File: quoting.rb

package info (click to toggle)
ruby-tmail 1.2.7.1-3%2Bdeb7u1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 1,712 kB
  • sloc: ruby: 15,207; ansic: 482; yacc: 349; makefile: 30
file content (171 lines) | stat: -rw-r--r-- 5,603 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
=begin rdoc

= Quoting methods

=end

class String
  def is_binary_data?
    ( self.count( "^ -~", "^\r\n" ).fdiv(self.size) > 0.3 || self.index( "\x00" ) ) unless empty?
  end
end

module TMail
  class Mail
    def subject(to_charset = 'utf-8')
      Unquoter.unquote_and_convert_to(quoted_subject, to_charset)
    end

    def unquoted_body(to_charset = 'utf-8')
      from_charset = charset
      case (content_transfer_encoding || "7bit").downcase
        when "quoted-printable"
          # the default charset is set to iso-8859-1 instead of 'us-ascii'.
          # This is needed as many mailer do not set the charset but send in ISO. This is only used if no charset is set.
          if !from_charset.blank? && from_charset.downcase == 'us-ascii'
            from_charset = 'iso-8859-1'
          end

          Unquoter.unquote_quoted_printable_and_convert_to(quoted_body,
            to_charset, from_charset, true)
        when "base64"
          Unquoter.unquote_base64_and_convert_to(quoted_body, to_charset,
            from_charset)
        when "7bit", "8bit"
          Unquoter.convert_to(quoted_body, to_charset, from_charset)
        when "binary"
          quoted_body
        else
          quoted_body
      end
    end

    def body(to_charset = 'utf-8', &block)
      attachment_presenter = block || Proc.new { |file_name| "Attachment: #{file_name}\n" }

      if multipart?
        parts.collect { |part|
          header = part["content-type"]

          if part.multipart?
            part.body(to_charset, &attachment_presenter)
          elsif header.nil?
            ""
          elsif !attachment?(part)
            part.unquoted_body(to_charset)
          else
            attachment_presenter.call(header["name"] || "(unnamed)")
          end
        }.join
      else
        unquoted_body(to_charset)
      end
    end
  end

  class Attachment
    
    include TextUtils
    
    def quoted?(string)
      !!((string =~ /.+'\w\w'.+/) || (string =~ /=\?.+\?.\?.+\?=/))
    end
    
    # Only unquote if quoted
    def original_filename(to_charset = 'utf-8')
      if quoted?(quoted_filename)
        Unquoter.unquote_and_convert_to(quoted_filename, to_charset).chomp
      else
        quoted_filename
      end
    end
  end

  class Unquoter
    class << self
      def unquote_and_convert_to(text, to_charset, from_charset = "iso-8859-1", preserve_underscores=false)
        return "" if text.nil?
        text.gsub!(/\?=(\s*)=\?/, '?==?') # Remove whitespaces between 'encoded-word's
        text.gsub(/(.*?)(?:(?:=\?(.*?)\?(.)\?(.*?)\?=)|$)/) do
          before = $1
          from_charset = $2
          quoting_method = $3
          text = $4

          before = convert_to(before, to_charset, from_charset) if before.length > 0
          before + case quoting_method
              when "q", "Q" then
                unquote_quoted_printable_and_convert_to(text, to_charset, from_charset, preserve_underscores)
              when "b", "B" then
                unquote_base64_and_convert_to(text, to_charset, from_charset)
              when nil then
                # will be nil at the end of the string, due to the nature of
                # the regex used.
                ""
              else
                raise "unknown quoting method #{quoting_method.inspect}"
            end
        end
      end

      def convert_to_with_fallback_on_iso_8859_1(text, to, from)
        return text if to == 'utf-8' and text.isutf8

        if from.blank? and !text.is_binary_data?
          from = CharDet.detect(text)['encoding']

          # Chardet ususally detects iso-8859-2 (aka windows-1250), but the text is
          # iso-8859-1 (aka windows-1252 and Latin1). http://en.wikipedia.org/wiki/ISO/IEC_8859-2
          # This can cause unwanted characters, like ŕ instead of à.
          # (I know, could be a very bad decision...)
          from = 'iso-8859-1' if from =~ /iso-8859-2/i
        end

        begin
          convert_to_without_fallback_on_iso_8859_1(text, to, from)
        rescue Iconv::InvalidCharacter
          unless from == 'iso-8859-1'
            from = 'iso-8859-1'
            retry
          end
        end
      end

      def unquote_quoted_printable_and_convert_to(text, to, from, preserve_underscores=false)
        text = text.gsub(/_/, " ") unless preserve_underscores
        text = text.gsub(/\r\n|\r/, "\n") # normalize newlines
        convert_to(text.unpack("M*").first, to, from)
      end

      def unquote_base64_and_convert_to(text, to, from)
        convert_to(Base64.decode(text), to, from)
      end

      begin
        require 'iconv'
        def convert_to(text, to, from)
          return text unless to && from
          text ? Iconv.iconv(to, from, text).first : ""
        rescue Iconv::IllegalSequence, Iconv::InvalidEncoding, Errno::EINVAL
          # the 'from' parameter specifies a charset other than what the text
          # actually is...not much we can do in this case but just return the
          # unconverted text.
          #
          # Ditto if either parameter represents an unknown charset, like
          # X-UNKNOWN.
          text
        end
      rescue LoadError
        # Not providing quoting support
        def convert_to(text, to, from)
          warn "Action Mailer: iconv not loaded; ignoring conversion from #{from} to #{to} (#{__FILE__}:#{__LINE__})"
          text
        end
      end
      
      alias_method :convert_to_without_fallback_on_iso_8859_1, :convert_to
      alias_method :convert_to, :convert_to_with_fallback_on_iso_8859_1
      
    end
  end
end