File: encodings.rb

package info (click to toggle)
ruby-mail 2.8.1%2Bdfsg1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,704 kB
  • sloc: ruby: 73,709; makefile: 3
file content (314 lines) | stat: -rw-r--r-- 9,792 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
# encoding: utf-8
# frozen_string_literal: true

module Mail
  # Raised when attempting to decode an unknown encoding type
  class UnknownEncodingType < StandardError #:nodoc:
  end

  module Encodings
    include Mail::Constants
    extend  Mail::Utilities

    @transfer_encodings = {}

    # Register transfer encoding
    #
    # Example
    #
    # Encodings.register "base64", Mail::Encodings::Base64
    def Encodings.register(name, cls)
      @transfer_encodings[get_name(name)] = cls
    end

    # Is the encoding we want defined?
    #
    # Example:
    #
    #  Encodings.defined?(:base64) #=> true
    def Encodings.defined?(name)
      @transfer_encodings.include? get_name(name)
    end

    # Gets a defined encoding type, QuotedPrintable or Base64 for now.
    #
    # Each encoding needs to be defined as a Mail::Encodings::ClassName for
    # this to work, allows us to add other encodings in the future.
    #
    # Example:
    #
    #  Encodings.get_encoding(:base64) #=> Mail::Encodings::Base64
    def Encodings.get_encoding(name)
      @transfer_encodings[get_name(name)]
    end

    def Encodings.get_all
      @transfer_encodings.values
    end

    def Encodings.get_name(name)
      underscoreize(name).downcase
    end

    def Encodings.transcode_charset(str, from_charset, to_charset = 'UTF-8')
      if from_charset
        Utilities.transcode_charset str, from_charset, to_charset
      else
        str
      end
    end

    # Encodes a parameter value using URI Escaping, note the language field 'en' can
    # be set using Mail::Configuration, like so:
    #
    #  Mail.defaults do
    #    param_encode_language 'jp'
    #  end
    #
    # The character set used for encoding will be the encoding on the string passed in.
    #
    # Example:
    #
    #  Mail::Encodings.param_encode("This is fun") #=> "us-ascii'en'This%20is%20fun"
    def Encodings.param_encode(str)
      case
      when str.ascii_only? && str =~ TOKEN_UNSAFE
        %Q{"#{str}"}
      when str.ascii_only?
        str
      else
        Utilities.param_encode(str)
      end
    end

    # Decodes a parameter value using URI Escaping.
    #
    # Example:
    #
    #  Mail::Encodings.param_decode("This%20is%20fun", 'us-ascii') #=> "This is fun"
    #
    #  str = Mail::Encodings.param_decode("This%20is%20fun", 'iso-8559-1')
    #  str.encoding #=> 'ISO-8859-1'      ## Only on Ruby 1.9
    #  str #=> "This is fun"
    def Encodings.param_decode(str, encoding)
      Utilities.param_decode(str, encoding)
    end

    # Decodes or encodes a string as needed for either Base64 or QP encoding types in
    # the =?<encoding>?[QB]?<string>?=" format.
    #
    # The output type needs to be :decode to decode the input string or :encode to
    # encode the input string.  The character set used for encoding will be the
    # encoding on the string passed in.
    #
    # On encoding, will only send out Base64 encoded strings.
    def Encodings.decode_encode(str, output_type)
      case
      when output_type == :decode
        Encodings.value_decode(str)
      else
        if str.ascii_only?
          str
        else
          Encodings.b_value_encode(str, str.encoding)
        end
      end
    end

    # Decodes a given string as Base64 or Quoted Printable, depending on what
    # type it is.
    #
    # String has to be of the format =?<encoding>?[QB]?<string>?=
    def Encodings.value_decode(str)
      # Optimization: If there's no encoded-words in the string, just return it
      return str unless str =~ ENCODED_VALUE

      lines = collapse_adjacent_encodings(str)

      # Split on white-space boundaries with capture, so we capture the white-space as well
      lines.each do |line|
        line.gsub!(ENCODED_VALUE) do |string|
          case $2
          when *B_VALUES then b_value_decode(string)
          when *Q_VALUES then q_value_decode(string)
          end
        end
      end.join("")
    end

    # Takes an encoded string of the format =?<encoding>?[QB]?<string>?=
    def Encodings.unquote_and_convert_to(str, to_encoding)
      output = value_decode( str ).to_s # output is already converted to UTF-8

      if 'utf8' == to_encoding.to_s.downcase.gsub("-", "")
        output
      elsif to_encoding
        begin
          output.encode(to_encoding)
        rescue Errno::EINVAL
          # the 'from' parameter specifies a charset other than what the text
          # actually is...not much we can do in this case but just return the
          # unconverted text.
          #
          # Ditto if either parameter represents an unknown charset, like
          # X-UNKNOWN.
          output
        end
      else
        output
      end
    end

    def Encodings.address_encode(address, charset = 'utf-8')
      if address.is_a?(Array)
        address.compact.map { |a| Encodings.address_encode(a, charset) }.join(", ")
      elsif address
        encode_non_usascii(address, charset)
      end
    end

    def Encodings.encode_non_usascii(address, charset)
      return address if address.ascii_only? or charset.nil?

      # Encode all strings embedded inside of quotes
      address = address.gsub(/("[^"]*[^\/]")/) { |s| Encodings.b_value_encode(unquote(s), charset) }

      # Then loop through all remaining items and encode as needed
      tokens = address.split(/\s/)

      map_with_index(tokens) do |word, i|
        if word.ascii_only?
          word
        else
          previous_non_ascii = i>0 && tokens[i-1] && !tokens[i-1].ascii_only?
          if previous_non_ascii #why are we adding an extra space here?
            word = " #{word}"
          end
          Encodings.b_value_encode(word, charset)
        end
      end.join(' ')
    end

    # Encode a string with Base64 Encoding and returns it ready to be inserted
    # as a value for a field, that is, in the =?<charset>?B?<string>?= format
    #
    # Example:
    #
    #  Encodings.b_value_encode('This is あ string', 'UTF-8')
    #  #=> "=?UTF-8?B?VGhpcyBpcyDjgYIgc3RyaW5n?="
    def Encodings.b_value_encode(string, encoding = nil)
      if string.to_s.ascii_only?
        string
      else
        Encodings.each_base64_chunk_byterange(string, 60).map do |chunk|
          str, encoding = Utilities.b_value_encode(chunk, encoding)
          "=?#{encoding}?B?#{str.chomp}?="
        end.join(" ")
      end
    end

    # Encode a string with Quoted-Printable Encoding and returns it ready to be inserted
    # as a value for a field, that is, in the =?<charset>?Q?<string>?= format
    #
    # Example:
    #
    #  Encodings.q_value_encode('This is あ string', 'UTF-8')
    #  #=> "=?UTF-8?Q?This_is_=E3=81=82_string?="
    def Encodings.q_value_encode(encoded_str, encoding = nil)
      return encoded_str if encoded_str.to_s.ascii_only?
      string, encoding = Utilities.q_value_encode(encoded_str, encoding)
      string.gsub!("=\r\n", '') # We already have limited the string to the length we want
      map_lines(string) do |str|
        "=?#{encoding}?Q?#{str.chomp.gsub(/ /, '_')}?="
      end.join(" ")
    end

    private

    # Decodes a Base64 string from the "=?UTF-8?B?VGhpcyBpcyDjgYIgc3RyaW5n?=" format
    #
    # Example:
    #
    #  Encodings.b_value_decode("=?UTF-8?B?VGhpcyBpcyDjgYIgc3RyaW5n?=")
    #  #=> 'This is あ string'
    def Encodings.b_value_decode(str)
      Utilities.b_value_decode(str)
    end

    # Decodes a Quoted-Printable string from the "=?UTF-8?Q?This_is_=E3=81=82_string?=" format
    #
    # Example:
    #
    #  Encodings.q_value_decode("=?UTF-8?Q?This_is_=E3=81=82_string?=")
    #  #=> 'This is あ string'
    def Encodings.q_value_decode(str)
      Utilities.q_value_decode(str)
    end

    # Gets the encoding type (Q or B) from the string.
    def Encodings.value_encoding_from_string(str)
      str[ENCODED_VALUE, 1]
    end

    # Split header line into proper encoded and unencoded parts.
    #
    # String has to be of the format =?<encoding>?[QB]?<string>?=
    #
    # Omit unencoded space after an encoded-word.
    def Encodings.collapse_adjacent_encodings(str)
      results = []
      last_encoded = nil  # Track whether to preserve or drop whitespace

      lines = str.split(FULL_ENCODED_VALUE)
      lines.each_slice(2) do |unencoded, encoded|
        if last_encoded = encoded
          if !Utilities.blank?(unencoded) || (!last_encoded && unencoded != EMPTY)
            results << unencoded
          end

          results << encoded
        else
          results << unencoded
        end
      end

      results
    end

    # Partition the string into bounded-size chunks without splitting
    # multibyte characters.
    def Encodings.each_base64_chunk_byterange(str, max_bytesize_per_base64_chunk, &block)
      raise "size per chunk must be multiple of 4" if (max_bytesize_per_base64_chunk % 4).nonzero?

      if block_given?
        max_bytesize = ((3 * max_bytesize_per_base64_chunk) / 4.0).floor
        each_chunk_byterange(str, max_bytesize, &block)
      else
        enum_for :each_base64_chunk_byterange, str, max_bytesize_per_base64_chunk
      end
    end

    # Partition the string into bounded-size chunks without splitting
    # multibyte characters.
    def Encodings.each_chunk_byterange(str, max_bytesize_per_chunk)
      return enum_for(:each_chunk_byterange, str, max_bytesize_per_chunk) unless block_given?

      offset = 0
      chunksize = 0

      str.each_char do |chr|
        charsize = chr.bytesize

        if chunksize + charsize > max_bytesize_per_chunk
          yield Utilities.string_byteslice(str, offset, chunksize)
          offset += chunksize
          chunksize = charsize
        else
          chunksize += charsize
        end
      end

      yield Utilities.string_byteslice(str, offset, chunksize)
    end
  end
end