File: unstructured_field.rb

package info (click to toggle)
ruby-mail 2.6.4%2Bdfsg1-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 4,256 kB
  • ctags: 1,327
  • sloc: ruby: 44,678; makefile: 3
file content (205 lines) | stat: -rw-r--r-- 6,524 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# encoding: utf-8
# frozen_string_literal: true
require 'mail/fields/common/common_field'

module Mail
  # Provides access to an unstructured header field
  #
  # ===Per RFC 2822:
  #  2.2.1. Unstructured Header Field Bodies
  #
  #     Some field bodies in this standard are defined simply as
  #     "unstructured" (which is specified below as any US-ASCII characters,
  #     except for CR and LF) with no further restrictions.  These are
  #     referred to as unstructured field bodies.  Semantically, unstructured
  #     field bodies are simply to be treated as a single line of characters
  #     with no further processing (except for header "folding" and
  #     "unfolding" as described in section 2.2.3).
  class UnstructuredField

    include Mail::CommonField
    include Mail::Utilities

    attr_accessor :charset
    attr_reader :errors

    def initialize(name, value, charset = nil)
      @errors = []

      if value.is_a?(Array)
        # Probably has arrived here from a failed parse of an AddressList Field
        value = value.join(', ')
      else
        # Ensure we are dealing with a string
        value = value.to_s
      end

      if charset
        self.charset = charset
      else
        if value.respond_to?(:encoding)
          self.charset = value.encoding
        else
          self.charset = $KCODE
        end
      end
      self.name = name
      self.value = value
      self
    end

    def encoded
      do_encode
    end

    def decoded
      do_decode
    end

    def default
      decoded
    end

    def parse # An unstructured field does not parse
      self
    end

    private

    def do_encode
      value.nil? ? '' : "#{wrapped_value}\r\n"
    end

    def do_decode
      Utilities.blank?(value) ? nil : Encodings.decode_encode(value, :decode)
    end

    # 2.2.3. Long Header Fields
    #
    #  Each header field is logically a single line of characters comprising
    #  the field name, the colon, and the field body.  For convenience
    #  however, and to deal with the 998/78 character limitations per line,
    #  the field body portion of a header field can be split into a multiple
    #  line representation; this is called "folding".  The general rule is
    #  that wherever this standard allows for folding white space (not
    #  simply WSP characters), a CRLF may be inserted before any WSP.  For
    #  example, the header field:
    #
    #          Subject: This is a test
    #
    #  can be represented as:
    #
    #          Subject: This
    #           is a test
    #
    #  Note: Though structured field bodies are defined in such a way that
    #  folding can take place between many of the lexical tokens (and even
    #  within some of the lexical tokens), folding SHOULD be limited to
    #  placing the CRLF at higher-level syntactic breaks.  For instance, if
    #  a field body is defined as comma-separated values, it is recommended
    #  that folding occur after the comma separating the structured items in
    #  preference to other places where the field could be folded, even if
    #  it is allowed elsewhere.
    def wrapped_value # :nodoc:
      wrap_lines(name, fold("#{name}: ".length))
    end

    # 6.2. Display of 'encoded-word's
    #
    #  When displaying a particular header field that contains multiple
    #  'encoded-word's, any 'linear-white-space' that separates a pair of
    #  adjacent 'encoded-word's is ignored.  (This is to allow the use of
    #  multiple 'encoded-word's to represent long strings of unencoded text,
    #  without having to separate 'encoded-word's where spaces occur in the
    #  unencoded text.)
    def wrap_lines(name, folded_lines)
      result = ["#{name}: #{folded_lines.shift}"]
      result.concat(folded_lines)
      result.join("\r\n\s")
    end

    def fold(prepend = 0) # :nodoc:
      encoding       = normalized_encoding
      decoded_string = decoded.to_s
      should_encode  = decoded_string.not_ascii_only?
      if should_encode
        first = true
        words = decoded_string.split(/[ \t]/).map do |word|
          if first
            first = !first
          else
            word = " #{word}"
          end
          if word.not_ascii_only?
            word
          else
            word.scan(/.{7}|.+$/)
          end
        end.flatten
      else
        words = decoded_string.split(/[ \t]/)
      end

      folded_lines   = []
      while !words.empty?
        limit = 78 - prepend
        limit = limit - 7 - encoding.length if should_encode
        line = String.new
        first_word = true
        while !words.empty?
          break unless word = words.first.dup
          word.encode!(charset) if charset && word.respond_to?(:encode!)
          word = encode(word) if should_encode
          word = encode_crlf(word)
          # Skip to next line if we're going to go past the limit
          # Unless this is the first word, in which case we're going to add it anyway
          # Note: This means that a word that's longer than 998 characters is going to break the spec. Please fix if this is a problem for you.
          # (The fix, it seems, would be to use encoded-word encoding on it, because that way you can break it across multiple lines and
          # the linebreak will be ignored)
          break if !line.empty? && (line.length + word.length + 1 > limit)
          # Remove the word from the queue ...
          words.shift
          # Add word separator
          if first_word
            first_word = false
          else
            line << " " if !should_encode
          end

          # ... add it in encoded form to the current line
          line << word
        end
        # Encode the line if necessary
        line = "=?#{encoding}?Q?#{line}?=" if should_encode
        # Add the line to the output and reset the prepend
        folded_lines << line
        prepend = 0
      end
      folded_lines
    end

    def encode(value)
      value = [value].pack(CAPITAL_M).gsub(EQUAL_LF, EMPTY)
      value.gsub!(/"/,  '=22')
      value.gsub!(/\(/, '=28')
      value.gsub!(/\)/, '=29')
      value.gsub!(/\?/, '=3F')
      value.gsub!(/_/,  '=5F')
      value.gsub!(/ /,  '_')
      value
    end

    def encode_crlf(value)
      value.gsub!(CR, CR_ENCODED)
      value.gsub!(LF, LF_ENCODED)
      value
    end

    def normalized_encoding
      encoding = charset.to_s.upcase.gsub('_', '-')
      encoding = 'UTF-8' if encoding == 'UTF8' # Ruby 1.8.x and $KCODE == 'u'
      encoding
    end

  end
end