File: parser.rb

package info (click to toggle)
ruby-multipart-parser 0.1.1-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, forky, sid, trixie
  • size: 128 kB
  • sloc: ruby: 602; makefile: 4
file content (246 lines) | stat: -rw-r--r-- 7,779 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
module MultipartParser
  # A low level parser for multipart messages,
  # based on the node-formidable parser.
  class Parser

    def initialize
      @boundary = nil
      @boundary_chars = nil
      @lookbehind = nil
      @state = :parser_uninitialized
      @index = 0  # Index into boundary or header
      @flags = {}
      @marks = {} # Keep track of different parts
      @callbacks = {}
    end

    # Initializes the parser, using the given boundary
    def init_with_boundary(boundary)
      @boundary = "\r\n--" + boundary
      @lookbehind = "\0"*(@boundary.length + 8)
      @state = :start

      @boundary_chars = {}
      @boundary.each_byte do |b|
        @boundary_chars[b.chr] = true
      end
    end

    # Registers a callback to be called when the
    # given event occurs. Each callback is expected to
    # take three parameters: buffer, start_index, and end_index.
    # All of these parameters may be null, depending on the callback.
    # Valid callbacks are:
    # :end
    # :header_field
    # :header_value
    # :header_end
    # :headers_end
    # :part_begin
    # :part_data
    # :part_end
    def on(event, &callback)
      @callbacks[event] = callback
    end

    # Writes data to the parser.
    # Returns the number of bytes parsed.
    # In practise, this means that if the return value
    # is less than the buffer length, a parse error occured.
    def write(buffer)
      i = 0
      buffer_length = buffer.length
      index = @index
      flags = @flags.dup
      state = @state
      lookbehind = @lookbehind
      boundary = @boundary
      boundary_chars = @boundary_chars
      boundary_length = @boundary.length
      boundary_end = boundary_length - 1

      while i < buffer_length
        c = buffer[i, 1]
        case state
          when :parser_uninitialized
            return i;
          when :start
            index = 0;
            state = :start_boundary
          when :start_boundary # Differs in that it has no preceeding \r\n
            if index == boundary_length - 2
              return i unless c == "\r"
              index += 1
            elsif index - 1 == boundary_length - 2
              return i unless c == "\n"
              # Boundary read successfully, begin next part
              callback(:part_begin)
              state = :header_field_start
            else
              return i unless c == boundary[index+2, 1] # Unexpected character
              index += 1
            end
            i += 1
          when :header_field_start
            state = :header_field
            @marks[:header_field] = i
            index = 0
          when :header_field
            if c == "\r"
              @marks.delete :header_field
              state = :headers_almost_done
            else
              index += 1
              unless c == "-" # Skip hyphens
                if c == ":"
                  return i if index == 1 # Empty header field
                  data_callback(:header_field, buffer, i, :clear => true)
                  state = :header_value_start
                else
                  cl = c.downcase
                  return i if cl < "a" || cl > "z"
                end
              end
            end
            i += 1
          when :header_value_start
            if c == " " # Skip spaces
              i += 1
            else
              @marks[:header_value] = i
              state = :header_value
            end
          when :header_value
            if c == "\r"
              data_callback(:header_value, buffer, i, :clear => true)
              callback(:header_end)
              state = :header_value_almost_done
            end
            i += 1
          when :header_value_almost_done
            return i unless c == "\n"
            state = :header_field_start
            i += 1
          when :headers_almost_done
            return i unless c == "\n"
            callback(:headers_end)
            state = :part_data_start
            i += 1
          when :part_data_start
            state = :part_data
            @marks[:part_data] = i
          when :part_data
            prev_index = index

            if index == 0
              # Boyer-Moore derived algorithm to safely skip non-boundary data
              # See http://debuggable.com/posts/parsing-file-uploads-at-500-
              # mb-s-with-node-js:4c03862e-351c-4faa-bb67-4365cbdd56cb
              while i + boundary_length <= buffer_length
                break if boundary_chars.has_key? buffer[i + boundary_end].chr
                i += boundary_length
              end
              c = buffer[i, 1]
            end

            if index < boundary_length
              if boundary[index, 1] == c
                if index == 0
                  data_callback(:part_data, buffer, i, :clear => true)
                end
                index += 1
              else # It was not the boundary we found, after all
                index = 0
              end
            elsif index == boundary_length
              index += 1
              if c == "\r"
                flags[:part_boundary] = true
              elsif c == "-"
                flags[:last_boundary] = true
              else # We did not find a boundary after all
                index = 0
              end
            elsif index - 1 == boundary_length
              if flags[:part_boundary]
                index = 0
                if c == "\n"
                  flags.delete :part_boundary
                  callback(:part_end)
                  callback(:part_begin)
                  state = :header_field_start
                  i += 1
                  next # Ugly way to break out of the case statement
                end
              elsif flags[:last_boundary]
                if c == "-"
                  callback(:part_end)
                  callback(:end)
                  state = :end
                else
                  index = 0 # False alarm
                end
              else
                index = 0
              end
            end

            if index > 0
              # When matching a possible boundary, keep a lookbehind
              # reference in case it turns out to be a false lead
              lookbehind[index-1] = c
            elsif prev_index > 0
              # If our boundary turns out to be rubbish,
              # the captured lookbehind belongs to part_data
              callback(:part_data, lookbehind, 0, prev_index)
              @marks[:part_data] = i

              # Reconsider the current character as it might be the
              # beginning of a new sequence.
              i -= 1
            end

            i += 1
          when :end
            i += 1
          else
            return i;
        end
      end

      data_callback(:header_field, buffer, buffer_length)
      data_callback(:header_value, buffer, buffer_length)
      data_callback(:part_data, buffer, buffer_length)

      @index = index
      @state = state
      @flags = flags

      return buffer_length
    end

    private

    # Issues a callback.
    def callback(event, buffer = nil, start = nil, the_end = nil)
      return if !start.nil? && start == the_end
      if @callbacks.has_key? event
        @callbacks[event].call(buffer, start, the_end)
      end
    end

    # Issues a data callback,
    # The only valid options is :clear,
    # which, if true, will reset the appropriate mark to 0,
    # If not specified, the mark will be removed.
    def data_callback(data_type, buffer, the_end, options = {})
      return unless @marks.has_key? data_type
      callback(data_type, buffer, @marks[data_type], the_end)
      unless options[:clear]
        @marks[data_type] = 0
      else
        @marks.delete data_type
      end
    end
  end
end