1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
|
module MultipartParser
# A low level parser for multipart messages,
# based on the node-formidable parser.
class Parser
def initialize
@boundary = nil
@boundary_chars = nil
@lookbehind = nil
@state = :parser_uninitialized
@index = 0 # Index into boundary or header
@flags = {}
@marks = {} # Keep track of different parts
@callbacks = {}
end
# Initializes the parser, using the given boundary
def init_with_boundary(boundary)
@boundary = "\r\n--" + boundary
@lookbehind = "\0"*(@boundary.length + 8)
@state = :start
@boundary_chars = {}
@boundary.each_byte do |b|
@boundary_chars[b.chr] = true
end
end
# Registers a callback to be called when the
# given event occurs. Each callback is expected to
# take three parameters: buffer, start_index, and end_index.
# All of these parameters may be null, depending on the callback.
# Valid callbacks are:
# :end
# :header_field
# :header_value
# :header_end
# :headers_end
# :part_begin
# :part_data
# :part_end
def on(event, &callback)
@callbacks[event] = callback
end
# Writes data to the parser.
# Returns the number of bytes parsed.
# In practise, this means that if the return value
# is less than the buffer length, a parse error occured.
def write(buffer)
i = 0
buffer_length = buffer.length
index = @index
flags = @flags.dup
state = @state
lookbehind = @lookbehind
boundary = @boundary
boundary_chars = @boundary_chars
boundary_length = @boundary.length
boundary_end = boundary_length - 1
while i < buffer_length
c = buffer[i, 1]
case state
when :parser_uninitialized
return i;
when :start
index = 0;
state = :start_boundary
when :start_boundary # Differs in that it has no preceeding \r\n
if index == boundary_length - 2
return i unless c == "\r"
index += 1
elsif index - 1 == boundary_length - 2
return i unless c == "\n"
# Boundary read successfully, begin next part
callback(:part_begin)
state = :header_field_start
else
return i unless c == boundary[index+2, 1] # Unexpected character
index += 1
end
i += 1
when :header_field_start
state = :header_field
@marks[:header_field] = i
index = 0
when :header_field
if c == "\r"
@marks.delete :header_field
state = :headers_almost_done
else
index += 1
unless c == "-" # Skip hyphens
if c == ":"
return i if index == 1 # Empty header field
data_callback(:header_field, buffer, i, :clear => true)
state = :header_value_start
else
cl = c.downcase
return i if cl < "a" || cl > "z"
end
end
end
i += 1
when :header_value_start
if c == " " # Skip spaces
i += 1
else
@marks[:header_value] = i
state = :header_value
end
when :header_value
if c == "\r"
data_callback(:header_value, buffer, i, :clear => true)
callback(:header_end)
state = :header_value_almost_done
end
i += 1
when :header_value_almost_done
return i unless c == "\n"
state = :header_field_start
i += 1
when :headers_almost_done
return i unless c == "\n"
callback(:headers_end)
state = :part_data_start
i += 1
when :part_data_start
state = :part_data
@marks[:part_data] = i
when :part_data
prev_index = index
if index == 0
# Boyer-Moore derived algorithm to safely skip non-boundary data
# See http://debuggable.com/posts/parsing-file-uploads-at-500-
# mb-s-with-node-js:4c03862e-351c-4faa-bb67-4365cbdd56cb
while i + boundary_length <= buffer_length
break if boundary_chars.has_key? buffer[i + boundary_end].chr
i += boundary_length
end
c = buffer[i, 1]
end
if index < boundary_length
if boundary[index, 1] == c
if index == 0
data_callback(:part_data, buffer, i, :clear => true)
end
index += 1
else # It was not the boundary we found, after all
index = 0
end
elsif index == boundary_length
index += 1
if c == "\r"
flags[:part_boundary] = true
elsif c == "-"
flags[:last_boundary] = true
else # We did not find a boundary after all
index = 0
end
elsif index - 1 == boundary_length
if flags[:part_boundary]
index = 0
if c == "\n"
flags.delete :part_boundary
callback(:part_end)
callback(:part_begin)
state = :header_field_start
i += 1
next # Ugly way to break out of the case statement
end
elsif flags[:last_boundary]
if c == "-"
callback(:part_end)
callback(:end)
state = :end
else
index = 0 # False alarm
end
else
index = 0
end
end
if index > 0
# When matching a possible boundary, keep a lookbehind
# reference in case it turns out to be a false lead
lookbehind[index-1] = c
elsif prev_index > 0
# If our boundary turns out to be rubbish,
# the captured lookbehind belongs to part_data
callback(:part_data, lookbehind, 0, prev_index)
@marks[:part_data] = i
# Reconsider the current character as it might be the
# beginning of a new sequence.
i -= 1
end
i += 1
when :end
i += 1
else
return i;
end
end
data_callback(:header_field, buffer, buffer_length)
data_callback(:header_value, buffer, buffer_length)
data_callback(:part_data, buffer, buffer_length)
@index = index
@state = state
@flags = flags
return buffer_length
end
private
# Issues a callback.
def callback(event, buffer = nil, start = nil, the_end = nil)
return if !start.nil? && start == the_end
if @callbacks.has_key? event
@callbacks[event].call(buffer, start, the_end)
end
end
# Issues a data callback,
# The only valid options is :clear,
# which, if true, will reset the appropriate mark to 0,
# If not specified, the mark will be removed.
def data_callback(data_type, buffer, the_end, options = {})
return unless @marks.has_key? data_type
callback(data_type, buffer, @marks[data_type], the_end)
unless options[:clear]
@marks[data_type] = 0
else
@marks.delete data_type
end
end
end
end
|