File: parse_cmp.rb

package info (click to toggle)
ruby-ox 2.14.23-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,504 kB
  • sloc: xml: 39,683; ansic: 9,626; ruby: 6,441; sh: 47; makefile: 2
file content (264 lines) | stat: -rwxr-xr-x 5,575 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
#!/usr/bin/env ruby -wW1

$: << '../lib'
$: << '../ext'

require 'optparse'
require 'stringio'
require 'ox'

$verbose = 0
$iter = 100

opts = OptionParser.new
opts.on('-v', 'increase verbosity')                            { $verbose += 1 }
opts.on('-i', '--iterations [Int]', Integer, 'iterations')     { |i| $iter = i }
opts.on('-h', '--help', 'Show this display')                   { puts opts; Process.exit!(0) }
files = opts.parse(ARGV)

### XML conversion to Hash using in memory Ox parsing ###

def node_to_dict(element)
  dict = {}
  key = nil
  element.nodes.each do |n|
    raise 'A dict can only contain elements.' unless n.is_a?(Ox::Element)

    if key.nil?
      raise "Expected a key, not a #{n.name}." unless 'key' == n.name

      key = first_text(n)
    else
      dict[key] = node_to_value(n)
      key = nil
    end
  end
  dict
end

def node_to_array(element)
  a = []
  element.nodes.map do |n|
    a.push(node_to_value(n))
  end
  a
end

def node_to_value(node)
  raise 'A dict can only contain elements.' unless node.is_a?(Ox::Element)

  case node.name
  when 'key'
    raise 'Expected a value, not a key.'
  when 'string'
    value = first_text(node)
  when 'dict'
    value = node_to_dict(node)
  when 'array'
    value = node_to_array(node)
  when 'integer'
    value = first_text(node).to_i
  when 'real'
    value = first_text(node).to_f
  when 'true'
    value = true
  when 'false'
    value = false
  else
    raise "#{node.name} is not a know element type."
  end
  value
end

def first_text(node)
  node.nodes.each do |n|
    return n if n.is_a?(String)
  end
  nil
end

def parse_gen(xml)
  doc = Ox.parse(xml)
  plist = doc.root
  dict = nil
  plist.nodes.each do |n|
    if n.is_a?(Ox::Element)
      dict = node_to_dict(n)
      break
    end
  end
  dict
end

### XML conversion to Hash using Ox SAX parser ###

class Handler
  def initialize
    @key = nil
    @type = nil
    @plist = nil
    @stack = []
  end

  def text(value)
    last = @stack.last
    if last.is_a?(Hash) and @key.nil?
      raise "Expected a key, not #{@type} with a value of #{value}." unless :key == @type

      @key = value
    else
      append(value)
    end
  end

  def start_element(name)
    if :dict == name
      dict = {}
      append(dict)
      @stack.push(dict)
    elsif :array == name
      a = []
      append(a)
      @stack.push(a)
    elsif :true == name
      append(true)
    elsif :false == name
      append(false)
    else
      @type = name
    end
  end

  def end_element(name)
    @stack.pop if [:dict, :array].include?(name)
  end

  attr_reader :plist

  def append(value)
    unless value.is_a?(Array) or value.is_a?(Hash)
      case @type
      when :string
        # ignore
      when :key
        # ignore
      when :integer
        value = value.to_i
      when :real
        value = value.to_f
      end
    end
    last = @stack.last
    if last.is_a?(Hash)
      raise "Expected a key, not with a value of #{value}." if @key.nil?

      last[@key] = value
      @key = nil
    elsif last.is_a?(Array)
      last.push(value)
    elsif last.nil?
      @plist = value
    end
  end
end

def parse_sax(xml)
  io = StringIO.new(xml)
  start = Time.now
  handler = Handler.new
  Ox.sax_parse(handler, io)
  handler.plist
end

### XML conversion to Hash using Ox Object parsing with gsub! replacements ###

def convert_parse_obj(xml)
  xml = plist_to_obj_xml(xml)
  Ox.load(xml, mode: :object)
end

### XML conversion to Hash using Ox Object parsing after gsub! replacements ###

def parse_obj(xml)
  Ox.load(xml, mode: :object)
end

def plist_to_obj_xml(xml)
  xml = xml.gsub(%{<plist version="1.0">
}, '')
  xml.gsub!(%{
</plist>}, '')
  {
    '<dict>' => '<h>',
    '</dict>' => '</h>',
    '<dict/>' => '<h/>',
    '<array>' => '<a>',
    '</array>' => '</a>',
    '<array/>' => '<a/>',
    '<string>' => '<s>',
    '</string>' => '</s>',
    '<string/>' => '<s/>',
    '<key>' => '<s>',
    '</key>' => '</s>',
    '<integer>' => '<i>',
    '</integer>' => '</i>',
    '<integer/>' => '<i/>',
    '<real>' => '<f>',
    '</real>' => '</f>',
    '<real/>' => '<f/>',
    '<true/>' => '<y/>',
    '<false/>' => '<n/>'
  }.each do |pat, rep|
    xml.gsub!(pat, rep)
  end
  xml
end

files.each do |filename|
  xml = File.read(filename)

  if 0 < $verbose
    d1 = parse_gen(xml)
    d2 = parse_sax(xml)
    d3 = convert_parse_obj(xml)
    puts "--- It is #{d1 == d2 and d2 == d3} that all parsers yield the same Hash. ---"
  end

  start = Time.now
  $iter.times do
    parse_gen(xml)
  end
  gen_time = Time.now - start

  start = Time.now
  $iter.times do
    parse_sax(xml)
  end
  sax_time = Time.now - start

  start = Time.now
  $iter.times do
    convert_parse_obj(xml)
  end
  conv_obj_time = Time.now - start

  xml = plist_to_obj_xml(xml)
  start = Time.now
  $iter.times do
    parse_obj(xml)
  end
  obj_time = Time.now - start

  puts "In memory parsing and conversion took #{gen_time} for #{$iter} iterations."
  puts "SAX parsing and conversion took #{sax_time} for #{$iter} iterations."
  puts "XML gsub Object parsing and conversion took #{conv_obj_time} for #{$iter} iterations."
  puts "Object parsing and conversion took #{obj_time} for #{$iter} iterations."
end

# Results for a run:
#
# > parse_cmp.rb Sample.graffle -i 1000
# In memory parsing and conversion took 4.135701 for 1000 iterations.
# SAX parsing and conversion took 3.731695 for 1000 iterations.
# XML gsub Object parsing and conversion took 3.292397 for 1000 iterations.
# Object parsing and conversion took 0.808877 for 1000 iterations.