File: sax_benchmark.rb

package info (click to toggle)
ruby-ox 2.14.22-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,500 kB
  • sloc: xml: 39,683; ansic: 9,615; ruby: 6,422; sh: 47; makefile: 2
file content (128 lines) | stat: -rw-r--r-- 2,443 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# All credit to https://github.com/hakanensari
# Taken from https://gist.github.com/hakanensari/3078932

require 'benchmark'
require 'stringio'

require 'nokogiri'
require 'ox'

io = StringIO.new %{
<?xml version="1.0" encoding="UTF-8"?>
<ItemLookupResponse>
  <Items>
    <Item>
      <ASIN>0816614024</ASIN>
      <ItemAttributes>
        <Creator Role="Author">Gilles Deleuze</Creator>
        <Creator Role="Contributor">Felix Guattari</Creator>
        <Title>Thousand Plateaus</Title>
      </ItemAttributes>
    </Item>
    <Item>
      <ASIN>0231081596</ASIN>
      <ItemAttributes>
        <Creator Role="Author">Gilles Deleuze</Creator>
        <Title>Difference and Repetition</Title>
      </ItemAttributes>
    </Item>
  </Items>
</ItemLookupResponse>
}.strip.gsub(/>\s+</, '><')

class OxHandler < Ox::Sax
  attr_reader :root

  def initialize
    super
    @stack = [@node = @root = {}]
  end

  def attr(key, val)
    @node[key] = val
  end

  def end_element(key)
    child = @stack.pop
    @node = @stack.last

    case @node[key]
    when Array
      @node[key] << child
    when Hash
      @node[key] = [@node[key], child]
    else
      if child.keys == [:__content__]
        @node[key] = child[:__content__]
      else
        @node[key] = child
      end
    end
  end

  def start_element(key)
    @stack << @node = {}
  end

  def text(val)
    @node[:__content__] = val
  end
end

class NokogiriHandler < Nokogiri::XML::SAX::Document
  attr_reader :root

  def characters(val)
    (@node['__content__'] ||= '') << val
  end

  def end_element(key)
    child = @stack.pop
    @node = @stack.last

    case @node[key]
    when Array
      @node[key] << child
    when Hash
      @node[key] = [@node[key], child]
    else
      if child.keys == ['__content__']
        @node[key] = child['__content__']
      else
        @node[key] = child
      end
    end
  end

  def start_element(key, attrs = [])
    @stack << @node = {}
    attrs.each do |attr|
      key, val = *attr
      @node[key] = val
    end
  end

  def start_document
    @stack = [@root = {}]
  end
end

n = 10_000
Benchmark.bmbm do |b|
  b.report('ox') do
    n.times do
      io.rewind
      handler = OxHandler.new
      Ox.sax_parse handler, io
    end
  end

  b.report('nokogiri') do
    n.times do
      io.rewind
      handler = NokogiriHandler.new
      parser = Nokogiri::XML::SAX::Parser.new handler
      parser.parse io
    end
  end
end