File: parsecount

package info (click to toggle)
ruby-libxml 2.7.0-3
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 2,024 kB
  • ctags: 2,251
  • sloc: xml: 9,628; ansic: 8,409; ruby: 7,806; makefile: 3
file content (170 lines) | stat: -rw-r--r-- 7,231 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#!/usr/bin/env ruby
#
# Each test has an XML Parser open a 98k XML document and count one type of leaf 
# element (466 entries). This is repeated a total of 100 times, twice for each Parser. 
# Summary measurements are from the second test.
#
# Tests run on a MacBook Pro, Mac OS X 10.5.5, 4GB memory, 2.5 GHz Intel Core 2 Duo.
# 
# Library versions:
#   hpricot 0.6.164
#   libxml: 0.9.2
# 
# All benchmarks were run twice in sequence and the measurements presented here are from 
# the second run. Java automatically optimizes JRuby code which is run many times and the 
# speedup from the first to the second test is from 40 to 75%. There is no essential 
# difference in the speed of the C version of Ruby between the first test and the second.
# 
# Summary:
#
#   100 times: Open 98k XML document and count one type of leaf element (466 entries)
# 
#   JRuby (Java 1.6.0_03-p3-Soylatte using server mode): jdom_document_builder     0.360
#   MRI: libxml v0.9.2                                                             0.383
#   JRuby (Java 1.6.0_07): jdom_document_builder                                   0.401
#   JRuby (Java 1.5.0_16): jdom_document_builder                                   0.428
#   JRuby (Java 1.6.0_03-p3-Soylatte using server mode): hpricot                   1.977
#   JRuby (Java 1.5.0_16): hpricot                                                 2.027
#   JRuby (Java 1.6.0_07): hpricot                                                 2.094
#   MRI: hpricot                                                                   2.140
#   JRuby (Java 1.6.0_03-p3-Soylatte using server mode): rexml                     5.488
#   JRuby (Java 1.5.0_16): rexml                                                   5.569
#   JRuby (Java 1.5.0_16): rexml                                                   5.578
#   MRI: rexml                                                                     8.606
# 
# Ruby version: MRI 1.8.6 (2008-03-03 rev 114), platform: universal-darwin9.0
# --------------------------------------------------------------------------------
#                             user     system      total        real
# rexml                   8.170000   0.100000   8.270000 (  8.606676)
# hpricot                 1.990000   0.040000   2.030000 (  2.140865)
# libxml                  0.350000   0.020000   0.370000 (  0.383475)
# 
# JRuby 1.1.5 (svn r8078) on Java 1.5.0_16
# --------------------------------------------------------------------------------
#                             user     system      total        real
# rexml                   5.578000   0.000000   5.578000 (  5.578371)
# hpricot                 2.028000   0.000000   2.028000 (  2.027348)
# jdom_document_builder   0.429000   0.000000   0.429000 (  0.428713)
# 
# JRuby 1.1.5 (svn r8078) on Java 1.6.0_07
# --------------------------------------------------------------------------------
#                             user     system      total        real
# rexml                   5.569000   0.000000   5.569000 (  5.569135)
# hpricot                 2.094000   0.000000   2.094000 (  2.093635)
# jdom_document_builder   0.401000   0.000000   0.401000 (  0.401115)
# 
# JRuby 1.1.5 (svn r8078) on Java 1.6.0_07 using server mode (-J-server)
# --------------------------------------------------------------------------------
#                             user     system      total        real
# rexml                   5.489000   0.000000   5.489000 (  5.488560)
# hpricot                 1.977000   0.000000   1.977000 (  1.976845)
# jdom_document_builder   0.377000   0.000000   0.377000 (  0.377808)
# 
# JRuby 1.1.5 (svn r8078) on Java 1.6.0_03-p3 (Soylatte) using server mode (-J-server)
# --------------------------------------------------------------------------------
#                             user     system      total        real
# rexml                   5.596000   0.000000   5.596000 (  5.596212)
# hpricot                 1.937000   0.000000   1.937000 (  1.937312)
# jdom_document_builder   0.360000   0.000000   0.360000 (  0.360068)

require 'rubygems'
require 'benchmark'
require "rexml/document"
require 'hpricot'

if RUBY_PLATFORM =~ /java/ 
    include Java 
    import javax.xml.parsers.DocumentBuilder
    import javax.xml.parsers.DocumentBuilderFactory
    @dbf = DocumentBuilderFactory.new_instance
    @ruby_info = "Ruby version: JRuby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE} rev #{RUBY_PATCHLEVEL}) [i386-jruby#{JRUBY_VERSION}]"
    @ruby_info << ", platform: Java, version #{java.lang.System.getProperty('java.version')}"
  else
    @ruby_info = "Ruby version: MRI #{RUBY_VERSION} (#{RUBY_RELEASE_DATE} rev #{RUBY_PATCHLEVEL})"
    @ruby_info << ", platform: #{RUBY_PLATFORM}"

    gem 'nokogiri', '>= 1.0.6'
    require 'nokogiri'

    gem 'libxml-ruby', '>= 0.9.2'
    require 'libxml'
    @xml_parser = LibXML::XML::Parser.new
end

file = File.expand_path(File.join(File.dirname(__FILE__), "sock_entries.xml"))
@bundle_with_466_sock_entries = File.read(file)

def rexml_count_socks
  doc = REXML::Document.new(@bundle_with_466_sock_entries).root
  socks = doc.elements.to_a('./sockEntries').length
end

unless RUBY_PLATFORM =~ /java/ 
  def nokogiri_count_socks
    #doc = Nokogiri(@bundle_with_466_sock_entries)
    #socks = doc.search('//sockEntries').length
  end

  def libxml_count_socks
    @xml_parser = LibXML::XML::Parser.new
    @xml_parser.string = @bundle_with_466_sock_entries
    doc = @xml_parser.parse
    socks = doc.find('//sockEntries').length
  end
end

def hpricot_count_socks
  doc = Hpricot.XML(@bundle_with_466_sock_entries)
  socks = doc.search("//sockEntries").length
end

if RUBY_PLATFORM =~ /java/ 
  def jdom_document_builder_count_socks
    file = File.expand_path(File.join(File.dirname(__FILE__), "sock_entries.xml"))
    doc = @dbf.new_document_builder.parse(file).get_document_element
    socks = doc.getElementsByTagName("sockEntries")
    socks.get_length
  end
end

n = 100
test_iterations = ARGV.first.to_i 
test_iterations = 1 unless test_iterations > 1
puts 
puts @ruby_info
puts
puts "#{n} times: Open 98k XML document and count one type of leaf element (466 entries)"
puts
print "running benchmark "
if test_iterations == 1 
  puts "once.\n\n"
else
  puts "#{test_iterations} times.\n\n"
end
test_iterations.times do
  Benchmark.bmbm do |x|
    x.report("rexml") { n.times {rexml_count_socks} }
    x.report("hpricot")  { n.times {hpricot_count_socks} }
    x.report("jdom_document_builder")  { n.times {jdom_document_builder_count_socks} } if RUBY_PLATFORM =~ /java/
    #x.report("nokogiri") { n.times {nokogiri_count_socks} }  unless RUBY_PLATFORM =~ /java/
    x.report("libxml") { n.times {libxml_count_socks} }  unless RUBY_PLATFORM =~ /java/ 
  end
  puts
end

#
# jrexml doesn't appear to have any speedup in this test
#
# if RUBY_PLATFORM =~ /java/ 
#   require 'jrexml'
#   def rexml_with_jrexml_count_socks
#     doc = REXML::Document.new(@bundle_with_466_sock_entries).root
#     socks = doc.elements.to_a('./sockEntries').length
#   end
# 
#   puts "\nNow add in JREXML to see if it speeds up rexml\n"
#   Benchmark.bmbm do |x|
#     x.report("rexml+jrexml") { n.times {rexml_with_jrexml_count_socks} }
#   end
#   puts
# end