1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
|
#! /usr/local/bin/ruby
## Ruby version of xmlstats
## 1999 by yoshidam
##
## This sample comes from Clark Cooper's sample of Perl extension
## module XML::Parser.
## (http://www.netheaven.com/~coopercc/xmlparser/samples/xmlstats)
##
## Try XML benchmark (http://www.xml.com/xml/pub/Benchmark/article.html)!
## Ruby is probably faster than Perl.
require 'xml/parser'
begin
require 'mbstring'
rescue LoadError
class String
def mblength
cnt = self.length
self.scan(/([\300-\367])/n) do |c|
if c[0] < "\340"
cnt -= 1
elsif c[0] < "\360"
cnt -= 2
else
cnt -= 3
end
end
cnt
end
end
end
$KCODE="UTF8"
class Elinfo
attr :name
attr :count, true
attr :minlev, true
attr :seen, true
attr :chars, true
attr :empty, true
attr :ptab, true
attr :ktab, true
attr :atab, true
def initialize(name, seen)
@name = name
@count = 0
@minlev = nil
@seen = seen
@chars = 0
@empty = true
@ptab = {}
@ptab.default = 0
@ktab = {}
@ktab.default = 0
@atab = {}
@atab.default = 0
end
def <=>(b)
ret = self.minlev - b.minlev
if ret == 0
return self.seen - b.seen
end
ret
end
end
class StatParser < XML::Parser
def initialize(*rest)
@elements = {}
@seen = 0
@root = nil
@context = []
end
def startElement(name, attr)
if (elinf = @elements[name]).nil?
@elements[name] = elinf = Elinfo.new(name, @seen += 1)
end
elinf.count += 1
pinf = @context[-1]
if pinf
elinf.ptab[pinf.name] += 1
pinf.ktab[name] += 1
pinf.empty = false
else
@root = name
end
attr.each_key do |key|
elinf.atab[key] += 1
end
@context.push(elinf)
end
def endElement(name)
@context.pop
end
def character(data)
inf = @context[-1]
inf.empty = false
inf.chars += data.mblength
end
def set_minlev(name, level)
name = @root if name.nil?
inf = @elements[name]
if inf.minlev.nil? or inf.minlev > level
newlev = level + 1
inf.minlev = level
inf.ktab.each_key do |key|
set_minlev(key, newlev)
end
end
end
def elinf_sort
@elements.sort { |(a_name, a_inf), (b_name, b_inf)|
a_inf <=> b_inf
}.each do |name, inf|
yield(name, inf)
end
end
end
def showtab(label, tab, dosum)
if tab.length == 0; return end
print "\n ", label, ":\n"
sum = 0
tab.sort.each do |name, cnt|
sum = sum + cnt
printf(" %-16s %5d\n", name, cnt)
end
if dosum and tab.length > 1
print " =====\n"
printf(" %5d\n", sum);
end
end
p = StatParser.new
begin
p.parse($<.read)
rescue XML::ParserError
print "#{$0}: #{$!} (in line #{p.line})\n"
exit 1
end
p.set_minlev(nil, 0)
p.elinf_sort do |name, elinf|
print "\n================\n"
print name, ": ", elinf.count, "\n"
if elinf.chars > 0
print "Had ", elinf.chars, " bytes of character data\n"
end
if elinf.empty
print "Always empty\n"
end
showtab("Parents", elinf.ptab, false)
showtab("Children", elinf.ktab, true)
showtab("Attributes", elinf.atab, false)
end
|