File: xmlstats.rb

package info (click to toggle)
libxml-parser-ruby 0.6.8-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 912 kB
  • ctags: 1,523
  • sloc: ruby: 11,080; ansic: 1,958; xml: 467; makefile: 59
file content (167 lines) | stat: -rwxr-xr-x 3,243 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#! /usr/local/bin/ruby

## Ruby version of xmlstats
## 1999 by yoshidam
##
## This sample comes from Clark Cooper's sample of Perl extension
## module XML::Parser.
##   (http://www.netheaven.com/~coopercc/xmlparser/samples/xmlstats)
##
## Try XML benchmark (http://www.xml.com/xml/pub/Benchmark/article.html)!
## Ruby is probably faster than Perl.

require 'xml/parser'
begin
  require 'mbstring'
rescue LoadError
  class String
    def mblength
      cnt = self.length
      self.scan(/([\300-\367])/n) do |c|
        if c[0] < "\340"
          cnt -= 1
        elsif c[0] < "\360"
          cnt -= 2
        else
          cnt -= 3
        end
      end
      cnt
    end
  end
end

$KCODE="UTF8"

class Elinfo
  attr :name
  attr :count, true
  attr :minlev, true
  attr :seen, true
  attr :chars, true
  attr :empty, true
  attr :ptab, true
  attr :ktab, true
  attr :atab, true

  def initialize(name, seen)
    @name = name
    @count = 0
    @minlev = nil
    @seen = seen
    @chars = 0
    @empty = true
    @ptab = {}
    @ptab.default = 0
    @ktab = {}
    @ktab.default = 0
    @atab = {}
    @atab.default = 0
  end

  def <=>(b)
    ret = self.minlev - b.minlev
    if ret == 0
      return self.seen - b.seen
    end
    ret
  end
end

class StatParser < XML::Parser
  def initialize(*rest)
    @elements = {}
    @seen = 0
    @root = nil
    @context = []
  end

  def startElement(name, attr)
    if (elinf = @elements[name]).nil?
      @elements[name] = elinf = Elinfo.new(name, @seen += 1)
    end
    elinf.count += 1

    pinf = @context[-1]
    if pinf
      elinf.ptab[pinf.name] += 1
      pinf.ktab[name] += 1
      pinf.empty = false
    else
      @root = name
    end

    attr.each_key do |key|
      elinf.atab[key] += 1
    end
    @context.push(elinf)
  end

  def endElement(name)
    @context.pop
  end

  def character(data)
    inf = @context[-1]
    inf.empty = false
    inf.chars += data.mblength
  end

  def set_minlev(name, level)
    name = @root if name.nil?
    inf = @elements[name]
    if inf.minlev.nil? or inf.minlev > level
      newlev = level + 1
      inf.minlev = level
      inf.ktab.each_key do |key|
        set_minlev(key, newlev)
      end
    end
  end

  def elinf_sort
    @elements.sort { |(a_name, a_inf), (b_name, b_inf)|
      a_inf <=> b_inf
    }.each do |name, inf|
      yield(name, inf)
    end
  end
end

def showtab(label, tab, dosum)
  if tab.length == 0; return end
  print "\n   ", label, ":\n"
  sum = 0

  tab.sort.each do |name, cnt|
    sum = sum + cnt
    printf("      %-16s      %5d\n", name, cnt)
  end
  if dosum and tab.length > 1
    print "                            =====\n"
    printf("                            %5d\n", sum);
  end
end

p = StatParser.new
begin
  p.parse($<.read)
rescue XML::ParserError
  print "#{$0}: #{$!} (in line #{p.line})\n"
  exit 1
end

p.set_minlev(nil, 0)
p.elinf_sort do |name, elinf|
  print "\n================\n"
  print name, ": ", elinf.count, "\n"
  if elinf.chars > 0
    print "Had ", elinf.chars, " bytes of character data\n"
  end
  if elinf.empty
    print "Always empty\n"
  end
  showtab("Parents", elinf.ptab, false)
  showtab("Children", elinf.ktab, true)
  showtab("Attributes", elinf.atab, false)
end