File: digesttest2.rb

package info (click to toggle)
libxml-parser-ruby 0.6.8-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 912 kB
  • ctags: 1,523
  • sloc: ruby: 11,080; ansic: 1,958; xml: 467; makefile: 59
file content (192 lines) | stat: -rwxr-xr-x 4,523 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#! /usr/local/bin/ruby

## DOMHASH without DOM tree
## 1999 by yoshidam
##
## Namespace support required
##

require 'xml/parser'
require 'md5'
#require 'uconv'

module XML
  class ExtEntParser < Parser
    def initialize(parent, *rest)
      super
      @parent = parent
    end

    def startElement(name, attr)
      @parent.startElement(name, attr)
    end

    def endElement(name)
      @parent.endElement(name)
    end

    def character(data)
      @parent.character(data)
    end

    def processingInstruction(target, data)
      @parent.processingInstruction(target, data)
    end

    def comment(data)
      @parent.comment(data)
    end

    def externalEntityRef(context, base, systemId, publicId)
      extp = ExtEntParser.new(self, context)
      begin
        tree = extp.parse(open(systemId).read)
      rescue XML::ParserError
        raise XML::ParserError.new("#{systemId}(#{extp.line}): #{$!}")
      rescue Errno::ENOENT
        raise Errno::ENOENT.new("#{$!}")
      end
      extp.done
    end
  end

  class DigestParser < Parser
    NODE_NODE = 0
    ELEMENT_NODE = 1
    ATTRIBUTE_NODE = 2
    TEXT_NODE = 3
    CDATA_SECTION_NODE = 4
    ENTITY_REFERENCE_NODE = 5
    ENTITY_NODE = 6
    PROCESSING_INSTRUCTION_NODE = 7
    COMMENT_NODE  = 8
    DOCUMENT_NODE = 9
    DOCUMENT_TYPE_NODE = 10
    DOCUMENT_FRAGMENT_NODE = 11
    NOTATION_NODE = 12

    def initialize(*rest)
      super
      @elem_stack = []
      @elem_data = [ "#document", [], [] ]
      @text = ''
      @root = nil
    end

    ## convert UTF-8 into UTF-16BE
    def tou16(str)
#      Uconv.u16swap(Uconv.u8tou16(str))
      str.unpack("U*").pack("n*")
    end

    ## create digest value for the text node
    def textDigest(text)
      MD5.new([TEXT_NODE].pack("N") + tou16(text)).digest
    end

    ## create digest value for the element  node
    def elementDigest(name, attrs, children)
      MD5.new([ELEMENT_NODE].pack("N") +
              tou16(name) +
              "\0\0" +
              [attrs.length].pack("N") +
              attrs.join +
              [children.length].pack("N") +
              children.join).digest
    end

    ## create digest value for the attribute node
    def attrDigest(name, value)
      MD5.new([ATTRIBUTE_NODE].pack("N") +
              tou16(name) + "\0\0" + tou16(value)).digest
    end

    def processingInstructionDigest(target, data)
      MD5.new([PROCESSING_INSTRUCTION_NODE].pack("N") +
              tou16(target) + "\0\0" + tou16(data)).digest
    end

    ## flush a bufferd text
    def flushText
      if @text.length > 0
        @elem_data[2].push(textDigest(@text))
        @text = ''
      end
    end

    ## start element handler
    def startElement(name, attr)
      flushText
      @elem_stack.push(@elem_data)
      attr_digests = []
      attr_array = attr.sort {|a, b|
        tou16(a[0]) <=> tou16(b[0])
      }
      attr_array.each {|a|
        attr_digests.push(attrDigest(a[0], a[1]))
      }
      @elem_data = [name, attr_digests, []]
    end

    ## end element handler
    def endElement(name)
      flushText
      digest = elementDigest(*@elem_data)
      @elem_data = @elem_stack.pop
      @elem_data[2].push(digest)

      ## digest for root element
      if @elem_stack.length == 0
        @root = digest
      end
    end

    ## character data handler
    def character(data)
      ## Character data must be concatenated because expat split a text
      ## node into some fragments.
      @text << data
    end

    ## PI handler
    def processingInstruction(target, data)
      flushText
      @elem_data[2].push(processingInstructionDigest(target, data))
    end

    ## comment handler
    def comment(data)
      flushText
      ## ignore comment node
    end

    def externalEntityRef(context, base, systemId, publicId)
      extp = ExtEntParser.new(self, context)
      begin
        tree = extp.parse(open(systemId).read)
      rescue XML::ParserError
        raise XML::ParserError.new("#{systemId}(#{extp.line}): #{$!}")
      rescue Errno::ENOENT
        raise Errno::ENOENT.new("#{$!}")
      end
      extp.done
    end

    def getRootDigest
      @root
    end
  end
end

p = XML::DigestParser.new(nil, ":") ## nssep must be ':'
if p.respond_to?(:setParamEntityParsing)
  p.setParamEntityParsing(XML::Parser::PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
end
begin
  p.parse($<.read)
rescue XML::ParserError
  print "#{$<.filename}:#{p.line}: #{$!}\n"
  exit
end
p.getRootDigest.each_byte { |c|; print "%02X" % c }
print "\n"