File: xmltreebuilder.rb.old

package info (click to toggle)
libxml-parser-ruby 0.5.16-1
  • links: PTS
  • area: main
  • in suites: potato
  • size: 596 kB
  • ctags: 702
  • sloc: ruby: 4,474; ansic: 1,254; xml: 542; makefile: 53
file content (205 lines) | stat: -rw-r--r-- 5,863 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
## -*- Ruby -*-
## XML::SimpleTreeBuilder
## 1998 by yoshidam

require 'xmlparser'
require 'xmltree'
include XML::SimpleTree

module XML
  class SimpleTreeBuilder
    ## Contructor
    ##  parser = XML::SimpleTreeBuilder.new(level)
    ##    level: 0 -- ignore default events (defualt)
    ##           1 -- catch default events and create the Comment,
    ##                the EntityReference, the XML declaration (as PI) and
    ##                the non-DOM-compliant DocumentType nodes.
    def initialize(level = 0)
      @tree = nil
      @parser = XMLParser.new
      @level = level
      if @level > 0
        def @parser.default; end
      end
    end

    ## User redefinable name encoding converter
    def nameConverter(str)
      str
    end

    ## User redefinable cdata encoding converter
    def cdataConverter(str)
      str
    end

    ## Parse
    ##   doctree = parser.parse(xml, trim)
    ##     xml:  string of XML contents
    ##     trim: flag of extra-whitespace triming.
    ##           This was not good idea.
    ##           Use tree.documentElement.normalize; tree.trim
    def parse(xml, trim = false)
      tree = Document.new
      current = tree
      inDocDecl = 0
      decl = ""
      inDecl = 0
      idRest = 0
      docType = nil
      extID = nil
      cdata_f = 0
      @parser.parse(xml) do |type, name, data|
        case type
        when XMLParser::START_ELEM
          attr = {}
          data.each do |key, value|
            attr[nameConverter(key)] = cdataConverter(value)
          end
          elem = Element.new(nameConverter(name), attr)
          current.appendChild(elem)
          current = elem

        when XMLParser::END_ELEM
          current = current.parentNode

        when XMLParser::CDATA
          if data == ''
            if @level > 0
              cdata_f = 1
              @parser.defaultCurrent
            end
            next
          end
##          next if trim && data =~ /^\s*$/
          if cdata_f == 2
            cdata = CDATASection.new(cdataConverter(data))
          else
            cdata = Text.new(cdataConverter(data))
          end
          current.appendChild(cdata)

        when XMLParser::PI
          pi = ProcessingInstruction.new(nameConverter(name),
                                         cdataConverter(data))
          ## PI data should not be converted
          current.appendChild(pi)

        when XMLParser::DEFAULT
          if cdata_f == 1
            if data == '<![CDATA['
              cdata_f = 2
            elsif data == ']]>'
              cdata_f = 0
            else
              print "Unexpected default event\n"
              cdata_f = 0
            end
            next
          end
## Large comment may crash regexp of Ruby
##          if data =~ /^<!--([\s\S]*)-->$/
##            comment = Comment.new(cdataConverter($1))
          if data =~ /^<!--/ && data =~ /-->$/ && data.length >= 7 
            comment = Comment.new(cdataConverter(data[4..-4]))
            ## Comment should not be converted
            current.appendChild(comment)
          elsif data =~ /^\&(.+);$/
            eref = EntityReference.new(nameConverter($1))
            current.appendChild(eref)
          elsif data =~ /^<\?xml\s*([\s\S]*)\?>$/
            pi = ProcessingInstruction.new("xml",
                                           cdataConverter($1))
            ## PI data should not be converted
            current.appendChild(pi)
          elsif inDocDecl == 0 && data =~ /^<\!DOCTYPE$/
            inDocDecl = 1
            inDecl = 0
            idRest = 0
            docType = nil
            extID = nil
          elsif inDocDecl == 1
            if data == "["
              inDocDecl = 2
            elsif data == ">"
              if !extID.nil?
                current.nodeValue = extID
              end
              inDocDecl = 0
              current = current.parentNode
            elsif data == "SYSTEM"
              idRest = 1
              extID = data
            elsif data == "PUBLIC"
              idRest = 2
              extID = data
            elsif data !~ /^\s+$/
              if idRest > 0
                ## SysID or PubID
                extID <<= " " + data
                idRest -= 1
              else
                ## Root Element Type
                docType = data
                doctype = DocumentType.new(nameConverter(docType))
                current.appendChild(doctype)
                current = doctype
              end
            end
          elsif inDocDecl == 2
            if inDecl == 0
              if data == "]"
                inDocDecl = 1
              elsif data =~ /^<\!/
                decl = data
                inDecl = 1
              elsif data =~ /^%(.+);$/
                ## PERef
                cdata = Text.new(nameConverter(data))
                current.appendChild(cdata)
              else
                ## WHITESPCAE
              end
            else ## inDecl == 1
              if data == ">"
                decl <<= data
                inDecl = 0
                ## Markup Decl
                cdata = Text.new(cdataConverter(decl))
                ## Markup decl should not be converted
                current.appendChild(cdata)
              elsif data =~ /^\s+$/
                ## WHITESPACE
                decl << " "
              else
                decl << data
              end
            end
          else
            ## maybe WHITESPACE
##            next if tri m&& data =~ /^\s*$/
            cdata = Text.new(cdataConverter(data))
            current.appendChild(cdata)
          end
        end
      end
      tree
    end

    def line
      @parser.line
    end

    def column
      @parser.column
    end

    def byteIndex
      @parser.byteIndex
    end
  end

  module DOM
    Builder = SimpleTreeBuilder
  end
end