File: test_filter.py

package info (click to toggle)
python-xml 0.8.4-10.1%2Blenny1
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 4,972 kB
  • ctags: 10,628
  • sloc: python: 46,730; ansic: 14,354; xml: 968; makefile: 201; sh: 20
file content (187 lines) | stat: -rw-r--r-- 5,625 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import pprint
import sys

from xml.dom import xmlbuilder, expatbuilder, Node
from xml.dom.NodeFilter import NodeFilter

class Filter(xmlbuilder.DOMBuilderFilter):
    whatToShow = NodeFilter.SHOW_ELEMENT

    def startContainer(self, node):
        assert node.nodeType == Node.ELEMENT_NODE
        if node.tagName == "skipthis":
            return self.FILTER_SKIP
        elif node.tagName == "rejectbefore":
            return self.FILTER_REJECT
        elif node.tagName == "stopbefore":
            return self.FILTER_INTERRUPT
        else:
            return self.FILTER_ACCEPT

    def acceptNode(self, node):
        assert node.nodeType == Node.ELEMENT_NODE
        if node.tagName == "skipafter":
            return self.FILTER_SKIP
        elif node.tagName == "rejectafter":
            return self.FILTER_REJECT
        elif node.tagName == "stopafter":
            return self.FILTER_INTERRUPT
        else:
            return self.FILTER_ACCEPT


class RecordingFilter:
    # Inheriting from xml.dom.xmlbuilder.DOMBuilderFilter is not
    # required, so we won't inherit from it this time to make sure it
    # isn't a problem.  We have to implement the entire interface
    # directly.

    whatToShow = NodeFilter.SHOW_ALL

    def __init__(self):
        self.events = []

    def startContainer(self, node):
        self.events.append(("start", node.nodeType, str(node.nodeName)))
        return xmlbuilder.DOMBuilderFilter.FILTER_ACCEPT

    def acceptNode(self, node):
        self.events.append(("accept", node.nodeType, str(node.nodeName)))
        return xmlbuilder.DOMBuilderFilter.FILTER_ACCEPT


simple_options = xmlbuilder.Options()
simple_options.filter = Filter()
simple_options.namespaces = 0

record_options = xmlbuilder.Options()
record_options.namespaces = 0

def checkResult(src):
    print
    dom = expatbuilder.makeBuilder(simple_options).parseString(src)
    print dom.toxml()
    dom.unlink()

def checkFilterEvents(src, record, what=NodeFilter.SHOW_ALL):
    record_options.filter = RecordingFilter()
    record_options.filter.whatToShow = what
    dom = expatbuilder.makeBuilder(record_options).parseString(src)
    if record != record_options.filter.events:
        print
        print "Received filter events:"
        pprint.pprint(record_options.filter.events)
        print
        print "Expected filter events:"
        pprint.pprint(record)
    dom.unlink()


# a simple case of skipping an element
checkResult("<doc><e><skipthis>text<e/>more</skipthis>abc</e>xyz</doc>")

# skip an element nested indirectly within another skipped element
checkResult('''\
<doc>Text.
  <skipthis>Nested text.
    <skipthis>Nested text in skipthis element.</skipthis>
    More nested text.
  </skipthis>Outer text.</doc>
''')

# skip an element nested indirectly within another skipped element
checkResult('''\
<doc>Text.
  <skipthis>Nested text.
    <nested-element>
      <skipthis>Nested text in skipthis element.</skipthis>
      More nested text.
    </nested-element>
    More text.
  </skipthis>Outer text.</doc>
''')

checkResult("<doc><rejectbefore/></doc>")

checkResult("<doc><rejectafter/></doc>")

checkResult('''\
<doc><rejectbefore>
  Text.
  <?my processing instruction?>
  <more stuff="foo"/>
  <!-- a comment -->
</rejectbefore></doc>
''')

checkResult('''\
<doc><rejectafter>
  Text.
  <?my processing instruction?>
  <more stuff="foo"/>
  <!-- a comment -->
</rejectafter></doc>
''')

# Make sure the document element is not passed to the filter:
checkResult("<rejectbefore/>")
checkResult("<rejectafter/>")
checkResult("<stopbefore/>")

checkResult("<doc>text<stopbefore> and </stopbefore>more</doc>")
checkResult("<doc>text<stopafter> and </stopafter>more</doc>")

checkResult("<doc><a/><skipafter>text</skipafter><a/></doc>")

checkFilterEvents("<doc/>", [])
checkFilterEvents("<doc attr='value'/>", [])
checkFilterEvents("<doc><e/></doc>", [
    ("start", Node.ELEMENT_NODE, "e"),
    ("accept", Node.ELEMENT_NODE, "e"),
    ])

src = """\
<!DOCTYPE doc [
  <!ENTITY e 'foo'>
  <!NOTATION n SYSTEM 'http://xml.python.org/notation/n'>
]>
<!-- comment -->
<?sample pi?>
<doc><e attr='value'><?pi data?><!--comment--></e></doc>
"""

checkFilterEvents(src, [
    ("accept", Node.DOCUMENT_TYPE_NODE, "doc"),
    ("accept", Node.ENTITY_NODE, "e"),
    ("accept", Node.NOTATION_NODE, "n"),
    ("accept", Node.COMMENT_NODE, "#comment"),
    ("accept", Node.PROCESSING_INSTRUCTION_NODE, "sample"),
    ("start", Node.ELEMENT_NODE, "e"),
    ("accept", Node.PROCESSING_INSTRUCTION_NODE, "pi"),
    ("accept", Node.COMMENT_NODE, "#comment"),
    ("accept", Node.ELEMENT_NODE, "e"),
    ])

# Show everything except a couple of things to the filter, to check
# that whatToShow is implemented.  This isn't sufficient to be a
# black-box test, but will get us started.

checkFilterEvents(src, [
    ("accept", Node.DOCUMENT_TYPE_NODE, "doc"),
    ("accept", Node.ENTITY_NODE, "e"),
    ("accept", Node.NOTATION_NODE, "n"),
    ("accept", Node.PROCESSING_INSTRUCTION_NODE, "sample"),
    ("start", Node.ELEMENT_NODE, "e"),
    ("accept", Node.PROCESSING_INSTRUCTION_NODE, "pi"),
    ("accept", Node.ELEMENT_NODE, "e"),
    ], what=NodeFilter.SHOW_ALL & ~NodeFilter.SHOW_COMMENT)

checkFilterEvents(src, [
    ("accept", Node.DOCUMENT_TYPE_NODE, "doc"),
    ("accept", Node.ENTITY_NODE, "e"),
    ("accept", Node.NOTATION_NODE, "n"),
    ("accept", Node.COMMENT_NODE, "#comment"),
    ("start", Node.ELEMENT_NODE, "e"),
    ("accept", Node.COMMENT_NODE, "#comment"),
    ("accept", Node.ELEMENT_NODE, "e"),
    ], what=NodeFilter.SHOW_ALL & ~NodeFilter.SHOW_PROCESSING_INSTRUCTION)