File: sax2obj.py

package info (click to toggle)
python-xml 0.8.4-10.1%2Blenny1
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 4,972 kB
  • ctags: 10,628
  • sloc: python: 46,730; ansic: 14,354; xml: 968; makefile: 201; sh: 20
file content (165 lines) | stat: -rw-r--r-- 4,251 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
"""
A general XML element -> Python object converter based on SAX.
"""

from xml.sax import saxexts,saxlib,saxutils
import re,string

reg_ws=re.compile("[%s]+" % string.whitespace)

class ConvSpec:
    """Contains the information needed to convert SAX events to Python
    objects."""

    def __init__(self):
        pass

class SAXObject:

    def __init__(self):
        self._fields={}

    def has_field(self,field):
        return self._fields.has_key(field)

    def get_fields(self):
        return self._fields.keys()

    def get_field(self,field):
        return self._fields[field]

    def set_field(self,field,value):
        self._fields[field]=value

    def display(self):
        for field in self._fields.keys():
            print "%s=%s" % (field,self._fields[field])

    def __getattr__(self,attr):
        try:
            return self._fields[attr]
        except KeyError,e:
            raise AttributeError(str(e))

    def __cmp__(self,obj):
        if id(obj)==id(self):
            return 0
        else:
            return 1

class DocHandler(saxlib.DocumentHandler):

    def __init__(self,target_elem,list_elems,ign_elems,rep_field):
        self.target_elem=target_elem
        self.list_elems=list_elems
        self.ign_elems=ign_elems
        self.rep_field=rep_field

        self.ignoring=0
        self.objects=[]
        self.current=None
        self.cur_data=""
        self.stack=[]

    def startElement(self,name,attrs):
        if self.ignoring:
            return

        if name==self.target_elem:
            self.current=SAXObject()
            for attr in attrs:
                self.current.set_field(attr,attrs[attr])
        elif self.list_elems.has_key(name):
            if not self.current.has_field(name):
                self.current.set_field(name,[])

            self.stack.append(self.current)
            self.current=SAXObject()
        elif self.rep_field.has_key(name) and not self.current.has_field(name):
            self.current.set_field(name,[])
        else:
            if self.ign_elems.has_key(name):
                self.ignoring=self.ignoring+1

        self.cur_data=""

    def characters(self,data,start,length):
        if self.ignoring or self.current==None:
            return

        data=data[start:start+length]
        mo=reg_ws.match(data)
        if mo!=None and mo.end(0)==len(data):
            return

        self.cur_data=self.cur_data+data

    def endElement(self,name):
        if self.ign_elems.has_key(name):
            self.ignoring=self.ignoring-1
            return

        if self.ignoring or self.current==None:
            return

        if name==self.target_elem:
            self.objects.append(self.current)
            self.current=None
        elif self.list_elems.has_key(name):
            obj=self.current
            self.current=self.stack[-1]
            del self.stack[-1]
            self.current.get_field(name).append(obj)
        elif self.rep_field.has_key(name):
            self.current.get_field(name).append(self.cur_data)
        else:
            self.current.set_field(name,self.cur_data)

    def get_objects(self):
        return self.objects

def make_objects(url,element,list_elems={},ign_elems={},rep_field={}):
    dh=DocHandler(element,list_elems,ign_elems,rep_field)
    eh=saxutils.ErrorPrinter()

    parser=saxexts.make_parser()
    parser.setDocumentHandler(dh)
    parser.setErrorHandler(eh)
    parser.parse(url)

    return dh.get_objects()

def make_xml(filename,root_elem,trgt_elem,list):
    out=open(filename,"w")
    out.write("<%s>\n" % root_elem)

    for obj in list:
        out.write("  <%s>\n" % trgt_elem)
        for field in obj.get_fields():
            out.write("    <%s>%s</%s>\n" % \
                      (field,escape_markup(obj.get_field(field)),field))
        out.write("  </%s>\n" % trgt_elem)

    out.write("\n</%s>" % root_elem)
    out.close()

def list2hash(lst,key_field):
    hash={}

    for obj in lst:
        hash[obj.get_field(key_field)]=obj

    return hash

def escape_markup(str):
    out=""

    for ch in str:
        if ch=="<":
            out=out+"&lt;"
        elif ch==">":
            out=out+"&gt;"
        else:
            out=out+ch

    return out