1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
|
from xml.dom import minidom
from datetime import datetime
import re
import sys
if sys.version_info[0] == 2:
binary_type = str
else:
binary_type = bytes
class Parser(object):
def __init__(self, xml):
if isinstance(xml, binary_type):
xml = xml.decode('utf-8')
self.doc = minidom.parseString("><".join(re.split(">\s+<", xml)).strip())
def parse(self):
return {self.__underscored(self.doc.documentElement.tagName): self.__parse_node(self.doc.documentElement)}
def __parse_node(self, root):
child = root.firstChild
if self.__get_node_attribute(root, "type") == "array":
return self.__build_list(child)
elif not child:
return self.__node_content(root, None)
elif (child.nodeType == minidom.Node.TEXT_NODE):
return self.__node_content(root, child.nodeValue)
else:
return self.__build_dict(child)
def __convert_to_boolean(self, value):
if value == "true" or value == "1":
return True
else:
return False
def __convert_to_date(self, value):
return datetime.strptime(value, "%Y-%m-%d").date()
def __convert_to_datetime(self, value):
return datetime.strptime(value, "%Y-%m-%dT%H:%M:%SZ")
def __convert_to_list(self, dict, key):
val = dict[key]
if not isinstance(val, list):
dict[key] = [val]
def __build_list(self, child):
l = []
while child is not None:
if (child.nodeType == minidom.Node.ELEMENT_NODE):
l.append(self.__parse_node(child))
child = child.nextSibling
return l
def __build_dict(self, child):
d = {}
while child is not None:
if (child.nodeType == minidom.Node.ELEMENT_NODE):
child_tag = self.__underscored(child.tagName)
if self.__get_node_attribute(child, "type") == "array" or child.firstChild and child.firstChild.nodeType == minidom.Node.TEXT_NODE:
d[child_tag] = self.__parse_node(child)
else:
if not d.get(child_tag):
d[child_tag] = self.__parse_node(child)
else:
self.__convert_to_list(d, child_tag)
d[child_tag].append(self.__parse_node(child))
child = child.nextSibling
return d
def __get_node_attribute(self, node, attribute):
attribute_node = node.attributes.get(attribute)
return attribute_node and attribute_node.value
def __node_content(self, parent, content):
parent_type = self.__get_node_attribute(parent, "type")
parent_nil = self.__get_node_attribute(parent, "nil")
if parent_type == "integer":
return int(content)
elif parent_type == "boolean":
return self.__convert_to_boolean(content)
elif parent_type == "datetime":
return self.__convert_to_datetime(content)
elif parent_type == "date":
return self.__convert_to_date(content)
elif parent_nil == "true":
return None
else:
return content or ""
def __underscored(self, string):
return string.replace("-","_")
|