1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
|
# -*- coding: utf-8 -*-
from elementtree import ElementTree
def compile(htmlfile,handlers):
"""
parses an html file and searches for the id's defined in the keys of handlers
if it finds a match then it passes the element with the id to the function it maps to
"""
dom = ElementTree.parse(htmlfile)
root = dom.getroot()
parentmap = GetParentMap(dom)
#check all subelements for id's and try to match them up to a handler
#note that this iterates through all levels of the tree, not just direct children
for element in dom.getiterator():
if element.attrib.has_key('id'):
id = element.attrib.get('id')
#if we find a handler for this id then pass the element to the handler
if handlers.has_key(id):
#if element is None then it should be removed
#find the parent and remove this element
if handlers[id](element) is False:
parentmap[element].remove(element)
print ElementTree.tostring(root)
return ElementTree.tostring(root)
def GetParentMap(dom):
"""
returns a dictionary mapping children to parents
"""
map={}
for p in dom.getiterator():
for c in p:
map[c] = p
return map
|