# basic tests

import sys
import time, string
from xml.parsers import sgmlop, xmllib

try:
    FILE, VERBOSE = sys.argv[1], 2
except IndexError:
    FILE, VERBOSE = "hamlet.xml", 1

print
print "test collecting parsers on", FILE
print

# --------------------------------------------------------------------
# sgmlop

class myCollector:
    def __init__(self):
        self.data = []
        self.text = []
    def finish_starttag(self, tag, data):
        if self.text:
            self.data.append(repr(string.join(self.text, "")))
            self.text = []
        self.data.append("start", tag, data)
    def handle_proc(self, tag, data):
        if self.text:
            self.data.append(repr(string.join(self.text, "")))
            self.text = []
        self.data.append("pi", tag, data)
    def handle_special(self, data):
        if self.text:
            self.data.append(repr(string.join(self.text, "")))
            self.text = []
        self.data.append("special", data)
    def handle_entityref(self, data):
        if self.text:
            self.data.append(repr(string.join(self.text, "")))
            self.text = []
        self.data.append("entity", data)
    def handle_data(self, data):
        self.text.append(data)
    def handle_cdata(self, data):
        self.text.append("CDATA" + data)

t = time.clock()
for i in range(1):
    out = myCollector()
    fp = open(FILE)
    parser = sgmlop.XMLParser()
    parser.register(out)
    b = 0
    while 1:
        data = fp.read(1024)
        if not data:
            break
        parser.feed(data)
        b = b + len(data)
    parser.close()
t1 = time.clock() - t

print "raw sgmlop:", len(out.data), "items;", round(t1, 3), "seconds;",
print round(b / t1 / 1024, 2), "kbytes per second"

# --------------------------------------------------------------------
# xmllib

class FastXMLParser(xmllib.FastXMLParser):
    def __init__(self):
        xmllib.FastXMLParser.__init__(self)
        self.data = []
        self.text = []
    def unknown_starttag(self, tag, data):
        if self.text:
            self.data.append(repr(string.join(self.text, "")))
            self.text = []
        self.data.append("start", tag, data)
    def handle_proc(self, tag, data):
        if self.text:
            self.data.append(repr(string.join(self.text, "")))
            self.text = []
        self.data.append("pi", tag, data)
    def handle_special(self, data):
        if self.text:
            self.data.append(repr(string.join(self.text, "")))
            self.text = []
        self.data.append("special", data)
    def handle_entityref(self, data):
        if self.text:
            self.data.append(repr(string.join(self.text, "")))
            self.text = []
        self.data.append("entity", data)
    def handle_data(self, data):
        self.text.append(data)
    def handle_cdata(self, data):
        self.text.append("CDATA" + data)

t = time.clock()
for i in range(1):
    fp = open(FILE)
    parser2 = FastXMLParser()
    b = 0
    while 1:
        data = fp.read(1024)
        if not data:
            break
        parser2.feed(data)
        b = b + len(data)
    parser2.close()
t2 = time.clock() - t

print "fast xmllib:", len(parser2.data), "items;", round(t2, 3), "seconds;",
print round(b / t2 / 1024, 2), "kbytes per second"

class SlowXMLParser(xmllib.SlowXMLParser):
    def __init__(self):
        xmllib.SlowXMLParser.__init__(self)
        self.data = []
        self.text = []
    def unknown_starttag(self, tag, data):
        if self.text:
            self.data.append(repr(string.join(self.text, "")))
            self.text = []
        self.data.append("start", tag, data)
    def handle_proc(self, tag, data):
        if self.text:
            self.data.append(repr(string.join(self.text, "")))
            self.text = []
        self.data.append("pi", tag, data)
    def handle_special(self, data):
        if self.text:
            self.data.append(repr(string.join(self.text, "")))
            self.text = []
        self.data.append("special", data)
    def handle_entityref(self, data):
        if self.text:
            self.data.append(repr(string.join(self.text, "")))
            self.text = []
        self.data.append("entity", data)
    def handle_data(self, data):
        self.text.append(data)
    def handle_cdata(self, data):
        self.text.append("CDATA" + data)

t = time.clock()
for i in range(1):
    fp = open(FILE)
    parser3 = SlowXMLParser()
    b = 0
    while 1:
        data = fp.read(1024)
        if not data:
            break
        parser3.feed(data)
        b = b + len(data)
    parser3.close()
t3 = time.clock() - t

print "slow xmllib:", len(parser3.data), "items;", round(t3, 3), "seconds;",
print round(b / t3 / 1024, 2), "kbytes per second"

print
print "normalized timing:"
print "slow xmllib", 1.0
print "fast xmllib", round(t2 / t3, 2), "(%sx)" % round(t3 / t2, 1)
print "sgmlop     ", round(t1 / t3, 2), "(%sx)" % round(t3 / t1, 1)
print

print "looking for differences:"

items = min(len(parser2.data), len(parser3.data))

for i in xrange(items):
    if parser2.data[i] != parser3.data[i]:
        for j in range(max(i-5, 0), min(i+5, items)):
            if parser2.data[j] != parser3.data[j]:
                print "+", j+1, parser2.data[j]
                print "*", j+1, parser3.data[j]
            else:
                print "=", j+1, parser2.data[j]
        break
else:
    print "   (none found)"
