1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
|
#!/usr/bin/python
# mlpatch.py: Run with no arguments for usage
import sys, os
import sgmllib
from htmlentitydefs import entitydefs
import fileinput
from urllib2 import urlopen
CHUNKSIZE = 8 * 1024
class MyParser(sgmllib.SGMLParser):
def __init__(self):
self.baseclass = sgmllib.SGMLParser
self.baseclass.__init__(self)
self.entitydefs = entitydefs
self.entitydefs["nbsp"] = " "
self.inbody = False
self.complete_line = False
self.discard_gathered()
def discard_gathered(self):
self.gather_data = False
self.gathered_data = ""
def noop(self):
pass
def out(self, data):
sys.stdout.write(data)
def handle_starttag(self, tag, method, attrs):
if not self.inbody: return
self.baseclass.handle_starttag(self, tag, method, attrs)
def handle_endtag(self, tag, method):
if not self.inbody: return
self.baseclass.handle_endtag(self, tag, method)
def handle_data(self, data):
if not self.inbody: return
data = data.replace('\n','')
if len(data) == 0: return
if self.gather_data:
self.gathered_data += data
else:
if self.complete_line:
if data[0] in ('+', '-', ' ', '#') \
or data.startswith("Index:") \
or data.startswith("@@ ") \
or data.startswith("======"):
# Real new line
self.out('\n')
else:
# Presume that we are wrapped
self.out(' ')
self.complete_line = False
self.out(data)
def handle_charref(self, ref):
if not self.inbody: return
self.baseclass.handle_charref(self, ref)
def handle_entityref(self, ref):
if not self.inbody: return
self.baseclass.handle_entityref(self, ref)
def handle_comment(self, comment):
if comment == ' body="start" ':
self.inbody = True
elif comment == ' body="end" ':
self.inbody = False
def handle_decl(self, data):
if not self.inbody: return
print "DECL: " + data
def unknown_starttag(self, tag, attrs):
if not self.inbody: return
print "UNKTAG: %s %s" % (tag, attrs)
def unknown_endtag(self, tag):
if not self.inbody: return
print "UNKTAG: /%s" % (tag)
def do_br(self, attrs):
self.complete_line = True
def do_p(self, attrs):
if self.complete_line:
self.out('\n')
self.out(' ')
self.complete_line = True
def start_a(self, attrs):
self.gather_data = True
def end_a(self):
self.out(self.gathered_data.replace('_at_', '@'))
self.discard_gathered()
def close(self):
if self.complete_line:
self.out('\n')
self.baseclass.close(self)
def main():
if len(sys.argv) == 1:
sys.stderr.write(
"usage: mlpatch.py dev|users year month msgno > foobar.patch\n" +
"example: mlpatch.py dev 2005 01 0001 > issue-XXXX.patch\n" +
"""
Very annoyingly, the http://svn.haxx.se/ subversion mailing list archives
mangle inline patches, and provide no raw message download facility
(other than for an entire month's email as an mbox).
So, I wrote this script, to demangle them. It's not perfect, as it has to
guess about whitespace, but it does an acceptable job.\n""")
sys.exit(0)
elif len(sys.argv) != 5:
sys.stderr.write("error: mlpatch.py: Bad parameters - run with no "
+ "parameters for usage\n")
sys.exit(1)
else:
list, year, month, msgno = sys.argv[1:]
url = "http://svn.haxx.se/" \
+ "%(list)s/archive-%(year)s-%(month)s/%(msgno)s.shtml" % locals()
print "MsgUrl: " + url
msgfile = urlopen(url)
p = MyParser()
buffer = msgfile.read(CHUNKSIZE)
while buffer:
p.feed(buffer)
buffer = msgfile.read(CHUNKSIZE)
p.close()
msgfile.close()
if __name__ == '__main__':
main()
|