import sys, dictdlib, re

# Generator script Copyright (c) 2002 by John Goerzen.
# GPL'd.

introtext = ''
# defpatt = "^([A-Z][^a-z0-9,.@;]+).+[.,;].+$"
defpatt = "^([A-Z][A-Z0-9' -]*[A-Z]).+[.,;].+$"
firstpatt = "^([AFM])[,.;]"

print "Generating intro text."

# Find the intro from the first file.

fd = open(sys.argv[1], "rt")
for line in fd.xreadlines():
    if re.search(defpatt, line):
        break
    introtext += line
fd.close()

print "Processing files."

dw = dictdlib.DictWriter('bouvier',
                         'http://www.constitution.org/bouv/bouvier.htm',
                         "Bouvier's Law Dictionary, Revised 6th Ed (1856)",
                         introtext)

for filename in sys.argv[1:]:
    fd = open(filename, "rt")

    ignoring = 1                        # Ignore stuff at beginning of file.
    headword = None                     # What we're reading now.
    defstr = ''                         # Text of current def.
    readyforstart = 1                   # Whether or not we're ready for def
                                        # start (saw a blank line)

    for line in fd.xreadlines():
        if len(line) == 1:              # Blank line.
            readyforstart = 1
            if not ignoring:
                defstr += line
        elif readyforstart:
            # Mark it 0 so we won't match stuff next time.
            # We know that len is not 1 here.
            readyforstart = 0
            if not ignoring:
               match = re.search(defpatt, line)
            else:
               match = re.search(firstpatt, line) or re.search(defpatt, line)
            if not match:
                if ignoring:
                    continue
                else:
                    defstr += line
            else:                       # Start of new def.
                if headword != None:
                    # Write out previous one.
                    dw.writeentry(defstr, [headword])
                headword = match.group(1)
                ignoring = 0
                defstr = line
        else:
            defstr += line
            readyforstart = 0

dw.finish()

