1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
|
#!/usr/bin/python
# make .index file from raw dictd dictionary file
# (containing %h and %d tags)
# to be used with FileDbDict
# .index is written to stdout
# and is not sorted. Sorting it can speed up
# serpento startup a bit
import sys, string
from utils import decb64
def writeindex(header, start, ln):
for i in header:
r = "%s\t%s\t%s\n" % (i, decb64(start), decb64(ln))
sys.stdout.write(r)
db = sys.argv[1]
f = open(db, 'r')
header = []
entrystart = 0
while 1:
while 1: # skip all %h in this cycle, go to first %d
l = f.readline()
if not l:
break
sl = string.lstrip(l)
if sl[:2]=='%d':
break
if sl[:2]=='%h':
header.append(string.strip(l[2:]))
if not l:
break
while 1:
tp = f.tell()
l = f.readline()
if not l:
end = f.tell()
break
sl = string.lstrip(l)
if sl[:2]=='%h': #next entry has started
nextheader = [string.strip(sl[2:])]
newstart = tp
end = tp-1
break
ln = end-entrystart
if header:
writeindex(header, entrystart, ln)
entrystart = newstart
header = nextheader
if not l:
break
|