File: indexding.py

package info (click to toggle)
serpento 0.3.6
  • links: PTS
  • area: main
  • in suites: woody
  • size: 292 kB
  • ctags: 381
  • sloc: python: 1,644; ansic: 666; perl: 157; sh: 116; makefile: 72
file content (63 lines) | stat: -rwxr-xr-x 1,303 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/python

# this indexes ding-like dictionary
# dictionary consists of lines, each line is one entry and looks like this:
#   first : second
# where first is a word in first language, second is translation
# : is separator, can be changed below (sep)
# there can be more words as entries:
#   first : second1, second2, second3
# separated by , (see sep1 below)

sep = " : "
sep1 = ", "

import sys, string
from utils import decb64


nulltrans = lambda x: (x,)
trans = lambda x: (string.strip(x),)
                           
def addword(word, pos, l):
    nw = trans(word)
    if nw:
        pos = decb64(pos)
        l = decb64(l)
        for i in nw:
            print "%s\t%s\t%s" % (i, pos, l)

f = open(sys.argv[1], "r")

pos = 0
lastlinepos = 0

inword = 0
word = ""

def readblock(f):
    bl = ""
    while 1:
        l = f.readline()
        bl = bl+l
        if (not l) or l=='\n':
            return bl

block = ""
while 1:
    #i = readblock(f) # or f.readline
    i = f.readline()
    if not i:
        break
    endpos = pos+len(i)
    i = string.replace(i, '\n', '')
    spl = string.split(i, sep)
    one, two = spl[:2]
    one = string.split(one, sep1)
    two = string.split(two, sep1)
    words = one+two
    for j in words:
        addword(j, pos, len(i))
    pos = endpos