File: test_trie.py

package info (click to toggle)
python-biopython 1.45-3
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 18,192 kB
  • ctags: 12,310
  • sloc: python: 83,505; xml: 13,834; ansic: 7,015; cpp: 1,855; sql: 1,144; makefile: 179
file content (101 lines) | stat: -rw-r--r-- 2,624 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/env python

import StringIO
from operator import truth

from Bio import trie

trieobj = trie.trie()

trieobj["hello"] = 5
trieobj["he"] = 7
trieobj["hej"] = 9
trieobj["foo"] = "bar"

k = trieobj.keys()
k.sort()
print k                          # ["foo", "he", "hej", "hello"]
print trieobj["hello"]           # 5
print trieobj.get("bye")         # None

print trieobj.has_key("hello")   # 1
print trieobj.has_key("he")      # 1
print trieobj.has_key("bye")     # 0

print trieobj.has_prefix("h")    # 1
print trieobj.has_prefix("hel")  # 1
print trieobj.has_prefix("foa")  # 0
print trieobj.has_prefix("hello world")   # 0

print len(trieobj)               # 4

k = trieobj.with_prefix("he")
k.sort()
print k                          # ["he", "hej", "hello"]
k = trieobj.with_prefix("l")
k.sort()
print k                          # []
k = trieobj.with_prefix("hej")
k.sort()
print k                          # ["hej"]
k = trieobj.with_prefix("hejk")
k.sort()
print k                          # []

trieobj2 = trie.trie()
trieobj2["foo"] = 1
k = trieobj2.keys()
k.sort()
print k                          # ["foo"]
v = trieobj2.values()
v.sort()
print v                          # [1]

print trieobj2.get("bar", 99)    # 99

trieobj2["hello"] = '55a'

print trieobj2.get_approximate("foo", 0)    # [("foo", 1, 0)]
print trieobj2.get_approximate("foo", 1)    # [("foo", 1, 0)]
print trieobj2.get_approximate("foa", 0)    # []
print trieobj2.get_approximate("foa", 1)    # [("foo", 1, 1)]
x = trieobj2.get_approximate("foa", 2)
print "found %d matches" % len(x)           # 3
x.sort()
print x                       # [("foo", 1, 1), ("foo", 1, 2), ("foo", 1, 2)]
# foo  foo-  foo-
# foa  f-oa  fo-a


#import sys; sys.exit(0)

# mismatch a->o
# insertion after f, deletion of o
# insertion after o, deletion of o

x = trieobj2.get_approximate("foo", 4)
y = {}
for z in x:
    y[z] = y.get(z, 0) + 1
x = y.items()
x.sort()
print x                       # [(('foo', 1, 0), 1), (('hello', '55a', 4), 6)]

h = StringIO.StringIO()
trie.save(h, trieobj2)
h.seek(0)
trieobj3 = trie.load(h)
k = trieobj3.keys()
k.sort()
for m in k:                       # foo 1
    print m, repr(trieobj3[m])    # hello '55a'


# Found bug, doesn't handle insertions and deletions at end properly.
trieobj = trie.trie()
trieobj["hello"] = 1
print trieobj.get_approximate('he', 2)        # []
print trieobj.get_approximate('he', 3)        # [('hello', 1, 3)]
print trieobj.get_approximate('hello me!', 3) # []
print trieobj.get_approximate('hello me!', 4) # [('hello', 1, 4)]
print trieobj.get_approximate('hello me!', 5) # [('hello', 1, 4)]