File: test_trie.py

package info (click to toggle)
python-biopython 1.54-1
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 25,400 kB
  • ctags: 10,975
  • sloc: python: 116,757; xml: 33,167; ansic: 8,622; sql: 1,488; makefile: 147
file content (114 lines) | stat: -rw-r--r-- 3,125 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python

# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

import StringIO
from operator import truth

try:
    from Bio import trie
except ImportError:
    import os
    from Bio import MissingExternalDependencyError
    if os.name=="java":
        message = "Not available on Jython, Bio.trie requires compiled C code."
    else:
        message = "Could not import Bio.trie, check C code was compiled."
    raise MissingExternalDependencyError(message)

trieobj = trie.trie()

trieobj["hello"] = 5
trieobj["he"] = 7
trieobj["hej"] = 9
trieobj["foo"] = "bar"

k = trieobj.keys()
k.sort()
print k                          # ["foo", "he", "hej", "hello"]
print trieobj["hello"]           # 5
print trieobj.get("bye")         # None

print trieobj.has_key("hello")   # 1
print trieobj.has_key("he")      # 1
print trieobj.has_key("bye")     # 0

print trieobj.has_prefix("h")    # 1
print trieobj.has_prefix("hel")  # 1
print trieobj.has_prefix("foa")  # 0
print trieobj.has_prefix("hello world")   # 0

print len(trieobj)               # 4

k = trieobj.with_prefix("he")
k.sort()
print k                          # ["he", "hej", "hello"]
k = trieobj.with_prefix("l")
k.sort()
print k                          # []
k = trieobj.with_prefix("hej")
k.sort()
print k                          # ["hej"]
k = trieobj.with_prefix("hejk")
k.sort()
print k                          # []

trieobj2 = trie.trie()
trieobj2["foo"] = 1
k = trieobj2.keys()
k.sort()
print k                          # ["foo"]
v = trieobj2.values()
v.sort()
print v                          # [1]

print trieobj2.get("bar", 99)    # 99

trieobj2["hello"] = '55a'

print trieobj2.get_approximate("foo", 0)    # [("foo", 1, 0)]
print trieobj2.get_approximate("foo", 1)    # [("foo", 1, 0)]
print trieobj2.get_approximate("foa", 0)    # []
print trieobj2.get_approximate("foa", 1)    # [("foo", 1, 1)]
x = trieobj2.get_approximate("foa", 2)
print "found %d matches" % len(x)           # 3
x.sort()
print x                       # [("foo", 1, 1), ("foo", 1, 2), ("foo", 1, 2)]
# foo  foo-  foo-
# foa  f-oa  fo-a


#import sys; sys.exit(0)

# mismatch a->o
# insertion after f, deletion of o
# insertion after o, deletion of o

x = trieobj2.get_approximate("foo", 4)
y = {}
for z in x:
    y[z] = y.get(z, 0) + 1
x = y.items()
x.sort()
print x                       # [(('foo', 1, 0), 1), (('hello', '55a', 4), 6)]

h = StringIO.StringIO()
trie.save(h, trieobj2)
h.seek(0)
trieobj3 = trie.load(h)
k = trieobj3.keys()
k.sort()
for m in k:                       # foo 1
    print m, repr(trieobj3[m])    # hello '55a'


# Found bug, doesn't handle insertions and deletions at end properly.
trieobj = trie.trie()
trieobj["hello"] = 1
print trieobj.get_approximate('he', 2)        # []
print trieobj.get_approximate('he', 3)        # [('hello', 1, 3)]
print trieobj.get_approximate('hello me!', 3) # []
print trieobj.get_approximate('hello me!', 4) # [('hello', 1, 4)]
print trieobj.get_approximate('hello me!', 5) # [('hello', 1, 4)]