1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
|
"""
Aho-Corasick string search algorithm.
Author : Wojciech Muła, wojciech_mula@poczta.onet.pl
WWW : http://0x80.pl
License : public domain
"""
import pyahocorasick
def exportdot(trie, file):
def writeln(text=""):
file.write(text + "\n")
writeln("digraph ahocorasick {")
def walk(node):
queue = [node]
while queue:
node = queue.pop()
yield node
for child in node.children.itervalues():
if child != node:
queue.append(child)
nodes = list(walk(trie.root))
# nodes
for node in nodes:
if node.output != pyahocorasick.nil:
writeln("\tnode%d [shape=doublecircle, label=\"\"]" % id(node))
else:
writeln("\tnode%d [shape=circle, label=\"\"]" % id(node))
# trie edges
for node in nodes:
for letter, child in node.children.iteritems():
nodeid = id(node)
destid = id(child)
if destid == id(trie.root):
# do not show self-links of root node created during make_automaton
continue
if letter.isalnum():
label = letter
else:
label = '%02x' % ord(letter)
writeln("\tnode%d -> node%d [label=\"%s\"]" % (nodeid, destid, label))
# fail links
for node in nodes:
nodeid = id(node)
failid = id(node.fail)
if failid != pyahocorasick.nil:
writeln("\tnode%d -> node%d [color=blue]" % (nodeid, failid))
writeln("}")
if __name__ == '__main__':
A = pyahocorasick.Trie()
A.add_word("he", 0)
A.add_word("her", 1)
A.add_word("hers", 2)
A.add_word("she", 3)
A.add_word("cat", 4)
A.add_word("shield", 5)
with open('trie.dot', 'wt') as f:
exportdot(A, f)
A.make_automaton()
with open('ahocorasick.dot', 'wt') as f:
exportdot(A, f)
|