File: dump2dot.py

package info (click to toggle)
python-pyahocorasick 1.4.1-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 748 kB
  • sloc: ansic: 4,554; python: 2,823; sh: 312; makefile: 242
file content (87 lines) | stat: -rw-r--r-- 1,720 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
"""
	Aho-Corasick string search algorithm.

	Author    : Wojciech Muła, wojciech_mula@poczta.onet.pl
	WWW       : http://0x80.pl
	License   : public domain
"""

import ahocorasick
import os

from ahocorasick import EMPTY, TRIE, AHOCORASICK;


def dump2dot(automaton, file):

	def writeln(text=""):
		file.write(text + "\n")

	def nodename(nodeid):
		return 'node%x' % (nodeid & 0xffffffff)

	if automaton.kind == EMPTY:
		writeln("digraph empty {}")
		return

	if automaton.kind == TRIE:
		name = "trie"
	else:
		name = "ahocorasick"


	writeln("digraph %s {" % name)

	nodes, edges, fail = automaton.dump()

	# nodes
	for nodeid, end in nodes:
		if end:
			attr = '[shape=doublecircle, label=""]'
		else:
			attr = '[shape=circle, label=""]'

		writeln("\t%s %s" % (nodename(nodeid), attr))

	def format_label(label):
		label = str(label, 'ascii')
		label = label.replace('"', r'\"')
		return '"%s"' % label

	# trie edges
	for nodeid, label, destid in edges:
		writeln("\t%s -> %s [label=%s]" % (nodename(nodeid), nodename(destid), format_label(label)))

	# fail links
	for nodeid, failid in fail:
		writeln("\t%s -> %s [color=blue]" % (nodename(nodeid), nodename(failid)))

	writeln("}")


def show(automaton):
	path = '/dev/shm/%s.dot' % os.getpid()
	with open(path, 'wt') as f:
		dump2dot(automaton, f)

	os.system("xdot %s" % path)
	#os.system("dotty %s" % path)
	os.unlink(path)


if __name__ == '__main__':
	A = ahocorasick.Automaton(ahocorasick.STORE_LENGTH)

	A.add_word("he")
	A.add_word("her")
	A.add_word("hers")
	A.add_word("she")
	A.add_word("cat")
	A.add_word("shield")

	with open('trie.dot', 'wt') as f:
		dump2dot(A, f)

	A.make_automaton()
	with open('ahocorasick.dot', 'wt') as f:
		dump2dot(A, f)