File: graphparse.py

package info (click to toggle)
pypy3 7.3.19%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 212,236 kB
  • sloc: python: 2,098,316; ansic: 540,565; sh: 21,462; asm: 14,419; cpp: 4,451; makefile: 4,209; objc: 761; xml: 530; exp: 499; javascript: 314; pascal: 244; lisp: 45; csh: 12; awk: 4
file content (142 lines) | stat: -rw-r--r-- 4,897 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
"""
Graph file parsing.
"""
from __future__ import print_function, absolute_import

import sys, re
import subprocess

from dotviewer import msgstruct

from dotviewer.strunicode import forcestr, forceunicode

re_nonword = re.compile(forcestr(r'([^0-9a-zA-Z_.]+)'))
re_plain   = re.compile(forcestr(r'graph [-0-9.]+ [-0-9.]+ [-0-9.]+$'), re.MULTILINE)
re_digraph = re.compile(forcestr(r'\b(graph|digraph)\b'), re.IGNORECASE)

def guess_type(content):
    # try to see whether it is a directed graph or not,
    # or already a .plain file
    # XXX not a perfect heursitic
    if re_plain.match(content):
        return 'plain'     # already a .plain file
    # look for the word 'graph' or 'digraph' followed by a '{'.
    bracepos = None
    lastfound = ''
    for match in re_digraph.finditer(content):
        position = match.start()
        if bracepos is None:
            bracepos = content.find(b'{', position)
            if bracepos < 0:
                break
        elif position > bracepos:
            break
        lastfound = match.group()
    if lastfound.lower() == b'digraph':
        return 'dot'
    if lastfound.lower() == b'graph':
        return 'neato'
    print("Warning: could not guess file type, using 'dot'", file=sys.stderr)
    return 'unknown'

def dot2plain_graphviz(content, contenttype, use_codespeak=False):
    if contenttype != 'neato':
        cmdline = 'dot -Tplain'
    else:
        cmdline = 'neato -Tplain'
    #print >> sys.stderr, '* running:', cmdline
    close_fds = sys.platform != 'win32'
    p = subprocess.Popen(cmdline, shell=True, close_fds=close_fds,
                         stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    (child_in, child_out) = (p.stdin, p.stdout)
    try:
        import threading
    except ImportError:
        bkgndwrite(child_in, content)
    else:
        t = threading.Thread(target=bkgndwrite, args=(child_in, content))
        t.start()
    plaincontent = child_out.read()
    child_out.close()
    if not plaincontent:    # 'dot' is likely not installed
        raise PlainParseError("no result from running 'dot'")
    return plaincontent

def bkgndwrite(f, data):
    f.write(data)
    f.close()

class PlainParseError(Exception):
    pass

def splitline(line, re_word = re.compile(forcestr(r'[^\s"]\S*|["]["]|["].*?[^\\]["]'))):
    import ast
    result = []
    for word in re_word.findall(line):
        if word.startswith(b'"'):
            word = ast.literal_eval(forceunicode(word))
        result.append(word)
    return result

def parse_plain(graph_id, plaincontent, links={}, fixedfont=False):
    plaincontent = plaincontent.replace(b'\r\n', b'\n')    # fix Windows EOL
    lines = plaincontent.splitlines(True)
    for i in range(len(lines)-2, -1, -1):
        if lines[i].endswith(b'\\\n'):   # line ending in '\'
            lines[i] = lines[i][:-2] + lines[i+1]
            del lines[i+1]
    header = splitline(lines.pop(0))
    if header[0] != b'graph':
        raise PlainParseError("should start with 'graph'")
    yield (msgstruct.CMSG_START_GRAPH, graph_id) + tuple(header[1:])

    texts = []
    for line in lines:
        line = splitline(line)
        if line[0] == B'node':
            if len(line) != 11:
                raise PlainParseError("bad 'node'")
            yield (msgstruct.CMSG_ADD_NODE,) + tuple(line[1:])
            texts.append(line[6])
        if line[0] == b'edge':
            yield (msgstruct.CMSG_ADD_EDGE,) + tuple(line[1:])
            i = 4 + 2 * int(line[3])
            if len(line) > i + 2:
                texts.append(line[i])
        if line[0] == b'stop':
            break

    if links:
        # only include the links that really appear in the graph
        seen = {}
        for text in texts:
            for word in re_nonword.split(text):
                if word and word in links and word not in seen:
                    t = links[word]
                    if isinstance(t, tuple):
                        statusbartext, color = t
                    else:
                        statusbartext = t
                        color = None
                    if color is not None:
                        yield (msgstruct.CMSG_ADD_LINK, word,
                               statusbartext, color[0], color[1], color[2])
                    else:
                        yield (msgstruct.CMSG_ADD_LINK, word, statusbartext)
                    seen[word] = True

    if fixedfont:
        yield (msgstruct.CMSG_FIXED_FONT,)

    yield (msgstruct.CMSG_STOP_GRAPH,)

def parse_dot(graph_id, content, links={}, fixedfont=False):
    contenttype = guess_type(content)
    if contenttype == 'plain':
        plaincontent = content
    else:
        try:
            plaincontent = dot2plain_graphviz(content, contenttype)
        except PlainParseError as e:
            raise
    return list(parse_plain(graph_id, plaincontent, links, fixedfont))