File: asterisk.py

package info (click to toggle)
pypy3 7.0.0%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 111,848 kB
  • sloc: python: 1,291,746; ansic: 74,281; asm: 5,187; cpp: 3,017; sh: 2,533; makefile: 544; xml: 243; lisp: 45; csh: 21; awk: 4
file content (171 lines) | stat: -rw-r--r-- 5,350 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# some analysis of global imports

"""
The idea:
compile a module's source text and walk recursively
through the code objects. Find out which globals
are used.
Then examine each 'import *' by importing that module
and looking for those globals.
Replace the 'import *' by the list found.
More advanced: If the new import has more than, say, 5 entries,
rewrite the import to use module.name throughout the source.
"""

import dis, cStringIO, sys

def disasm(code):
    hold = sys.stdout
    try:
        sys.stdout = cStringIO.StringIO()
        dis.dis(code)
        return sys.stdout.getvalue()
    finally:
        sys.stdout = hold

def opsequence(code):
    for line in disasm(code).split('\n'):
        pieces = line.split('(', 1)
        if len(pieces) == 1:
            start, arg = pieces[0], None
        else:
            start, arg = pieces
        words = start.split()
        while words and (words[0].isdigit() or words[0] == '>>'):
            word = words.pop(0)
            if word.isdigit():
                ofs = int(word)
        if not words:
            continue
        op = words[0]
        if arg:
            arg = arg[:-1] # )
        if op.startswith('JUMP'):
            arg = int(words[1])
        yield ofs, op, arg

def globalsof(code, globrefs=None, stars=None, globals=None):
    names = code.co_names
    vars = code.co_varnames
    if globrefs is None: globrefs = {}
    if stars is None: stars = [] # do stars in order
    if globals is None: globals = {}
    in_seq = False
    for ofs, op, arg in opsequence(code):
        if op == 'LOAD_GLOBAL':
            name = arg
            refs = globrefs.setdefault(name, {})
            offsets = refs.setdefault(code, [])
            offsets.append(ofs)
        elif op == 'IMPORT_NAME':
            in_seq = True
            imp_module = arg
            imp_what = None
        elif op == 'IMPORT_FROM':
            in_seq = True
            imp_what = arg
        elif op == 'STORE_NAME':
            # we are not interested in local imports, which
            # would generate a STORE_FAST
            name = arg
            if in_seq:
                globals[name] = imp_what, imp_module
                in_seq = False
            else:
                globals[name] = None, None
        elif op == 'IMPORT_STAR':
            stars.append( (imp_module, ofs) )
            in_seq = False
        else:
            in_seq = False
    return globrefs, stars, globals

def offsetmap(c):
    # create a mapping from offsets to line numbers.
    # we count lines from zero, here.
    tab = c.co_lnotab
    line = c.co_firstlineno - 1
    addr = 0
    res = { addr: line }
    for i in range(0, len(tab), 2):
        addr = addr + ord(tab[i])
        line = line + ord(tab[i+1])
        res[addr] = line
    return res


class Analyser:
    def __init__(self, fname):
        self.fname = fname
        self.source = file(fname).read()
        self.starimports = []
        self.codeobjects = {}
        self.globrefs, self.stars, self.globals = self.analyse()

    def analyse(self):
        globrefs = {}
        stars = []
        globals = {}
        seen = {}
        code = compile(self.source, self.fname, 'exec')
        todo = [code]
        while todo:
            code = todo.pop(0)
            self.codeobjects[code] = offsetmap(code)
            globalsof(code, globrefs, stars, globals)
            seen[code] = True
            for const in code.co_consts:
                if type(const) is type(code) and const not in seen:
                    todo.append(const)
        return globrefs, stars, globals

    def get_unknown_globals(self):
        from __builtin__ import __dict__ as bltin
        ret = [name for name in self.globrefs.keys()
               if name not in bltin and name not in self.globals]
        return ret

    def get_from_star(self, modname):
        dic = {}
        exec "from %s import *" % modname in dic
        return dic

    def resolve_star_imports(self):
        implicit = {}
        which = {}
        for star, ofs in self.stars:
            which[star] = []
            for key in self.get_from_star(star).keys():
                implicit[key] = star
        # sort out in which star import we find what.
        # note that we walked star imports in order,
        # so we are sure to resolve ambiguities correctly.
        for name in self.get_unknown_globals():
            mod = implicit[name]
            which[mod].append(name)
        imps = []
        for star, ofs in self.stars:
            imps.append( (ofs, star, which[star]) )
        self.starimports = imps

    def find_statements(self):
        # go through all code objects and collect
        # line numbers. This gives us all statements.
        lineset = {}
        for co, ofs2line in self.codeobjects.items():
            for ofs, line in ofs2line.items():
                lineset[line] = True
        linenos = lineset.keys()
        if 0 not in linenos:
            linenos.append(0)
        linenos.sort()
        self.linenos = linenos
        # now create statement chunks
        srclines = self.source.split('\n')
        stmts = []
        start = 0
        for lno in linenos[1:] + [sys.maxint]:
            stmt = '\n'.join(srclines[start:lno])
            stmts.append(stmt)
            start = lno
        self.statements = stmts