File: stap-resolve-module-function.py

package info (click to toggle)
systemtap 5.1-5
links: PTS, VCS
area: main
in suites: sid, trixie
size: 47,964 kB
sloc: cpp: 80,838; ansic: 54,757; xml: 49,725; exp: 43,665; sh: 11,527; python: 5,003; perl: 2,252; tcl: 1,312; makefile: 1,006; javascript: 149; lisp: 105; awk: 101; asm: 91; java: 70; sed: 16
file content (561 lines) | stat: -rw-r--r-- 21,494 bytes
parent folder | download | duplicates (3)
from __future__ import print_function

import ast
import warnings
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    # deprecated in python 3.4, but still works as of python 3.7.6
    import imp
import os.path
import re
import sys
from glob import glob


_verbose = 0


# We want everything that isn't actual results to go to
# stderr. _eprint() is just like print(), but all output automatically
# goes to stderr.
def _eprint(*args, **kwargs):
    print(*args, file=sys.stderr, **kwargs)


def _remove_ext(fullname):
    return os.path.splitext(fullname)[0]

_alphanum = frozenset(
    "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
_regexp_chars = frozenset("*?[]")


def _stapre_escape(pattern):
    "Escape all unallowed wildcard characters in pattern."
    s = list(pattern)
    alphanum = _alphanum
    for i, c in enumerate(pattern):
        if c not in alphanum and c not in _regexp_chars:
            if c == "\000":
                s[i] = "\\000"
            else:
                s[i] = "\\" + c
    for i, c in enumerate(pattern):
        # Convert '*' into '.*', a real regexp.
        if c == '*':
            s[i] = '[^.]*'
        # Convert '?' into '.', a real regexp.
        elif c == '?':
            s[i] = '.'
    return pattern[:0].join(s)


def _stapre_name_has_wildcard(pattern):
    for i, c in enumerate(pattern):
        if c in _regexp_chars:
            return True
    return False


def _get_default_search_path():
    path = sys.path
    # The first element of sys.path is the current directory of this
    # script, not the user's current directory. So, insert the empty
    # string (standing in for the current directory) at the start of
    # the list.
    path.insert(0, '')
    return path


def _find_wildcarded_modules(modpattern, path=None):
    """Version of imp.find_module() that handles hierarchical module
    names and wildcards.

    Try to find the module name. If path is omitted or None, the
    default search path is used. Otherwise, path must be a list of
    directory names; each directory is searched for files. Invalid
    names in the list are silently ignored (but all list items must be
    strings).

    If search is successful, the return value is a 4-element tuple
    (modname, file, pathname, description):

    modname is the module name, file is an open file object positioned
    at the beginning, pathname is the pathname of the file found, and
    description is a 3-element tuple as contained in the list returned
    by get_suffixes() describing the kind of module found.

    If the module does not live in a file, ImportError is raised.
    If the search is unsuccessful, ImportError is raised. Other
    exceptions indicate problems with the arguments or environment.

    If the module is a package, '__init__.py' is opened.

    """
    global _verbose
    results = []
    if path is None:
        path = _get_default_search_path()

    # Convert '.' to '/', since 'foo.bar' gets loaded from
    # 'foo/bar.py'.
    modules = []
    mp_path = modpattern.replace('.', '/')

    # Resolve wildcards if needed.
    if _stapre_name_has_wildcard(modpattern):

        # Now that we've got 'foo/bar', try to find it in the path.
        for component in path:
            if len(component):
                path_prefix = component + '/'
            else:
                path_prefix = ''
            # FIXME: We're going to be searching directories in the
            # path multiple times. To speed things up, we could cache
            # full directory results, then do matching against the
            # cached results.
            pathname = path_prefix + mp_path + '.py'
            if _verbose:
                _eprint("globbing '%s'" % pathname)
            glob_results = glob(pathname)
            if len(glob_results) > 0:
                if _verbose:
                    _eprint("glob(%s) found: %s" % (pathname, glob_results))
                for g in glob_results:
                    # First get the part of the path past the prefix.
                    full_modname = _remove_ext(g[len(path_prefix):])
                    # Convert "a/b/c" to "a.b.c".
                    full_modname = full_modname.replace('/', '.')
                    modules.append((full_modname,
                                    _remove_ext(os.path.basename(g)),
                                    [os.path.dirname(g)]))
                if _verbose:
                    _eprint("module list 1: %s" % modules)
            # We also need to check for package directories containing
            # an '__init__.py' file.
            pathname = path_prefix + mp_path + '/__init__.py'
            glob_results = glob(pathname)
            if len(glob_results) > 0:
                if _verbose:
                    _eprint("glob(%s) found: %s" % (pathname, glob_results))
                for g in glob_results:
                    # First remove the '/__init__.py'
                    full_modname = os.path.dirname(g)
                    # Now get the part of the path past the prefix.
                    full_modname = full_modname[len(path_prefix):]
                    # Convert "a/b/c" to "a.b.c".
                    full_modname = full_modname.replace('/', '.')
                    modules.append((full_modname,
                                    _remove_ext(os.path.basename(g)),
                                    [os.path.dirname(g)]))
                if _verbose:
                    _eprint("module list 2: %s" % modules)
    else:
        # Handle paths without wildcards.

        # Now that we've got 'foo/bar', try to find it in the path.
        for component in path:
            if len(component):
                path_prefix = component + '/'
            else:
                path_prefix = ''
            if _verbose:
                _eprint("looking for '%s'" % (path_prefix + mp_path + '.py'))
            # FIXME: We're going to be searching directories in the
            # path multiple times. To speed things up, we could cache
            # full directory results, then do matching against the
            # cached results.
            pathname = path_prefix + mp_path + '.py'
            if os.path.isfile(pathname):
                if _verbose:
                    _eprint("found: %s" % pathname)
                # First get the part of the path past the prefix.
                full_modname = mp_path
                # Convert "a/b/c" to "a.b.c".
                full_modname = full_modname.replace('/', '.')
                modules.append((full_modname,
                                os.path.basename(mp_path),
                                [os.path.dirname(pathname)]))
                if _verbose:
                    _eprint("module list 1: %s" % modules)
            # We also need to check for package directories containing
            # an '__init__.py' file.
            pathname = path_prefix + mp_path + '/__init__.py'
            if os.path.isfile(pathname):
                if _verbose:
                    _eprint("found: %s" % pathname)
                full_modname = mp_path
                # Convert "a/b/c" to "a.b.c".
                full_modname = full_modname.replace('/', '.')
                modules.append((full_modname, '__init__',
                                [os.path.dirname(pathname)]))
                if _verbose:
                    _eprint("module list 2: %s" % modules)

    if len(modules) == 0:
        return results

    if _verbose:
        _eprint("module list: %s" % modules)
    for (fm, m, p) in modules:
        if _verbose:
            _eprint("trying to load '%s'..." % fm)
        (fh, filename, descr) = imp.find_module(m, p)
        if descr[2] == imp.PY_SOURCE:
            if _verbose:
                _eprint("found module '%s'" % fm)
            results.append((fm, fh, filename, descr))
        elif descr[2] == imp.PKG_DIRECTORY:
            # We've found a package directory, which we shouldn't have
            # since the loop above should only return full paths.
            raise ImportError('Unhandled package directory ' + fm)
        elif descr[2] == imp.C_BUILTIN:
            # Just ignore 
            continue
        elif descr[2] == imp.C_EXTENSION:
            # Just ignore 
            continue
        else:
            raise ImportError('Unknown descr: %d for module' %
                              descr[2], fm)
    return results


def _parse_function_pattern(function_pattern):
    """Parse the FUNCTION_PATTERN into its parts. See the
    resolve_pattern docstring for a description of FUNCTION_PATTERN.

    Returns a tuple of: (FUNCTION_PATTERN, PATH, LINENO_TYPE, RANGE)
    """
    filename = None
    lineno_type = None
    lineno = None

    # First, look for a '@', indicating we've got a filename in
    # the function pattern.
    function_pattern = function_pattern.strip()
    ampersand = function_pattern.find('@')
    if ampersand > 0:
        filename = function_pattern[ampersand+1:].strip()
        function_pattern = function_pattern[:ampersand].strip()

        # Does the filename have a ':' or '+' in it, indicating a line
        # number?
        colon = filename.find(':')
        plus = filename.find('+')
        if colon > 0 and plus > 0:
            raise SyntaxError("Error: both ':' and '+' specified in '%s'."
                              % filename)
        elif colon > 0 or plus > 0:
            if colon > 0:
                lineno_type = ':'
                pos = colon
            elif plus > 0 and colon < 0:
                lineno_type = '+'
                pos = plus
            lineno = filename[pos+1:].strip()
            filename = filename[:pos].strip()
    return (function_pattern, filename, lineno_type, lineno)


def resolve_patterns(module_pattern, function_pattern):
    """Resolve the MODULE_PATTERN and FUNCTION_PATTERN into actual
    python module name, function name, source file and lines.

    MODULE_PATTERN is the name of the python module. This part may
    use the "*" and "?" wildcarding operators to match multiple
    names.

    FUNCTION_PATTERN is made up of 3 parts: FUNCTION[@PATH[[:+]RANGE]]

    where:

    - The first part is the name of a python function. This part may
      use the "*" and "?" wildcarding operators to match multiple
      names.

    - The second part is optinal and begins with the "@" character. It
      is followed by the path to the python source file containing the
      function, and may include a wildcard pattern.

    - Finally, the third part is optional if the path to the python
      source was given, and identifies the line number in the source
      file preceded by a ":" or a "+".  The line number is assumed to
      be an absolute line number if preceded by a ":", or relative to
      the declaration line of the function if preceded by a "+".  All
      the lines in the function can be matched with ":*".  A range of
      lines x through y can be matched with ":x-y". Ranges and
      specific lines can be mixed using commas, e.g. ":x,y-z".
    """
    global _verbose
    if _verbose:
        _eprint("Resolving patterns '%s' '%s'"
                % (module_pattern, function_pattern))

    # Parse the function pattern into its parts.
    (function_pattern, filename, lineno_type, lineno) \
        = _parse_function_pattern(function_pattern)

    # If we've got a filename (which could have wildcards), try to
    # handle it.
    modpattern_list = []
    if filename:
        filename_list = []
        # If we've got an absolute path, we don't need to search
        # for the file.
        if os.path.isabs(filename):
            if _stapre_name_has_wildcard(filename):
                filename_list = glob(filename)
            elif os.path.isfile(filename):
                filename_list.append(filename)
        else:
            for component in _get_default_search_path():
                if len(component):
                    path_prefix = component + '/'
                else:
                    path_prefix = ''
                if _stapre_name_has_wildcard(filename):
                    if _verbose:
                        _eprint("globbing '%s'"
                                % (path_prefix + filename))
                    filename_list.extend(glob(path_prefix + filename))
                elif os.path.isfile(path_prefix + filename):
                    filename_list.append(path_prefix + filename)
        # If we had a explicit filename, but couldn't find it, we've
        # got an error.
        if len(filename_list) == 0:
            raise IOError("filename '%s' can't be found." % filename)

        # OK, a filename was specified and we found it. Now we need to
        # mangle the module part of the pattern to include the
        # path of the file we found.
        #
        # FIXME: Note we aren't validating the module name against the
        # filename. So, 'module("a*").function("b*@foo.py")' is going
        # to match, even though module 'foo' doesn't match 'a*'.
        for f in filename_list:
            abspath = os.path.abspath(f)

            # We want the same number of 'levels' in the answer as in
            # the input. If the module pattern contains 'f*.b*', we
            # want the module name to be 'foo.bar'.
            levels = module_pattern.count('.') + 1
            parts = _remove_ext(abspath).split('/')
            if os.path.basename(abspath) != '__init__.py':
                mod_name = '.'.join(parts[(len(parts) - levels):])
                mod_path = '/'.join(parts[:(len(parts) - levels)])
            else:
                mod_name = '.'.join(parts[(len(parts) - levels - 1):-1])
                mod_path = '/'.join(parts[:(len(parts) - levels) - 1])
            modpattern_list.append((mod_name, [mod_path]))
    else:
        modpattern_list.append((module_pattern, None))

    if _verbose:
        _eprint("modpattern_list: %s" % modpattern_list)
    ret_list = []
    ret_list_format = '%s %s@%s:%d'
    ret_list_flag_format = '%s %s@%s:%d %s'
    for (mp, p) in modpattern_list:
        # Try to load the source for the module(s).
        if _verbose:
            _eprint("using _find_wildcarded_modules(%s, %s)..." % (mp, p))
        results = _find_wildcarded_modules(mp, p)
        if _verbose:
            _eprint('_find_wildcarded_modules() returned %s' % results)
        for (module, f, filename, descr) in results:
            if _verbose:
                _eprint("Loading source for module '%s'" % module)
            try:
                source = f.read()
            finally:
                f.close()

            if filename is None or source is None:
                raise IOError("Couldn't find module '%s'" % module)

            # Parse the source, turning it into a AST (Abstract Syntax
            # Tree). This doesn't actually load the file (which would run
            # it).
            tree = ast.parse(source, filename, "exec")

            # Walk the AST, looking for function definitions, line number, etc.
            walker = _AstWalker(module, filename)
            modinfo = walker.visit(tree)
            esc_pattern = (_stapre_escape(mp) + '.'
                           + _stapre_escape(function_pattern) + '$')
            re_obj = re.compile(esc_pattern)
            for (func, lines) in modinfo.functions:
                if _verbose:
                    _eprint("matching %s against '%s'" % (func, esc_pattern))
                if re_obj.match(func):
                    if _verbose:
                        _eprint("%s matches %s" % (func, esc_pattern))

                    bare_func = func[len(module) + 1:]
                    # No line numbers were specified. So, report the
                    # function definition line (the # first line
                    # number). Add the 'call' specifier so that
                    # systemtap knows what's going on.
                    if lineno_type is None and lineno is None:
                        ret_list.append(ret_list_flag_format
                                        % (module, bare_func,
                                           modinfo.path, lines[0], 'call'))
                        continue

                    # We do line matching here. First, handle wildcard.
                    if lineno == '*':
                        for l in lines[1:]:
                            ret_list.append(ret_list_format
                                            % (module, bare_func,
                                               modinfo.path, l))
                        continue

                    # Handle absolute or relative line numbers
                    # here. Try to parse N, N-M, or N,M,O,P, or a
                    # combination thereof...
                    for lrange in lineno.split(','):
                        # Handle 'N-M'
                        dash = lrange.find('-')
                        if dash:
                            low = lrange[:dash]
                            high = lrange[dash + 1:]

                            # Normalize relative line numbers.
                            if lineno_type == '+':
                                low += lines[0]
                                high += lines[0]

                            # Look for lines that are between low and
                            # high.
                            for l in lines[1:]:
                                if l >= low and l >= high:
                                    ret_list.append(ret_list_format
                                                    % (module, bare_func,
                                                       modinfo.path, l))
                        # Handle 'N'
                        elif lrange in lines[1:]:
                            ret_list.append(ret_list_format
                                            % (module, bare_func,
                                               modinfo.path, l))
    if _verbose:
        _eprint('returning %s' % ret_list)
    return ret_list


class _ModuleInfo(object):
    def __init__(self, name=None, path=None):
        self.name = name
        self.path = path
        self.functions = []
        self.lines = []

    def dump(self):
        _eprint('Name: %s' % self.name)
        _eprint('Path: %s' % self.path)
        for (func, lines) in self.functions:
            _eprint('%s: %s' % (func, lines))
        _eprint('%s: %s' % (self.name, self.lines))

    def add_function(self, funcname, linelist):
        self.functions.append((funcname, linelist))

    def add_linelist(self, linelist):
        self.lines = linelist


class _AstWalker(ast.NodeVisitor):
    def __init__(self, mod_name, mod_path, verbose=False):
        self.lastline = -1
        self.names = []
        self.names.append(mod_name)
        self.indent = 0
        self.indent_with = '    '
        self.linelist = []
        self.verbose = verbose
        self.modinfo = _ModuleInfo(mod_name, mod_path)

    def visit_Module(self, node):
        if self.verbose:
            _eprint("found module")
        self.generic_visit(node)
        self.modinfo.add_linelist(self.linelist)
        return self.modinfo

    def body(self, statements):
        for stmt in statements:
            self.visit(stmt)

    def visit_ClassDef(self, node):
        if self.verbose:
            _eprint("%sfound class %s (%d)"
                    % (self.indent_with * self.indent, node.name,
                       node.lineno))
        self.names.append(node.name)
        self.indent += 1
        self.body(node.body)
        self.indent -= 1
        self.names.pop()

    def visit_FunctionDef(self, node):
        self.names.append(node.name)
        funcname = '.'.join(self.names)
        if self.verbose:
            _eprint("%sfound FunctionDef %s (%d)"
                    % (self.indent_with * self.indent, funcname,
                       node.lineno))
        self.indent += 1
        saved_linelist = self.linelist
        self.linelist = []
        self.linelist.append(node.lineno)
        self.body(node.body)
        self.indent -= 1
        self.modinfo.add_function(funcname, self.linelist)
        self.linelist = saved_linelist
        self.names.pop()

    def generic_visit(self, node):
        try:
            if self.lastline < node.lineno:
                self.lastline = node.lineno
                if self.verbose:
                    _eprint("%sstmt: %d"
                            % (self.indent_with * self.indent,
                               node.lineno))
                self.linelist.append(node.lineno)
        except AttributeError:
            pass
        ast.NodeVisitor.generic_visit(self, node)


def _usage():
    _eprint("Usage: %s [-v] MODULE_PATTERN FUNCTION_PATTERN" % sys.argv[0])
    sys.exit(1)


if __name__ == '__main__':
    import getopt
    if len(sys.argv) < 3:
        _usage()
    try:
        (opts, pargs) = getopt.getopt(sys.argv[1:], 'v')
    except getopt.GetoptError as e:
        _eprint("Error: %s" % e)
        _usage()
    for (opt, value) in opts:
        if opt == '-v':
            _verbose += 1
    if len(pargs) != 2:
        _usage()
    try:
        results = resolve_patterns(pargs[0], pargs[1])
        if results:
            for s in results:
                print(s)
    except IOError as e:
        _eprint("IOError: %s" % e)
        sys.exit(1)
    except SyntaxError as e:
        _eprint("SyntaxError: %s" % e)
        sys.exit(1)