File: ipy_lookfor.py

package info (click to toggle)
ipython 0.13.1-2%2Bdeb7u1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 15,752 kB
  • sloc: python: 69,537; makefile: 355; lisp: 272; sh: 80; objc: 37
file content (234 lines) | stat: -rw-r--r-- 7,089 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
"""
IPython extension: %lookfor command for searching docstrings

"""
# Pauli Virtanen <pav@iki.fi>, 2008.

import re, inspect, pkgutil, pydoc

#------------------------------------------------------------------------------
# Lookfor functionality
#------------------------------------------------------------------------------

# Cache for lookfor: {id(module): {name: (docstring, kind, index), ...}...}
# where kind: "func", "class", "module", "object"
# and index: index in breadth-first namespace traversal
_lookfor_caches = {}

# regexp whose match indicates that the string may contain a function signature
_function_signature_re = re.compile(r"[a-z_]+\(.*[,=].*\)", re.I)

def lookfor(what, modules=None, import_modules=True, regenerate=False):
    """
    Search for objects whose documentation contains all given words.
    Shows a summary of matching objects, sorted roughly by relevance.

    Parameters
    ----------
    what : str
        String containing words to look for.

    module : str, module
        Module whose docstrings to go through.
    import_modules : bool
        Whether to import sub-modules in packages.
        Will import only modules in __all__
    regenerate: bool
        Re-generate the docstring cache

    """
    # Cache
    cache = {}
    for module in modules:
        try:
            c = _lookfor_generate_cache(module, import_modules, regenerate)
            cache.update(c)
        except ImportError:
            pass

    # Search
    # XXX: maybe using a real stemming search engine would be better?
    found = []
    whats = str(what).lower().split()
    if not whats: return

    for name, (docstring, kind, index) in cache.iteritems():
        if kind in ('module', 'object'):
            # don't show modules or objects
            continue
        ok = True
        doc = docstring.lower()
        for w in whats:
            if w not in doc:
                ok = False
                break
        if ok:
            found.append(name)

    # Relevance sort
    # XXX: this is full Harrison-Stetson heuristics now,
    # XXX: it probably could be improved

    kind_relevance = {'func': 1000, 'class': 1000,
                      'module': -1000, 'object': -1000}

    def relevance(name, docstr, kind, index):
        r = 0
        # do the keywords occur within the start of the docstring?
        first_doc = "\n".join(docstr.lower().strip().split("\n")[:3])
        r += sum([200 for w in whats if w in first_doc])
        # do the keywords occur in the function name?
        r += sum([30 for w in whats if w in name])
        # is the full name long?
        r += -len(name) * 5
        # is the object of bad type?
        r += kind_relevance.get(kind, -1000)
        # is the object deep in namespace hierarchy?
        r += -name.count('.') * 10
        r += max(-index / 100, -100)
        return r

    def relevance_sort(a, b):
        dr = relevance(b, *cache[b]) - relevance(a, *cache[a])
        if dr != 0: return dr
        else: return cmp(a, b)
    found.sort(relevance_sort)

    # Pretty-print
    s = "Search results for '%s'" % (' '.join(whats))
    help_text = [s, "-"*len(s)]
    for name in found:
        doc, kind, ix = cache[name]

        doclines = [line.strip() for line in doc.strip().split("\n")
                    if line.strip()]

        # find a suitable short description
        try:
            first_doc = doclines[0].strip()
            if _function_signature_re.search(first_doc):
                first_doc = doclines[1].strip()
        except IndexError:
            first_doc = ""
        help_text.append("%s\n    %s" % (name, first_doc))

    # Output
    if len(help_text) > 10:
        pager = pydoc.getpager()
        pager("\n".join(help_text))
    else:
        print "\n".join(help_text)

def _lookfor_generate_cache(module, import_modules, regenerate):
    """
    Generate docstring cache for given module.

    Parameters
    ----------
    module : str, None, module
        Module for which to generate docstring cache
    import_modules : bool
        Whether to import sub-modules in packages.
        Will import only modules in __all__
    regenerate: bool
        Re-generate the docstring cache

    Returns
    -------
    cache : dict {obj_full_name: (docstring, kind, index), ...}
        Docstring cache for the module, either cached one (regenerate=False)
        or newly generated.

    """
    global _lookfor_caches

    if module is None:
        module = "numpy"

    if isinstance(module, str):
        module = __import__(module)

    if id(module) in _lookfor_caches and not regenerate:
        return _lookfor_caches[id(module)]

    # walk items and collect docstrings
    cache = {}
    _lookfor_caches[id(module)] = cache
    seen = {}
    index = 0
    stack = [(module.__name__, module)]
    while stack:
        name, item = stack.pop(0)
        if id(item) in seen: continue
        seen[id(item)] = True

        index += 1
        kind = "object"

        if inspect.ismodule(item):
            kind = "module"
            try:
                _all = item.__all__
            except AttributeError:
                _all = None
            # import sub-packages
            if import_modules and hasattr(item, '__path__'):
                for m in pkgutil.iter_modules(item.__path__):
                    if _all is not None and m[1] not in _all:
                        continue
                    try:
                        __import__("%s.%s" % (name, m[1]))
                    except ImportError:
                        continue
            for n, v in inspect.getmembers(item):
                if _all is not None and n not in _all:
                    continue
                stack.append(("%s.%s" % (name, n), v))
        elif inspect.isclass(item):
            kind = "class"
            for n, v in inspect.getmembers(item):
                stack.append(("%s.%s" % (name, n), v))
        elif callable(item):
            kind = "func"

        doc = inspect.getdoc(item)
        if doc is not None:
            cache[name] = (doc, kind, index)

    return cache

#------------------------------------------------------------------------------
# IPython connectivity
#------------------------------------------------------------------------------

from IPython.core import ipapi
ip = ipapi.get()

_lookfor_modules = ['numpy', 'scipy']

def lookfor_f(self, arg=''):
    r"""
    Search for objects whose documentation contains all given words.
    Shows a summary of matching objects, sorted roughly by relevance.

    Usage
    -----
    %lookfor +numpy  some words
    Search module 'numpy'

    %lookfor_modules numpy scipy
    Set default modules whose docstrings to search

    """
    lookfor(arg, modules=_lookfor_modules)

def lookfor_modules_f(self, arg=''):
    global _lookfor_modules
    if not arg:
        print "Modules included in %lookfor search:", _lookfor_modules
    else:
        _lookfor_modules = arg.split()

ip.define_magic('lookfor', lookfor_f)
ip.define_magic('lookfor_modules', lookfor_modules_f)