File: docextract.py

package info (click to toggle)
pigment-python 0.3.4-2
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 2,856 kB
  • ctags: 2,881
  • sloc: python: 11,567; sh: 9,133; makefile: 227; ansic: 76
file content (263 lines) | stat: -rw-r--r-- 8,739 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
# -*- Mode: Python; py-indent-offset: 4 -*-
'''Simple module for extracting GNOME style doc comments from C
sources, so I can use them for other purposes.'''

import sys, os, string, re

__all__ = ['extract']

comment_line_lead = re.compile(r'^\s*\*\s*')
funcname_pat = re.compile(r'^(\w+)\s*:?')
sectionname_pat = re.compile(r'^SECTION\s*:\s*(\w+)\s*$')
comment_start_pat = re.compile(r'^\s*/\*\*\s')
comment_end_pat = re.compile(r'^\s*\*+/')

newline_pat = re.compile(r'^\s*\n\s*$')
return_pat = re.compile(r'^@?(returns:|return\s+value:|returns\s*)(.*\n?)$',
                        re.IGNORECASE)
param_pat = re.compile(r'^@(\S+)\s*:(.*\n?)$')

class Doc:
    name = None
    def __init__(self, name=None):
        self.name = name

    @staticmethod
    def new_doc(line):
        # it's important to check sectionname_pat first, since it's a more
        # specific case of funcname_pat.
        for regexp,DocType in [(sectionname_pat, ClassDoc),
                               (funcname_pat, FunctionDoc)]:
            match = regexp.match(line)
            if match:
                return DocType(match.group(1))
        return None

    def _strip_comment_lead(self, line):
        return comment_line_lead.sub('', line)

    def parse_line(self, line):
        """Parses a line and fills the Doc accordingly, returns False if we
        reached the end of the comment"""
        match = comment_end_pat.match(line)
        if match:
            return False
        clean_line = self._strip_comment_lead(line)
        self.do_parse_line(clean_line)
        return True

    def do_parse_line(self, line):
        raise NotImplementedError

    def set_name(self, name):
        self.name = name

    def has_enough(self):
        return self.name != None


class FunctionDoc(Doc):
    #states
    IN_DESCRIPTION = 1
    IN_RETURN = 2
    IN_PARAM = 3
    def __init__(self, name=None):
        self.params = []
        self.description = ''
        self.ret = ''
        self.current_state = self.IN_DESCRIPTION
        Doc.__init__(self, name)

    def add_param(self, name, description):
        if name == '...':
            name = 'Varargs'
        self.params.append((name, description.strip()))
    def append_to_last_param(self, extra):
        self.params[-1] = (self.params[-1][0], self.params[-1][1] + extra)
    def append_to_named_param(self, name, extra):
        for i in range(len(self.params)):
            if self.params[i][0] == name:
                self.params[i] = (name, self.params[i][1] + extra)
                return
        # fall through to adding extra parameter ...
        self.add_param(name, extra)
    def append_description(self, extra):
        self.description = self.description + extra
    def append_return(self, extra):
        self.ret = self.ret + extra

    def get_param_description(self, name):
        for param, description in self.params:
            if param == name:
                return description
        else:
            return ''

    def set_state_from_line(self, line):
        """Changes .current_state if the (comment) line requires it. Returns
        the match that triggered the change or None."""
        for pat, state in [ (newline_pat, self.IN_DESCRIPTION),
                            (return_pat, self.IN_RETURN),
                            (param_pat, self.IN_PARAM)]:
            match = pat.match(line)
            if match:
                self.current_state = state
                return match

    def do_parse_line(self, line):
        param = None
        if not line: line = '\n'

        match = self.set_state_from_line(line)

        if self.current_state == self.IN_PARAM:
            if match:
                param = match.group(1)
                desc = match.group(2)
                self.add_param(param, desc)
            else:
                self.append_to_last_param(line)
        elif self.current_state == self.IN_RETURN:
            if match:
                return_start = match.group(1)
                self.ret = match.group(2)
                self.description = self.description + return_start + self.ret
            else:
                self.append_return(line)
        elif self.current_state == self.IN_DESCRIPTION:
            if match: #newline
                pass
            else:
                if line[:12] == 'Description:':
                    line = line[12:]
                self.append_description(line)
        else:
           print >> sys.stderr, "Wooops!"

class ClassDoc(Doc):
    def __init__(self, name=None):
        self.name = None
        self.short_description = None
        self.see_also = []
        Doc.__init__(self, name)

    def set_name(self, name):
        self.name = name

    def set_short_description(self, desc):
        self.short_description = desc.strip()

    def add_see_also_line(self, line):
        self.see_also += [name.strip(' #.') for name in line.split(',')]

    def do_parse_line(self, line):
        match = param_pat.match(line)
        if match:
            name = match.group(1)
            desc = match.group(2)
            if name == 'short_description':
                self.set_short_description(desc)
            elif name == 'see_also':
                self.add_see_also_line(desc)




def parse_file(fp, func_docs, class_docs):
    line = fp.readline()
    in_comment_block = False
    while line:
        if not in_comment_block:
            if comment_start_pat.match(line):
                in_comment_block = True
                continue
            else:
                line = fp.readline()
                continue

        # inside a comment block, and not the end of the block ...
        line = comment_line_lead.sub('', line)

        doc = Doc.new_doc(line)
        if doc:
            ret = True
            print >> sys.stderr, "    name=", doc.name
            line = fp.readline() # we skip the line giving the name
            while in_comment_block:
                ret = doc.parse_line(line)
                if not ret:
                    in_comment_block = False
                line = fp.readline()
                if not line:
                    in_comment_block = False
                    break
            if isinstance(doc, FunctionDoc):
                func_docs[doc.name] = doc
            elif isinstance(doc, ClassDoc):
                class_docs[doc.name] = doc
            else:
                raise NotImplementedError
        line = fp.readline()

def parse_dir(dir, func_docs, class_docs):
    for file in os.listdir(dir):
        if file in ('.', '..'): continue
        path = os.path.join(dir, file)
        if os.path.isdir(path):
            parse_dir(path, func_docs, class_docs)
        if len(file) > 2 and file[-2:] == '.c':
            parse_file(open(path, 'r'), func_docs, class_docs)

def extract(dirs, func_docs=None, class_docs=None):
    if not func_docs: func_docs = {}
    if not class_docs: class_docs = {}
    for dir in dirs:
        parse_dir(dir, func_docs, class_docs)
    return (func_docs, class_docs)

tmpl_section_pat = re.compile(r'^<!-- ##### (\w+) (\w+) ##### -->$')
def parse_tmpl(fp, doc_dict):
    cur_doc = None

    line = fp.readline()
    while line:
        match = tmpl_section_pat.match(line)
        if match:
            cur_doc = None  # new input shouldn't affect the old doc dict
            sect_type = match.group(1)
            sect_name = match.group(2)

            if sect_type == 'FUNCTION':
                cur_doc = doc_dict.get(sect_name)
                if not cur_doc:
                    cur_doc = FunctionDoc()
                    cur_doc.set_name(sect_name)
                    doc_dict[sect_name] = cur_doc
        elif line == '<!-- # Unused Parameters # -->\n':
            cur_doc = None # don't worry about unused params.
        elif cur_doc:
            if line[:10] == '@Returns: ':
                if string.strip(line[10:]):
                    cur_doc.append_return(line[10:])
            elif line[0] == '@':
                pos = string.find(line, ':')
                if pos >= 0:
                    cur_doc.append_to_named_param(line[1:pos], line[pos+1:])
                else:
                    cur_doc.append_description(line)
            else:
                cur_doc.append_description(line)

        line = fp.readline()

def extract_tmpl(dirs, doc_dict=None):
    if not doc_dict: doc_dict = {}
    for dir in dirs:
        for file in os.listdir(dir):
            if file in ('.', '..'): continue
            path = os.path.join(dir, file)
            if os.path.isdir(path):
                continue
            if len(file) > 2 and file[-2:] == '.sgml':
                parse_tmpl(open(path, 'r'), doc_dict)
    return doc_dict