File: docextract.py

package info (click to toggle)
pigment-python 0.3.4-2
links: PTS
area: main
in suites: lenny
size: 2,856 kB
ctags: 2,881
sloc: python: 11,567; sh: 9,133; makefile: 227; ansic: 76
file content (263 lines) | stat: -rw-r--r-- 8,739 bytes
parent folder | download | duplicates (3)
# -*- Mode: Python; py-indent-offset: 4 -*-
'''Simple module for extracting GNOME style doc comments from C
sources, so I can use them for other purposes.'''

import sys, os, string, re

__all__ = ['extract']

comment_line_lead = re.compile(r'^\s*\*\s*')
funcname_pat = re.compile(r'^(\w+)\s*:?')
sectionname_pat = re.compile(r'^SECTION\s*:\s*(\w+)\s*$')
comment_start_pat = re.compile(r'^\s*/\*\*\s')
comment_end_pat = re.compile(r'^\s*\*+/')

newline_pat = re.compile(r'^\s*\n\s*$')
return_pat = re.compile(r'^@?(returns:|return\s+value:|returns\s*)(.*\n?)$',
                        re.IGNORECASE)
param_pat = re.compile(r'^@(\S+)\s*:(.*\n?)$')

class Doc:
    name = None
    def __init__(self, name=None):
        self.name = name

    @staticmethod
    def new_doc(line):
        # it's important to check sectionname_pat first, since it's a more
        # specific case of funcname_pat.
        for regexp,DocType in [(sectionname_pat, ClassDoc),
                               (funcname_pat, FunctionDoc)]:
            match = regexp.match(line)
            if match:
                return DocType(match.group(1))
        return None

    def _strip_comment_lead(self, line):
        return comment_line_lead.sub('', line)

    def parse_line(self, line):
        """Parses a line and fills the Doc accordingly, returns False if we
        reached the end of the comment"""
        match = comment_end_pat.match(line)
        if match:
            return False
        clean_line = self._strip_comment_lead(line)
        self.do_parse_line(clean_line)
        return True

    def do_parse_line(self, line):
        raise NotImplementedError

    def set_name(self, name):
        self.name = name

    def has_enough(self):
        return self.name != None


class FunctionDoc(Doc):
    #states
    IN_DESCRIPTION = 1
    IN_RETURN = 2
    IN_PARAM = 3
    def __init__(self, name=None):
        self.params = []
        self.description = ''
        self.ret = ''
        self.current_state = self.IN_DESCRIPTION
        Doc.__init__(self, name)

    def add_param(self, name, description):
        if name == '...':
            name = 'Varargs'
        self.params.append((name, description.strip()))
    def append_to_last_param(self, extra):
        self.params[-1] = (self.params[-1][0], self.params[-1][1] + extra)
    def append_to_named_param(self, name, extra):
        for i in range(len(self.params)):
            if self.params[i][0] == name:
                self.params[i] = (name, self.params[i][1] + extra)
                return
        # fall through to adding extra parameter ...
        self.add_param(name, extra)
    def append_description(self, extra):
        self.description = self.description + extra
    def append_return(self, extra):
        self.ret = self.ret + extra

    def get_param_description(self, name):
        for param, description in self.params:
            if param == name:
                return description
        else:
            return ''

    def set_state_from_line(self, line):
        """Changes .current_state if the (comment) line requires it. Returns
        the match that triggered the change or None."""
        for pat, state in [ (newline_pat, self.IN_DESCRIPTION),
                            (return_pat, self.IN_RETURN),
                            (param_pat, self.IN_PARAM)]:
            match = pat.match(line)
            if match:
                self.current_state = state
                return match

    def do_parse_line(self, line):
        param = None
        if not line: line = '\n'

        match = self.set_state_from_line(line)

        if self.current_state == self.IN_PARAM:
            if match:
                param = match.group(1)
                desc = match.group(2)
                self.add_param(param, desc)
            else:
                self.append_to_last_param(line)
        elif self.current_state == self.IN_RETURN:
            if match:
                return_start = match.group(1)
                self.ret = match.group(2)
                self.description = self.description + return_start + self.ret
            else:
                self.append_return(line)
        elif self.current_state == self.IN_DESCRIPTION:
            if match: #newline
                pass
            else:
                if line[:12] == 'Description:':
                    line = line[12:]
                self.append_description(line)
        else:
           print >> sys.stderr, "Wooops!"

class ClassDoc(Doc):
    def __init__(self, name=None):
        self.name = None
        self.short_description = None
        self.see_also = []
        Doc.__init__(self, name)

    def set_name(self, name):
        self.name = name

    def set_short_description(self, desc):
        self.short_description = desc.strip()

    def add_see_also_line(self, line):
        self.see_also += [name.strip(' #.') for name in line.split(',')]

    def do_parse_line(self, line):
        match = param_pat.match(line)
        if match:
            name = match.group(1)
            desc = match.group(2)
            if name == 'short_description':
                self.set_short_description(desc)
            elif name == 'see_also':
                self.add_see_also_line(desc)




def parse_file(fp, func_docs, class_docs):
    line = fp.readline()
    in_comment_block = False
    while line:
        if not in_comment_block:
            if comment_start_pat.match(line):
                in_comment_block = True
                continue
            else:
                line = fp.readline()
                continue

        # inside a comment block, and not the end of the block ...
        line = comment_line_lead.sub('', line)

        doc = Doc.new_doc(line)
        if doc:
            ret = True
            print >> sys.stderr, "    name=", doc.name
            line = fp.readline() # we skip the line giving the name
            while in_comment_block:
                ret = doc.parse_line(line)
                if not ret:
                    in_comment_block = False
                line = fp.readline()
                if not line:
                    in_comment_block = False
                    break
            if isinstance(doc, FunctionDoc):
                func_docs[doc.name] = doc
            elif isinstance(doc, ClassDoc):
                class_docs[doc.name] = doc
            else:
                raise NotImplementedError
        line = fp.readline()

def parse_dir(dir, func_docs, class_docs):
    for file in os.listdir(dir):
        if file in ('.', '..'): continue
        path = os.path.join(dir, file)
        if os.path.isdir(path):
            parse_dir(path, func_docs, class_docs)
        if len(file) > 2 and file[-2:] == '.c':
            parse_file(open(path, 'r'), func_docs, class_docs)

def extract(dirs, func_docs=None, class_docs=None):
    if not func_docs: func_docs = {}
    if not class_docs: class_docs = {}
    for dir in dirs:
        parse_dir(dir, func_docs, class_docs)
    return (func_docs, class_docs)

tmpl_section_pat = re.compile(r'^<!-- ##### (\w+) (\w+) ##### -->$')
def parse_tmpl(fp, doc_dict):
    cur_doc = None

    line = fp.readline()
    while line:
        match = tmpl_section_pat.match(line)
        if match:
            cur_doc = None  # new input shouldn't affect the old doc dict
            sect_type = match.group(1)
            sect_name = match.group(2)

            if sect_type == 'FUNCTION':
                cur_doc = doc_dict.get(sect_name)
                if not cur_doc:
                    cur_doc = FunctionDoc()
                    cur_doc.set_name(sect_name)
                    doc_dict[sect_name] = cur_doc
        elif line == '<!-- # Unused Parameters # -->\n':
            cur_doc = None # don't worry about unused params.
        elif cur_doc:
            if line[:10] == '@Returns: ':
                if string.strip(line[10:]):
                    cur_doc.append_return(line[10:])
            elif line[0] == '@':
                pos = string.find(line, ':')
                if pos >= 0:
                    cur_doc.append_to_named_param(line[1:pos], line[pos+1:])
                else:
                    cur_doc.append_description(line)
            else:
                cur_doc.append_description(line)

        line = fp.readline()

def extract_tmpl(dirs, doc_dict=None):
    if not doc_dict: doc_dict = {}
    for dir in dirs:
        for file in os.listdir(dir):
            if file in ('.', '..'): continue
            path = os.path.join(dir, file)
            if os.path.isdir(path):
                continue
            if len(file) > 2 and file[-2:] == '.sgml':
                parse_tmpl(open(path, 'r'), doc_dict)
    return doc_dict