File: extract.py

package info (click to toggle)
pkgconf 1.8.1-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,856 kB
  • sloc: ansic: 6,400; sh: 5,783; makefile: 212; python: 132
file content (149 lines) | stat: -rw-r--r-- 5,641 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# derived from https://github.com/jeanralphaviles/comment_parser/blob/master/comment_parser/parsers/c_parser.py
# MIT license - https://github.com/jeanralphaviles/comment_parser/blob/master/LICENSE


class Comment:
    def __init__(self, comment, line, multiline):
        self.comment = comment
        self.line = line
        self.multiline = multiline

    def __repr__(self):
        return "Comment(comment=%r, line=%r, multiline=%r)" % (self.comment, self.line, self.multiline)

    @property
    def clean_text(self):
        if not self.multiline:
            return self.comment.strip()

        lines = self.comment.splitlines()
        cleanlines = []
        for line in lines:
            if line[0:3] == ' * ':
                cleanlines.append(line[3:])
            elif len(line) == 2:
                cleanlines.append('')
        return '\n'.join(cleanlines)

    @property
    def doc_text(self):
        text = self.clean_text
        if '!doc' in text[0:4]:
            return text[5:]
        return None


class FileError(Exception):
    pass


class UnterminatedCommentError(Exception):
    pass


def extract_comments(filename):
    """Extracts a list of comments from the given C family source file.
    Comments are represented with the Comment class found in the common module.
    C family comments come in two forms, single and multi-line comments.
        - Single-line comments begin with '//' and continue to the end of line.
        - Multi-line comments begin with '/*' and end with '*/' and can span
            multiple lines of code. If a multi-line comment does not terminate
            before EOF is reached, then an exception is raised.
    Note that this doesn't take language-specific preprocessor directives into
    consideration.
    Args:
        filename: String name of the file to extract comments from.
    Returns:
        Python list of Comment objects in the order that they appear in the file.
    Raises:
        FileError: File was unable to be open or read.
        UnterminatedCommentError: Encountered an unterminated multi-line
            comment.
    """
    try:
        with open(filename, 'r') as source_file:
            state = 0
            current_comment = ''
            comments = []
            line_counter = 1
            comment_start = 1
            while True:
                char = source_file.read(1)
                if not char:
                    if state is 3 or state is 4:
                        raise UnterminatedCommentError()
                    if state is 2:
                        # Was in single line comment. Create comment.
                        comment = Comment(current_comment, line_counter, False)
                        comments.append(comment)
                    return comments
                if state is 0:
                    # Waiting for comment start character or beginning of
                    # string.
                    if char == '/':
                        state = 1
                    elif char == '"':
                        state = 5
                elif state is 1:
                    # Found comment start character, classify next character and
                    # determine if single or multiline comment.
                    if char == '/':
                        state = 2
                    elif char == '*':
                        comment_start = line_counter
                        state = 3
                    else:
                        state = 0
                elif state is 2:
                    # In single line comment, read characters until EOL.
                    if char == '\n':
                        comment = Comment(current_comment, line_counter, False)
                        comments.append(comment)
                        current_comment = ''
                        state = 0
                    else:
                        current_comment += char
                elif state is 3:
                    # In multi-line comment, add characters until '*'
                    # encountered.
                    if char == '*':
                        state = 4
                    else:
                        current_comment += char
                elif state is 4:
                    # In multi-line comment with asterisk found. Determine if
                    # comment is ending.
                    if char == '/':
                        comment = Comment(
                            current_comment, comment_start, True)
                        comments.append(comment)
                        current_comment = ''
                        state = 0
                    else:
                        current_comment += '*'
                        # Care for multiple '*' in a row
                        if char != '*':
                            current_comment += char
                            state = 3
                elif state is 5:
                    # In string literal, expect literal end or escape char.
                    if char == '"':
                        state = 0
                    elif char == '\\':
                        state = 6
                elif state is 6:
                    # In string literal, escaping current char.
                    state = 5
                if char == '\n':
                    line_counter += 1
    except OSError as exception:
        raise FileError(str(exception))


if __name__ == '__main__':
    import sys
    from pprint import pprint

    comments = [comment for comment in extract_comments(sys.argv[1]) if comment.doc_text]
    for comment in comments:
        print(comment.doc_text)