File: combine_src.py

package info (click to toggle)
duktape 2.7.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 21,160 kB
  • sloc: ansic: 215,359; python: 5,961; javascript: 4,555; makefile: 477; cpp: 205
file content (271 lines) | stat: -rw-r--r-- 10,431 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
#!/usr/bin/env python2
#
#  Combine a set of a source files into a single C file.
#
#  Overview of the process:
#
#    * Parse user supplied C files.  Add automatic #undefs at the end
#      of each C file to avoid defines bleeding from one file to another.
#
#    * Combine the C files in specified order.  If sources have ordering
#      dependencies (depends on application), order may matter.
#
#    * Process #include statements in the combined source, categorizing
#      them either as "internal" (found in specified include path) or
#      "external".  Internal includes, unless explicitly excluded, are
#      inlined into the result while extenal includes are left as is.
#      Duplicate internal #include statements are replaced with a comment.
#
#  At every step, source and header lines are represented with explicit
#  line objects which keep track of original filename and line.  The
#  output contains #line directives, if requested, to ensure error
#  throwing and other diagnostic info will work in a useful manner when
#  deployed.  It's also possible to generate a combined source with no
#  #line directives.
#
#  Making the process deterministic is important, so that if users have
#  diffs that they apply to the combined source, such diffs would apply
#  for as long as possible.
#
#  Limitations and notes:
#
#    * While there are automatic #undef's for #define's introduced in each
#      C file, it's not possible to "undefine" structs, unions, etc.  If
#      there are structs/unions/typedefs with conflicting names, these
#      have to be resolved in the source files first.
#
#    * Because duplicate #include statements are suppressed, currently
#      assumes #include statements are not conditional.
#
#    * A system header might be #include'd in multiple source files with
#      different feature defines (like _BSD_SOURCE).  Because the #include
#      file will only appear once in the resulting source, the first
#      occurrence wins.  The result may not work correctly if the feature
#      defines must actually be different between two or more source files.
#

import logging
import sys
logging.basicConfig(level=logging.INFO, stream=sys.stdout, format='%(name)-21s %(levelname)-7s %(message)s')
logger = logging.getLogger('combine_src.py')
logger.setLevel(logging.INFO)

import os
import re
import json
import optparse
import logging

# Include path for finding include files which are amalgamated.
include_paths = []

# Include files specifically excluded from being inlined.
include_excluded = []

class File:
    filename_full = None
    filename = None
    lines = None

    def __init__(self, filename, lines):
        self.filename = os.path.basename(filename)
        self.filename_full = filename
        self.lines = lines

class Line:
    filename_full = None
    filename = None
    lineno = None
    data = None

    def __init__(self, filename, lineno, data):
        self.filename = os.path.basename(filename)
        self.filename_full = filename
        self.lineno = lineno
        self.data = data

def readFile(filename):
    lines = []

    with open(filename, 'rb') as f:
        lineno = 0
        for line in f:
            lineno += 1
            if len(line) > 0 and line[-1] == '\n':
                line = line[:-1]
            lines.append(Line(filename, lineno, line))

    return File(filename, lines)

def lookupInclude(incfn):
    re_sep = re.compile(r'/|\\')

    inccomp = re.split(re_sep, incfn)  # split include path, support / and \

    for path in include_paths:
        fn = apply(os.path.join, [ path ] + inccomp)
        if os.path.exists(fn):
            return fn  # Return full path to first match

    return None

def addAutomaticUndefs(f):
    defined = {}

    re_def = re.compile(r'#define\s+(\w+).*$')
    re_undef = re.compile(r'#undef\s+(\w+).*$')

    for line in f.lines:
        m = re_def.match(line.data)
        if m is not None:
            #logger.debug('DEFINED: %s' % repr(m.group(1)))
            defined[m.group(1)] = True
        m = re_undef.match(line.data)
        if m is not None:
            # Could just ignore #undef's here: we'd then emit
            # reliable #undef's (though maybe duplicates) at
            # the end.
            #logger.debug('UNDEFINED: %s' % repr(m.group(1)))
            if defined.has_key(m.group(1)):
                del defined[m.group(1)]

    # Undefine anything that seems to be left defined.  This not a 100%
    # process because some #undef's might be conditional which we don't
    # track at the moment.  Note that it's safe to #undef something that's
    # not defined.

    keys = sorted(defined.keys())  # deterministic order
    if len(keys) > 0:
        #logger.debug('STILL DEFINED: %r' % repr(defined.keys()))
        f.lines.append(Line(f.filename, len(f.lines) + 1, ''))
        f.lines.append(Line(f.filename, len(f.lines) + 1, '/* automatic undefs */'))
        for k in keys:
            logger.debug('automatic #undef for ' + k)
            f.lines.append(Line(f.filename, len(f.lines) + 1, '#undef %s' % k))

def createCombined(files, prologue_filename, line_directives):
    res = []
    line_map = []   # indicate combined source lines where uncombined file/line would change
    metadata = {
        'line_map': line_map
    }

    emit_state = [ None, None ]  # curr_filename, curr_lineno

    def emit(line):
        if isinstance(line, (str, unicode)):
            res.append(line)
            emit_state[1] += 1
        else:
            if line.filename != emit_state[0] or line.lineno != emit_state[1]:
                if line_directives:
                    res.append('#line %d "%s"' % (line.lineno, line.filename))
                line_map.append({ 'original_file': line.filename,
                                  'original_line': line.lineno,
                                  'combined_line': len(res) + 1 })
            res.append(line.data)
            emit_state[0] = line.filename
            emit_state[1] = line.lineno + 1

    included = {}  # headers already included

    if prologue_filename is not None:
        with open(prologue_filename, 'rb') as f:
            for line in f.read().split('\n'):
                res.append(line)

    re_inc = re.compile(r'^#include\s+(<|\")(.*?)(>|\").*$')

    # Process a file, appending it to the result; the input may be a
    # source or an include file.  #include directives are handled
    # recursively.
    def processFile(f):
        logger.debug('Process file: ' + f.filename)

        for line in f.lines:
            if not line.data.startswith('#include'):
                emit(line)
                continue

            m = re_inc.match(line.data)
            if m is None:
                raise Exception('Couldn\'t match #include line: %s' % repr(line.data))
            incpath = m.group(2)
            if incpath in include_excluded:
                # Specific include files excluded from the
                # inlining / duplicate suppression process.
                emit(line)  # keep as is
                continue

            if included.has_key(incpath):
                # We suppress duplicate includes, both internal and
                # external, based on the assumption that includes are
                # not behind #if defined() checks.  This is the case for
                # Duktape (except for the include files excluded).
                emit('/* #include %s -> already included */' % incpath)
                continue
            included[incpath] = True

            # An include file is considered "internal" and is amalgamated
            # if it is found in the include path provided by the user.

            incfile = lookupInclude(incpath)
            if incfile is not None:
                logger.debug('Include considered internal: %s -> %s' % (repr(line.data), repr(incfile)))
                emit('/* #include %s */' % incpath)
                processFile(readFile(incfile))
            else:
                logger.debug('Include considered external: %s' % repr(line.data))
                emit(line)  # keep as is

    for f in files:
        processFile(f)

    return '\n'.join(res) + '\n', metadata

def main():
    global include_paths, include_excluded

    parser = optparse.OptionParser()
    parser.add_option('--include-path', dest='include_paths', action='append', default=[], help='Include directory for "internal" includes, can be specified multiple times')
    parser.add_option('--include-exclude', dest='include_excluded', action='append', default=[], help='Include file excluded from being considered internal (even if found in include dirs)')
    parser.add_option('--prologue', dest='prologue', help='Prologue to prepend to start of file')
    parser.add_option('--output-source', dest='output_source', help='Output source filename')
    parser.add_option('--output-metadata', dest='output_metadata', help='Output metadata filename')
    parser.add_option('--line-directives', dest='line_directives', action='store_true', default=False, help='Use #line directives in combined source')
    parser.add_option('--quiet', dest='quiet', action='store_true', default=False, help='Suppress info messages (show warnings)')
    parser.add_option('--verbose', dest='verbose', action='store_true', default=False, help='Show verbose debug messages')
    (opts, args) = parser.parse_args()

    assert(opts.include_paths is not None)
    include_paths = opts.include_paths  # global for easy access
    include_excluded = opts.include_excluded
    assert(opts.output_source)
    assert(opts.output_metadata)

    # Log level.
    if opts.quiet:
        logger.setLevel(logging.WARNING)
    elif opts.verbose:
        logger.setLevel(logging.DEBUG)

    # Read input files, add automatic #undefs
    sources = args
    files = []
    for fn in sources:
        res = readFile(fn)
        logger.debug('Add automatic undefs for: ' + fn)
        addAutomaticUndefs(res)
        files.append(res)

    combined_source, metadata = \
        createCombined(files, opts.prologue, opts.line_directives)
    with open(opts.output_source, 'wb') as f:
        f.write(combined_source)
    with open(opts.output_metadata, 'wb') as f:
        f.write(json.dumps(metadata, indent=4))

    logger.info('Combined %d source files, %d bytes written to %s' % (len(files), len(combined_source), opts.output_source))

if __name__ == '__main__':
    main()