1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
|
#!/usr/bin/env python2
#
# Combine a set of a source files into a single C file.
#
# Overview of the process:
#
# * Parse user supplied C files. Add automatic #undefs at the end
# of each C file to avoid defines bleeding from one file to another.
#
# * Combine the C files in specified order. If sources have ordering
# dependencies (depends on application), order may matter.
#
# * Process #include statements in the combined source, categorizing
# them either as "internal" (found in specified include path) or
# "external". Internal includes, unless explicitly excluded, are
# inlined into the result while extenal includes are left as is.
# Duplicate internal #include statements are replaced with a comment.
#
# At every step, source and header lines are represented with explicit
# line objects which keep track of original filename and line. The
# output contains #line directives, if requested, to ensure error
# throwing and other diagnostic info will work in a useful manner when
# deployed. It's also possible to generate a combined source with no
# #line directives.
#
# Making the process deterministic is important, so that if users have
# diffs that they apply to the combined source, such diffs would apply
# for as long as possible.
#
# Limitations and notes:
#
# * While there are automatic #undef's for #define's introduced in each
# C file, it's not possible to "undefine" structs, unions, etc. If
# there are structs/unions/typedefs with conflicting names, these
# have to be resolved in the source files first.
#
# * Because duplicate #include statements are suppressed, currently
# assumes #include statements are not conditional.
#
# * A system header might be #include'd in multiple source files with
# different feature defines (like _BSD_SOURCE). Because the #include
# file will only appear once in the resulting source, the first
# occurrence wins. The result may not work correctly if the feature
# defines must actually be different between two or more source files.
#
import logging
import sys
logging.basicConfig(level=logging.INFO, stream=sys.stdout, format='%(name)-21s %(levelname)-7s %(message)s')
logger = logging.getLogger('combine_src.py')
logger.setLevel(logging.INFO)
import os
import re
import json
import optparse
import logging
# Include path for finding include files which are amalgamated.
include_paths = []
# Include files specifically excluded from being inlined.
include_excluded = []
class File:
filename_full = None
filename = None
lines = None
def __init__(self, filename, lines):
self.filename = os.path.basename(filename)
self.filename_full = filename
self.lines = lines
class Line:
filename_full = None
filename = None
lineno = None
data = None
def __init__(self, filename, lineno, data):
self.filename = os.path.basename(filename)
self.filename_full = filename
self.lineno = lineno
self.data = data
def readFile(filename):
lines = []
with open(filename, 'rb') as f:
lineno = 0
for line in f:
lineno += 1
if len(line) > 0 and line[-1] == '\n':
line = line[:-1]
lines.append(Line(filename, lineno, line))
return File(filename, lines)
def lookupInclude(incfn):
re_sep = re.compile(r'/|\\')
inccomp = re.split(re_sep, incfn) # split include path, support / and \
for path in include_paths:
fn = apply(os.path.join, [ path ] + inccomp)
if os.path.exists(fn):
return fn # Return full path to first match
return None
def addAutomaticUndefs(f):
defined = {}
re_def = re.compile(r'#define\s+(\w+).*$')
re_undef = re.compile(r'#undef\s+(\w+).*$')
for line in f.lines:
m = re_def.match(line.data)
if m is not None:
#logger.debug('DEFINED: %s' % repr(m.group(1)))
defined[m.group(1)] = True
m = re_undef.match(line.data)
if m is not None:
# Could just ignore #undef's here: we'd then emit
# reliable #undef's (though maybe duplicates) at
# the end.
#logger.debug('UNDEFINED: %s' % repr(m.group(1)))
if defined.has_key(m.group(1)):
del defined[m.group(1)]
# Undefine anything that seems to be left defined. This not a 100%
# process because some #undef's might be conditional which we don't
# track at the moment. Note that it's safe to #undef something that's
# not defined.
keys = sorted(defined.keys()) # deterministic order
if len(keys) > 0:
#logger.debug('STILL DEFINED: %r' % repr(defined.keys()))
f.lines.append(Line(f.filename, len(f.lines) + 1, ''))
f.lines.append(Line(f.filename, len(f.lines) + 1, '/* automatic undefs */'))
for k in keys:
logger.debug('automatic #undef for ' + k)
f.lines.append(Line(f.filename, len(f.lines) + 1, '#undef %s' % k))
def createCombined(files, prologue_filename, line_directives):
res = []
line_map = [] # indicate combined source lines where uncombined file/line would change
metadata = {
'line_map': line_map
}
emit_state = [ None, None ] # curr_filename, curr_lineno
def emit(line):
if isinstance(line, (str, unicode)):
res.append(line)
emit_state[1] += 1
else:
if line.filename != emit_state[0] or line.lineno != emit_state[1]:
if line_directives:
res.append('#line %d "%s"' % (line.lineno, line.filename))
line_map.append({ 'original_file': line.filename,
'original_line': line.lineno,
'combined_line': len(res) + 1 })
res.append(line.data)
emit_state[0] = line.filename
emit_state[1] = line.lineno + 1
included = {} # headers already included
if prologue_filename is not None:
with open(prologue_filename, 'rb') as f:
for line in f.read().split('\n'):
res.append(line)
re_inc = re.compile(r'^#include\s+(<|\")(.*?)(>|\").*$')
# Process a file, appending it to the result; the input may be a
# source or an include file. #include directives are handled
# recursively.
def processFile(f):
logger.debug('Process file: ' + f.filename)
for line in f.lines:
if not line.data.startswith('#include'):
emit(line)
continue
m = re_inc.match(line.data)
if m is None:
raise Exception('Couldn\'t match #include line: %s' % repr(line.data))
incpath = m.group(2)
if incpath in include_excluded:
# Specific include files excluded from the
# inlining / duplicate suppression process.
emit(line) # keep as is
continue
if included.has_key(incpath):
# We suppress duplicate includes, both internal and
# external, based on the assumption that includes are
# not behind #if defined() checks. This is the case for
# Duktape (except for the include files excluded).
emit('/* #include %s -> already included */' % incpath)
continue
included[incpath] = True
# An include file is considered "internal" and is amalgamated
# if it is found in the include path provided by the user.
incfile = lookupInclude(incpath)
if incfile is not None:
logger.debug('Include considered internal: %s -> %s' % (repr(line.data), repr(incfile)))
emit('/* #include %s */' % incpath)
processFile(readFile(incfile))
else:
logger.debug('Include considered external: %s' % repr(line.data))
emit(line) # keep as is
for f in files:
processFile(f)
return '\n'.join(res) + '\n', metadata
def main():
global include_paths, include_excluded
parser = optparse.OptionParser()
parser.add_option('--include-path', dest='include_paths', action='append', default=[], help='Include directory for "internal" includes, can be specified multiple times')
parser.add_option('--include-exclude', dest='include_excluded', action='append', default=[], help='Include file excluded from being considered internal (even if found in include dirs)')
parser.add_option('--prologue', dest='prologue', help='Prologue to prepend to start of file')
parser.add_option('--output-source', dest='output_source', help='Output source filename')
parser.add_option('--output-metadata', dest='output_metadata', help='Output metadata filename')
parser.add_option('--line-directives', dest='line_directives', action='store_true', default=False, help='Use #line directives in combined source')
parser.add_option('--quiet', dest='quiet', action='store_true', default=False, help='Suppress info messages (show warnings)')
parser.add_option('--verbose', dest='verbose', action='store_true', default=False, help='Show verbose debug messages')
(opts, args) = parser.parse_args()
assert(opts.include_paths is not None)
include_paths = opts.include_paths # global for easy access
include_excluded = opts.include_excluded
assert(opts.output_source)
assert(opts.output_metadata)
# Log level.
if opts.quiet:
logger.setLevel(logging.WARNING)
elif opts.verbose:
logger.setLevel(logging.DEBUG)
# Read input files, add automatic #undefs
sources = args
files = []
for fn in sources:
res = readFile(fn)
logger.debug('Add automatic undefs for: ' + fn)
addAutomaticUndefs(res)
files.append(res)
combined_source, metadata = \
createCombined(files, opts.prologue, opts.line_directives)
with open(opts.output_source, 'wb') as f:
f.write(combined_source)
with open(opts.output_metadata, 'wb') as f:
f.write(json.dumps(metadata, indent=4))
logger.info('Combined %d source files, %d bytes written to %s' % (len(files), len(combined_source), opts.output_source))
if __name__ == '__main__':
main()
|