1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
|
#!/usr/bin/env python3
# Tool to bundle multiple C/C++ source files, inlining any includes.
#
# Note: there are two types of exclusion options: the '-x' flag, which besides
# excluding a file also adds an #error directive in place of the #include, and
# the '-k' flag, which keeps the #include and doesn't inline the file. The
# intended use cases are: '-x' for files that would normally be #if'd out, so
# features that 100% won't be used in the amalgamated file, for which every
# occurrence adds the error, and '-k' for headers that we wish to manually
# include, such as a project's public API, for which occurrences after the first
# are removed.
#
# Todo: the error handling could be better, which currently throws and halts
# (which is functional just not very friendly).
#
# Author: Carl Woffenden, Numfum GmbH (this script is released under a CC0 license/Public Domain)
import argparse, re, sys
from pathlib import Path
from typing import Any, List, Optional, Pattern, Set, TextIO
# Set of file roots when searching (equivalent to -I paths for the compiler).
roots: Set[Path] = set()
# Set of (canonical) file Path objects to exclude from inlining (and not only
# exclude but to add a compiler error directive when they're encountered).
excludes: Set[Path] = set()
# Set of (canonical) file Path objects to keep as include directives.
keeps: Set[Path] = set()
# Whether to keep the #pragma once directives (unlikely, since this will result
# in a warning, but the option is there).
keep_pragma: bool = False
# Destination file object (or stdout if no output file was supplied).
destn: TextIO = sys.stdout
# Set of file Path objects previously inlined (and to ignore if reencountering).
found: Set[Path] = set()
# Compiled regex Pattern to handle "#pragma once" in various formats:
#
# #pragma once
# #pragma once
# # pragma once
# #pragma once
# #pragma once // comment
#
# Ignoring commented versions, same as include_regex.
#
pragma_regex: Pattern = re.compile(r'^\s*#\s*pragma\s*once\s*')
# Compiled regex Pattern to handle the following type of file includes:
#
# #include "file"
# #include "file"
# # include "file"
# #include "file"
# #include "file" // comment
# #include "file" // comment with quote "
#
# And all combinations of, as well as ignoring the following:
#
# #include <file>
# //#include "file"
# /*#include "file"*/
#
# We don't try to catch errors since the compiler will do this (and the code is
# expected to be valid before processing) and we don't care what follows the
# file (whether it's a valid comment or not, since anything after the quoted
# string is ignored)
#
include_regex: Pattern = re.compile(r'^\s*#\s*include\s*"(.+?)"')
# Simple tests to prove include_regex's cases.
#
def test_match_include() -> bool:
if (include_regex.match('#include "file"') and
include_regex.match(' #include "file"') and
include_regex.match('# include "file"') and
include_regex.match('#include "file"') and
include_regex.match('#include "file" // comment')):
if (not include_regex.match('#include <file>') and
not include_regex.match('//#include "file"') and
not include_regex.match('/*#include "file"*/')):
found = include_regex.match('#include "file" // "')
if (found and found.group(1) == 'file'):
print('#include match valid')
return True
return False
# Simple tests to prove pragma_regex's cases.
#
def test_match_pragma() -> bool:
if (pragma_regex.match('#pragma once') and
pragma_regex.match(' #pragma once') and
pragma_regex.match('# pragma once') and
pragma_regex.match('#pragma once') and
pragma_regex.match('#pragma once // comment')):
if (not pragma_regex.match('//#pragma once') and
not pragma_regex.match('/*#pragma once*/')):
print('#pragma once match valid')
return True
return False
# Finds 'file'. First the list of 'root' paths are searched, followed by the
# currently processing file's 'parent' path, returning a valid Path in
# canonical form. If no match is found None is returned.
#
def resolve_include(file: str, parent: Optional[Path] = None) -> Optional[Path]:
for root in roots:
found = root.joinpath(file).resolve()
if (found.is_file()):
return found
if (parent):
found = parent.joinpath(file).resolve();
else:
found = Path(file)
if (found.is_file()):
return found
return None
# Helper to resolve lists of files. 'file_list' is passed in from the arguments
# and each entry resolved to its canonical path (like any include entry, either
# from the list of root paths or the owning file's 'parent', which in this case
# is case is the input file). The results are stored in 'resolved'.
#
def resolve_excluded_files(file_list: Optional[List[str]], resolved: Set[Path], parent: Optional[Path] = None) -> None:
if (file_list):
for filename in file_list:
found = resolve_include(filename, parent)
if (found):
resolved.add(found)
else:
error_line(f'Warning: excluded file not found: {filename}')
# Writes 'line' to the open 'destn' (or stdout).
#
def write_line(line: str) -> None:
print(line, file=destn)
# Logs 'line' to stderr. This is also used for general notifications that we
# don't want to go to stdout (so the source can be piped).
#
def error_line(line: Any) -> None:
print(line, file=sys.stderr)
# Inline the contents of 'file' (with any of its includes also inlined, etc.).
#
# Note: text encoding errors are ignored and replaced with ? when reading the
# input files. This isn't ideal, but it's more than likely in the comments than
# code and a) the text editor has probably also failed to read the same content,
# and b) the compiler probably did too.
#
def add_file(file: Path, file_name: str = None) -> None:
if (file.is_file()):
if (not file_name):
file_name = file.name
error_line(f'Processing: {file_name}')
with file.open('r', errors='replace') as opened:
for line in opened:
line = line.rstrip('\n')
match_include = include_regex.match(line);
if (match_include):
# We have a quoted include directive so grab the file
inc_name = match_include.group(1)
resolved = resolve_include(inc_name, file.parent)
if (resolved):
if (resolved in excludes):
# The file was excluded so error if the compiler uses it
write_line(f'#error Using excluded file: {inc_name} (re-amalgamate source to fix)')
error_line(f'Excluding: {inc_name}')
else:
if (resolved not in found):
# The file was not previously encountered
found.add(resolved)
if (resolved in keeps):
# But the include was flagged to keep as included
write_line(f'/**** *NOT* inlining {inc_name} ****/')
write_line(line)
error_line(f'Not inlining: {inc_name}')
else:
# The file was neither excluded nor seen before so inline it
write_line(f'/**** start inlining {inc_name} ****/')
add_file(resolved, inc_name)
write_line(f'/**** ended inlining {inc_name} ****/')
else:
write_line(f'/**** skipping file: {inc_name} ****/')
else:
# The include file didn't resolve to a file
write_line(f'#error Unable to find: {inc_name}')
error_line(f'Error: Unable to find: {inc_name}')
else:
# Skip any 'pragma once' directives, otherwise write the source line
if (keep_pragma or not pragma_regex.match(line)):
write_line(line)
else:
error_line(f'Error: Invalid file: {file}')
# Start here
parser = argparse.ArgumentParser(description='Amalgamate Tool', epilog=f'example: {sys.argv[0]} -r ../my/path -r ../other/path -o out.c in.c')
parser.add_argument('-r', '--root', action='append', type=Path, help='file root search path')
parser.add_argument('-x', '--exclude', action='append', help='file to completely exclude from inlining')
parser.add_argument('-k', '--keep', action='append', help='file to exclude from inlining but keep the include directive')
parser.add_argument('-p', '--pragma', action='store_true', default=False, help='keep any "#pragma once" directives (removed by default)')
parser.add_argument('-o', '--output', type=argparse.FileType('w'), help='output file (otherwise stdout)')
parser.add_argument('input', type=Path, help='input file')
args = parser.parse_args()
# Fail early on an invalid input (and store it so we don't recurse)
args.input = args.input.resolve(strict=True)
found.add(args.input)
# Resolve all of the root paths upfront (we'll halt here on invalid roots)
if (args.root):
for path in args.root:
roots.add(path.resolve(strict=True))
# The remaining params: so resolve the excluded files and #pragma once directive
resolve_excluded_files(args.exclude, excludes, args.input.parent)
resolve_excluded_files(args.keep, keeps, args.input.parent)
keep_pragma = args.pragma;
# Then recursively process the input file
try:
if (args.output):
destn = args.output
add_file(args.input)
finally:
if (destn):
destn.close()
|