1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
|
#!/usr/bin/env python3
"""
This is a middle-processor for MicroPython source files. It takes the output
of the C preprocessor, has the option to change it, then feeds this into the
C compiler.
It currently has the ability to reorder static hash tables so they are actually
hashed, resulting in faster lookup times at runtime.
To use, configure the Python variables below, and add the following line to the
Makefile:
CFLAGS += -no-integrated-cpp -B$(shell pwd)/../tools
"""
import sys
import os
import re
################################################################################
# these are the configuration variables
# TODO somehow make them externally configurable
# this is the path to the true C compiler
cc1_path = '/usr/lib/gcc/x86_64-unknown-linux-gnu/5.3.0/cc1'
#cc1_path = '/usr/lib/gcc/arm-none-eabi/5.3.0/cc1'
# this must be the same as MICROPY_QSTR_BYTES_IN_HASH
bytes_in_qstr_hash = 2
# this must be 1 or more (can be a decimal)
# larger uses more code size but yields faster lookups
table_size_mult = 1
# these control output during processing
print_stats = True
print_debug = False
# end configuration variables
################################################################################
# precompile regexs
re_preproc_line = re.compile(r'# [0-9]+ ')
re_map_entry = re.compile(r'\{.+?\(MP_QSTR_([A-Za-z0-9_]+)\).+\},')
re_mp_obj_dict_t = re.compile(r'(?P<head>(static )?const mp_obj_dict_t (?P<id>[a-z0-9_]+) = \{ \.base = \{&mp_type_dict\}, \.map = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P<tail>, \.used = .+ };)$')
re_mp_map_t = re.compile(r'(?P<head>(static )?const mp_map_t (?P<id>[a-z0-9_]+) = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P<tail>, \.used = .+ };)$')
re_mp_rom_map_elem_t = re.compile(r'static const mp_rom_map_elem_t [a-z_0-9]+\[\] = {$')
# this must match the equivalent function in qstr.c
def compute_hash(qstr):
hash = 5381
for char in qstr:
hash = (hash * 33) ^ ord(char)
# Make sure that valid hash is never zero, zero means "hash not computed"
return (hash & ((1 << (8 * bytes_in_qstr_hash)) - 1)) or 1
# this algo must match the equivalent in map.c
def hash_insert(map, key, value):
hash = compute_hash(key)
pos = hash % len(map)
start_pos = pos
if print_debug:
print(' insert %s: start at %u/%u -- ' % (key, pos, len(map)), end='')
while True:
if map[pos] is None:
# found empty slot, so key is not in table
if print_debug:
print('put at %u' % pos)
map[pos] = (key, value)
return
else:
# not yet found, keep searching
if map[pos][0] == key:
raise AssertionError("duplicate key '%s'" % (key,))
pos = (pos + 1) % len(map)
assert pos != start_pos
def hash_find(map, key):
hash = compute_hash(key)
pos = hash % len(map)
start_pos = pos
attempts = 0
while True:
attempts += 1
if map[pos] is None:
return attempts, None
elif map[pos][0] == key:
return attempts, map[pos][1]
else:
pos = (pos + 1) % len(map)
if pos == start_pos:
return attempts, None
def process_map_table(file, line, output):
output.append(line)
# consume all lines that are entries of the table and concat them
# (we do it this way because there can be multiple entries on one line)
table_contents = []
while True:
line = file.readline()
if len(line) == 0:
print('unexpected end of input')
sys.exit(1)
line = line.strip()
if len(line) == 0:
# empty line
continue
if re_preproc_line.match(line):
# preprocessor line number comment
continue
if line == '};':
# end of table (we assume it appears on a single line)
break
table_contents.append(line)
# make combined string of entries
entries_str = ''.join(table_contents)
# split into individual entries
entries = []
while entries_str:
# look for single entry, by matching nested braces
match = None
if entries_str[0] == '{':
nested_braces = 0
for i in range(len(entries_str)):
if entries_str[i] == '{':
nested_braces += 1
elif entries_str[i] == '}':
nested_braces -= 1
if nested_braces == 0:
match = re_map_entry.match(entries_str[:i + 2])
break
if not match:
print('unknown line in table:', entries_str)
sys.exit(1)
# extract single entry
line = match.group(0)
qstr = match.group(1)
entries_str = entries_str[len(line):].lstrip()
# add the qstr and the whole line to list of all entries
entries.append((qstr, line))
# sort entries so hash table construction is deterministic
entries.sort()
# create hash table
map = [None] * int(len(entries) * table_size_mult)
for qstr, line in entries:
# We assume that qstr does not have any escape sequences in it.
# This is reasonably safe, since keys in a module or class dict
# should be standard identifiers.
# TODO verify this and raise an error if escape sequence found
hash_insert(map, qstr, line)
# compute statistics
total_attempts = 0
for qstr, _ in entries:
attempts, line = hash_find(map, qstr)
assert line is not None
if print_debug:
print(' %s lookup took %u attempts' % (qstr, attempts))
total_attempts += attempts
if len(entries):
stats = len(map), len(entries) / len(map), total_attempts / len(entries)
else:
stats = 0, 0, 0
if print_debug:
print(' table stats: size=%d, load=%.2f, avg_lookups=%.1f' % stats)
# output hash table
for row in map:
if row is None:
output.append('{ 0, 0 },\n')
else:
output.append(row[1] + '\n')
output.append('};\n')
# skip to next non-blank line
while True:
line = file.readline()
if len(line) == 0:
print('unexpected end of input')
sys.exit(1)
line = line.strip()
if len(line) == 0:
continue
break
# transform the is_ordered param from 1 to 0
match = re_mp_obj_dict_t.match(line)
if match is None:
match = re_mp_map_t.match(line)
if match is None:
print('expecting mp_obj_dict_t or mp_map_t definition')
print(output[0])
print(line)
sys.exit(1)
line = match.group('head') + '0' + match.group('tail') + '\n'
output.append(line)
return (match.group('id'),) + stats
def process_file(filename):
output = []
file_changed = False
with open(filename, 'rt') as f:
while True:
line = f.readline()
if not line:
break
if re_mp_rom_map_elem_t.match(line):
file_changed = True
stats = process_map_table(f, line, output)
if print_stats:
print(' [%s: size=%d, load=%.2f, avg_lookups=%.1f]' % stats)
else:
output.append(line)
if file_changed:
if print_debug:
print(' modifying static maps in', output[0].strip())
with open(filename, 'wt') as f:
for line in output:
f.write(line)
def main():
# run actual C compiler
# need to quote args that have special characters in them
def quote(s):
if s.find('<') != -1 or s.find('>') != -1:
return "'" + s + "'"
else:
return s
ret = os.system(cc1_path + ' ' + ' '.join(quote(s) for s in sys.argv[1:]))
if ret != 0:
ret = (ret & 0x7f) or 127 # make it in range 0-127, but non-zero
sys.exit(ret)
if sys.argv[1] == '-E':
# CPP has been run, now do our processing stage
for i, arg in enumerate(sys.argv):
if arg == '-o':
return process_file(sys.argv[i + 1])
print('%s: could not find "-o" option' % (sys.argv[0],))
sys.exit(1)
elif sys.argv[1] == '-fpreprocessed':
# compiler has been run, nothing more to do
return
else:
# unknown processing stage
print('%s: unknown first option "%s"' % (sys.argv[0], sys.argv[1]))
sys.exit(1)
if __name__ == '__main__':
main()
|