1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
|
#!/usr/bin/env python3
# Copyright 2018 The Emscripten Authors. All rights reserved.
# Emscripten is available under two separate licenses, the MIT license and the
# University of Illinois/NCSA Open Source License. Both these licenses can be
# found in the LICENSE file.
"""Utility tools that extracts DWARF information encoded in a wasm output
produced by the LLVM tools, and encodes it as a wasm source map. Additionally,
it can collect original sources, change files prefixes, and strip debug
sections from a wasm file.
"""
import argparse
from collections import OrderedDict
import json
import logging
from math import floor, log
import os
import re
from subprocess import Popen, PIPE
from pathlib import Path
import sys
__scriptdir__ = os.path.dirname(os.path.abspath(__file__))
__rootdir__ = os.path.dirname(__scriptdir__)
sys.path.append(__rootdir__)
logger = logging.getLogger('wasm-sourcemap')
def parse_args():
parser = argparse.ArgumentParser(prog='wasm-sourcemap.py', description=__doc__)
parser.add_argument('wasm', help='wasm file')
parser.add_argument('-o', '--output', help='output source map')
parser.add_argument('-p', '--prefix', nargs='*', help='replace source debug filename prefix for source map', default=[])
parser.add_argument('-s', '--sources', action='store_true', help='read and embed source files from file system into source map')
parser.add_argument('-l', '--load-prefix', nargs='*', help='replace source debug filename prefix for reading sources from file system (see also --sources)', default=[])
parser.add_argument('-w', nargs='?', help='set output wasm file')
parser.add_argument('-x', '--strip', action='store_true', help='removes debug and linking sections')
parser.add_argument('-u', '--source-map-url', nargs='?', help='specifies sourceMappingURL section contest')
parser.add_argument('--dwarfdump', help="path to llvm-dwarfdump executable")
parser.add_argument('--dwarfdump-output', nargs='?', help=argparse.SUPPRESS)
parser.add_argument('--basepath', help='base path for source files, which will be relative to this')
return parser.parse_args()
class Prefixes:
def __init__(self, args):
prefixes = []
for p in args:
if '=' in p:
prefix, replacement = p.split('=')
prefixes.append({'prefix': prefix, 'replacement': replacement})
else:
prefixes.append({'prefix': p, 'replacement': None})
self.prefixes = prefixes
self.cache = {}
def resolve(self, name):
if name in self.cache:
return self.cache[name]
for p in self.prefixes:
if name.startswith(p['prefix']):
if p['replacement'] is None:
result = name[len(p['prefix'])::]
else:
result = p['replacement'] + name[len(p['prefix'])::]
break
self.cache[name] = result
return result
# SourceMapPrefixes contains resolver for file names that are:
# - "sources" is for names that output to source maps JSON
# - "load" is for paths that used to load source text
class SourceMapPrefixes:
def __init__(self, sources, load):
self.sources = sources
self.load = load
def provided(self):
return bool(self.sources.prefixes or self.load.prefixes)
def encode_vlq(n):
VLQ_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
x = (n << 1) if n >= 0 else ((-n << 1) + 1)
result = ""
while x > 31:
result = result + VLQ_CHARS[32 + (x & 31)]
x = x >> 5
return result + VLQ_CHARS[x]
def read_var_uint(wasm, pos):
n = 0
shift = 0
b = ord(wasm[pos:pos + 1])
pos = pos + 1
while b >= 128:
n = n | ((b - 128) << shift)
b = ord(wasm[pos:pos + 1])
pos = pos + 1
shift += 7
return n + (b << shift), pos
def strip_debug_sections(wasm):
logger.debug('Strip debug sections')
pos = 8
stripped = wasm[:pos]
while pos < len(wasm):
section_start = pos
section_id, pos_ = read_var_uint(wasm, pos)
section_size, section_body = read_var_uint(wasm, pos_)
pos = section_body + section_size
if section_id == 0:
name_len, name_pos = read_var_uint(wasm, section_body)
name_end = name_pos + name_len
name = wasm[name_pos:name_end]
if name == "linking" or name == "sourceMappingURL" or name.startswith("reloc..debug_") or name.startswith(".debug_"):
continue # skip debug related sections
stripped = stripped + wasm[section_start:pos]
return stripped
def encode_uint_var(n):
result = bytearray()
while n > 127:
result.append(128 | (n & 127))
n = n >> 7
result.append(n)
return bytes(result)
def append_source_mapping(wasm, url):
logger.debug('Append sourceMappingURL section')
section_name = "sourceMappingURL"
section_content = encode_uint_var(len(section_name)) + section_name + encode_uint_var(len(url)) + url
return wasm + encode_uint_var(0) + encode_uint_var(len(section_content)) + section_content
def get_code_section_offset(wasm):
logger.debug('Read sections index')
pos = 8
while pos < len(wasm):
section_id, pos_ = read_var_uint(wasm, pos)
section_size, pos = read_var_uint(wasm, pos_)
if section_id == 10:
return pos
pos = pos + section_size
def remove_dead_entries(entries):
# Remove entries for dead functions. It is a heuristics to ignore data if the
# function starting address near to 0 (is equal to its size field length).
block_start = 0
cur_entry = 0
while cur_entry < len(entries):
if not entries[cur_entry]['eos']:
cur_entry += 1
continue
fn_start = entries[block_start]['address']
# Calculate the LEB encoded function size (including size field)
fn_size_length = floor(log(entries[cur_entry]['address'] - fn_start + 1, 128)) + 1
min_live_offset = 1 + fn_size_length # 1 byte is for code section entries
if fn_start < min_live_offset:
# Remove dead code debug info block.
del entries[block_start:cur_entry + 1]
cur_entry = block_start
continue
cur_entry += 1
block_start = cur_entry
def read_dwarf_entries(wasm, options):
if options.dwarfdump_output:
output = Path(options.dwarfdump_output).read_bytes()
elif options.dwarfdump:
logger.debug('Reading DWARF information from %s' % wasm)
if not os.path.exists(options.dwarfdump):
logger.error('llvm-dwarfdump not found: ' + options.dwarfdump)
sys.exit(1)
process = Popen([options.dwarfdump, '-debug-info', '-debug-line', '--recurse-depth=0', wasm], stdout=PIPE)
output, err = process.communicate()
exit_code = process.wait()
if exit_code != 0:
logger.error('Error during llvm-dwarfdump execution (%s)' % exit_code)
sys.exit(1)
else:
logger.error('Please specify either --dwarfdump or --dwarfdump-output')
sys.exit(1)
entries = []
debug_line_chunks = re.split(r"debug_line\[(0x[0-9a-f]*)\]", output.decode('utf-8'))
maybe_debug_info_content = debug_line_chunks[0]
for i in range(1, len(debug_line_chunks), 2):
stmt_list = debug_line_chunks[i]
comp_dir_match = re.search(r"DW_AT_stmt_list\s+\(" + stmt_list + r"\)\s+" +
r"DW_AT_comp_dir\s+\(\"([^\"]+)", maybe_debug_info_content)
comp_dir = comp_dir_match.group(1) if comp_dir_match is not None else ""
line_chunk = debug_line_chunks[i + 1]
# include_directories[ 1] = "/Users/yury/Work/junk/sqlite-playground/src"
# file_names[ 1]:
# name: "playground.c"
# dir_index: 1
# mod_time: 0x00000000
# length: 0x00000000
#
# Address Line Column File ISA Discriminator Flags
# ------------------ ------ ------ ------ --- ------------- -------------
# 0x0000000000000006 22 0 1 0 0 is_stmt
# 0x0000000000000007 23 10 1 0 0 is_stmt prologue_end
# 0x000000000000000f 23 3 1 0 0
# 0x0000000000000010 23 3 1 0 0 end_sequence
# 0x0000000000000011 28 0 1 0 0 is_stmt
include_directories = {'0': comp_dir}
for dir in re.finditer(r"include_directories\[\s*(\d+)\] = \"([^\"]*)", line_chunk):
include_directories[dir.group(1)] = dir.group(2)
files = {}
for file in re.finditer(r"file_names\[\s*(\d+)\]:\s+name: \"([^\"]*)\"\s+dir_index: (\d+)", line_chunk):
dir = include_directories[file.group(3)]
file_path = (dir + '/' if file.group(2)[0] != '/' else '') + file.group(2)
files[file.group(1)] = file_path
for line in re.finditer(r"\n0x([0-9a-f]+)\s+(\d+)\s+(\d+)\s+(\d+)(.*?end_sequence)?", line_chunk):
entry = {'address': int(line.group(1), 16), 'line': int(line.group(2)), 'column': int(line.group(3)), 'file': files[line.group(4)], 'eos': line.group(5) is not None}
if not entry['eos']:
entries.append(entry)
else:
# move end of function to the last END operator
entry['address'] -= 1
if entries[-1]['address'] == entry['address']:
# last entry has the same address, reusing
entries[-1]['eos'] = True
else:
entries.append(entry)
remove_dead_entries(entries)
# return entries sorted by the address field
return sorted(entries, key=lambda entry: entry['address'])
def normalize_path(path):
return path.replace('\\', '/').replace('//', '/')
def build_sourcemap(entries, code_section_offset, prefixes, collect_sources, base_path):
sources = []
sources_content = [] if collect_sources else None
mappings = []
sources_map = {}
last_address = 0
last_source_id = 0
last_line = 1
last_column = 1
for entry in entries:
line = entry['line']
column = entry['column']
# ignore entries with line 0
if line == 0:
continue
# start at least at column 1
if column == 0:
column = 1
address = entry['address'] + code_section_offset
file_name = entry['file']
file_name = normalize_path(file_name)
# if prefixes were provided, we use that; otherwise, we emit a relative
# path
if prefixes.provided():
source_name = prefixes.sources.resolve(file_name)
else:
try:
file_name = os.path.relpath(file_name, base_path)
except ValueError:
file_name = os.path.abspath(file_name)
file_name = normalize_path(file_name)
source_name = file_name
if source_name not in sources_map:
source_id = len(sources)
sources_map[source_name] = source_id
sources.append(source_name)
if collect_sources:
load_name = prefixes.load.resolve(file_name)
try:
with open(load_name, 'r') as infile:
source_content = infile.read()
sources_content.append(source_content)
except IOError:
print('Failed to read source: %s' % load_name)
sources_content.append(None)
else:
source_id = sources_map[source_name]
address_delta = address - last_address
source_id_delta = source_id - last_source_id
line_delta = line - last_line
column_delta = column - last_column
mappings.append(encode_vlq(address_delta) + encode_vlq(source_id_delta) + encode_vlq(line_delta) + encode_vlq(column_delta))
last_address = address
last_source_id = source_id
last_line = line
last_column = column
return OrderedDict([('version', 3),
('names', []),
('sources', sources),
('sourcesContent', sources_content),
('mappings', ','.join(mappings))])
def main():
options = parse_args()
wasm_input = options.wasm
with open(wasm_input, 'rb') as infile:
wasm = infile.read()
entries = read_dwarf_entries(wasm_input, options)
code_section_offset = get_code_section_offset(wasm)
prefixes = SourceMapPrefixes(sources=Prefixes(options.prefix), load=Prefixes(options.load_prefix))
logger.debug('Saving to %s' % options.output)
map = build_sourcemap(entries, code_section_offset, prefixes, options.sources, options.basepath)
with open(options.output, 'w') as outfile:
json.dump(map, outfile, separators=(',', ':'))
if options.strip:
wasm = strip_debug_sections(wasm)
if options.source_map_url:
wasm = append_source_mapping(wasm, options.source_map_url)
if options.w:
logger.debug('Saving wasm to %s' % options.w)
with open(options.w, 'wb') as outfile:
outfile.write(wasm)
logger.debug('Done')
return 0
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG if os.environ.get('EMCC_DEBUG') else logging.INFO)
sys.exit(main())
|