1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
|
#!/usr/bin/env python
# Copyright 2016 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
__doc__ = """generate_resource_allowlist.py [-o OUTPUT] INPUTS...
INPUTS are paths to unstripped binaries or PDBs containing references to
resources in their debug info.
This script generates a resource allowlist by reading debug info from
INPUTS and writes it to OUTPUT.
"""
# Allowlisted resources are identified by searching the input file for
# instantiations of the special function ui::AllowlistedResource (see
# ui/base/resource/allowlist.h).
import argparse
import os
import subprocess
import sys
import ar
llvm_bindir = os.path.join(os.path.dirname(sys.argv[0]), '..', '..',
'third_party', 'llvm-build', 'Release+Asserts',
'bin')
def ExtractAllowlistFromFile(path, resource_ids):
with open(path, 'rb') as f:
data = f.read()
# When symbol_level=0, only mangled names exist.
# E.g.: _ZN2ui19AllowlistedResourceILi22870EEEvv
prefix = b'AllowlistedResourceILi'
start_idx = 0
while start_idx != -1:
start_idx = data.find(prefix, start_idx)
if start_idx != -1:
end_idx = data.find(b'E', start_idx)
resource_ids.add(int(data[start_idx + len(prefix):end_idx]))
start_idx = end_idx
def GetResourceAllowlistELF(path):
# Produce a resource allowlist by searching for debug info referring to
# AllowlistedResource.
# This used to use "readelf -p .debug_str", but it doesn't seem to work with
# use_debug_fission=true. Reading the raw file is faster anyways.
resource_ids = set()
ExtractAllowlistFromFile(path, resource_ids)
return resource_ids
def GetResourceAllowlistPDB(path):
# Produce a resource allowlist by using llvm-pdbutil to read a PDB file's
# publics stream, which is essentially a symbol table, and searching for
# instantiations of AllowlistedResource. Any such instantiations are demangled
# to extract the resource identifier.
pdbutil = subprocess.Popen(
[os.path.join(llvm_bindir, 'llvm-pdbutil'), 'dump', '-publics', path],
stdout=subprocess.PIPE)
names = ''
for line in pdbutil.stdout:
line = line.decode('utf8')
# Read a line of the form
# "733352 | S_PUB32 [size = 56] `??$AllowlistedResource@$0BFGM@@ui@@YAXXZ`".
if '`' not in line:
continue
sym_name = line[line.find('`') + 1:line.rfind('`')]
# Under certain conditions such as the GN arg `use_clang_coverage = true` it
# is possible for the compiler to emit additional symbols that do not match
# the standard mangled-name format.
# Example: __profd_??$AllowlistedResource@$0BGPH@@ui@@YAXXZ
# C++ mangled names are supposed to begin with `?`, so check for that.
if 'AllowlistedResource' in sym_name and sym_name.startswith('?'):
names += sym_name + '\n'
exit_code = pdbutil.wait()
if exit_code != 0:
raise Exception('llvm-pdbutil exited with exit code %d' % exit_code)
undname = subprocess.Popen([os.path.join(llvm_bindir, 'llvm-undname')],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
stdout, _ = undname.communicate(names.encode('utf8'))
resource_ids = set()
for line in stdout.split(b'\n'):
line = line.decode('utf8')
# Read a line of the form
# "void __cdecl ui::AllowlistedResource<5484>(void)".
prefix = ' ui::AllowlistedResource<'
pos = line.find(prefix)
if pos == -1:
continue
try:
resource_ids.add(int(line[pos + len(prefix):line.rfind('>')]))
except ValueError:
continue
exit_code = undname.wait()
if exit_code != 0:
raise Exception('llvm-undname exited with exit code %d' % exit_code)
return resource_ids
def GetResourceAllowlistFileList(file_list_path):
# Creates a list of resources given the list of linker input files.
# Simply grep's them for AllowlistedResource<...>.
with open(file_list_path) as f:
paths = f.read().splitlines()
paths = ar.ExpandThinArchives(paths)
resource_ids = set()
for p in paths:
ExtractAllowlistFromFile(p, resource_ids)
return resource_ids
def WriteResourceAllowlist(args):
resource_ids = set()
for input in args.inputs:
with open(input, 'rb') as f:
magic = f.read(4)
chunk = f.read(60)
if magic == b'\x7fELF':
func = GetResourceAllowlistELF
elif magic == b'Micr':
func = GetResourceAllowlistPDB
elif magic == b'obj/' or b'/obj/' in chunk:
# For secondary toolchain, path will look like android_clang_arm/obj/...
func = GetResourceAllowlistFileList
else:
raise Exception('unknown file format')
resource_ids.update(func(input))
# The last time this broke, exactly two resources were still being found.
if len(resource_ids) < 100:
raise Exception('Suspiciously few resources found. Likely an issue with '
'the regular expression in this script. Found: ' +
','.join(sorted(resource_ids)))
for id in sorted(resource_ids):
args.output.write(str(id) + '\n')
def main():
parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument('inputs', nargs='+', help='An unstripped binary or PDB.')
parser.add_argument('-o',
dest='output',
type=argparse.FileType('w'),
default=sys.stdout,
help='The resource list path to write (default stdout)')
args = parser.parse_args()
WriteResourceAllowlist(args)
if __name__ == '__main__':
main()
|