File: ninja_parser.py

package info (click to toggle)
chromium 139.0.7258.127-1
links: PTS, VCS
area: main
in suites:
size: 6,122,068 kB
sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (211 lines) | stat: -rwxr-xr-x 7,447 bytes
parent folder | download | duplicates (6)
#!/usr/bin/env python3
# Copyright 2017 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Extract source file information from .ninja files."""

# Copied from //tools/binary_size/libsupersize

import argparse
import io
import json
import logging
import os
import re
import sys

sys.path.insert(1, os.path.join(os.path.dirname(__file__), '..'))
import action_helpers

# E.g.:
# build obj/.../foo.o: cxx gen/.../foo.cc || obj/.../foo.inputdeps.stamp
# build obj/.../libfoo.a: alink obj/.../a.o obj/.../b.o |
# build ./libchrome.so ./lib.unstripped/libchrome.so: solink a.o b.o ...
# build libmonochrome.so: __chrome_android_libmonochrome___rule | ...
_REGEX = re.compile(r'build ([^:]+): \w+ (.*?)(?: *\||\n|$)')

_RLIBS_REGEX = re.compile(r'  rlibs = (.*?)(?:\n|$)')

# Unmatches seems to happen for empty source_sets(). E.g.:
# obj/chrome/browser/ui/libui_public_dependencies.a
_MAX_UNMATCHED_TO_LOG = 20
_MAX_UNMATCHED_TO_IGNORE = 200


class _SourceMapper:

  def __init__(self, dep_map, parsed_files):
    self._dep_map = dep_map
    self.parsed_files = parsed_files
    self._unmatched_paths = set()

  def _FindSourceForPathInternal(self, path):
    if not path.endswith(')'):
      if path.startswith('..'):
        return path
      return self._dep_map.get(path)

    # foo/bar.a(baz.o)
    start_idx = path.index('(')
    lib_name = path[:start_idx]
    obj_name = path[start_idx + 1:-1]
    by_basename = self._dep_map.get(lib_name)
    if not by_basename:
      if lib_name.endswith('rlib') and 'std/' in lib_name:
        # Currently we use binary prebuilt static libraries of the Rust
        # stdlib so we can't get source paths. That may change in future.
        return '(Rust stdlib)/%s' % lib_name
      return None
    if lib_name.endswith('.rlib'):
      # Rust doesn't really have the concept of an object file because
      # the compilation unit is the whole 'crate'. Return whichever
      # filename was the crate root.
      return next(iter(by_basename.values()))
    obj_path = by_basename.get(obj_name)
    if not obj_path:
      # Found the library, but it doesn't list the .o file.
      logging.warning('no obj basename for %s %s', path, obj_name)
      return None
    return self._dep_map.get(obj_path)

  def FindSourceForPath(self, path):
    """Returns the source path for the given object path (or None if not found).

    Paths for objects within archives should be in the format: foo/bar.a(baz.o)
    """
    ret = self._FindSourceForPathInternal(path)
    if not ret and path not in self._unmatched_paths:
      if self.unmatched_paths_count < _MAX_UNMATCHED_TO_LOG:
        logging.warning('Could not find source path for %s (empty source_set?)',
                        path)
      self._unmatched_paths.add(path)
    return ret

  @property
  def unmatched_paths_count(self):
    return len(self._unmatched_paths)


def _ParseNinjaPathList(path_list):
  ret = path_list.replace('\\ ', '\b')
  return [s.replace('\b', ' ') for s in ret.split()]


def _OutputsAreObject(outputs):
  return (outputs.endswith('.a') or outputs.endswith('.o')
          or outputs.endswith('.rlib'))


def _ParseOneFile(lines, dep_map, executable_path):
  sub_ninjas = []
  executable_inputs = None
  last_executable_paths = []
  for line in lines:
    if line.startswith('subninja '):
      sub_ninjas.append(line[9:-1])
    # Rust .rlibs are listed as implicit dependencies of the main
    # target linking rule, then are given as an extra
    #   rlibs =
    # variable on a subsequent line. Watch out for that line.
    elif m := _RLIBS_REGEX.match(line):
      if executable_path in last_executable_paths:
        executable_inputs.extend(_ParseNinjaPathList(m.group(1)))
    elif m := _REGEX.match(line):
      outputs, srcs = m.groups()
      if _OutputsAreObject(outputs):
        output = outputs.replace('\\ ', ' ')
        assert output not in dep_map, 'Duplicate output: ' + output
        if output[-1] == 'o':
          dep_map[output] = srcs.replace('\\ ', ' ')
        else:
          obj_paths = _ParseNinjaPathList(srcs)
          dep_map[output] = {os.path.basename(p): p for p in obj_paths}
      elif executable_path:
        last_executable_paths = [
            os.path.normpath(p) for p in _ParseNinjaPathList(outputs)
        ]
        if executable_path in last_executable_paths:
          executable_inputs = _ParseNinjaPathList(srcs)

  return sub_ninjas, executable_inputs


def _Parse(output_directory, executable_path):
  """Parses build.ninja and subninjas.

  Args:
    output_directory: Where to find the root build.ninja.
    executable_path: Path to binary to find inputs for.

  Returns: A tuple of (source_mapper, executable_inputs).
    source_mapper: _SourceMapper instance.
    executable_inputs: List of paths of linker inputs.
  """
  if executable_path:
    executable_path = os.path.relpath(executable_path, output_directory)
  to_parse = ['build.ninja']
  seen_paths = set(to_parse)
  dep_map = {}
  executable_inputs = None
  while to_parse:
    path = os.path.join(output_directory, to_parse.pop())
    with open(path, encoding='utf-8', errors='ignore') as obj:
      sub_ninjas, found_executable_inputs = _ParseOneFile(
          obj, dep_map, executable_path)
      if found_executable_inputs:
        assert not executable_inputs, (
            'Found multiple inputs for executable_path ' + executable_path)
        executable_inputs = found_executable_inputs
    for subpath in sub_ninjas:
      assert subpath not in seen_paths, 'Double include of ' + subpath
      seen_paths.add(subpath)
    to_parse.extend(sub_ninjas)

  assert executable_inputs, 'Failed to find rule that builds ' + executable_path
  return _SourceMapper(dep_map, seen_paths), executable_inputs


def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--executable', required=True)
  parser.add_argument('--result-json', required=True)
  parser.add_argument('--depfile')
  args = parser.parse_args()
  logs_io = io.StringIO()
  logging.basicConfig(level=logging.DEBUG,
                      format='%(levelname).1s %(relativeCreated)6d %(message)s',
                      stream=logs_io)

  source_mapper, object_paths = _Parse('.', args.executable)
  logging.info('Found %d linker inputs', len(object_paths))
  source_paths = []
  for obj_path in object_paths:
    result = source_mapper.FindSourceForPath(obj_path) or obj_path
    # Need to recurse on .a files.
    if isinstance(result, dict):
      source_paths.extend(
          source_mapper.FindSourceForPath(v) or v for v in result.values())
    else:
      source_paths.append(result)
  logging.info('Found %d source paths', len(source_paths))

  num_unmatched = source_mapper.unmatched_paths_count
  if num_unmatched > _MAX_UNMATCHED_TO_LOG:
    logging.warning('%d paths were missing sources (showed the first %d)',
                    num_unmatched, _MAX_UNMATCHED_TO_LOG)
  if num_unmatched > _MAX_UNMATCHED_TO_IGNORE:
    raise Exception('Too many unmapped files. Likely a bug in ninja_parser.py')

  if args.depfile:
    action_helpers.write_depfile(args.depfile, args.result_json,
                                 source_mapper.parsed_files)

  with open(args.result_json, 'w', encoding='utf-8') as f:
    json.dump({
        'logs': logs_io.getvalue(),
        'source_paths': source_paths,
    }, f)


if __name__ == '__main__':
  main()