File: find_features.py

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (136 lines) | stat: -rw-r--r-- 3,875 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Copyright 2023 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Python module to find feature names in source code.

These functions are declared in a separate module to allow multiprocessing to
correctly unpickle the called functions again.
"""

import glob
import itertools
import multiprocessing
import pathlib
import re

from typing import List, Set

BASE_FEATURE_PATTERN = br'BASE_FEATURE\((.*?),(.*?),(.*?)\);'
BASE_FEATURE_RE = re.compile(BASE_FEATURE_PATTERN,
                             flags=re.MULTILINE + re.DOTALL)

# Only search these directories for flags. If your flag is outside these root
# directories, then add the directory here.
DIRECTORIES_TO_SEARCH = [
    'android_webview',
    'apps',
    'ash',
    'base',
    'cc',
    'chrome',
    'chromecast',
    'chromeos',
    'clank',
    'components',
    'content',
    'courgette',
    'crypto',
    'dbus',
    'device',
    'extensions',
    'fuchsia_web',
    'gin',
    'google_apis',
    'gpu',
    'headless',
    'infra',
    'internal',
    'ios',
    'ipc',
    'media',
    'mojo',
    'native_client',
    'native_client_sdk',
    'net',
    'pdf',
    'ppapi',
    'printing',
    'remoting',
    'rlz',
    'sandbox',
    'services',
    'skia',
    'sql',
    'storage',
    # third_party/blink handled separately in FindDeclaredFeatures
    'ui',
    'url',
    'v8',
    'webkit',
    'weblayer',
]


def _FindFeaturesInFile(filepath: str) -> List[str]:
  # Work on bytes to avoid utf-8 decode errors outside feature declarations
  file_contents = pathlib.Path(filepath).read_bytes()
  matches = BASE_FEATURE_RE.finditer(file_contents)
  # Remove whitespace and surrounding " from the second argument
  # which is the feature name.
  return [m.group(2).strip().strip(b'"').decode('utf-8') for m in matches]


def _FindDeclaredFeaturesImpl(repository_root: pathlib.Path) -> Set[str]:
  # Features are supposed to be defined in .cc files.
  # Iterate over the search folders in the root.
  root = pathlib.Path(repository_root)
  glob_patterns = [
      str(p / pathlib.Path('**/*.cc')) for p in root.iterdir()
      if p.is_dir() and p.name in DIRECTORIES_TO_SEARCH
  ]

  # blink is the only directory in third_party that should be searched.
  blink_glob = str(root / pathlib.Path('third_party/blink/**/*.cc'))
  glob_patterns.append(blink_glob)

  # Additional features for iOS can be found in mm files in the ios directory.
  mm_glob = str(root / pathlib.Path('ios/**/*.mm'))
  glob_patterns.append(mm_glob)

  # Create glob iterators that lazily go over the files to search
  glob_iterators = [
      glob.iglob(pattern, recursive=True) for pattern in glob_patterns
  ]

  # Limit to 4 processes - the disk accesses becomes a bottleneck with just a
  # few processes, but splitting the searching across multiple CPUs does yield
  # a benefit of a few seconds.
  # The exact batch size does not seem to matter much, as long as it is >> 1.
  pool = multiprocessing.Pool(4)
  found_features = pool.imap_unordered(_FindFeaturesInFile,
                                       itertools.chain(*glob_iterators), 1000)
  pool.close()
  pool.join()

  feature_names = set()
  for feature_list in found_features:
    feature_names.update(feature_list)
  return feature_names


def FindDeclaredFeatures(input_api) -> Set[str]:
  """Finds all declared feature names in the source code.

  This function will scan all *.cc and *.mm files and look for features
  defined with the BASE_FEATURE macro. It will extract the feature names.

  Args:
    input_api: InputApi instance for opening files
  Returns:
    Set of defined feature names in the source tree.
  """
  return _FindDeclaredFeaturesImpl(input_api.change.RepositoryRoot())


if __name__ == '__main__':
  print(_FindDeclaredFeaturesImpl(pathlib.Path('.')))