File: cleanup_stale_fieldtrial_configs.py

package info (click to toggle)
chromium 138.0.7204.157-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 6,071,864 kB
  • sloc: cpp: 34,936,859; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,953; asm: 946,768; xml: 739,967; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,806; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (177 lines) | stat: -rw-r--r-- 6,219 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# Copyright 2021 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Simple script for cleaning up stale configs from fieldtrial_testing_config.

Methodology:
  Scan for all study names that appear in fieldtrial config file,
  and removes ones that don't appear anywhere in the codebase.
  The script ignores WebRTC entries as those often lead to false positives.

Usage:
  vpython3 tools/variations/cleanup_stale_fieldtrial_configs.py

Run with --help to get a complete list of options this script runs with.

If this script removes features that appear to be used in the codebase,
double-check the study or feature name for typos or case differences.
"""

from __future__ import print_function

import json
import optparse
import os
import re
import subprocess
import sys
import threading

CONFIG_PATH = 'testing/variations/fieldtrial_testing_config.json'
PRESUBMIT_SCRIPT = 'testing/variations/PRESUBMIT.py'
THREAD_COUNT = 16

# The following is a list of regexes to run against literals, and if matched,
# the literal would be counted as being used. Use this to skip removal of
# studies (and studies that depend on features) that are not visible in code.
# Eg. ChromeOS where experiments are passed from ash to platform services.
_LITERAL_SKIP_REGEX_STRINGS = ['^CrOSLateBoot.*', '^CrOSEarlyBoot.*']

_LITERAL_SKIP_REGEXES = [
    re.compile(regexp_str) for regexp_str in _LITERAL_SKIP_REGEX_STRINGS
]
_LITERAL_CACHE = {}


def is_literal_in_skiplist(literal):
  for regex in _LITERAL_SKIP_REGEXES:
    if regex.match(literal):
      print('Skipping', repr(literal), 'due to', regex)
      return True
  return False


def is_literal_in_git(literal):
  git_grep_cmd = ('git', 'grep', '--threads', '2', '-l', '\"%s\"' % literal)
  git_grep_proc = subprocess.Popen(git_grep_cmd, stdout=subprocess.PIPE)
  # Check for >1 since fieldtrial_testing_config.json will always be a result.
  return len(git_grep_proc.stdout.read().splitlines()) > 1


def is_literal_in_files(literal, code_files):
  bash_files_using_literal = subprocess.Popen(
      ('xargs', 'grep', '-s', '-l', '\\\"%s\\\"' % literal),
      stdin=subprocess.PIPE,
      stdout=subprocess.PIPE)
  files_using_literal = bash_files_using_literal.communicate(code_files)[0]
  return len(files_using_literal.splitlines()) > 0


def is_literal_used(literal, code_files):
  """Check if a given string literal is used in the passed code files."""
  if literal in _LITERAL_CACHE:
    return _LITERAL_CACHE[literal]

  used = is_literal_in_skiplist(literal) or is_literal_in_git(
      literal) or is_literal_in_files(literal, code_files)
  if not used:
    print('Did not find', repr(literal))
  _LITERAL_CACHE[literal] = used
  return used


def is_study_used(study_name, configs, code_files):
  """Checks if a given study is used in the codebase."""
  if study_name.startswith('WebRTC-'):
    return True  # Skip webrtc studies which give false positives.

  if is_literal_used(study_name, code_files):
    return True
  for config in configs:
    for experiment in config.get('experiments', []):
      for feature in experiment.get('enable_features', []):
        if is_literal_used(feature, code_files):
          return True
      for feature in experiment.get('disable_features', []):
        if is_literal_used(feature, code_files):
          return True
  return False


def thread_func(thread_limiter, studies_map, study_name, configs, code_files):
  """Runs a limited number of tasks and updates the map with the results.

  Args:
    thread_limited: A lock used to limit the number of active threads.
    studies_map: The map where confirmed studies are added to.
    study_name: The name of the study to check.
    configs: The configs for the given study.
    code_files: A string with the paths to all code files (cc or h files).

  Side-effect:
    This function adds the study to |studies_map| if it used.
  """
  thread_limiter.acquire()
  try:
    if is_study_used(study_name, configs, code_files):
      studies_map[study_name] = configs
  finally:
    thread_limiter.release()


def main():
  parser = optparse.OptionParser()
  parser.add_option('--input_path',
                    help='Path to the fieldtrial config file to clean.')
  parser.add_option('--output_path',
                    help='Path to write cleaned up fieldtrial config file.')
  parser.add_option('--thread_count',
                    type='int',
                    help='The number of threads to use for scanning.')

  opts, _ = parser.parse_args()
  input_path = os.path.expanduser(opts.input_path or CONFIG_PATH)
  output_path = os.path.expanduser(opts.output_path or CONFIG_PATH)
  thread_limiter = threading.BoundedSemaphore(opts.thread_count or THREAD_COUNT)

  with open(input_path) as fin:
    studies = json.load(fin)
  print('Loaded config from', input_path)

  bash_list_files = subprocess.Popen(['find', '.', '-type', 'f'],
                                     stdout=subprocess.PIPE)
  bash_code_files = subprocess.Popen(['grep', '-E', '\\.(h|cc)$'],
                                     stdin=bash_list_files.stdout,
                                     stdout=subprocess.PIPE)
  bash_filtered_code_files = subprocess.Popen(
      ('grep', '-Ev', '(/out/|/build/|/gen/)'),
      stdin=bash_code_files.stdout,
      stdout=subprocess.PIPE)
  filtered_code_files = bash_filtered_code_files.stdout.read()

  threads = []
  clean_studies = {}
  for study_name, configs in studies.items():
    args = (thread_limiter, clean_studies, study_name, configs,
            filtered_code_files)
    threads.append(threading.Thread(target=thread_func, args=args))

  # Start all threads, then join all threads.
  for t in threads:
    t.start()
  for t in threads:
    t.join()

  with open(output_path, 'wt') as fout:
    json.dump(clean_studies, fout)
  print('Wrote cleaned config to', output_path)

  # Run presubmit script to format config file.
  retcode = subprocess.call(['vpython3', PRESUBMIT_SCRIPT, output_path])
  if retcode != 0:
    print('Failed to format output, manually run:')
    print('vpython3', PRESUBMIT_SCRIPT, output_path)


if __name__ == '__main__':
  sys.exit(main())