File: update_histogram_enum.py

package info (click to toggle)
chromium 120.0.6099.224-1~deb11u1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 6,112,112 kB
  • sloc: cpp: 32,907,025; ansic: 8,148,123; javascript: 3,679,536; python: 2,031,248; asm: 959,718; java: 804,675; xml: 617,256; sh: 111,417; objc: 100,835; perl: 88,443; cs: 53,032; makefile: 29,579; fortran: 24,137; php: 21,162; tcl: 21,147; sql: 20,809; ruby: 17,735; pascal: 12,864; yacc: 8,045; lisp: 3,388; lex: 1,323; ada: 727; awk: 329; jsp: 267; csh: 117; exp: 43; sed: 37
file content (378 lines) | stat: -rw-r--r-- 14,498 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
# Copyright 2014 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Updates enums in histograms.xml file with values read from provided C++ enum.

If the file was pretty-printed, the updated version is pretty-printed too.
"""

import io
import logging
import os
import re
import sys

from xml.dom import minidom

sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
import path_util

import histogram_paths
import histogram_configuration_model


ENUMS_PATH = histogram_paths.ENUMS_XML


class UserError(Exception):

  @property
  def message(self):
    return self.args[0]


class DuplicatedValue(Exception):
  """Exception raised for duplicated enum values.

  Attributes:
      first_label: First enum label that shares the duplicated enum value.
      second_label: Second enum label that shares the duplicated enum value.
  """
  def __init__(self, first_label, second_label):
    self.first_label = first_label
    self.second_label = second_label


class DuplicatedLabel(Exception):
  """Exception raised for duplicated enum labels.

  Attributes:
      first_value: First enum value that shares the duplicated enum label.
      second_value: Second enum value that shares the duplicated enum label.
  """
  def __init__(self, first_value, second_value):
    self.first_value = first_value
    self.second_value = second_value


def Log(message):
  logging.info(message)


def _CheckForDuplicates(enum_value, label, result):
  """Checks if an enum value or label already exists in the results."""
  if enum_value in result:
    raise DuplicatedValue(result[enum_value], label)
  if label in result.values():
    (dup_value, ) = (k for k, v in result.items() if v == 'label')
    raise DuplicatedLabel(enum_value, dup_value)


def ReadHistogramValues(filename, start_marker, end_marker, strip_k_prefix):
  """Creates a dictionary of enum values, read from a C++ file.

  Args:
      filename: The unix-style path (relative to src/) of the file to open.
      start_marker: A regex that signifies the start of the enum values.
      end_marker: A regex that signifies the end of the enum values.
      strip_k_prefix: Set to True if enum values are declared as kFoo and the
          'k' should be stripped.

  Returns:
      A dictionary from enum value to enum label.

  Raises:
      DuplicatedValue: An error when two enum labels share the same value.
      DuplicatedLabel: An error when two enum values share the same label.
  """
  # Read the file as a list of lines
  with io.open(path_util.GetInputFile(filename)) as f:
    content = f.readlines()

  START_REGEX = re.compile(start_marker)
  ITEM_REGEX = re.compile(r'^(\w+)')
  ITEM_REGEX_WITH_INIT = re.compile(r'(\w+)\s*=\s*(\d*)')
  WRAPPED_INIT = re.compile(r'(\d+)')
  END_REGEX = re.compile(end_marker)

  iterator = iter(content)
  # Find the start of the enum
  for line in iterator:
    if START_REGEX.match(line.strip()):
      break

  enum_value = 0
  result = {}
  for line in iterator:
    line = line.strip()
    # Exit condition: we reached last enum value
    if END_REGEX.match(line):
      break
    # Inside enum: generate new xml entry
    m = ITEM_REGEX_WITH_INIT.match(line)
    if m:
      label = m.group(1)
      if m.group(2):
        enum_value = int(m.group(2))
      else:
        # Enum name is so long that the value wrapped to the next line
        next_line = next(iterator).strip()
        enum_value = int(WRAPPED_INIT.match(next_line).group(1))
    else:
      m = ITEM_REGEX.match(line)
      if m:
        label = m.group(1)
      else:
        continue
    if strip_k_prefix:
      assert label.startswith('k'), "Enum " + label + " should start with 'k'."
      label = label[1:]
    _CheckForDuplicates(enum_value, label, result)
    result[enum_value] = label
    enum_value += 1
  return result


def ReadHistogramValuesFromXML(filename, element_name,
                               value_attribute, label_attribute):
  """Creates a dictionary of enum values, read from an XML file.

  Args:
      filename: The unix-style path (relative to src/) of the file to open.
      element_name: Name of elements in the given XML that would be used to
          extract enums.
      value_attribute: The attribute name in source XML that would be mapped to
          |value| attributes in enums.xml.
      label_attribute: The attribute name in source XML that would be mapped to
          |label| attributes in enums.xml.

  Returns:
      A boolean indicating wheather the histograms.xml file would be changed.

  Raises:
      DuplicatedValue: An error when two enum labels share the same value.
      DuplicatedLabel: An error when two enum values share the same label.
  """
  source_xml = minidom.parse(path_util.GetInputFile(filename))
  result = {}
  for row in source_xml.getElementsByTagName(element_name):
    enum_value = int(row.getAttribute(value_attribute))
    label = row.getAttribute(label_attribute)
    _CheckForDuplicates(enum_value, label, result)
    result[enum_value] = label
  return result


def CreateEnumItemNode(document, value, label):
  """Creates an int element to append to an enum."""
  item_node = document.createElement('int')
  item_node.attributes['value'] = str(value)
  item_node.attributes['label'] = label
  return item_node


def UpdateHistogramDefinitions(histogram_enum_name, source_enum_values,
                               source_enum_path, caller_script_name, document):
  """Updates the enum node named |histogram_enum_name| based on the definition
  stored in |source_enum_values|. Existing items for which |source_enum_values|
  doesn't contain any corresponding data will be preserved. |source_enum_path|
  and |caller_script_name| will be used to insert a comment.
  """
  # Get a dom of <enum name=|histogram_enum_name| ...> node in |document|.
  for enum_node in document.getElementsByTagName('enum'):
    if enum_node.attributes['name'].value == histogram_enum_name:
      break
  else:
    raise UserError('No {0} enum node found'.format(histogram_enum_name))

  new_item_nodes = {}
  new_comments = []

  # Add a "Generated from (...)" comment.
  new_comments.append(
      document.createComment(
          ' Generated from {0}.'.format(source_enum_path).replace('\\', '/') +
          ('\nCalled by {0}.'.format(caller_script_name
                                     ) if caller_script_name else '')))

  # Create item nodes for each of the enum values.
  for value, label in source_enum_values.items():
    new_item_nodes[value] = CreateEnumItemNode(document, value, label)

  # Scan existing nodes in |enum_node| for old values and preserve them.
  # - Preserve comments other than the 'Generated from' comment. NOTE:
  #   this does not preserve the order of the comments in relation to the
  #   old values.
  # - Drop anything else.
  SOURCE_COMMENT_REGEX = re.compile('^ Generated from ')
  for child in enum_node.childNodes:
    if child.nodeName == 'int':
      value = int(child.attributes['value'].value)
      if value not in source_enum_values:
        new_item_nodes[value] = child
    # Preserve existing non-generated comments.
    elif (child.nodeType == minidom.Node.COMMENT_NODE and
          SOURCE_COMMENT_REGEX.match(child.data) is None):
      new_comments.append(child)

  # Update |enum_node|. First, remove everything existing.
  while enum_node.hasChildNodes():
    enum_node.removeChild(enum_node.lastChild)

  # Add comments at the top.
  for comment in new_comments:
    enum_node.appendChild(comment)

  # Add in the new enums.
  for value in sorted(new_item_nodes.keys()):
    enum_node.appendChild(new_item_nodes[value])


def _GetOldAndUpdatedXml(histogram_enum_name, source_enum_values,
                         source_enum_path, caller_script_name):
  """Reads old histogram from |histogram_enum_name| from |ENUMS_PATH|, and
  calculates new histogram from |source_enum_values| from |source_enum_path|,
  and returns both in XML format.
  """
  Log('Reading existing histograms from "{0}".'.format(ENUMS_PATH))
  with io.open(ENUMS_PATH, 'r', encoding='utf-8') as f:
    histograms_doc = minidom.parse(f)
    f.seek(0)
    xml = f.read()

  Log('Comparing histograms enum with new enum definition.')
  UpdateHistogramDefinitions(histogram_enum_name, source_enum_values,
                             source_enum_path, caller_script_name,
                             histograms_doc)

  new_xml = histogram_configuration_model.PrettifyTree(histograms_doc)
  return (xml, new_xml)


def CheckPresubmitErrors(histogram_enum_name,
                         update_script_name,
                         source_enum_path,
                         start_marker,
                         end_marker,
                         strip_k_prefix=False,
                         histogram_value_reader=ReadHistogramValues):
  """Extracts histogram enum values from a source file and checks for
  violations.

  Enum values are extracted from |source_enum_path| using
  |histogram_value_reader| function. The following presubmit violations are then
  checked:
    1. Failure to update histograms.xml to match
    2. Introduction of duplicate values

  Args:
      histogram_enum_name: The name of the XML <enum> attribute to update.
      update_script_name: The name of an update script to run to update the UMA
          mappings for the enum.
      source_enum_path: A unix-style path, relative to src/, giving
          the source file from which to read the enum.
      start_marker: A regular expression that matches the start of the C++ enum.
      end_marker: A regular expression that matches the end of the C++ enum.
      strip_k_prefix: Set to True if enum values are declared as kFoo and the
          'k' should be stripped.
      histogram_value_reader: A reader function that takes four arguments
          (source_path, start_marker, end_marker, strip_k_prefix), and returns a
          list of strings of the extracted enum names. The default is
          ReadHistogramValues(), which parses the values out of an enum defined
          in a C++ source file.


  Returns:
      A string with presubmit failure description, or None (if no failures).
  """
  Log('Reading histogram enum definition from "{0}".'.format(source_enum_path))
  try:
    source_enum_values = histogram_value_reader(source_enum_path, start_marker,
                                                end_marker, strip_k_prefix)
  except DuplicatedValue as duplicated_values:
    return ('%s enum has been updated and there exist '
            'duplicated values between (%s) and (%s)' %
            (histogram_enum_name, duplicated_values.first_label,
             duplicated_values.second_label))
  except DuplicatedLabel as duplicated_labels:
    return ('%s enum has been updated and there exist '
            'duplicated labels between (%s) and (%s)' %
            (histogram_enum_name, duplicated_labels.first_value,
             duplicated_labels.second_value))

  (xml, new_xml) = _GetOldAndUpdatedXml(histogram_enum_name, source_enum_values,
                                        source_enum_path, update_script_name)
  if xml != new_xml:
    return ('%s enum has been updated and the UMA mapping needs to be '
            'regenerated. Please run %s in src/tools/metrics/histograms/ to '
            'update the mapping.' % (histogram_enum_name, update_script_name))

  return None


def UpdateHistogramFromDict(histogram_enum_name, source_enum_values,
                            source_enum_path, caller_script_name):
  """Updates |histogram_enum_name| enum in histograms.xml file with values
  from the {value: 'key'} dictionary |source_enum_values|. A comment is added
  to histograms.xml citing that the values in |histogram_enum_name| were
  sourced from |source_enum_path|, requested by |caller_script_name|.
  """
  (xml, new_xml) = _GetOldAndUpdatedXml(histogram_enum_name, source_enum_values,
                                        source_enum_path, caller_script_name)
  with io.open(ENUMS_PATH, 'w', encoding='utf-8', newline='') as f:
    f.write(new_xml)

  Log('Done.')


def UpdateHistogramEnum(histogram_enum_name,
                        source_enum_path,
                        start_marker,
                        end_marker,
                        strip_k_prefix=False,
                        calling_script=None):
  """Reads a C++ enum from a .h file and updates histograms.xml to match.

  Args:
      histogram_enum_name: The name of the XML <enum> attribute to update.
      source_enum_path: A unix-style path, relative to src/, giving
          the C++ header file from which to read the enum.
      start_marker: A regular expression that matches the start of the C++ enum.
      end_marker: A regular expression that matches the end of the C++ enum.
      strip_k_prefix: Set to True if enum values are declared as kFoo and the
          'k' should be stripped.
  """

  Log('Reading histogram enum definition from "{0}".'.format(source_enum_path))
  source_enum_values = ReadHistogramValues(source_enum_path,
      start_marker, end_marker, strip_k_prefix)

  UpdateHistogramFromDict(histogram_enum_name, source_enum_values,
                          source_enum_path, calling_script)


def UpdateHistogramEnumFromXML(histogram_enum_name, source_enum_path,
                               caller_script_name, element_name,
                               value_attribute, label_attribute):
  """Reads a .xml file and updates histograms.xml to match.

  Args:
      histogram_enum_name: The name of the XML <enum> attribute to update.
      source_enum_path: A unix-style path, relative to src/, giving
          the XML file from which to read the enum.
      caller_script_name: Name of the script calling this function.
      element_name: Name of elements in the given XML that would be used to
          extract enums.
      value_attribute: The attribute name in source XML that would be mapped to
          |value| attributes in enums.xml.
      label_attribute: The attribute name in source XML that would be mapped to
          |label| attributes in enums.xml.
  """

  Log('Reading histogram enum definition from "{0}".'.format(source_enum_path))
  source_enum_values = ReadHistogramValuesFromXML(
      source_enum_path, element_name, value_attribute, label_attribute)

  UpdateHistogramFromDict(histogram_enum_name, source_enum_values,
      source_enum_path, caller_script_name)