File: validate_format.py

package info (click to toggle)
chromium 138.0.7204.183-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 6,071,908 kB
  • sloc: cpp: 34,937,088; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,953; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,806; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (123 lines) | stat: -rwxr-xr-x 4,165 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env python3
# Copyright 2013 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Verifies that the histograms XML file is well-formatted."""

import argparse
import logging
import re
import sys
from typing import List
import xml.dom.minidom

import extract_histograms
import histogram_paths
import merge_xml
import xml_utils

# The allowlist of namespaces (histogram prefixes, case insensitive) that are
# split across multiple files.
_NAMESPACES_IN_MULTIPLE_FILES = [
    'ash', 'autocomplete', 'chromeos', 'fcminvalidations', 'graphics', 'launch',
    'usereducation'
]


def CheckNamespaces(xml_paths: List[str]):
  """Check that histograms from a single namespace are all in the same file.

  Generally we want the histograms from a single namespace to be in the same
  file. There are some exceptions to that which are listed in the
  _NAMESPACES_IN_MULTIPLE_FILES variable.

  The namespace is the first component of the name of the histogram. e.g.
  `Foo.Bar.Baz` has a namespace of `Foo`.

  Args:
    xml_paths: A list of paths to the xml files to validate.
  """
  namespaces = {}
  has_errors = False
  for path in xml_paths:
    tree = xml.dom.minidom.parse(path)

    def _GetNamespace(node):
      return node.getAttribute('name').lower().split('.')[0]

    namespaces_in_file = set(
        _GetNamespace(node)
        for node in xml_utils.IterElementsWithTag(tree, 'histogram', depth=3))
    for namespace in namespaces_in_file:
      if (namespace in namespaces
          and namespace not in _NAMESPACES_IN_MULTIPLE_FILES):
        logging.error(
            'Namespace %s has already been used in %s. it\'s recommended to '
            'put histograms with the same namespace in the same file. If you '
            'intentionally want to split a namespace across multiple files, '
            'please add the namespace to the |_NAMESPACES_IN_MULTIPLE_FILES| '
            'in the validate_format.py.' % (namespace, namespaces[namespace]))
        has_errors = True
      namespaces[namespace] = path

  return has_errors


def _CheckVariantsRegistered(xml_paths: List[str]) -> bool:
  """Checks that all tokens within histograms are registered.

  All tokens within histograms should be registered as tokens in the same file
  either inline (as a <token> node) or out of line (as a <variants> node).

  Args:
    xml_paths: A list of paths to the xml files to validate.
  """
  has_errors = False
  for path in xml_paths:
    tree = xml.dom.minidom.parse(path)
    variants, variants_errors = extract_histograms.ExtractVariantsFromXmlTree(
        tree)
    has_errors = has_errors or bool(variants_errors)

    for histogram in xml_utils.IterElementsWithTag(tree, 'histogram', depth=3):
      tokens, tokens_errors = extract_histograms.ExtractTokens(
          histogram, variants)
      has_errors = has_errors or bool(tokens_errors)

      token_keys = [token['key'] for token in tokens]
      token_keys.extend(variants.keys())

      histogram_name = histogram.getAttribute('name')

      tokens_in_name = re.findall(r'\{(.+?)\}', histogram_name)
      for used_token in tokens_in_name:
        if used_token not in token_keys:
          logging.error(
              'Token {%s} is not registered in histogram %s in file %s.',
              used_token, histogram_name, path)
          has_errors = True

  return has_errors


def main():
  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--xml_paths',
      type=str,
      nargs='*',
      default=histogram_paths.ALL_XMLS,
      help='An optional list of paths to XML files to validate passed as'
      ' consecutive arguments. Production XML files are validated by default.')
  paths_to_check = parser.parse_args().xml_paths

  doc = merge_xml.MergeFiles(paths_to_check,
                             expand_owners_and_extract_components=False)
  _, errors = extract_histograms.ExtractHistogramsFromDom(doc)
  errors = errors or CheckNamespaces(paths_to_check)
  errors = errors or _CheckVariantsRegistered(paths_to_check)
  sys.exit(bool(errors))


if __name__ == '__main__':
  main()