1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
|
#!/usr/bin/env python3
# Copyright 2013 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Verifies that the histograms XML file is well-formatted."""
import argparse
import logging
import re
import sys
from typing import List
import xml.dom.minidom
import extract_histograms
import histogram_paths
import merge_xml
import xml_utils
# The allowlist of namespaces (histogram prefixes, case insensitive) that are
# split across multiple files.
_NAMESPACES_IN_MULTIPLE_FILES = [
'ash', 'autocomplete', 'chromeos', 'fcminvalidations', 'graphics', 'launch',
'usereducation'
]
def CheckNamespaces(xml_paths: List[str]):
"""Check that histograms from a single namespace are all in the same file.
Generally we want the histograms from a single namespace to be in the same
file. There are some exceptions to that which are listed in the
_NAMESPACES_IN_MULTIPLE_FILES variable.
The namespace is the first component of the name of the histogram. e.g.
`Foo.Bar.Baz` has a namespace of `Foo`.
Args:
xml_paths: A list of paths to the xml files to validate.
"""
namespaces = {}
has_errors = False
for path in xml_paths:
tree = xml.dom.minidom.parse(path)
def _GetNamespace(node):
return node.getAttribute('name').lower().split('.')[0]
namespaces_in_file = set(
_GetNamespace(node)
for node in xml_utils.IterElementsWithTag(tree, 'histogram', depth=3))
for namespace in namespaces_in_file:
if (namespace in namespaces
and namespace not in _NAMESPACES_IN_MULTIPLE_FILES):
logging.error(
'Namespace %s has already been used in %s. it\'s recommended to '
'put histograms with the same namespace in the same file. If you '
'intentionally want to split a namespace across multiple files, '
'please add the namespace to the |_NAMESPACES_IN_MULTIPLE_FILES| '
'in the validate_format.py.' % (namespace, namespaces[namespace]))
has_errors = True
namespaces[namespace] = path
return has_errors
def _CheckVariantsRegistered(xml_paths: List[str]) -> bool:
"""Checks that all tokens within histograms are registered.
All tokens within histograms should be registered as tokens in the same file
either inline (as a <token> node) or out of line (as a <variants> node).
Args:
xml_paths: A list of paths to the xml files to validate.
"""
has_errors = False
for path in xml_paths:
tree = xml.dom.minidom.parse(path)
variants, variants_errors = extract_histograms.ExtractVariantsFromXmlTree(
tree)
has_errors = has_errors or bool(variants_errors)
for histogram in xml_utils.IterElementsWithTag(tree, 'histogram', depth=3):
tokens, tokens_errors = extract_histograms.ExtractTokens(
histogram, variants)
has_errors = has_errors or bool(tokens_errors)
token_keys = [token['key'] for token in tokens]
token_keys.extend(variants.keys())
histogram_name = histogram.getAttribute('name')
tokens_in_name = re.findall(r'\{(.+?)\}', histogram_name)
for used_token in tokens_in_name:
if used_token not in token_keys:
logging.error(
'Token {%s} is not registered in histogram %s in file %s.',
used_token, histogram_name, path)
has_errors = True
return has_errors
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
'--xml_paths',
type=str,
nargs='*',
default=histogram_paths.ALL_XMLS,
help='An optional list of paths to XML files to validate passed as'
' consecutive arguments. Production XML files are validated by default.')
paths_to_check = parser.parse_args().xml_paths
doc = merge_xml.MergeFiles(paths_to_check,
expand_owners_and_extract_components=False)
_, errors = extract_histograms.ExtractHistogramsFromDom(doc)
errors = errors or CheckNamespaces(paths_to_check)
errors = errors or _CheckVariantsRegistered(paths_to_check)
sys.exit(bool(errors))
if __name__ == '__main__':
main()
|