File: generate_permission_element_grd.py

package info (click to toggle)
chromium 138.0.7204.183-1
links: PTS, VCS
area: main
in suites: trixie
size: 6,071,908 kB
sloc: cpp: 34,937,088; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,953; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,806; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (233 lines) | stat: -rw-r--r-- 9,374 bytes
parent folder | download | duplicates (2)
# Copyright 2024 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import glob
import os
import re
import sys
from xml.dom.minidom import parse, parseString

_SRC_PATH = os.path.abspath(
    os.path.join(os.path.dirname(__file__), '..', '..', '..', '..', '..'))
sys.path.append(os.path.join(_SRC_PATH, 'tools', 'grit'))
from grit.extern import tclib

kGrdTemplate = '''<?xml version="1.0" encoding="utf-8"?>
<!--
This file contains all Permission element strings in all locales.
This is a generated grd file.
The script to generate the grd file is located at
third_party/blink/renderer/build/scripts/generate_permission_element_grd.py
-->
<grit base_dir="." latest_public_release="0" current_release="1"
    source_lang_id="en" enc_check="möl">
<outputs>
    <output filename="grit/permission_element_generated_strings.h" type="rc_header">
      <emit emit_type='prepend'></emit>
    </output>
    <output filename="permission_element_generated_strings.pak" type="data_package" />
</outputs>
<release seq="1" allow_pseudo="false">
    <messages fallback_to_english="true">
    </messages>
</release>
</grit>
'''

kStringMapCcPrefix = '''// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Generated by third_party/blink/renderer/build/scripts/generate_permission_element_grd.py

#include <stdint.h>

#include <optional>
#include <string_view>
#include <utility>

#include "base/containers/fixed_flat_map.h"
#include "base/containers/fixed_flat_set.h"
#include "base/strings/string_slice.h"
#include "third_party/blink/renderer/core/html/html_permission_element_strings_map.h"
#include "third_party/blink/public/strings/grit/permission_element_generated_strings.h"
#include "third_party/blink/public/strings/grit/permission_element_strings.h"

namespace blink {

namespace {

static constexpr char kLanguages[] =
'''
kStringMapCcMidfix = '''

using LangStringSlice = base::subtle::StringSlice<sizeof(kLanguages), kLanguages>;

// In C++20, pairs required exactly-matching types to be comparable, i.e.
// std::pair<T1, T2> cannot be compared against std::pair<U1, U2>, even if T1
// and T2 are comparable against U1 and U2. This was addressed in C++23 in
// https://cplusplus.github.io/LWG/issue3865. However, older versions of
// libstdc++ do not support this, so the lookup needs to be wrapped with a
// type that is heterogeneously comparable with the key pair type...
struct LookupHelper {
  std::string_view lang;
  uint16_t id;

  // The style guide requires operator== to be defined if operator<=> is
  // defined. However, operator== is never actually used, which causes a
  // compiler warning. Explicitly delete it instead... if it's ever needed,
  // the build will start failing.
  friend constexpr bool operator==(LookupHelper lhs,
                                   std::pair<LangStringSlice, uint16_t> rhs) = delete;
  friend constexpr auto operator<=>(LookupHelper lhs,
                                    std::pair<LangStringSlice, uint16_t> rhs) {
    return lhs.lang != rhs.first ?  lhs.lang <=> rhs.first : lhs.id <=> rhs.second;
  }
};

}  // namespace

std::optional<uint16_t> GetPermissionElementMessageId(
    std::string_view language_code,
    uint16_t base_message) {
  static constexpr auto kMessageIds =
      base::MakeFixedFlatMap<std::pair<LangStringSlice, uint16_t>, uint16_t>({
'''
kStringMapCcSuffix = '''
      });

  auto message = kMessageIds.find(
      LookupHelper{language_code, static_cast<uint16_t>(base_message)});
  return message == kMessageIds.end()
      ? std::nullopt
      : std::optional(message->second);
}

}  // namespace blink
'''


def get_message_id_map_and_orderings(input_base_dir):
    dom = parse(input_base_dir + "permission_element_strings.grd")
    dic = {}
    messages = dom.getElementsByTagName("message")
    for message in messages:
        dic[tclib.GenerateMessageId(
            message.firstChild.data.strip())] = message.getAttribute("name")
    # The returned orderings are used to help sort keys for the fixed flat map.
    # Empirically, resource IDs appear to be allocated in the order the messages
    # are listed in the grd.
    return (dic,
            dict((message.getAttribute("name"), i)
                 for (i, message) in enumerate(messages)))


def generate_grd_file(id_map, file_list, output_file_path):
    doc = parseString(kGrdTemplate)
    messages_node = doc.getElementsByTagName("messages")[0]
    for file in file_list:
        translated_file = parse(file)
        translated_messages = translated_file.getElementsByTagName(
            "translation")
        if translated_messages.length == 0:
            continue
        message_name_suffix = file.rsplit('.',
                                          1)[0].rsplit('_',
                                                       1)[1].replace('-', '_')
        for translated_message in translated_messages:
            message_name_prefix = id_map[translated_message.getAttribute("id")]
            generated_message_name = message_name_prefix + "_" + message_name_suffix
            message = translated_message.firstChild.data.strip()
            new_message_node = doc.createElement("message")
            new_message_node.setAttribute("name", generated_message_name)
            new_message_node.setAttribute("translateable", "false")
            messages_node.appendChild(new_message_node)
            new_message_node.appendChild(doc.createTextNode(message))
            messages_node.appendChild(doc.createTextNode('\n      '))

    with open(output_file_path, 'wb') as output_file:
        output_file.write(doc.toxml(encoding='UTF-8'))


# Generate the shortest string containing both `long` and `short` as
# substrings. The full n-string problem is NP-complete, but we only use a crude
# greedy heuristic.
def superstring(long, short):
    if long.find(short) >= 0:
        return long
    for i in range(len(short), 0, -1):
        if long[-i:] == short[:i]:
            return long + short[i:]
        if short[-i:] == long[:i]:
            return short + long[i:]
    return long + short


def generate_cpp_mapping(orderings, input_file_path, output_file_path):
    doc = parse(input_file_path)
    messages = doc.getElementsByTagName("message")
    with open(output_file_path, 'w') as output_file:
        # This is to add language-only versions for the only three languages for
        # which we do not have language-only locales available in our translation
        # lists. The language only version of the string is needed for the case
        # when the combination of language and country is unknown. E.g. for the
        # `pt-AO` (Portuguese Angola) lang setting, we will use `pt`, which via
        # this code will use `pt-pt` (Portuguese from Portugal).
        custom_locale_mappings = {"en-gb": "en", "pt-pt": "pt", "zh-cn": "zh"}

        locales = set()
        message_map = []
        for message in messages:
            message_name = message.getAttribute('name')
            base_message = re.split('_[a-z]', message_name)[0]
            locale = message_name.split(base_message)[1].split(
                '_', 1)[1].lower().replace("_", "-")
            if locale in custom_locale_mappings:
                locale = custom_locale_mappings[locale]
            # Add all locales first since iteration order is non-deterministic.
            locales.add(locale)
            message_map.append((locale, base_message, message_name))

        langs = ''
        locales = sorted(locales, key=lambda x: (-len(x), x))
        # Sort by length so that we add `ab-cd` before `ab` and `cd`
        for locale in locales:
            langs = superstring(langs, locale)

        lang_map = {}
        for locale in locales:
            lang_map[locale] = langs.find(locale), len(locale)

        output_file.write(kStringMapCcPrefix)
        output_file.write(f'    "{langs}";\n')
        output_file.write(kStringMapCcMidfix)
        # Pre-sorting is important here to avoid running into constexpr
        # evaluation limits at compile time, since the translation tables can be
        # quite large.
        for (lang, base_message,
             message_name) in sorted(message_map,
                                     key=lambda x: (x[0], orderings[x[1]])):
            output_file.write(
                f'        {{{{{{{lang_map[lang][0]}, {lang_map[lang][1]}}}, {base_message}}}, {message_name}}},\n'
            )
        # Argh
        output_file.write(kStringMapCcSuffix)


def main(argv):
    output_grd_file_position = argv.index('--output_grd')
    output_map_file_position = argv.index('--output_map')
    input_base_dir_position = argv.index('--input_base_dir')
    input_base_dir = argv[input_base_dir_position + 1]
    id_map, orderings = get_message_id_map_and_orderings(input_base_dir)
    translated_files = list(
        glob.glob(input_base_dir +
                  "translations/permission_element_strings_*"))
    generate_grd_file(id_map, translated_files,
                      argv[output_grd_file_position + 1])
    generate_cpp_mapping(orderings, argv[output_grd_file_position + 1],
                         argv[output_map_file_position + 1])


if __name__ == '__main__':
    sys.exit(main(sys.argv))