File: txstats.py

package info (click to toggle)
calamares 3.4.2-1
links: PTS, VCS
area: main
in suites: forky, sid
size: 33,104 kB
sloc: cpp: 71,902; python: 4,365; xml: 1,379; sh: 717; ansic: 105; makefile: 7
file content (284 lines) | stat: -rwxr-xr-x 10,409 bytes
parent folder | download | duplicates (2)
#! /usr/bin/env python3
#
#   SPDX-FileCopyrightText: 2018 Adriaan de Groot <groot@kde.org>
#   SPDX-License-Identifier: BSD-2-Clause
#
# Uses the Transifex API to get a list of enabled languages,
# and outputs CMake settings for inclusion into CMakeLists.txt.
#
# This is a Python3 script.
#
# Run it with a -v command-line option to get extra output on
# actual translation percentages.
import sys
import os
import argparse

class TXError(Exception):
    pass


class TransifexGetter(object):
    """
    Get language data from Transifex.

    The object does all the work in __init__, after that
    the only relevant data is .languages, a dictionary
    of language data.
    """
    def __init__(self):
        token = self.get_tx_credentials()
        if token is None:
            raise TXError("Could not get Transifex API token")

        import requests
        base_url = "https://rest.api.transifex.com/resource_language_stats"
        project_filter = "filter[project]=o:calamares:p:calamares"
        resource_filter = "filter[resource]=o:calamares:p:calamares:r:calamares"
        url = base_url + "?" + project_filter.replace(":", "%3A") + "&" + resource_filter.replace(":", "%3A")
        headers = {
            "accept": "application/vnd.api+json",
            "authorization": "Bearer " + token
        }

        r = requests.get(url, headers=headers)
        if r.status_code != 200:
            raise TXError("Could not get Transifex data from API")

        j = r.json()
        data = j["data"]

        self.languages = dict()

        for d in data:
            translated_count = d["attributes"]["translated_strings"]
            total_count = d["attributes"]["total_strings"]
            language_key = d["relationships"]["language"]["data"]["id"]
            assert language_key.startswith("l:")
            language_key = language_key[2:]
            self.languages[language_key] = dict(translated=dict(stringcount=translated_count, percentage=(translated_count / total_count)))


    def get_tx_credentials(self):
        """
        Gets the API token out of the user's .transifexrc (this is supposed
        to be secure).
        """
        import configparser
        import os
        txconfig_name = os.path.expanduser("~/.transifexrc")
        try:
            with open(txconfig_name, "r") as f:
                parser = configparser.ConfigParser()
                parser.read_file(f)

                return parser.get("https://app.transifex.com", "password")
        except IOError as e:
            return None


class BogusGetter(object):
    """
    Fake language data.

    This object pretends to retrieve data, and returns fixed language lists and percentages,
    for testing purposes without hitting Transifex servers all the time.
    """
    def __init__(self):
        self.languages = dict()
        for lang, completion in ( ("sq", 100), ("ar", 44), ("as", 28), ("de", 15), ("da", 4), ("ts", 82) ):
            self.languages[lang] = dict(translated=dict(stringcount=686, percentage=(completion/100.0)))


class PrintOutputter(object):
    """
    Output via print-statements.
    """
    def __init__(self):
        pass

    def print(self, s):
        print(s)

    def __enter__(self):
        return self

    def __exit__(self, e, v, tb):
        pass


class EditingOutputter(object):
    """
    Edit CMakeLists in-place.
    """
    def __init__(self):
        with open("CMakeLists.txt", "r") as f:
            lines = f.readlines()

        mark = None
        mark_text = None
        for l in lines:
            # Note that we didn't strip the lines, so need the \n here
            if l.startswith("# Total ") and l.endswith(" languages\n"):
                mark = lines.index(l)
                mark_text = l
                break
        if mark is None:
            raise TXError("No CMakeLists.txt lines for TX stats found")
        self.pre_lines = lines[:mark]

        nextmark = mark + 1
        for l in lines[mark+1:]:
            nextmark += 1
            if l.startswith(mark_text):
                break
        if nextmark > mark + 150 or nextmark > len(lines) - 4:
            # Try to catch runaway nextmarks: we know there should
            # be four set-lines, which are unlikely to be 3 lines each;
            # similarly the CMakeLists.txt is supposed to end with
            # some boilerplate.
            #
            # However, gersemi will reformat to one-language-per-line,
            # so we can get really long sections, that's why we use 150 as a limit.
            raise TXError("Could not find end of TX settings in CMakeLists.txt")
        self.post_lines = lines[nextmark:]

        self.mid_lines = []
        print("# Editing CMakeLists.txt in-place")

    def print(self, s):
        # Add the implicit \n from print()
        self.mid_lines.append(s + "\n")
        if s.startswith("#"):
            print(s)

    def __enter__(self):
        return self

    def __exit__(self, e, v, tb):
        if e is None:
            with open("CMakeLists.txt", "w") as f:
                f.write("".join(self.pre_lines + self.mid_lines + self.post_lines))
            print("# CMakeLists.txt updated")


def output_langs(all_langs, outputter, label, filterfunc):
    """
    Output (via print) all of the languages in @p all_langs
    that satisfy the translation-percentage filter @p filterfunc.
    Prints a CMake set() command with the @p label as part
    of the variable name.

    Performs line-wrapping.
    """
    these_langs = [l for s, l in all_langs if filterfunc(s)]
    out = " ".join(["set( _tx_%s" % label, " ".join(sorted(these_langs)), ")"])
    width = 68
    prefix = ""
    trailer = f"  # {len(these_langs)} languages" # Comment at the end of the CMake line
    while len(out) > width - len(prefix):
        chunk = out[:out[:width].rfind(" ")]
        outputter.print("%s%s" % (prefix, chunk))
        out = out[len(chunk)+1:]
        prefix = "    "
    outputter.print(f"{prefix}{out}{trailer}")


def get_tx_stats(languages, outputter, verbose):
    """
    Does an API request to Transifex with the given API @p token, getting
    the translation statistics for the main body of texts. Then prints
    out CMake settings to replace the _tx_* variables in CMakeLists.txt
    according to standard criteria.

    If @p verbose is True, prints out language stats as well.
    """
    # Some languages go into the "incomplete" list by definition,
    # regardless of their completion status: this can have various reasons.
    #
    # - (Esperanto wasn't supported until Qt 5.12.2)
    # - Interlingue still is not supported by the minimum Qt version
    incomplete_languages = (
        "ie",   # Not supported by Qt at least through 5.15.0
        )

    all_langs = []
    mark_text = "# Total %d languages" % len(languages)
    outputter.print(mark_text)
    for lang_name in languages:
        stats = languages[lang_name]["translated"]["percentage"]
        # Make the by-definition-incomplete languages have a percentage
        # lower than zero; this way they end up sorted (in -v output)
        # at the bottom but you can still determine the "actual" percentage.
        if lang_name in incomplete_languages:
            stats = -stats
        all_langs.append((stats, lang_name))

    if verbose:
        for s, l in sorted(all_langs, reverse=True):
            outputter.print("#  %16s\t%6.2f" % (l, s * 100.0))
    output_langs(all_langs, outputter, "complete", lambda s : s == 1.0)
    output_langs(all_langs, outputter, "good", lambda s : 1.0 > s >= 0.75)
    output_langs(all_langs, outputter, "ok", lambda s : 0.75 > s >= 0.05)
    output_langs(all_langs, outputter, "incomplete", lambda s : 0.05 > s)
    outputter.print(mark_text)

    # Audit the languages that are in TX, mapped to git
    for lang_name in languages:
        if not os.path.exists("lang/calamares_{}.ts".format(lang_name)):
            print("# !! Missing translation file for {}".format(lang_name))
        if not os.path.isdir("lang/python/{}/LC_MESSAGES".format(lang_name)):
            print("# !! Missing Python translation file for {}".format(lang_name))

    # Audit the files that are in git, mapped to TX
    special_cases = ("python.pot", "python", "CMakeLists.txt", "txload.cpp", "calamares_i18n.qrc.in")
    for file_name in os.listdir("lang"):
        if file_name in special_cases:
            continue
        elif file_name.startswith("calamares_") and file_name.endswith(".ts"):
            key = file_name[10:-3]
            if not key in languages and not key == "en":
                print("# !! Translation file for {} not in TX".format(key))
        elif file_name.startswith("tz_") and file_name.endswith(".ts"):
            key = file_name[3:-3]
            if not key in languages and not key == "en":
                print("# !! Translation file for TZ {} not in TX".format(key))
        elif file_name.startswith("kb_") and file_name.endswith(".ts"):
            key = file_name[3:-3]
            if not key in languages and not key == "en":
                print("# !! Translation file for KB {} not in TX".format(key))
        else:
            print("# !! Weird translation file {} not in TX".format(file_name))

    # Audit the python translation files that are in git, mapped to TX
    for file_name in os.listdir("lang/python"):
        if file_name not in languages:
            print("# !! Translation file for Python {} not in TX".format(file_name))

    return 0


def main():
    parser = argparse.ArgumentParser(description="Update Transifex Statistics")
    parser.add_argument("--verbose", "-v", help="Show statistics", action="store_true")
    parser.add_argument("--bogus", "-n", help="Use bogus data (do not query Transifex)", action="store_true")
    parser.add_argument("--edit", "-e", help="Edit CMakeLists.txt in-place", action="store_true")
    args = parser.parse_args()
    try:
        if args.bogus:
            getter = BogusGetter()
        else:
            getter = TransifexGetter()
        if args.edit:
            outputter = EditingOutputter()
        else:
            outputter = PrintOutputter()
        with outputter:
            return get_tx_stats(getter.languages, outputter, args.verbose)
    except TXError as e:
        print("! " + str(e))
        return 1;
    return 0

if __name__ == "__main__":
    sys.exit(main())