File: txstats.py

package info (click to toggle)
calamares 3.4.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 33,104 kB
  • sloc: cpp: 71,902; python: 4,365; xml: 1,379; sh: 717; ansic: 105; makefile: 7
file content (284 lines) | stat: -rwxr-xr-x 10,409 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
#! /usr/bin/env python3
#
#   SPDX-FileCopyrightText: 2018 Adriaan de Groot <groot@kde.org>
#   SPDX-License-Identifier: BSD-2-Clause
#
# Uses the Transifex API to get a list of enabled languages,
# and outputs CMake settings for inclusion into CMakeLists.txt.
#
# This is a Python3 script.
#
# Run it with a -v command-line option to get extra output on
# actual translation percentages.
import sys
import os
import argparse

class TXError(Exception):
    pass


class TransifexGetter(object):
    """
    Get language data from Transifex.

    The object does all the work in __init__, after that
    the only relevant data is .languages, a dictionary
    of language data.
    """
    def __init__(self):
        token = self.get_tx_credentials()
        if token is None:
            raise TXError("Could not get Transifex API token")

        import requests
        base_url = "https://rest.api.transifex.com/resource_language_stats"
        project_filter = "filter[project]=o:calamares:p:calamares"
        resource_filter = "filter[resource]=o:calamares:p:calamares:r:calamares"
        url = base_url + "?" + project_filter.replace(":", "%3A") + "&" + resource_filter.replace(":", "%3A")
        headers = {
            "accept": "application/vnd.api+json",
            "authorization": "Bearer " + token
        }

        r = requests.get(url, headers=headers)
        if r.status_code != 200:
            raise TXError("Could not get Transifex data from API")

        j = r.json()
        data = j["data"]

        self.languages = dict()

        for d in data:
            translated_count = d["attributes"]["translated_strings"]
            total_count = d["attributes"]["total_strings"]
            language_key = d["relationships"]["language"]["data"]["id"]
            assert language_key.startswith("l:")
            language_key = language_key[2:]
            self.languages[language_key] = dict(translated=dict(stringcount=translated_count, percentage=(translated_count / total_count)))


    def get_tx_credentials(self):
        """
        Gets the API token out of the user's .transifexrc (this is supposed
        to be secure).
        """
        import configparser
        import os
        txconfig_name = os.path.expanduser("~/.transifexrc")
        try:
            with open(txconfig_name, "r") as f:
                parser = configparser.ConfigParser()
                parser.read_file(f)

                return parser.get("https://app.transifex.com", "password")
        except IOError as e:
            return None


class BogusGetter(object):
    """
    Fake language data.

    This object pretends to retrieve data, and returns fixed language lists and percentages,
    for testing purposes without hitting Transifex servers all the time.
    """
    def __init__(self):
        self.languages = dict()
        for lang, completion in ( ("sq", 100), ("ar", 44), ("as", 28), ("de", 15), ("da", 4), ("ts", 82) ):
            self.languages[lang] = dict(translated=dict(stringcount=686, percentage=(completion/100.0)))


class PrintOutputter(object):
    """
    Output via print-statements.
    """
    def __init__(self):
        pass

    def print(self, s):
        print(s)

    def __enter__(self):
        return self

    def __exit__(self, e, v, tb):
        pass


class EditingOutputter(object):
    """
    Edit CMakeLists in-place.
    """
    def __init__(self):
        with open("CMakeLists.txt", "r") as f:
            lines = f.readlines()

        mark = None
        mark_text = None
        for l in lines:
            # Note that we didn't strip the lines, so need the \n here
            if l.startswith("# Total ") and l.endswith(" languages\n"):
                mark = lines.index(l)
                mark_text = l
                break
        if mark is None:
            raise TXError("No CMakeLists.txt lines for TX stats found")
        self.pre_lines = lines[:mark]

        nextmark = mark + 1
        for l in lines[mark+1:]:
            nextmark += 1
            if l.startswith(mark_text):
                break
        if nextmark > mark + 150 or nextmark > len(lines) - 4:
            # Try to catch runaway nextmarks: we know there should
            # be four set-lines, which are unlikely to be 3 lines each;
            # similarly the CMakeLists.txt is supposed to end with
            # some boilerplate.
            #
            # However, gersemi will reformat to one-language-per-line,
            # so we can get really long sections, that's why we use 150 as a limit.
            raise TXError("Could not find end of TX settings in CMakeLists.txt")
        self.post_lines = lines[nextmark:]

        self.mid_lines = []
        print("# Editing CMakeLists.txt in-place")

    def print(self, s):
        # Add the implicit \n from print()
        self.mid_lines.append(s + "\n")
        if s.startswith("#"):
            print(s)

    def __enter__(self):
        return self

    def __exit__(self, e, v, tb):
        if e is None:
            with open("CMakeLists.txt", "w") as f:
                f.write("".join(self.pre_lines + self.mid_lines + self.post_lines))
            print("# CMakeLists.txt updated")


def output_langs(all_langs, outputter, label, filterfunc):
    """
    Output (via print) all of the languages in @p all_langs
    that satisfy the translation-percentage filter @p filterfunc.
    Prints a CMake set() command with the @p label as part
    of the variable name.

    Performs line-wrapping.
    """
    these_langs = [l for s, l in all_langs if filterfunc(s)]
    out = " ".join(["set( _tx_%s" % label, " ".join(sorted(these_langs)), ")"])
    width = 68
    prefix = ""
    trailer = f"  # {len(these_langs)} languages" # Comment at the end of the CMake line
    while len(out) > width - len(prefix):
        chunk = out[:out[:width].rfind(" ")]
        outputter.print("%s%s" % (prefix, chunk))
        out = out[len(chunk)+1:]
        prefix = "    "
    outputter.print(f"{prefix}{out}{trailer}")


def get_tx_stats(languages, outputter, verbose):
    """
    Does an API request to Transifex with the given API @p token, getting
    the translation statistics for the main body of texts. Then prints
    out CMake settings to replace the _tx_* variables in CMakeLists.txt
    according to standard criteria.

    If @p verbose is True, prints out language stats as well.
    """
    # Some languages go into the "incomplete" list by definition,
    # regardless of their completion status: this can have various reasons.
    #
    # - (Esperanto wasn't supported until Qt 5.12.2)
    # - Interlingue still is not supported by the minimum Qt version
    incomplete_languages = (
        "ie",   # Not supported by Qt at least through 5.15.0
        )

    all_langs = []
    mark_text = "# Total %d languages" % len(languages)
    outputter.print(mark_text)
    for lang_name in languages:
        stats = languages[lang_name]["translated"]["percentage"]
        # Make the by-definition-incomplete languages have a percentage
        # lower than zero; this way they end up sorted (in -v output)
        # at the bottom but you can still determine the "actual" percentage.
        if lang_name in incomplete_languages:
            stats = -stats
        all_langs.append((stats, lang_name))

    if verbose:
        for s, l in sorted(all_langs, reverse=True):
            outputter.print("#  %16s\t%6.2f" % (l, s * 100.0))
    output_langs(all_langs, outputter, "complete", lambda s : s == 1.0)
    output_langs(all_langs, outputter, "good", lambda s : 1.0 > s >= 0.75)
    output_langs(all_langs, outputter, "ok", lambda s : 0.75 > s >= 0.05)
    output_langs(all_langs, outputter, "incomplete", lambda s : 0.05 > s)
    outputter.print(mark_text)

    # Audit the languages that are in TX, mapped to git
    for lang_name in languages:
        if not os.path.exists("lang/calamares_{}.ts".format(lang_name)):
            print("# !! Missing translation file for {}".format(lang_name))
        if not os.path.isdir("lang/python/{}/LC_MESSAGES".format(lang_name)):
            print("# !! Missing Python translation file for {}".format(lang_name))

    # Audit the files that are in git, mapped to TX
    special_cases = ("python.pot", "python", "CMakeLists.txt", "txload.cpp", "calamares_i18n.qrc.in")
    for file_name in os.listdir("lang"):
        if file_name in special_cases:
            continue
        elif file_name.startswith("calamares_") and file_name.endswith(".ts"):
            key = file_name[10:-3]
            if not key in languages and not key == "en":
                print("# !! Translation file for {} not in TX".format(key))
        elif file_name.startswith("tz_") and file_name.endswith(".ts"):
            key = file_name[3:-3]
            if not key in languages and not key == "en":
                print("# !! Translation file for TZ {} not in TX".format(key))
        elif file_name.startswith("kb_") and file_name.endswith(".ts"):
            key = file_name[3:-3]
            if not key in languages and not key == "en":
                print("# !! Translation file for KB {} not in TX".format(key))
        else:
            print("# !! Weird translation file {} not in TX".format(file_name))

    # Audit the python translation files that are in git, mapped to TX
    for file_name in os.listdir("lang/python"):
        if file_name not in languages:
            print("# !! Translation file for Python {} not in TX".format(file_name))

    return 0


def main():
    parser = argparse.ArgumentParser(description="Update Transifex Statistics")
    parser.add_argument("--verbose", "-v", help="Show statistics", action="store_true")
    parser.add_argument("--bogus", "-n", help="Use bogus data (do not query Transifex)", action="store_true")
    parser.add_argument("--edit", "-e", help="Edit CMakeLists.txt in-place", action="store_true")
    args = parser.parse_args()
    try:
        if args.bogus:
            getter = BogusGetter()
        else:
            getter = TransifexGetter()
        if args.edit:
            outputter = EditingOutputter()
        else:
            outputter = PrintOutputter()
        with outputter:
            return get_tx_stats(getter.languages, outputter, args.verbose)
    except TXError as e:
        print("! " + str(e))
        return 1;
    return 0

if __name__ == "__main__":
    sys.exit(main())