1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
|
#! /usr/bin/python3
"""
This program extracts translatable contents from xlm files under
the subdirectory "doc/", and builds a PO template file with them.
Currently translatable contents are provided by tags
'title', 'ulink', 'term' and 'para'.
Each entry is prepended a comment with a mean to access the translatable
text chunk, with xml.etree.ElementTree
How to use this script:
$ devtools/blend-doc-makepot doc/en > path_to_blends.pot
"""
import xml.etree.ElementTree as ET
import sys, re, os, hashlib, collections
sys.path.append(os.path.dirname(__file__))
from utils import normalizedText
def content(tag):
return (tag.text or "") + ''.join(ET.tostring(e, encoding='unicode') for e in tag)
class PotEntry():
"""
Class to implement POT entries.
Parameters of the constructor:
------------------------------
- comment (str) a comment a comment to acces the text in a source file
- text (str) the original text to process
"""
entries = collections.OrderedDict()
def __init__(self, comment, text):
self.comment = comment
self.text = "\n".join([
f'"{l}"' for l in normalizedText(text, escape=True)])
self.hash = hashlib.sha256(self.text.encode("UTF-8")).hexdigest()
if self.hash in self.entries:
self.entries[self.hash].append(self)
else:
self.entries[self.hash] = [self]
return
def __str__(self):
return """\
msgid ""
{self.text}
msgstr ""
"""
@staticmethod
def header():
"""
Returns a template for the begin of the POT file
"""
return """\
# LANG translations for Debian Blends documentation.
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the blends package.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: blender-doc\\n"
"Report-Msgid-Bugs-To: blender@packages.debian.org\\n"
"POT-Creation-Date: 2024-11-20 10:11+0200\\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
"Language-Team: LANGUAGE <LL@li.org>\\n"
"Language: \\n"
"MIME-Version: 1.0\\n"
"Content-Type: text/plain; charset=UTF-8\\n"
"Content-Transfer-Encoding: 8bit\\n"
"""
@staticmethod
def entries_str():
result = ""
for h, list_p in PotEntry.entries.items():
comments = "\n".join([f"#. {p.comment}" for p in list_p])
text = PotEntry.entries[h][0].text
result += f"""
{comments}
msgid ""
{text}
msgstr ""
"""
return result
@staticmethod
def potfile():
return f"""
{PotEntry.header()}
{PotEntry.entries_str()}
"""
def pot_entries(filename):
"""
collects POT entries from a file
Parameter:
----------
- filename (str) path to an xml file
Returns:
--------
None
Side-effects:
-------------
PotEntry.entries will bear all information collected from the file
"""
tree = ET.parse(filename)
root = tree.getroot()
for tag in ("title", "ulink", "term", "para"):
xpath = ".//" + tag
elements = root.findall(xpath)
for i, t in enumerate(elements):
if t.text and t.text.strip():
comment = f'{filename} : root.findall("{xpath}")[{i}].text'
_ = PotEntry(comment, t.text)
dejavu = {}
for e in t:
if e.tag in dejavu:
dejavu[e.tag] += 1
else:
dejavu[e.tag] = 1
xpath2 = f"./{e.tag}[{dejavu[e.tag]}]"
if e.tag == "emphasis":
if e.text and e.text.strip():
# it is an emphasis inside some translatable text
# so we must output its text into the POT file
comment = f'''\
{filename} : root.findall("{xpath}")[{i}].findall("{xpath2}")[0].text'''
_ = PotEntry(comment, e.text)
dejavu_em = {}
for e_em in e:
# we are under one layer of emphasis tag now!
if e_em in dejavu_em:
dejavu_em[e_em.tag] += 1
else:
dejavu_em[e_em.tag] = 1
xpath2_em = f"./{e_em.tag}[{dejavu_em[e_em.tag]}]"
if e_em.tail and e_em.tail.strip():
comment = f'''\
{filename} : root.findall("{xpath}")[{i}].findall("{xpath2}")[0].findall("{xpath2_em}")[0].tail'''
_ = PotEntry(comment, e_em.tail)
if e.tail and e.tail.strip():
comment = f'''\
{filename} : root.findall("{xpath}")[{i}].findall("{xpath2}")[0].tail'''
_ = PotEntry(comment, e.tail)
return
if __name__ == "__main__":
# write a POT file to the standard output
path = "."
if len(sys.argv) > 1 :
path = sys.argv[1]
for root, dirs, files in os.walk(path):
for f in files:
if f.endswith(".xml"):
filename = os.path.join(root, f)
pot_entries(filename)
print(PotEntry.potfile())
|