File: template_i18n_strings.py

package info (click to toggle)
slm 2.12-1
links: PTS, VCS
area: main
in suites: forky, sid
size: 24,520 kB
sloc: python: 15,419; javascript: 5,061; makefile: 184; sh: 182; xml: 57
file content (179 lines) | stat: -rw-r--r-- 5,450 bytes
parent folder | download | duplicates (2)
"""
    template_i18n_strings.py
    - fouille les templates de django et y cherche les fichiers
      de type HTML ou RML, isole le chaînes (pas les tags) et les
      prépare à la façon de : {% translate la chaîne à traduire %}

    Copyright (C) 2024 Georges Khaznadar <georgesk@debian.org>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""

import bs4
from bs4 import BeautifulSoup as BS
import sys, os, re
import ply.lex as lex

def markup(s, start='{% translate "', end='" %}'):
    """
    marquage d'un texte, par mise en place de deux marqueurs :
    le marqueur de fin vient avant les espaces terminaux, le
    marqueur de début après les expaces initiaux
    @param s chaîne à marquer
    @param start marqueur de début : "{% translate " par défaut
    @param end marquetr de fin : " %}" par défaut
    @return la chaîne marquée
    """
    if not s.strip():
        return s
    pos= len(s)-1
    while s[pos] in " \t\n":
        pos -= 1
    s = s[:pos+1] + end + s[pos+1:]
    pos = 0
    while s[pos] in " \t\n":
        pos += 1
    s = s[:pos] + start + s[pos:]
    return s

class TransLexer(object):
    tokens = (
        "TEXT",
        "VAR",
        "CTRL",
    )

    states = (
       ('var','exclusive'),     # not to translate
       ('ctrl','exclusive'),     # not to translate
    )

    def t_begin_var(self,t):
        r'\{\{'
        t.lexer.begin('var')
        return

    def t_var_end_var(self,t):
        r'\}\}'
        t.lexer.begin('INITIAL')
        return

    t_var_VAR = r'[^\}]+'

    def t_begin_ctrl(self,t):
        r'\{%'
        t.lexer.begin('ctrl')
        return

    def t_ctrl_end_ctrl(self,t):
        r'%\}'
        t.lexer.begin('INITIAL')
        return

    t_ctrl_CTRL = r'[^%]+'


    t_TEXT = r"[^\{]+"

    #def t_newline(t):
    #    r'\n'
    #    t.lexer.lineno += 1

    def t_error(self,t):
        pass

    def t_var_error(self,t):
        pass

    def t_ctrl_error(self,t):
        pass

    def __init__(self, **kwargs):
        self.lexer = lex.lex(module=self, **kwargs)
        return
    
    def i18n(self, s):
        self.lexer.input(s)
        result=""
        while True:
            tok = self.lexer.token()
            if tok is None: break
            if tok.type == 'TEXT':
                string = s[tok.lexpos:tok.lexpos+len(tok.value)]
                result += markup(string)
            elif tok.type == "VAR":
                result += "{{" + tok.value + "}}"
            elif tok.type == "CTRL":
                result += "{%" + tok.value + "%}"
        return result

TRANSLEXER = TransLexer()

def print_text(s):
    if isinstance(s, bs4.element.NavigableString):
        s=str(s)
        if s.strip():
            print(s, end="")
    return ""

def prepare_translation(elt):
    if isinstance(elt, bs4.element.NavigableString):
        # on a un élément de type texte
        # on va sauter toutes les parties de plus d'un séparateur contigu
        # et tous le marquages destinée à Jinja2, c'est à dire les
        # parenthèses de type {{...}} et {%...%}
        if str(elt).strip() and not isinstance(elt, bs4.Comment):
            return TRANSLEXER.i18n(str(elt))
    return ""
            
def parcours_de_textes(doc, a_faire):
    """
    Parcourt un document XML, et pour chaque textes qu'on y trouve,
    lance la fonction a_faire
    @param doc un arbre xml
    @param a_faire un fonction de profil str -> str ; la chaîne de
           sortie peut être obtenue à partir de la chaîne en entrée, et
           si sa valeur n'est pas "", elle est utilisable pour remplacer
           la chaîne d'entrée
    @ return un nouveau document XML
    """

    # N.B. avec un élément e de type Tag, quand on veut remplacer son texte,
    # if faut le faire avec e.string.replace_with(nouvelle_valeur)
    for elt in doc.children:
        t = a_faire(elt)
        if t:
            elt.string.replace_with(t)
        if isinstance(elt, bs4.element.Tag) and elt.name.lower() != 'script':
            parcours_de_textes(elt, a_faire)
    return

if __name__ == "__main__":
    xmlstring = open(sys.argv[1]).read()
    restoreDocType = False
    if "<!DOCTYPE html>" in xmlstring:
        ############ protection du doctype #################
        xmlstring = xmlstring.replace("<!DOCTYPE html>", "{{ !DOCTYPE html }}")
        restoreDocType = True
    doc = BS(xmlstring, 'html.parser')
    parcours_de_textes(doc, prepare_translation)
    newxmlstring = doc.prettify()
    if restoreDocType:
        newxmlstring = newxmlstring.replace(
            "{{ !DOCTYPE html }}", "<!DOCTYPE html>")
    ################ marquage des attributs title #############
    pattern = re.compile(r"title=\"([^\"]*+)\"")
    newxmlstring = pattern.sub("title=\"{% translate '\\1' %}\"", newxmlstring)
    print(newxmlstring)