File: gladtex.py

package info (click to toggle)
gladtex 2.3.1-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 440 kB
  • sloc: python: 4,531; sh: 7; makefile: 3
file content (310 lines) | stat: -rw-r--r-- 15,029 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
#!/usr/bin/env python3
import argparse
import multiprocessing
import os
import posixpath
import re
import sys
import gleetex


class HelpfulCmdParser(argparse.ArgumentParser):
    """This variant of arg parser always prints the full help whenever an error
    occurs."""
    def error(self, message):
        sys.stderr.write('error: %s\n' % message)
        self.print_help()
        sys.exit(2)



def format_ordinal(number):
    endings = ['th', 'st', 'nd', 'rd'] + ['th'] * 6
    return '%d%s' % (number, endings[number%10])

class Main:
    """This class parses command line arguments and deals with the
    conversion. Only the run method needs to be called."""
    def __init__(self):
        self.__encoding = "utf-8"

    def _parse_args(self, args):
        """Parse command line arguments and return option instance."""
        epilog = "GladTeX %s, http://humenda.github.io/GladTeX" % gleetex.VERSION
        description = ("GladTeX is a preprocessor that enables the use of LaTeX"
            " maths within HTML files. The maths, embedded in <EQ>...</EQ> "
            "tags, as if within \\(..\\) in LaTeX (or $...$ in TeX), is fed "
            "through latex and replaced by images.\n\nIf the environment "
            "variable `DEBUG=1`is set, a full Python traceback, instead of a "
            "short, user-friendly message, will be shown.")
        parser = HelpfulCmdParser(epilog=epilog, description=description)
        parser.add_argument("-a", action="store_true", dest="exclusionfile", help="save text alternatives " +
                "for images which are too long for the alt attribute into a " +
                "single separate file and link images to it")
        parser.add_argument('-b', dest='background_color',
                help="Set background color for resulting images (default transparent)")
        parser.add_argument('-c', dest='foreground_color',
                help="Set foreground color for resulting images (default 0,0,0)")
        parser.add_argument('-d', dest='directory', help="Directory in which to" +
                " store generated images in (relative path)")
        parser.add_argument('-e', dest='latex_maths_env',
                help="Set custom maths environment to surround the formula" + \
                        " (e.g. flalign)")
        parser.add_argument('-E', dest='encoding', default=None,
                help="Overwrite encoding to use (default UTF-8)")
        parser.add_argument('-i', metavar='CLASS', dest='inlinemath',
                help="CSS class to assign to inline math (default: 'inlinemath')")
        parser.add_argument('-l', metavar='CLASS', dest='displaymath',
                help="CSS class to assign to block-level math (default: 'displaymath')")
        parser.add_argument('-K', dest='keep_latex_source', action="store_true",
                default=False, help="keep LaTeX file(s) when converting formulas (useful for debugging)")
        parser.add_argument('-m', dest='machinereadable', action="store_true",
                default=False,
                help="Print output in machine-readable format (less concise, better parseable)")
        parser.add_argument("-n", action="store_true", dest="notkeepoldcache",
                    help=("Purge unreadable caches along with all eqn*.png files. "
                        "Caches can be unreadable if the used GladTeX version is "
                        "incompatible. If this option is unset, GladTeX will "
                        "simply fail when the cache is unreadable."))
        parser.add_argument('-o', metavar='FILENAME', dest='output',
                help=("Set output file name; '-' will print text to stdout (by"
                    "default input file name is used and .htex extension changed "
                    "to .html)"))
        parser.add_argument('-p', metavar='LATEX_STATEMENT', dest="preamble",
                help="Add given LaTeX code to preamble of document; that'll " +\
                    "affect the conversion of every image")
        parser.add_argument('-r', metavar='DPI', dest='dpi', default='115',
                help=("Set resolution (size of images) to 'dpi' (115 for a "
                    "fontsize of 12pt); if the suffix 'pt' is added, the "
                    "resolution wil be calculated from the given font size."))
        parser.add_argument('-R', action="store_true", dest='replace_nonascii',
                default=False, help="Replace non-ascii characters in formulas "
                    "through their LaTeX commands")
        parser.add_argument("-u", metavar="URL", dest='url',
                help="URL to image files (relative links are default)")
        parser.add_argument('input', help="Input .htex file with LaTeX " +
                "formulas (if omitted or -, stdin will be read)")
        return parser.parse_args(args)

    def exit(self, text, status):
        """Exit function. Could be used to register any clean up action."""
        sys.stderr.write(text)
        if not text.endswith('\n'):
            sys.stderr.write('\n')
        sys.exit(status)

    def validate_options(self, opts):
        """Validate certain arguments suppliedon the command line. The user will
        get a (hopefully) helpful error message if he/she gave an invalid
        parameter."""
        color_regex = re.compile(r"^\d(?:\.\d+)?,\d(?:\.\d+)?,\d(?:\.\d+)?")
        if opts.background_color and not color_regex.match(opts.background_color):
            print("Option -b requires a string in the format " +
                        "num,num,num where num is a broken decimal between 0 " +
                        "and 1.")
            sys.exit(12)
        if opts.foreground_color and not color_regex.match(opts.foreground_color):
            print("Option -c requires a string in the format " +
                        "num,num,num where num is a broken decimal between 0 " +
                        "and 1.")
            sys.exit(13)

    def get_input_output(self, options):
        """Determine whether GladTeX is reading from stdin/file, writing to
        stdout/file and determine base_directory if files are in another
        directory. If no output file name is given and there is a input file to
        read from, output is written to a file ending on .html instead of .htex.
        The returned document is either string or byte, the latter if encoding
        is unknown."""
        data = None
        base_path = options.directory
        output = '-'
        if options.input == '-':
            data = sys.stdin.read()
        else:
            try:
                if options.encoding:
                    with open(options.input) as f:
                        data = f.read()
                else: # read as binary and guess from HTML meta charset
                    with open(options.input, 'rb') as file:
                        data = file.read()
            except UnicodeDecodeError as e:
                self.exit(('Error while reading from %s: %s\nProbably this file'
                    ' has a different encoding, try specifying -E.') % \
                            (options.input, str(e)), 88)
            except IsADirectoryError:
                self.exit("Error: cannot open %s for reading: is a directory." \
                        % options.input, 19)
            except FileNotFoundError:
                self.exit("Error: file %s not found." % options.input, 20)

        # check which output file name to use
        if options.output:
            output = options.output
        elif options.input != '-':
            output = os.path.splitext(options.input)[0] + '.html'
        # else case: output = '-' (see above)
        if not base_path:
            if options.output and os.path.dirname(options.output):
                base_path = os.path.dirname(output)
            elif options.input and os.path.dirname(options.input):
                base_path = os.path.dirname(input)
        if base_path: # if finally a basepath found:, strip \\ if on Windows
            base_path = posixpath.join(*(options.directory.split('\\')))
        # strip base_path from output, if there's one
        output = os.path.basename(output)
        return (data, base_path, output)


    def run(self, args):
        options = self._parse_args(args[1:])
        self.validate_options(options)
        self.__encoding = options.encoding
        doc, base_path, output = self.get_input_output(options)
        docparser = gleetex.htmlhandling.EqnParser()
        try:
            docparser.feed(doc)
            self.__encoding = docparser.get_encoding()
            self.__encoding = (self.__encoding if self.__encoding else 'utf-8')
        except gleetex.htmlhandling.ParseException as e:
            input_fn = ('stdin' if options.input == '-' else options.input)
            self.exit('Error while parsing {}: {}'.format(input_fn,
                str(e)), 5)
        doc = docparser.get_data()
        processed = self.convert_images(doc, base_path, options)
        with gleetex.htmlhandling.HtmlImageFormatter(base_path=base_path,
                link_path=options.url)  as img_fmt:
            img_fmt.set_exclude_long_formulas(True)
            if options.replace_nonascii:
                img_fmt.set_replace_nonascii(True)
            if options.url:
                img_fmt.set_url(options.url)
            if options.inlinemath:
                img_fmt.set_inline_math_css_class(options.inlinemath)
            if options.displaymath:
                img_fmt.set_display_math_css_class(options.displaymath)

            if output == '-':
                self.write_html(sys.stdout, processed, img_fmt)
            else:
                with open(output, 'w', encoding=self.__encoding) as file:
                    self.write_html(file, processed, img_fmt)

    def write_html(self, file, processed, formatter):
        """Write back altered HTML file with given formatter."""
        # write data back
        for chunk in processed:
            if isinstance(chunk, dict):
                is_displaymath = chunk['displaymath']
                file.write(formatter.format(chunk['pos'], chunk['formula'],
                    chunk['path'], is_displaymath))
            else:
                file.write(chunk)

    def convert_images(self, parsed_htex_document, base_path, options):
        """Convert all formulas to images and store file path and equation in a
        list to be processed later on."""
        base_path = ('' if not base_path or base_path == '.' else base_path)
        result = []
        try:
            conv = gleetex.convenience.CachedConverter(base_path,
                    not options.notkeepoldcache, encoding=self.__encoding)
        except gleetex.caching.JsonParserException as e:
            self.exit(e.args[0], 78)

        self.set_options(conv, options)
        formulas = [c for c in parsed_htex_document if isinstance(c, (tuple,
            list))]
        try:
            conv.convert_all(base_path, formulas)
        except gleetex.convenience.ConversionException as e:
            self.emit_latex_error(e, options.machinereadable,
                    options.replace_nonascii)

        # iterate over chunks of eqnparser
        for chunk in parsed_htex_document:
            # chunk == an entity parsed by EqnParser; type 'str' will be taken
            # literally, 'list' will be treated as formula
            if isinstance(chunk, (tuple, list)):
                _p, displaymath, formula = chunk
                try:
                    data = conv.get_data_for(formula, displaymath)
                except KeyError as e:
                    raise KeyError(("formula '%s' not found; that means it was "
                        "not converted which should usually not happen.") % e.args[0])
                data['formula'] = formula
                data['displaymath'] = displaymath
                result.append(data)
            else:
                result.append(chunk)
        return result


    def set_options(self, conv, options):
        """Apply options from command line parser to the converter."""
        # set options
        options_to_query = ['preamble', 'latex_maths_env',
                'keep_latex_source']
        for option_str in options_to_query:
            option = getattr(options, option_str)
            if option:
                if option in ('True', 'False', 'false', 'true'):
                    option = option == 'True'
                conv.set_option(option_str, option)
        dpi = None
        if options.dpi.endswith('pt'):
            dpi = gleetex.image.fontsize2dpi(float(options.dpi[:-2]))
        else:
            dpi = float(options.dpi)
        conv.set_option("dpi", dpi)
        # colors need special handling
        for option_str in ['foreground_color', 'background_color']:
            option = getattr(options, option_str)
            if option:
                conv.set_option(option_str, tuple(map(float, option.split(','))))
        if options.replace_nonascii:
            conv.set_replace_nonascii(True)

    def emit_latex_error(self, err, machine_readable, escape):
        """Format a LaTeX error in a meaningful way. The argument escape
        speicifies, whether the -R switch had been passed."""
        if 'DEBUG' in os.environ and os.environ['DEBUG'] == '1':
            raise err
        escaped = err.formula
        if escape:
            escaped = gleetex.document.escape_unicode_in_formulas(err.formula)
        msg = None
        additional = ''
        if 'Package inputenc' in err.args[0]:
            additional += ('Add the switch `-R` to automatically replace unicode '
                'characters with LaTeX command sequences.')
        if machine_readable:
            msg = 'Line: {}, {}\nNumber: {}\nFormula: {}{}\nMessage: {}'.format(
                    err.src_line_number, err.src_pos_on_line, err.formula_count,
                    err.formula,
                    ('' if escaped == err.formula
                        else '\nLaTeXified formula: %s' % escaped),
                    err.cause)
            if additional:
                msg += '; ' + additional
        else:
            formula = '    ' + err.formula.replace('\n', '\n    ')
            escaped = ('    ' + escaped.replace('\n', '\n    ') if escaped !=
                    err.formula else '')
            msg = "Error while converting formula %d at line %d, %d:\n" %\
                           (err.formula_count, err.src_line_number, err.src_pos_on_line,)
            msg += '%s%s\n%s' % (formula, ('' if escaped == err.formula
                else '\nFormula without unicode symbols:\n%s' % escaped),
                   err.cause)
            if additional:
                import textwrap
                msg += ' undefined.\n' + '\n'.join(textwrap.wrap(additional, 80))
        self.exit(msg, 91)


if __name__ == '__main__':
    # enable multiprocessing on Windows, see python docs
    multiprocessing.freeze_support()
    m = Main()
    m.run(sys.argv)