File: formatter.py

package info (click to toggle)
vlfeat 0.9.21%2Bdfsg0-6%2Bdeb11u1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 6,824 kB
  • sloc: ansic: 23,180; python: 1,782; makefile: 201; xml: 188; sh: 49
file content (629 lines) | stat: -rw-r--r-- 18,928 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
#!/usr/bin/python
# file:        formatter.py
# author:      Andrea Vedaldi
# description: Utility to format MATLAB comments.

# Copyright (C) 2007-12 Andrea Vedaldi and Brian Fulkerson.
# All rights reserved.
#
# This file is part of the VLFeat library and is made available under
# the terms of the BSD license (see the COPYING file).

"""
MDOC fromats the help block of a MATLAB M-file based on a simple set
of rules. Pharagraphs, verbatim sections, lists and other structures
are automatically instantiated by looking at blank lines, indentation
and a few decoration symbols.

The documentation starts at a conventional indentation level N (by
default 2). A block of non-epmty lines prefixed by N characters is
considered a paragraph. For instance

 |  Bla bla bla
 |  bla bla bla.
 |
 |  Bla bla.

generates two pharagraps. If there are more than N white spaces,
then the block is taken verbatim instead (and rendered in <pre> HTML
tags). For instance

 |  Bla bla bla
 |   Code Code Code
 |
 |   Code Code Code

generates one paragraph followed by one verbatim section.
"""

import xml.dom.minidom
import sys
import os
import re

__mpname__           = 'MDocFormatter'
__version__          = '0.1'
__date__             = '2008-01-01'
__description__      = 'MDoc formatting module'
__long_description__ = __doc__
__license__          = 'BSD'
__author__           = 'Andrea Vedaldi'

# terminal
class Terminal:
    def isa(self, classinfo):
        return isinstance(self, classinfo)

# empty terminal
class E (Terminal):
    pass

# blank line
class B (Terminal):
    content = ""

# non-blank line
class L (Terminal):
    indent  = 0

# regular line
class PL (L):
    pass

# line with bullet
class BL (L):
    bullet = None
    inner_indent = 0

# line with description
class DL (L):
    pass

# --------------------------------------------------------------------
def lex(line):
# --------------------------------------------------------------------
    """
    Parse the string LINE to a terminal symbol. Each line corresponds
    to exactly one terminal type. Terminal types are the leaf of a
    hierarchy of types.
    """

    # a blank line
    match = re.match(r"\s*\n?$", line) ;
    if match: return B()

    # a line of the type '  content::inner_content'
    match = re.match(r"(\s*)(.*)::(.*)\n?$", line)
    if match:
        x = DL()
        x.indent        = len(match.group(1))
        x.content       = match.group(2)
        x.inner_content = match.group(3)
        return x

    # a line of the type '  - inner_contet'
    match = re.match(r"(\s*)([-\*#]\s*)(\S.*)\n?$", line)
    if match:
        x = BL()
        x.indent        = len(match.group(1))
        x.inner_content = match.group(3)
        x.bullet        = match.group(2)
        x.inner_indent  = x.indent + len(x.bullet)
        x.content       = x.bullet + x.inner_content
        return x

    # a line of the type  '   content'
    match = re.match(r"(\s*)(\S.*)\n?$", line)
    if match:
        x = PL()
        x.indent  = len(match.group(1))
        x.content = match.group(2)
        return x

# --------------------------------------------------------------------
class Lexer(object):
# --------------------------------------------------------------------
    """
    l = Lexer(LINES) parses the array of strings LINES. Lexer has a
    head pointing to the current line. The head can be controlled by
    the following methods:

    l.next() advances the head and fetches the next terminal.
    l.back() moves back the head.
    l.getpos() returns the head position.
    l.seek(POS) sets the head position to POS.
    """
    def __init__(self, lines):
        self.tokens = []
        self.pos    = -1
        for line in lines:
            self.tokens.append(lex(line))

    def __next__(self):
        self.pos = self.pos + 1
        if self.pos >= len(self.tokens):
            return E()
        else:
            return self.tokens [self.pos]

    def seek(self, pos):
        self.pos = pos

    def back(self):
        if self.pos >=0: self.pos -= 1

    def rewrite(self, str):
        self.tokens [self.pos] = str ;

    def getpos(self):
        return self.pos

    def __str__(self):
        str = ""
        for i,t in enumerate(self.tokens):
             str += "%5d) %s %s\n" % (i, t.__class__.__name__,t.content)
        return str

# --------------------------------------------------------------------
class Formatter:
# --------------------------------------------------------------------
    """
    f = Formatter(LINES) parses the array of strings LINES.

    f = Formatter(LINES, FUNCS) takes the dictionary of functions
    FUNCS. Function names must be uppercase. The dictionary entries
    are used to cross link functions in the generated documentation.

    Formatter(LINES, FUNCS, LINKTYPE) produces links of the specified
    type.  Use 'a' for HTML anchors and 'wiki' for MediaWiki style
    links.

    f.toDOM() process the data to construct an XML (HTML) representation
    of them.
    """
    def __init__ (self, lines, funcs={}, linktype='a'):
        self.indentinit = 0
        lineone = lines[0]
        while lineone.startswith(' '):
            lineone = lineone[1:]
            self.indentinit += 1

        self.tokens = Lexer(lines)
        self.xmldoc = xml.dom.minidom.Document()
        self.funcs = funcs
        self.linktype = linktype
        #print self.tokens

    def toTextNode(self,s):
        return self.xmldoc.createTextNode(s)

    def addAttr(self, tag, attr, val):
        x = self.xmldoc.createAttribute(attr)
        x.nodeValue = val
        tag.setAttributeNode(x)

    def addText(self, tag, s):
        txt = self.toTextNode(s)
        tag.appendChild(txt)

    def addFancyText(self, tag, s):
        "Adds text while transforming function references to links."
        xs = []
        last = -1
        iter = re.finditer(r'(?:'
                           r'(?P<function>[A-Z][A-Z0-9_]*)'
                           r'\([^\)]*\)'
                           r')|(?:'
                           r'<a href="matlab:vl_help\(\''
                           r'(?P<page>[a-zA-Z0-9_]*)'
                           r'\'\)">'
                           r'(?P<text>[^<]*)'
                           r'</a>'
                           r')',s)

                           # r'(?P<page>[a-zA-Z0-9_]*)'
                           # r')', s)



                           # r')', s)

        for i in iter:
            func_name = i.group("function")
            page_name = i.group("page")

            if func_name and func_name.upper() in self.funcs:
                # retrieve function HTML location
                func_href = self.funcs[func_name.upper()]

                # add text so far
                xs.append(self.toTextNode(s[last+1:i.start()]))

                if self.linktype == 'a':
                    # add link to function
                    atag = self.xmldoc.createElement("a")
                    self.addText(atag, i.group('function'))
                    atag.setAttribute("href", "%s" % (func_href))
                    xs.append(atag)
                elif self.linktype == 'wiki':
                    linktxt = "[[%s|%s]]" % (func_href, i.group('function'))
                    xs.append(self.toTextNode(linktxt))

                # set head
                last = i.start()+len(i.group(1))-1

            elif page_name:
                #print "page %s:" % page_name, i.group("text")
                page_href = "%%dox:%s;" % page_name

                # add text so far
                xs.append(self.toTextNode(s[last+1:i.start()]))

                if self.linktype == 'a':
                    # add link to function
                    atag = self.xmldoc.createElement("a")
                    self.addText(atag, i.group('text'))
                    atag.setAttribute("href", "%s" % (page_href))
                    xs.append(atag)
                elif self.linktype == 'wiki':
                    linktxt = "[[%s|%s]]" % (func_href, i.group('function'))
                    xs.append(self.toTextNode(linktxt))

                # set head
                last = i.end()-1

        xs.append(self.toTextNode(s[last+1:]))
        for x in xs:
            tag.appendChild(x)

    # ................................................................
    # E, B, L, PL, BL, DL, ...
    def parse_Terminal(self, T):
        "If the next terminal on the stream is of type T, the terminal"
        "is extracted and returned. Otherwise the function returns None"
        pos = self.tokens.getpos()
        t = next(self.tokens)
        if t.isa(T):
            return t
        self.tokens.seek(pos)
        return None

    # ................................................................
    # DIV(N) -> (B | P(N) | BL(N) | DL(N) | V(N))+
    def parse_DIV(self, indent):
        "Parse a DIV(N) symbol. A DIV(N) a sequence of blank"
        "lines (B or other blocks at indentation level N, such as"
        "pharagraphs P(N), bullet lists BL(N), description lists DN(N)"
        pos = self.tokens.getpos()
        xs = []
        while True:
            x = self.parse_Terminal(B)
            if x: continue

            x = self.parse_P(indent)
            if x:
                xs.append(x)
                continue

            x = self.parse_V(indent)
            if x:
                xs.append(x)
                continue

            x = self.parse_UL(indent)
            if x:
                xs.append(x)
                continue

            x = self.parse_DL(indent)
            if x:
                xs.append(x)
                continue

            break
        if len(xs) == 0: return None
        return xs

    # ................................................................
    # P(N) -> PL(N) L(N)*
    def parse_P(self, indent):
        content = "\n"
        good = False
        pos = self.tokens.getpos()

        # Introduced by PL
        x = self.parse_Terminal(PL)
        if x:
            if x.indent == indent:
                content += x.content + "\n"
                good = True
            else:
                self.tokens.back()
        if not good:
            return None

        # Continued by zero or more L
        while True:
            x = self.parse_Terminal(L)
            if x:
                if x.indent == indent:
                    content += x.content + "\n"
                    good = True
                    continue
                else:
                    self.tokens.back()
            break

        ptag = self.xmldoc.createElement("p")
        self.addFancyText(ptag, content)
        return ptag

    # ................................................................
    # V(N) -> L(M)+, M > N
    def parse_V(self, indent):
        content = "\n"
        good = False
        pos = self.tokens.getpos()
        while True:
            x = self.parse_Terminal(L)
            if x:
                if x.indent > indent:
                    content += " "*(x.indent - indent) + x.content + "\n"
                    good = True
                    continue
                else:
                    self.tokens.back()
            x = self.parse_Terminal(B)
            if x:
                content += "\n"
                continue
            break
        if good:
            ptag = self.xmldoc.createElement("pre")
            # remove potential blank line at the end
            if content[-2:] == "\n\n":
                content= content[:-1]
            self.addText(ptag, content)
            return ptag
        self.tokens.seek(pos)
        return None

    # ................................................................
    # UL(N) -> ULI(N)+
    def parse_UL(self, indent):
        xs = []
        while True:
            x = self.parse_ULI(indent)
            if x:
                xs.append(x)
                continue
            break
        if len(xs) == 0: return None
        ultag = self.xmldoc.createElement("ul")
        for x in xs:
            ultag.appendChild(x)
        return ultag

    # ................................................................
    # ULI(N) -> UL(N,M) L(M)* DIV(M), M > N
    def parse_ULI(self, indent):
        content = "\n"
        good = False
        pos = self.tokens.getpos()

        # Introduced by UL
        x = self.parse_Terminal(BL)
        if x:
            if x.indent == indent:
                content += x.inner_content + "\n"
                indent   = x.inner_indent
                good = True
            else:
                self.tokens.back()
        if not good:
            return None

        # Continued by zero or more L
        while True:
            x = self.parse_Terminal(L)
            if x:
                if x.indent == indent:
                    content += x.content + "\n"
                    good = True
                    continue
                else:
                    self.tokens.back()
            break
        litag = self.xmldoc.createElement("li")
        ptag  = self.xmldoc.createElement("p")
        self.addFancyText(ptag, content)
        litag.appendChild(ptag)

        # Continued by DIV
        xs = self.parse_DIV(indent)
        if xs:
            for x in xs:
                litag.appendChild(x)

        return litag


    # ................................................................
    # DL(N) -> DI(N)+
    def parse_DL(self, indent):
        xs = []
        while True:
            x = self.parse_DI(indent)
            if x:
                xs += x
                continue
            break
        if len(xs) == 0: return None
        dltag = self.xmldoc.createElement("dl")
        for x in xs:
            dltag.appendChild(x)
        return dltag

    # ................................................................
    # DI(N) -> DL(N) DIV(M)?, M > N
    def parse_DI(self, indent):
        content = "\n"
        good   = False
        pos    = self.tokens.getpos()
        xs     = []

        # Introduced by DL
        x = self.parse_Terminal(DL)
        if x:
            if x.indent == indent:
                content += x.content + "\n"
                good = True
            else:
                self.tokens.back()
        if not good:
            return None

        if False:
            # adds text after :: as part of the description dd
            dttag = self.xmldoc.createElement("dt")
            dttxt = self.toTextNode(content)
            dttag.appendChild(dttxt)
            xs.append(dttag)

            # Inject inner_content
            c = x.inner_content.strip()
            if len(c) > 0:
                tk = PL()
                tk.content = x.inner_content
                t = next(self.tokens)
                self.tokens.back()
                if t.isa(L) and t.indent > indent:
                    tk.indent = t.indent
                else:
                    tk.indent = indent+1 ;
                    self.tokens.rewrite(tk)
                    self.tokens.back()
        else:
            # adds text after :: as part of the description term dt
            dttag = self.xmldoc.createElement("dt")
            dttxt = self.toTextNode(content)
            dttag.appendChild(dttxt)
            c = x.inner_content.strip()
            if len(c) > 0:
                deftag = self.xmldoc.createElement("span")
                self.addAttr(deftag, "class", "defaults")
                self.addText(deftag, c)
                dttag.appendChild(deftag)
            xs.append(dttag)

        # Continued by DIV
        t = next(self.tokens)
        self.tokens.back()
        if t.isa(L) and t.indent > indent:
            xs_ = self.parse_DIV(t.indent)
            if len(xs_) > 0:
                ddtag = self.xmldoc.createElement("dd")
                for x in xs_:
                    ddtag.appendChild(x)
                xs.append(ddtag)

        return xs

    # ................................................................
    def toDOM(self):
        # write <mfile></mfile>
        xmf = self.xmldoc.createElement("div")
        xmf.setAttribute("class", "documentation")

        self.xmldoc.appendChild(xmf)

        # parse documentation
        xs = self.parse_DIV(self.indentinit)
        for x in xs: xmf.appendChild(x)

        return self.xmldoc


if __name__ == '__main__':
    text=""" Lorem Ipsum is simply dummy text of the printing and typesetting
 industry. Lorem Ipsum has been the industry's standard dummy text
 ever since the 1500s, when an unknown printer took a galley of type
 and scrambled it to make a type specimen book. It has survived not
 only five centuries, but also the leap into electronic typesetting,
 remaining essentially unchanged. It was popularised in the 1960s with
 the release of Letraset sheets containing Lorem Ipsum passages, and
 more recently with desktop publishing software like Aldus PageMaker
 including versions of Lorem Ipsum.

 Also <a href="matlab:vl_help('fisher')">Fisher vectors</a>.

 These are links BL(), BL(A,B) and BLA(A,A) (as long as the dictionary
 cites them).

 Mimamama
   verbatim1
   verbatim2
   verbatim3

   verbatim4
   verbatim5
 Lorem Ipsum is simply dummy text of the printing and typesetting
 industry. Lorem Ipsum has been the industry's standard dummy text
 ever since the 1500s, when an unknown printer took a galley of type
 and scrambled it to make a type specimen book. It has survived not
 only five centuries, but also the leap into electronic typesetting,
 remaining essentially unchanged. It was popularised in the 1960s with
 the release of Letraset sheets containing Lorem Ipsum passages, and
 more recently with desktop publishing software like Aldus PageMaker
 including versions of Lorem Ipsum.

 - outer1 /
   outer1 line 2 /
   outer1 line 3 /

   outer1 new paragarph

   - inner1
   - inner2
   - inner3
     continued on next line
       continued with verbatim

       more verbatim after blank
   - inner4
 - outer again
 - outer
 bla

 - list2
 - list4
 - BL()
 - BL(A,B)

 Test descrition::
     Lorem Ipsum is simply dummy text of the printing
     and typesetting industry. Lorem Ipsum has been the industry's
     standard dummy text ever since the 1500s, when an unknown printer
     took a galley of type and scrambled it to make a type specimen
     book. It has survived not only five centuries, but also the leap
     into electronic typesetting, remaining essentially unchanged. It
     was popularised in the 1960s with the release of Letraset sheets
     containing Lorem Ipsum passages, and more recently with desktop
     publishing software like Aldus PageMaker including versions of
     Lorem Ipsum.

 Ancora::
     Bli bli bli
     Blu blu blu

     - list
     - lust
     - last

     Bli bla

  Verbatimmo
"""
    lines = text.splitlines()
    formatter = Formatter(lines, {'BL':'http://www.google.com'}, 'a')
    print(formatter.toDOM().toxml("UTF-8"))