File: html_re.py

package info (click to toggle)
mozjs140 140.7.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 1,216,752 kB
  • sloc: javascript: 2,267,210; cpp: 1,423,664; python: 966,252; ansic: 632,297; xml: 115,965; sh: 15,392; asm: 13,399; makefile: 10,455; yacc: 4,504; perl: 2,223; lex: 1,414; ruby: 1,064; exp: 756; java: 185; sql: 66; sed: 18
file content (40 lines) | stat: -rw-r--r-- 929 bytes parent folder | download | duplicates (18)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
"""Regexps to match html elements
"""

import re

attr_name = "[a-zA-Z_:][a-zA-Z0-9:._-]*"

unquoted = "[^\"'=<>`\\x00-\\x20]+"
single_quoted = "'[^']*'"
double_quoted = '"[^"]*"'

attr_value = "(?:" + unquoted + "|" + single_quoted + "|" + double_quoted + ")"

attribute = "(?:\\s+" + attr_name + "(?:\\s*=\\s*" + attr_value + ")?)"

open_tag = "<[A-Za-z][A-Za-z0-9\\-]*" + attribute + "*\\s*\\/?>"

close_tag = "<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>"
comment = "<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->"
processing = "<[?][\\s\\S]*?[?]>"
declaration = "<![A-Z]+\\s+[^>]*>"
cdata = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>"

HTML_TAG_RE = re.compile(
    "^(?:"
    + open_tag
    + "|"
    + close_tag
    + "|"
    + comment
    + "|"
    + processing
    + "|"
    + declaration
    + "|"
    + cdata
    + ")"
)
HTML_OPEN_CLOSE_TAG_STR = "^(?:" + open_tag + "|" + close_tag + ")"
HTML_OPEN_CLOSE_TAG_RE = re.compile(HTML_OPEN_CLOSE_TAG_STR)