File: identifier.py

package info (click to toggle)
python-enable 4.3.0-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 7,280 kB
  • ctags: 13,899
  • sloc: cpp: 48,447; python: 28,502; ansic: 9,004; makefile: 315; sh: 44
file content (37 lines) | stat: -rw-r--r-- 1,110 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
""" Parse CSS identifiers. More complicated than it sounds"""

from pyparsing import Word, Literal, Regex, Combine, Optional, White, oneOf, ZeroOrMore
import string
import re

class White(White):
    """ Customize whitespace to match the CSS spec values"""
    def __init__(self, ws=" \t\r\n\f", min=1, max=0, exact=0):
        super(White, self).__init__(ws, min, max, exact)

escaped = (
    Literal("\\").suppress() +
    #chr(20)-chr(126) + chr(128)-unichr(sys.maxunicode)
    Regex(u"[\u0020-\u007e\u0080-\uffff]", re.IGNORECASE)
)

def convertToUnicode(t):
    return unichr(int(t[0], 16))
hex_unicode = (
    Literal("\\").suppress() +
    Regex("[0-9a-f]{1,6}", re.IGNORECASE) +
    Optional(White(exact=1)).suppress()
).setParseAction(convertToUnicode)


escape = hex_unicode | escaped

#any unicode literal outside the 0-127 ascii range
nonascii = Regex(u"[^\u0000-\u007f]")

#single character for starting an identifier.
nmstart = Regex(u"[A-Z]", re.IGNORECASE) | nonascii | escape

nmchar = Regex(u"[0-9A-Z-]", re.IGNORECASE) | nonascii | escape

identifier = Combine(nmstart + ZeroOrMore(nmchar))