File: Atoms.pm

package info (click to toggle)
libpegex-perl 0.75-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 908 kB
  • sloc: perl: 3,288; makefile: 43; sh: 2
file content (106 lines) | stat: -rw-r--r-- 2,767 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
package Pegex::Grammar::Atoms;
use Pegex::Base;

#------------------------------------------------------------------------------#
# Pegex regex atoms for grammars
#------------------------------------------------------------------------------#
my $atoms = {
    # Default whitespace rules for that use '~'
    ws      => '<WS>',
    ws1     => '<ws>*',
    ws2     => '<ws>+',

    # Default whitespace rules for that use '-' and '+'
    _       => '<ws1>',
    __      => '<ws2>',

    # Special rules
    ALWAYS  => '',
    NEVER   => '(?!)',

    # Basics
    ALL     => '[\s\S]',    # Every char (including newline and space)
    ANY     => '.',         # Any char (except newline)
    SPACE   => '\ ',        # ASCII space char
    TAB     => '\t',        # Horizontal tab
    WS      => '\s',        # Whitespace
    NS      => '\S',        # Not Space
    NL      => '\n',        # Newline
    BREAK   => '\n',        # Line break (more readable alias for NL)
    CR      => '\r',        # Carriage return
    EOL     => '\r?\n',     # Unix/DOS line ending
    DOS     => '\r\n',      # Windows/DOS line ending
    EOS     => '\z',        # End of stream/string/file
    EMPTY   => '',          # Empty string

    # Common character classes
    WORD    => '\w',
    BLANK   => '[\ \t]',
    ALPHA   => '[a-zA-Z]',
    LOWER   => '[a-z]',
    UPPER   => '[A-Z]',
    DIGIT   => '[0-9]',
    OCTAL   => '[0-7]',
    HEX     => '[0-9a-fA-F]',
    ALNUM   => '[a-zA-Z0-9]',
    CONTROL => '[\x00-\x1f]',
    HICHAR  => '[\x7f-\x{ffff}]',

    # Ranges - for use inside character classes
    WORDS   => '0-9A-Za-z_',
    BLANKS  => '\ \t',
    ALPHAS  => 'a-zA-Z',
    LOWERS  => 'a-z',
    UPPERS  => 'A-Z',
    DIGITS  => '0-9',
    OCTALS  => '0-7',
    HEXS    => '0-9a-fA-F',
    ALNUMS  => 'a-zA-Z0-9',
    CONTROLS => '\x00-\x1f',
    HICHARS => '\x7f-\x{ffff}',

    # Paired punctuation
    SINGLE  => "'",
    TICK    => "'",
    DOUBLE  => '"',
    GRAVE   => '`',
    LPAREN  => '\(',
    RPAREN  => '\)',
    LCURLY  => '\{',
    RCURLY  => '\}',
    LSQUARE => '\[',
    RSQUARE => '\]',
    LANGLE  => '<',
    RANGLE  => '\>',

    # Other ASCII punctuation
    BANG    => '!',
    AT      => '\@',
    HASH    => '\#',
    DOLLAR  => '\$',
    PERCENT => '%',
    CARET   => '\^',
    AMP     => '&',
    STAR    => '\*',
    TILDE   => '\~',
    UNDER   => '_',
    DASH    => '\-',
    PLUS    => '\+',
    EQUAL   => '=',
    PIPE    => '\|',
    BACK    => '\\\\',
    COLON   => ':',
    SEMI    => ';',
    COMMA   => ',',
    DOT     => '\.',
    QMARK   => '\?',
    SLASH   => '/',

    # Special rules for named control chars
    BS      => '\x08',    # Backspace
    FF      => '\x0C',    # Formfeed
};

sub atoms { return $atoms }

1;