File: lexer.cpp

package info (click to toggle)
golang-github-wellington-go-libsass 0.9.2%2Bgit20181130.4ef5b9d-1.1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 2,128 kB
  • sloc: cpp: 28,607; ansic: 839; makefile: 44
file content (181 lines) | stat: -rw-r--r-- 6,244 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#include "sass.hpp"
#include <cctype>
#include <iostream>
#include <iomanip>
#include "lexer.hpp"
#include "constants.hpp"


namespace Sass {
  using namespace Constants;

  namespace Prelexer {

    //####################################
    // BASIC CHARACTER MATCHERS
    //####################################

    // Match standard control chars
    const char* kwd_at(const char* src) { return exactly<'@'>(src); }
    const char* kwd_dot(const char* src) { return exactly<'.'>(src); }
    const char* kwd_comma(const char* src) { return exactly<','>(src); };
    const char* kwd_colon(const char* src) { return exactly<':'>(src); };
    const char* kwd_star(const char* src) { return exactly<'*'>(src); };
    const char* kwd_plus(const char* src) { return exactly<'+'>(src); };
    const char* kwd_minus(const char* src) { return exactly<'-'>(src); };
    const char* kwd_slash(const char* src) { return exactly<'/'>(src); };

    //####################################
    // implement some function that do exist in the standard
    // but those are locale aware which brought some trouble
    // this even seems to improve performance by quite a bit
    //####################################

    bool is_alpha(const char& chr)
    {
      return unsigned(chr - 'A') <= 'Z' - 'A' ||
             unsigned(chr - 'a') <= 'z' - 'a';
    }

    bool is_space(const char& chr)
    {
      // adapted the technique from is_alpha
      return chr == ' ' || unsigned(chr - '\t') <= '\r' - '\t';
    }

    bool is_digit(const char& chr)
    {
      // adapted the technique from is_alpha
      return unsigned(chr - '0') <= '9' - '0';
    }

    bool is_number(const char& chr)
    {
      // adapted the technique from is_alpha
      return is_digit(chr) || chr == '-' || chr == '+';
    }

    bool is_xdigit(const char& chr)
    {
      // adapted the technique from is_alpha
      return unsigned(chr - '0') <= '9' - '0' ||
             unsigned(chr - 'a') <= 'f' - 'a' ||
             unsigned(chr - 'A') <= 'F' - 'A';
    }

    bool is_punct(const char& chr)
    {
      // locale independent
      return chr == '.';
    }

    bool is_alnum(const char& chr)
    {
      return is_alpha(chr) || is_digit(chr);
    }

    // check if char is outside ascii range
    bool is_unicode(const char& chr)
    {
      // check for unicode range
      return unsigned(chr) > 127;
    }

    // check if char is outside ascii range
    // but with specific ranges (copied from Ruby Sass)
    bool is_nonascii(const char& chr)
    {
      unsigned int cmp = unsigned(chr);
      return (
        (cmp >= 128 && cmp <= 15572911) ||
        (cmp >= 15630464 && cmp <= 15712189) ||
        (cmp >= 4036001920)
      );
    }

    // check if char is within a reduced ascii range
    // valid in a uri (copied from Ruby Sass)
    bool is_uri_character(const char& chr)
    {
      unsigned int cmp = unsigned(chr);
      return (cmp > 41 && cmp < 127) ||
             cmp == ':' || cmp == '/';
    }

    // check if char is within a reduced ascii range
    // valid for escaping (copied from Ruby Sass)
    bool is_escapable_character(const char& chr)
    {
      unsigned int cmp = unsigned(chr);
      return cmp > 31 && cmp < 127;
    }

    // Match word character (look ahead)
    bool is_character(const char& chr)
    {
      // valid alpha, numeric or unicode char (plus hyphen)
      return is_alnum(chr) || is_unicode(chr) || chr == '-';
    }

    //####################################
    // BASIC CLASS MATCHERS
    //####################################

    // create matchers that advance the position
    const char* space(const char* src) { return is_space(*src) ? src + 1 : 0; }
    const char* alpha(const char* src) { return is_alpha(*src) ? src + 1 : 0; }
    const char* unicode(const char* src) { return is_unicode(*src) ? src + 1 : 0; }
    const char* nonascii(const char* src) { return is_nonascii(*src) ? src + 1 : 0; }
    const char* digit(const char* src) { return is_digit(*src) ? src + 1 : 0; }
    const char* xdigit(const char* src) { return is_xdigit(*src) ? src + 1 : 0; }
    const char* alnum(const char* src) { return is_alnum(*src) ? src + 1 : 0; }
    const char* punct(const char* src) { return is_punct(*src) ? src + 1 : 0; }
    const char* hyphen(const char* src) { return *src && *src == '-' ? src + 1 : 0; }
    const char* character(const char* src) { return is_character(*src) ? src + 1 : 0; }
    const char* uri_character(const char* src) { return is_uri_character(*src) ? src + 1 : 0; }
    const char* escapable_character(const char* src) { return is_escapable_character(*src) ? src + 1 : 0; }

    // Match multiple ctype characters.
    const char* spaces(const char* src) { return one_plus<space>(src); }
    const char* digits(const char* src) { return one_plus<digit>(src); }
    const char* hyphens(const char* src) { return one_plus<hyphen>(src); }

    // Whitespace handling.
    const char* no_spaces(const char* src) { return negate< space >(src); }
    const char* optional_spaces(const char* src) { return zero_plus< space >(src); }

    // Match any single character.
    const char* any_char(const char* src) { return *src ? src + 1 : src; }

    // Match word boundary (zero-width lookahead).
    const char* word_boundary(const char* src) { return is_character(*src) || *src == '#' ? 0 : src; }

    // Match linefeed /(?:\n|\r\n?)/
    const char* re_linebreak(const char* src)
    {
      // end of file or unix linefeed return here
      if (*src == 0 || *src == '\n') return src + 1;
      // a carriage return may optionally be followed by a linefeed
      if (*src == '\r') return *(src + 1) == '\n' ? src + 2 : src + 1;
      // no linefeed
      return 0;
    }

    // Assert string boundaries (/\Z|\z|\A/)
    // This is a zero-width positive lookahead
    const char* end_of_line(const char* src)
    {
      // end of file or unix linefeed return here
      return *src == 0 || *src == '\n' || *src == '\r' ? src : 0;
    }

    // Assert end_of_file boundary (/\z/)
    // This is a zero-width positive lookahead
    const char* end_of_file(const char* src)
    {
      // end of file or unix linefeed return here
      return *src == 0 ? src : 0;
    }

  }
}