File: lexer.rb

package info (click to toggle)
ruby-liquid 5.12.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,444 kB
  • sloc: ruby: 14,571; makefile: 6
file content (179 lines) | stat: -rw-r--r-- 5,702 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# frozen_string_literal: true

module Liquid
  class Lexer
    CLOSE_ROUND = [:close_round, ")"].freeze
    CLOSE_SQUARE = [:close_square, "]"].freeze
    COLON = [:colon, ":"].freeze
    COMMA = [:comma, ","].freeze
    COMPARISION_NOT_EQUAL = [:comparison, "!="].freeze
    COMPARISON_CONTAINS = [:comparison, "contains"].freeze
    COMPARISON_EQUAL = [:comparison, "=="].freeze
    COMPARISON_GREATER_THAN = [:comparison, ">"].freeze
    COMPARISON_GREATER_THAN_OR_EQUAL = [:comparison, ">="].freeze
    COMPARISON_LESS_THAN = [:comparison, "<"].freeze
    COMPARISON_LESS_THAN_OR_EQUAL = [:comparison, "<="].freeze
    COMPARISON_NOT_EQUAL_ALT = [:comparison, "<>"].freeze
    DASH = [:dash, "-"].freeze
    DOT = [:dot, "."].freeze
    DOTDOT = [:dotdot, ".."].freeze
    DOT_ORD = ".".ord
    DOUBLE_STRING_LITERAL = /"[^\"]*"/
    EOS = [:end_of_string].freeze
    IDENTIFIER            = /[a-zA-Z_][\w-]*\??/
    NUMBER_LITERAL        = /-?\d+(\.\d+)?/
    OPEN_ROUND = [:open_round, "("].freeze
    OPEN_SQUARE = [:open_square, "["].freeze
    PIPE = [:pipe, "|"].freeze
    QUESTION = [:question, "?"].freeze
    RUBY_WHITESPACE = [" ", "\t", "\r", "\n", "\f"].freeze
    SINGLE_STRING_LITERAL = /'[^\']*'/
    WHITESPACE_OR_NOTHING = /\s*/

    SINGLE_COMPARISON_TOKENS = [].tap do |table|
      table["<".ord] = COMPARISON_LESS_THAN
      table[">".ord] = COMPARISON_GREATER_THAN
      table.freeze
    end

    TWO_CHARS_COMPARISON_JUMP_TABLE = [].tap do |table|
      table["=".ord] = [].tap do |sub_table|
        sub_table["=".ord] = COMPARISON_EQUAL
        sub_table.freeze
      end
      table["!".ord] = [].tap do |sub_table|
        sub_table["=".ord] = COMPARISION_NOT_EQUAL
        sub_table.freeze
      end
      table.freeze
    end

    COMPARISON_JUMP_TABLE = [].tap do |table|
      table["<".ord] = [].tap do |sub_table|
        sub_table["=".ord] = COMPARISON_LESS_THAN_OR_EQUAL
        sub_table[">".ord] = COMPARISON_NOT_EQUAL_ALT
        sub_table.freeze
      end
      table[">".ord] = [].tap do |sub_table|
        sub_table["=".ord] = COMPARISON_GREATER_THAN_OR_EQUAL
        sub_table.freeze
      end
      table.freeze
    end

    NEXT_MATCHER_JUMP_TABLE = [].tap do |table|
      "a".upto("z") do |c|
        table[c.ord] = [:id, IDENTIFIER].freeze
        table[c.upcase.ord] = [:id, IDENTIFIER].freeze
      end
      table["_".ord] = [:id, IDENTIFIER].freeze

      "0".upto("9") do |c|
        table[c.ord] = [:number, NUMBER_LITERAL].freeze
      end
      table["-".ord] = [:number, NUMBER_LITERAL].freeze

      table["'".ord] = [:string, SINGLE_STRING_LITERAL].freeze
      table["\"".ord] = [:string, DOUBLE_STRING_LITERAL].freeze
      table.freeze
    end

    SPECIAL_TABLE = [].tap do |table|
      table["|".ord] = PIPE
      table[".".ord] = DOT
      table[":".ord] = COLON
      table[",".ord] = COMMA
      table["[".ord] = OPEN_SQUARE
      table["]".ord] = CLOSE_SQUARE
      table["(".ord] = OPEN_ROUND
      table[")".ord] = CLOSE_ROUND
      table["?".ord] = QUESTION
      table["-".ord] = DASH
    end

    NUMBER_TABLE = [].tap do |table|
      "0".upto("9") do |c|
        table[c.ord] = true
      end
      table.freeze
    end

    # rubocop:disable Metrics/BlockNesting
    class << self
      def tokenize(ss)
        output = []

        until ss.eos?
          ss.skip(WHITESPACE_OR_NOTHING)

          break if ss.eos?

          start_pos = ss.pos
          peeked = ss.peek_byte

          if (special = SPECIAL_TABLE[peeked])
            ss.scan_byte
            # Special case for ".."
            if special == DOT && ss.peek_byte == DOT_ORD
              ss.scan_byte
              output << DOTDOT
            elsif special == DASH
              # Special case for negative numbers
              if (peeked_byte = ss.peek_byte) && NUMBER_TABLE[peeked_byte]
                ss.pos -= 1
                output << [:number, ss.scan(NUMBER_LITERAL)]
              else
                output << special
              end
            else
              output << special
            end
          elsif (sub_table = TWO_CHARS_COMPARISON_JUMP_TABLE[peeked])
            ss.scan_byte
            if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte])
              output << found
              ss.scan_byte
            else
              raise_syntax_error(start_pos, ss)
            end
          elsif (sub_table = COMPARISON_JUMP_TABLE[peeked])
            ss.scan_byte
            if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte])
              output << found
              ss.scan_byte
            else
              output << SINGLE_COMPARISON_TOKENS[peeked]
            end
          else
            type, pattern = NEXT_MATCHER_JUMP_TABLE[peeked]

            if type && (t = ss.scan(pattern))
              # Special case for "contains"
              output << if type == :id && t == "contains" && output.last&.first != :dot
                COMPARISON_CONTAINS
              else
                [type, t]
              end
            else
              raise_syntax_error(start_pos, ss)
            end
          end
        end
        # rubocop:enable Metrics/BlockNesting
        output << EOS
      rescue ::ArgumentError => e
        if e.message == "invalid byte sequence in #{ss.string.encoding}"
          raise SyntaxError, "Invalid byte sequence in #{ss.string.encoding}"
        else
          raise
        end
      end

      def raise_syntax_error(start_pos, ss)
        ss.pos = start_pos
        # the character could be a UTF-8 character, use getch to get all the bytes
        raise SyntaxError, "Unexpected character #{ss.getch}"
      end
    end
  end
end