1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
|
# frozen_string_literal: true
module Liquid
class Lexer
CLOSE_ROUND = [:close_round, ")"].freeze
CLOSE_SQUARE = [:close_square, "]"].freeze
COLON = [:colon, ":"].freeze
COMMA = [:comma, ","].freeze
COMPARISION_NOT_EQUAL = [:comparison, "!="].freeze
COMPARISON_CONTAINS = [:comparison, "contains"].freeze
COMPARISON_EQUAL = [:comparison, "=="].freeze
COMPARISON_GREATER_THAN = [:comparison, ">"].freeze
COMPARISON_GREATER_THAN_OR_EQUAL = [:comparison, ">="].freeze
COMPARISON_LESS_THAN = [:comparison, "<"].freeze
COMPARISON_LESS_THAN_OR_EQUAL = [:comparison, "<="].freeze
COMPARISON_NOT_EQUAL_ALT = [:comparison, "<>"].freeze
DASH = [:dash, "-"].freeze
DOT = [:dot, "."].freeze
DOTDOT = [:dotdot, ".."].freeze
DOT_ORD = ".".ord
DOUBLE_STRING_LITERAL = /"[^\"]*"/
EOS = [:end_of_string].freeze
IDENTIFIER = /[a-zA-Z_][\w-]*\??/
NUMBER_LITERAL = /-?\d+(\.\d+)?/
OPEN_ROUND = [:open_round, "("].freeze
OPEN_SQUARE = [:open_square, "["].freeze
PIPE = [:pipe, "|"].freeze
QUESTION = [:question, "?"].freeze
RUBY_WHITESPACE = [" ", "\t", "\r", "\n", "\f"].freeze
SINGLE_STRING_LITERAL = /'[^\']*'/
WHITESPACE_OR_NOTHING = /\s*/
SINGLE_COMPARISON_TOKENS = [].tap do |table|
table["<".ord] = COMPARISON_LESS_THAN
table[">".ord] = COMPARISON_GREATER_THAN
table.freeze
end
TWO_CHARS_COMPARISON_JUMP_TABLE = [].tap do |table|
table["=".ord] = [].tap do |sub_table|
sub_table["=".ord] = COMPARISON_EQUAL
sub_table.freeze
end
table["!".ord] = [].tap do |sub_table|
sub_table["=".ord] = COMPARISION_NOT_EQUAL
sub_table.freeze
end
table.freeze
end
COMPARISON_JUMP_TABLE = [].tap do |table|
table["<".ord] = [].tap do |sub_table|
sub_table["=".ord] = COMPARISON_LESS_THAN_OR_EQUAL
sub_table[">".ord] = COMPARISON_NOT_EQUAL_ALT
sub_table.freeze
end
table[">".ord] = [].tap do |sub_table|
sub_table["=".ord] = COMPARISON_GREATER_THAN_OR_EQUAL
sub_table.freeze
end
table.freeze
end
NEXT_MATCHER_JUMP_TABLE = [].tap do |table|
"a".upto("z") do |c|
table[c.ord] = [:id, IDENTIFIER].freeze
table[c.upcase.ord] = [:id, IDENTIFIER].freeze
end
table["_".ord] = [:id, IDENTIFIER].freeze
"0".upto("9") do |c|
table[c.ord] = [:number, NUMBER_LITERAL].freeze
end
table["-".ord] = [:number, NUMBER_LITERAL].freeze
table["'".ord] = [:string, SINGLE_STRING_LITERAL].freeze
table["\"".ord] = [:string, DOUBLE_STRING_LITERAL].freeze
table.freeze
end
SPECIAL_TABLE = [].tap do |table|
table["|".ord] = PIPE
table[".".ord] = DOT
table[":".ord] = COLON
table[",".ord] = COMMA
table["[".ord] = OPEN_SQUARE
table["]".ord] = CLOSE_SQUARE
table["(".ord] = OPEN_ROUND
table[")".ord] = CLOSE_ROUND
table["?".ord] = QUESTION
table["-".ord] = DASH
end
NUMBER_TABLE = [].tap do |table|
"0".upto("9") do |c|
table[c.ord] = true
end
table.freeze
end
# rubocop:disable Metrics/BlockNesting
class << self
def tokenize(ss)
output = []
until ss.eos?
ss.skip(WHITESPACE_OR_NOTHING)
break if ss.eos?
start_pos = ss.pos
peeked = ss.peek_byte
if (special = SPECIAL_TABLE[peeked])
ss.scan_byte
# Special case for ".."
if special == DOT && ss.peek_byte == DOT_ORD
ss.scan_byte
output << DOTDOT
elsif special == DASH
# Special case for negative numbers
if (peeked_byte = ss.peek_byte) && NUMBER_TABLE[peeked_byte]
ss.pos -= 1
output << [:number, ss.scan(NUMBER_LITERAL)]
else
output << special
end
else
output << special
end
elsif (sub_table = TWO_CHARS_COMPARISON_JUMP_TABLE[peeked])
ss.scan_byte
if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte])
output << found
ss.scan_byte
else
raise_syntax_error(start_pos, ss)
end
elsif (sub_table = COMPARISON_JUMP_TABLE[peeked])
ss.scan_byte
if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte])
output << found
ss.scan_byte
else
output << SINGLE_COMPARISON_TOKENS[peeked]
end
else
type, pattern = NEXT_MATCHER_JUMP_TABLE[peeked]
if type && (t = ss.scan(pattern))
# Special case for "contains"
output << if type == :id && t == "contains" && output.last&.first != :dot
COMPARISON_CONTAINS
else
[type, t]
end
else
raise_syntax_error(start_pos, ss)
end
end
end
# rubocop:enable Metrics/BlockNesting
output << EOS
rescue ::ArgumentError => e
if e.message == "invalid byte sequence in #{ss.string.encoding}"
raise SyntaxError, "Invalid byte sequence in #{ss.string.encoding}"
else
raise
end
end
def raise_syntax_error(start_pos, ss)
ss.pos = start_pos
# the character could be a UTF-8 character, use getch to get all the bytes
raise SyntaxError, "Unexpected character #{ss.getch}"
end
end
end
end
|