File: lexer_examples.rb

package info (click to toggle)
ruby-graphql 2.2.17-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 9,584 kB
  • sloc: ruby: 67,505; ansic: 1,753; yacc: 831; javascript: 331; makefile: 6
file content (249 lines) | stat: -rw-r--r-- 7,622 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# frozen_string_literal: true
module TokenMethods
  refine Array do
    def name
      self[0]
    end

    def value
      self[3]
    end

    def to_s
      self[3]
    end

    def line
      self[1]
    end

    def col
      self[2]
    end

    def prev_token
      self[4]
    end

    def previous_token
      self[4]
    end

    def inspect
      "(#{name} #{value.inspect} [#{line}:#{col}])"
    end
  end
end

using TokenMethods

module LexerExamples
  def self.included(child_mod)
    child_mod.module_eval do
      describe ".tokenize" do
        let(:query_string) {%|
          {
            query getCheese {
              cheese(id: 1) {
                ... cheeseFields
              }
            }
          }
        |}
        let(:tokens) { subject.tokenize(query_string) }

        it "force encodes to utf-8" do
          # string that will be invalid utf-8 once force encoded
          string = "vandflyver \xC5rhus".dup.force_encoding("ASCII-8BIT")
          assert_bad_unicode(string)
        end

        it "makes utf-8 arguments named type" do
          str = "{ a(type: 1) }"
          tokens = subject.tokenize(str)
          assert_equal Encoding::UTF_8, tokens[2].value.encoding
        end

        it "keeps track of previous_token" do
          assert_equal tokens[0], tokens[1].prev_token
        end

        it "handles integers with a leading zero" do
          tokens = subject.tokenize("{ a(id: 04) }")
          assert_equal :INT, tokens[5].name
        end

        it "allows escaped quotes in strings" do
          tokens = subject.tokenize('"a\\"b""c"')
          assert_equal 'a"b', tokens[0].value
          assert_equal 'c', tokens[1].value
        end

        it "handles escaped backslashes before escaped quotes" do
          tokens = subject.tokenize('text: "b\\\\", otherText: "a"')
          assert_equal ['text', ':', 'b\\', 'otherText', ':', 'a',], tokens.map(&:value)
        end

        describe "block strings" do
          let(:query_string) { %|{ a(b: """\nc\n \\""" d\n""" """""e""""")}|}

          it "tokenizes them" do
            assert_equal "c\n \"\"\" d", tokens[5].value
            assert_equal "\"\"e\"\"", tokens[6].value
          end

          it "tokenizes 10 quote edge case correctly" do
            tokens = subject.tokenize('""""""""""')
            assert_equal '""', tokens[0].value # first 8 quotes are a valid block string """"""""
            assert_equal '', tokens[1].value # last 2 quotes are a valid string ""
          end

          it "tokenizes with nested single quote strings correctly" do
            tokens = subject.tokenize('"""{"x"}"""')
            assert_equal '{"x"}', tokens[0].value

            tokens = subject.tokenize('"""{"foo":"bar"}"""')
            assert_equal '{"foo":"bar"}', tokens[0].value
          end

          it "tokenizes empty block strings correctly" do
            empty_block_string = '""""""'
            tokens = subject.tokenize(empty_block_string)

            assert_equal '', tokens[0].value
          end

          it "tokenizes escaped backslashes at the end of blocks" do
            query_str = <<-GRAPHQL
text: """b\\\\""", otherText: "a"
GRAPHQL

            tokens = subject.tokenize(query_str)
            assert_equal ['text', ':', 'b\\', 'otherText', ':', 'a',], tokens.map(&:value)
          end
        end

        it "unescapes escaped characters" do
          assert_equal "\" \\ / \b \f \n \r \t", subject.tokenize('"\\" \\\\ \\/ \\b \\f \\n \\r \\t"').first.to_s
        end

        it "unescapes escaped unicode characters" do
          assert_equal "\t", subject.tokenize('"\\u0009"').first.to_s
          assert_equal "\t", subject.tokenize('"\\u{0009}"').first.to_s
          assert_equal "š˜‘", subject.tokenize('"\\u{10611}"').first.to_s
          assert_equal "šŸ’©", subject.tokenize('"\\u{1F4A9}"').first.to_s
          assert_equal "šŸ’©", subject.tokenize('"\\uD83D\\uDCA9"').first.to_s
        end

        it "accepts the full range of unicode" do
          assert_equal "šŸ’©", subject.tokenize('"šŸ’©"').first.to_s
          assert_equal "⌱", subject.tokenize('"⌱"').first.to_s
          assert_equal "šŸ‚”\nšŸ‚¢", subject.tokenize('"""šŸ‚”
    šŸ‚¢"""').first.to_s
        end

        it "doesn't accept unicode outside strings or comments" do
          assert_equal :UNKNOWN_CHAR, subject.tokenize('😘 ').first.name
        end

        it "rejects bad unicode, even when there's good unicode in the string" do
          assert_bad_unicode('"\\u0XXF \\u0009"', "Bad unicode escape in \"\\\\u0XXF \\\\u0009\"")
        end

        it "rejects truly invalid UTF-8 bytes" do
          error_filename = "spec/support/parser/filename_example_invalid_utf8.graphql"
          text = File.read(error_filename)
          assert_bad_unicode(text)
        end

        it "rejects unicode that's well-formed but results in invalidly-encoded strings" do
          # when the string here gets tokenized into an actual `:STRING`, it results in `valid_encoding?` being false for
          # the ruby string so application code usually blows up trying to manipulate it
          text1 = '"\\udc00\\udf2c"'
          assert_bad_unicode(text1, 'Bad unicode escape in "\\xED\\xB0\\x80\\xED\\xBC\\xAC"')
          text2 = '"\\u{dc00}\\u{df2c}"'
          assert_bad_unicode(text2, 'Bad unicode escape in "\\xED\\xB0\\x80\\xED\\xBC\\xAC"')
        end

        it "clears the previous_token between runs" do
          tok_2 = subject.tokenize(query_string)
          assert_nil tok_2[0].prev_token
        end

        it "counts string position properly" do
          tokens = subject.tokenize('{ a(b: "c")}')
          str_token = tokens[5]
          assert_equal :STRING, str_token.name
          assert_equal "c", str_token.value
          assert_equal 8, str_token.col
          assert_equal '(STRING "c" [1:8])', str_token.inspect
          rparen_token = tokens[6]
          assert_equal '(RPAREN ")" [1:11])', rparen_token.inspect
        end

        it "tokenizes block quotes with triple quotes correctly" do
          doc = <<-eos
"""

string with \\"""

"""
          eos
          tokens = subject.tokenize doc
          token = tokens.first
          assert_equal :STRING, token.name
          assert_equal 'string with """', token.value
        end

        it "counts block string line properly" do
          str = <<-GRAPHQL
          """
          Here is a
          multiline description
          """
          type Query {
            a: B
          }

          "Here's another description"

          type B {
            a: B
          }

          """
          And another
          multiline description
          """


          type C {
            a: B
          }
          GRAPHQL

          tokens = subject.tokenize(str)

          string_tok, type_keyword_tok, query_name_tok,
            _curly, _ident, _colon, _ident, _curly,
            string_tok_2, type_keyword_tok_2, b_name_tok,
            _curly, _ident, _colon, _ident, _curly,
            string_tok_3, type_keyword_tok_3, c_name_tok = tokens

          assert_equal 1, string_tok.line
          assert_equal 5, type_keyword_tok.line
          assert_equal 5, query_name_tok.line

          # Make sure it handles the empty spaces, too
          assert_equal 9, string_tok_2.line
          assert_equal 11, type_keyword_tok_2.line
          assert_equal 11, b_name_tok.line

          assert_equal 15, string_tok_3.line
          assert_equal 21, type_keyword_tok_3.line
          assert_equal 21, c_name_tok.line
        end
      end
    end
  end
end