File: annotation_tokenizer_test.py

package info (click to toggle)
chromium 138.0.7204.157-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 6,071,864 kB
  • sloc: cpp: 34,936,859; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,953; asm: 946,768; xml: 739,967; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,806; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (106 lines) | stat: -rwxr-xr-x 4,285 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env vpython3
# Copyright 2019 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""
Unit tests for annotation_tokenizer.py.
"""

import unittest

from annotation_tokenizer import Tokenizer, SourceCodeParsingError


class AnnotationTokenizerTest(unittest.TestCase):
  def testRealAnnotationDefinition(self):
    real_definition = """
        DefineNetworkTrafficAnnotation("foobar_fetcher", R"(
          semantics {
            sender: "Foobar Component"
            description: "Fetches Foobars for the user."
            trigger: "The user requests a new Foobar."
            data: "The type of Foobar the user requested."
            destination: GOOGLE_OWNED_SERVICE
          }
          policy {
            cookies_allowed: NO
            setting: "Privacy and Security > Enable Foobars"
            chrome_policy {
              FoobarsEnabled {
                FoobarsEnabled: false
              }
            }
          })");"""
    tokenizer = Tokenizer(real_definition,
                          'components/foobar/foobar_request_handler.cc', 42)
    self.assertEqual('DefineNetworkTrafficAnnotation',
                     tokenizer.advance('symbol'))
    self.assertEqual('(', tokenizer.advance('left_paren'))
    self.assertEqual('foobar_fetcher', tokenizer.advance('string_literal'))
    self.assertEqual(',', tokenizer.advance('comma'))
    self.assertTrue(bool(tokenizer.advance('string_literal')))
    self.assertEqual(')', tokenizer.advance('right_paren'))

  def testAdvanceHappyPath(self):
    tokenizer = Tokenizer('"hello", R"(world)", function_name())));',
                          'foo.txt', 33)
    self.assertEqual('hello', tokenizer.advance('string_literal'))
    self.assertEqual(',', tokenizer.advance('comma'))
    self.assertEqual('world', tokenizer.advance('string_literal'))
    self.assertEqual(',', tokenizer.advance('comma'))
    self.assertEqual('function_name', tokenizer.advance('symbol'))
    self.assertEqual('(', tokenizer.advance('left_paren'))
    self.assertEqual(')', tokenizer.advance('right_paren'))
    self.assertEqual(')', tokenizer.advance('right_paren'))

  def testAdvanceMultiline(self):
    tokenizer = Tokenizer('\n\tR"(the quick\nbrown\nfox)"', 'foo.txt', 33)
    self.assertEqual(
        'the quick\nbrown\nfox', tokenizer.advance('string_literal'))

  def testAdvanceTextBlock(self):
    tokenizer = Tokenizer('\n """\n  the quick\n  red\n  fox"""', 'foo.txt', 2)
    self.assertEqual('the quick\nred\nfox', tokenizer.advance('string_literal'))

  def testAdvanceErrorPaths(self):
    tokenizer = Tokenizer('  hello , ', 'foo.txt', 33)
    tokenizer.advance('symbol')
    with self.assertRaisesRegex(SourceCodeParsingError,
                                'Expected symbol.+at foo.txt:33'):
      # There are no more tokens.
      tokenizer.advance('symbol')

    tokenizer = Tokenizer('"hello"', 'foo.txt', 33)
    with self.assertRaisesRegex(SourceCodeParsingError,
                                'Expected comma.+at foo.txt:33'):
      # The type doesn't match.
      tokenizer.advance('comma')

    tokenizer = Tokenizer('{', 'foo.txt', 33)
    with self.assertRaisesRegex(SourceCodeParsingError,
                                'Expected string_literal.+at foo.txt:33'):
      # Not a valid token at all.
      tokenizer.advance('string_literal')

  def testMaybeAdvance(self):
    tokenizer = Tokenizer('"hello", world', 'foo.txt', 33)
    self.assertEqual(None, tokenizer.maybe_advance('symbol'))
    self.assertEqual('hello', tokenizer.maybe_advance('string_literal'))
    self.assertEqual(',', tokenizer.maybe_advance('comma'))
    self.assertEqual(None, tokenizer.maybe_advance('left_paren'))
    self.assertEqual('world', tokenizer.maybe_advance('symbol'))
    self.assertEqual(None, tokenizer.maybe_advance('right_paren'))

  def testEscaping(self):
    tokenizer = Tokenizer(
        '''
      "\\"ab\\nc \\\\\\" def \\\\\\""
      "string ends here:\\\\" this is not part of the string"
    ''', 'foo.txt', 33)
    self.assertEqual('"ab\nc \\" def \\"', tokenizer.advance('string_literal'))
    self.assertEqual('string ends here:\\', tokenizer.advance('string_literal'))


if __name__ == '__main__':
  unittest.main()