File: idn_test_case_generator.py

package info (click to toggle)
chromium 120.0.6099.224-1~deb11u1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 6,112,112 kB
  • sloc: cpp: 32,907,025; ansic: 8,148,123; javascript: 3,679,536; python: 2,031,248; asm: 959,718; java: 804,675; xml: 617,256; sh: 111,417; objc: 100,835; perl: 88,443; cs: 53,032; makefile: 29,579; fortran: 24,137; php: 21,162; tcl: 21,147; sql: 20,809; ruby: 17,735; pascal: 12,864; yacc: 8,045; lisp: 3,388; lex: 1,323; ada: 727; awk: 329; jsp: 267; csh: 117; exp: 43; sed: 37
file content (126 lines) | stat: -rwxr-xr-x 4,007 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/env python3
# Copyright 2017 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Utilities for generating IDN test cases.

Either use the command-line interface (see --help) or directly call make_case
from Python shell (see make_case documentation).
"""


import argparse
import codecs
import doctest
import sys


def str_to_c_string(string):
  """Converts a Python bytes to a C++ string literal.

    >>> str_to_c_string(b'abc\x8c')
    '"abc\\\\x8c"'
    """
  return repr(string).replace("'", '"').removeprefix('b')


def unicode_to_c_ustring(string):
  """Converts a Python unicode string to a C++ u16-string literal.

    >>> unicode_to_c_ustring(u'b\u00fccher.de')
    'u"b\\\\u00fccher.de"'
    """
  result = ['u"']
  for c in string:
    if (ord(c) > 0xffff):
      escaped = '\\U%08x' % ord(c)
    elif (ord(c) > 0x7f):
      escaped = '\\u%04x' % ord(c)
    else:
      escaped = c
    result.append(escaped)
  result.append('"')
  return ''.join(result)


def make_case(unicode_domain, unicode_allowed=True, case_name=None):
  """Generates a C++ test case for an IDN domain test.

    This is designed specifically for the IDNTestCase struct in the file
    components/url_formatter/url_formatter_unittest.cc. It generates a row of
    the idn_cases array, specifying a test for a particular domain.

    |unicode_domain| is a Unicode string of the domain (NOT IDNA-encoded).
    |unicode_allowed| specifies whether the test case should expect the domain
    to be displayed in Unicode form (kSafe) or in IDNA/Punycode ASCII encoding
    (kUnsafe). |case_name| is just for the comment.

    This function will automatically convert the domain to its IDNA format, and
    prepare the test case in C++ syntax.

    >>> make_case(u'\u5317\u4eac\u5927\u5b78.cn', True, 'Hanzi (Chinese)')
        // Hanzi (Chinese)
        {"xn--1lq90ic7f1rc.cn", u"\\u5317\\u4eac\\u5927\\u5b78.cn", kSafe},
    >>> make_case(u'b\u00fccher.de', True)
        {"xn--bcher-kva.de", u"b\\u00fccher.de", kSafe},

    This will also apply normalization to the Unicode domain, as required by the
    IDNA algorithm. This example shows U+210F normalized to U+0127 (this
    generates the exact same test case as u'\u0127ello'):

    >>> make_case(u'\u210fello', True)
        {"xn--ello-4xa", u"\\u0127ello", kSafe},
    """
  idna_input = codecs.encode(unicode_domain, 'idna')
  # Round-trip to ensure normalization.
  unicode_output = codecs.decode(idna_input, 'idna')
  if case_name:
    print('    // %s' % case_name)
  print('    {%s, %s, %s},' %
        (str_to_c_string(idna_input), unicode_to_c_ustring(unicode_output),
         'kSafe' if unicode_allowed else 'kUnsafe'))


def main(args=None):
  if args is None:
    args = sys.argv[1:]

  parser = argparse.ArgumentParser(description='Generate an IDN test case.')
  parser.add_argument('domain',
                      metavar='DOMAIN',
                      nargs='?',
                      help='the Unicode domain (not encoded)')
  parser.add_argument('--name',
                      metavar='NAME',
                      help='the name of the test case')
  parser.add_argument('--no-unicode',
                      action='store_false',
                      dest='unicode_allowed',
                      default=True,
                      help='expect the domain to be Punycoded')
  parser.add_argument('--test',
                      action='store_true',
                      dest='run_tests',
                      help='run unit tests')

  args = parser.parse_args(args)

  if args.run_tests:
    import doctest
    doctest.testmod()
    return

  if not args.domain:
    parser.error('Required argument: DOMAIN')

  if '://' in args.domain:
    parser.error('A URL must not be passed as the domain argument')

  make_case(args.domain,
            unicode_allowed=args.unicode_allowed,
            case_name=args.name)


if __name__ == '__main__':
  sys.exit(main())