File: utf8.rb

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (130 lines) | stat: -rw-r--r-- 3,128 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/ruby

require 'google/protobuf'
require 'utf8_pb'
require 'test/unit'

module CaptureWarnings
  @@warnings = nil

  module_function

  def warn(message, category: nil, **kwargs)
    if @@warnings
      @@warnings << message
    else
      super
    end
  end

  def capture
    @@warnings = []
    yield
    @@warnings
  ensure
    @@warnings = nil
  end
end

Warning.extend CaptureWarnings

module Utf8Test
  def test_scalar
    msg = Utf8TestProtos::TestUtf8.new
    assert_bad_utf8 { msg.optional_string = bad_utf8_string() }
  end

  def test_repeated
    msg = Utf8TestProtos::TestUtf8.new
    assert_bad_utf8 { msg.repeated_string << bad_utf8_string() }
  end

  def test_map_key
    msg = Utf8TestProtos::TestUtf8.new
    assert_bad_utf8 { msg.map_string_string[bad_utf8_string()] = "abc" }
  end

  def test_map_value
    msg = Utf8TestProtos::TestUtf8.new
    assert_bad_utf8 { msg.map_string_string["abc"] = bad_utf8_string() }
  end
end

# Tests the case of string objects that are marked UTF-8, but contain invalid
# UTF-8.
#
# This case will raise Encoding::InvalidByteSequenceError
class MarkedUtf8Test < Test::Unit::TestCase
  def assert_bad_utf8(&block)
    assert_raises(Encoding::InvalidByteSequenceError, &block)
  end

  def bad_utf8_string
    str = "\x80"
    assert_false str.valid_encoding?
    str
  end

  include Utf8Test
end

# This test doesn't work in JRuby because JRuby appears to have a bug where
# the "valid" bit on a string's data is not invalidated properly when the
# string is modified: https://github.com/jruby/jruby/issues/8316
if !defined? JRUBY_VERSION
  # Tests the case of string objects that are marked UTF-8, and initially contain
  # valid UTF-8, but are later modified to be invalid UTF-8.  This may put the
  # string into an state of "unknown" validity.
  #
  # This case will raise Encoding::InvalidByteSequenceError
  class MarkedModifiedUtf8Test < Test::Unit::TestCase
    def assert_bad_utf8(&block)
      assert_raises(Encoding::InvalidByteSequenceError, &block)
    end

    def bad_utf8_string
      str = " "
      assert_true str.valid_encoding?
      str[0] = "\x80"
      str
    end

    include Utf8Test
  end
end

# Tests the case of string objects that are marked with a non-UTF-8 encoding,
# but contain invalid UTF-8.
#
# This case will raise Encoding::UndefinedConversionError.
class MarkedNonUtf8Test < Test::Unit::TestCase
  def assert_bad_utf8
    assert_raises(Encoding::UndefinedConversionError) { yield }
  end

  def bad_utf8_string
    str = "\x80".force_encoding(Encoding::ASCII_8BIT)
    assert_true str.valid_encoding?
    str
  end

  include Utf8Test
end

# Tests the case of string objects that are marked with a non-UTF-8 encoding,
# but are invalid even in their source encoding.
#
# This case will raise Encoding::InvalidByteSequenceError
class MarkedNonUtf8Test < Test::Unit::TestCase
  def assert_bad_utf8(&block)
    assert_raises(Encoding::InvalidByteSequenceError, &block)
  end

  def bad_utf8_string
    str = "\x80".force_encoding(Encoding::ASCII)
    assert_false str.valid_encoding?
    str
  end

  include Utf8Test
end