File: test_null.rb

package info (click to toggle)
ruby-nokogiri 1.13.10%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 7,416 kB
  • sloc: ansic: 38,198; xml: 28,086; ruby: 22,271; java: 15,517; cpp: 7,037; yacc: 244; sh: 148; makefile: 136
file content (113 lines) | stat: -rw-r--r-- 3,930 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# encoding: utf-8
# frozen_string_literal: true

require "helper"

class TestHtml5Null < Nokogiri::TestCase
  def fragment(s)
    Nokogiri::HTML5.fragment(s, max_errors: 10)
  end

  def test_null_char_ref
    frag = fragment("&#0;")
    assert_equal(1, frag.errors.length)
  end

  def test_data_state
    frag = fragment("\u0000")
    # 12.2.5.1 Data state: unexpected-null-character parse error
    # 12.2.6.4.7 The "in body" insertion mode: Parse error
    assert_equal(2, frag.errors.length)
  end

  def test_data_rcdata_state
    # 12.2.6.4.7 The "in body" insertion mode: textarea swiches to RCDATA
    # state
    frag = fragment("<textarea>\u0000</textarea>")
    # 12.2.5.2 RCDATA state: unexpected-null-character parse error
    assert_equal(1, frag.errors.length)
  end

  def test_data_scriptdata_state
    # 12.2.6.4.7 The "in body" insertion mode: Process "script" using rules
    # for "in head" insertion mode
    # 12.2.6.4.4 The "in head" insertion mode: "script" switches to script
    # data state
    frag = fragment("<script>\u0000</script>")
    # 12.2.5.4 Script data state: unexpected-null-character parse error
    assert_equal(1, frag.errors.length)
  end

  def test_data_plaintext_state
    frag = fragment("<plaintext>\u0000</plaintext>")
    # 12.2.5.5 PLAINTEXT state: unexpected-null-character parse error
    # EOF parse error because there's no way to switch out of plaintext!
    assert_equal(2, frag.errors.length)
  end

  def test_data_tag_name_state
    frag = fragment("<x\u0000></x\ufffd>")
    # 12.2.5.8 Tag name state: unexpected-null-character parse error
    assert_equal(1, frag.errors.length)
  end

  # XXX: There are 6 script states to test.

  def test_attribute_name_state
    frag = fragment("<p \u0000>")
    # 12.2.5.33 Attribute name state
    assert_equal(1, frag.errors.length)
  end

  def test_attribute_value_states
    frag = fragment("<p x=\"\u0000\"><p x='\u0000'><p x=\u0000>")
    # 12.2.5.36 Attribute value (double-quoted) state
    # 12.2.5.37 Attribute value (single-quoted) state
    # 12.2.5.38 Attribute value (unquoted) state
    assert_equal(3, frag.errors.length)
  end

  def test_bogus_comment_state
    frag = fragment("<!\u0000>")
    # 12.2.5.42 Markup declaration open state: incorrectly-opened-comment
    # parse error
    # 12.2.5.41 Bogus comment state: unexpected-null-character parse error
    assert_equal(2, frag.errors.length)
  end

  def test_comment_state
    frag = fragment("<!-- \u0000 -->")
    # 12.2.5.45 Comment state: unexpected-null-character parse error
    assert_equal(1, frag.errors.length)
  end

  def test_doctype_name_states
    # There are two missing here for double quoted PUBLIC and SYSTEM values.
    doc = Nokogiri::HTML5.parse("<!DOCTYPE \u0000\u0000 PUBLIC '\u0000' '\u0000' \u0000>",
      max_errors: 10)
    # 12.2.5.54 Before DOCTYPE name state: unexpected-null-character parse
    # error
    # 12.2.5.55 DOCTYPE name state: unexpected-null-character parse error
    # 12.2.5.60 DOCTYPE public identifier (single-quoted) state:
    # unexpected-null-character parse error
    # 12.2.5.66 DOCTYPE system identifier (single-quoted) state:
    # unexpected-null-character parse error
    # 12.2.5.67 After DOCTYPE system identifier state:
    # unexpected-character-after-doctype-system-identifier parse error
    # 12.2.5.68 Bogus DOCTYPE state: unexpected-null-character parse error
    # 12.2.6.4.1 The "initial" insertion mode: parse error
    assert_equal(7, doc.errors.length)
  end

  def test_cdata_section_state
    frag = fragment("<script>//<![CDATA[\n\u0000\n//]]></script>")
    # 12.2.6.5 The rules for parsing tokens in foreign content: parse error
    assert_equal(1, frag.errors.length)
  end

  def test_error_api_with_null
    frag = fragment("<p \u0000>")
    assert_predicate(frag.errors, :any?)
    assert_includes(frag.errors[0].to_s, "<p \u0000>")
  end
end if Nokogiri.uses_gumbo?