1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
|
# encoding: utf-8
# frozen_string_literal: true
require "helper"
class TestHtml5Null < Nokogiri::TestCase
def fragment(s)
Nokogiri::HTML5.fragment(s, max_errors: 10)
end
def test_null_char_ref
frag = fragment("�")
assert_equal(1, frag.errors.length)
end
def test_data_state
frag = fragment("\u0000")
# 12.2.5.1 Data state: unexpected-null-character parse error
# 12.2.6.4.7 The "in body" insertion mode: Parse error
assert_equal(2, frag.errors.length)
end
def test_data_rcdata_state
# 12.2.6.4.7 The "in body" insertion mode: textarea swiches to RCDATA
# state
frag = fragment("<textarea>\u0000</textarea>")
# 12.2.5.2 RCDATA state: unexpected-null-character parse error
assert_equal(1, frag.errors.length)
end
def test_data_scriptdata_state
# 12.2.6.4.7 The "in body" insertion mode: Process "script" using rules
# for "in head" insertion mode
# 12.2.6.4.4 The "in head" insertion mode: "script" switches to script
# data state
frag = fragment("<script>\u0000</script>")
# 12.2.5.4 Script data state: unexpected-null-character parse error
assert_equal(1, frag.errors.length)
end
def test_data_plaintext_state
frag = fragment("<plaintext>\u0000</plaintext>")
# 12.2.5.5 PLAINTEXT state: unexpected-null-character parse error
# EOF parse error because there's no way to switch out of plaintext!
assert_equal(2, frag.errors.length)
end
def test_data_tag_name_state
frag = fragment("<x\u0000></x\ufffd>")
# 12.2.5.8 Tag name state: unexpected-null-character parse error
assert_equal(1, frag.errors.length)
end
# XXX: There are 6 script states to test.
def test_attribute_name_state
frag = fragment("<p \u0000>")
# 12.2.5.33 Attribute name state
assert_equal(1, frag.errors.length)
end
def test_attribute_value_states
frag = fragment("<p x=\"\u0000\"><p x='\u0000'><p x=\u0000>")
# 12.2.5.36 Attribute value (double-quoted) state
# 12.2.5.37 Attribute value (single-quoted) state
# 12.2.5.38 Attribute value (unquoted) state
assert_equal(3, frag.errors.length)
end
def test_bogus_comment_state
frag = fragment("<!\u0000>")
# 12.2.5.42 Markup declaration open state: incorrectly-opened-comment
# parse error
# 12.2.5.41 Bogus comment state: unexpected-null-character parse error
assert_equal(2, frag.errors.length)
end
def test_comment_state
frag = fragment("<!-- \u0000 -->")
# 12.2.5.45 Comment state: unexpected-null-character parse error
assert_equal(1, frag.errors.length)
end
def test_doctype_name_states
# There are two missing here for double quoted PUBLIC and SYSTEM values.
doc = Nokogiri::HTML5.parse("<!DOCTYPE \u0000\u0000 PUBLIC '\u0000' '\u0000' \u0000>",
max_errors: 10)
# 12.2.5.54 Before DOCTYPE name state: unexpected-null-character parse
# error
# 12.2.5.55 DOCTYPE name state: unexpected-null-character parse error
# 12.2.5.60 DOCTYPE public identifier (single-quoted) state:
# unexpected-null-character parse error
# 12.2.5.66 DOCTYPE system identifier (single-quoted) state:
# unexpected-null-character parse error
# 12.2.5.67 After DOCTYPE system identifier state:
# unexpected-character-after-doctype-system-identifier parse error
# 12.2.5.68 Bogus DOCTYPE state: unexpected-null-character parse error
# 12.2.6.4.1 The "initial" insertion mode: parse error
assert_equal(7, doc.errors.length)
end
def test_cdata_section_state
frag = fragment("<script>//<![CDATA[\n\u0000\n//]]></script>")
# 12.2.6.5 The rules for parsing tokens in foreign content: parse error
assert_equal(1, frag.errors.length)
end
def test_error_api_with_null
frag = fragment("<p \u0000>")
assert_predicate(frag.errors, :any?)
assert_includes(frag.errors[0].to_s, "<p \u0000>")
end
end if Nokogiri.uses_gumbo?
|