File: parseutils_test.go

package info (click to toggle)
golang-github-mmcdole-gofeed 1.1.3-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 4,468 kB
  • sloc: xml: 2,760; makefile: 3
file content (110 lines) | stat: -rw-r--r-- 3,401 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
package shared

import (
	"testing"

	"github.com/stretchr/testify/assert"
)

func TestDecodeEntities(t *testing.T) {
	tests := []struct {
		str string
		res string
	}{
		{"", ""},
		{"foo", "foo"},
		{"skip & normal & amps", "skip & normal & amps"},
		{"not & entity;hello &ne xt;one", "not & entity;hello &ne xt;one"},

		{"&lt;foo&gt;", "<foo>"},
		{"a &quot;b&quot; &apos;c&apos;", "a \"b\" 'c'"},
		{"foo &amp;&amp; bar", "foo && bar"},

		{"&#34;foo&#34;", "\"foo\""},
		{"&#x61;&#x062;&#x0063;", "abc"},
		{"r&#xe9;sum&#x00E9;", "résumé"},
		{"r&eacute;sum&eacute;", "résumé"},
		{"&", "&"},
		{"&foo", "&foo"},
		{"&lt", "&lt"},
		{"&#", "&#"},
	}

	for _, test := range tests {
		res, err := DecodeEntities(test.str)
		assert.Nil(t, err, "cannot decode %q", test.str)
		assert.Equal(t, res, test.res,
			"%q was decoded to %q instead of %q",
			test.str, res, test.res)
	}
}

func TestStripCDATA(t *testing.T) {
	tests := []struct {
		str string
		res string
	}{
		{"<![CDATA[ test ]]>test", " test test"},
		{"<![CDATA[test &]]> &lt;", "test & <"},
		{"", ""},
		{"test", "test"},
		{"]]>", "]]>"},
		{"<![CDATA[", "<![CDATA["},
		{"<![CDATA[testtest", "<![CDATA[testtest"},
		{`<![CDATA[
    Since this is a CDATA section
    I can use all sorts of reserved characters
    like > < " and &
    or write things like
    <foo></bar>
    but my document is still well formed!
]]>`, `
    Since this is a CDATA section
    I can use all sorts of reserved characters
    like > < " and &
    or write things like
    <foo></bar>
    but my document is still well formed!
`},
		{`<![CDATA[
Within this Character Data block I can
use double dashes as much as I want (along with <, &, ', and ")
*and* %MyParamEntity; will be expanded to the text
"Has been expanded" ... however, I can't use
the CEND sequence. If I need to use CEND I must escape one of the
brackets or the greater-than sign using concatenated CDATA sections.
]]>`, `
Within this Character Data block I can
use double dashes as much as I want (along with <, &, ', and ")
*and* %MyParamEntity; will be expanded to the text
"Has been expanded" ... however, I can't use
the CEND sequence. If I need to use CEND I must escape one of the
brackets or the greater-than sign using concatenated CDATA sections.
`},
		// 		{`<![CDATA[ test ]]><!--
		// Within this comment I can use ]]>
		// and other reserved characters like <
		// &, ', and ", but %MyParamEntity; will not be expanded
		// (if I retrieve the text of this node it will contain
		// %MyParamEntity; and not "Has been expanded")
		// and I can't place two dashes next to each other.
		// -->`, ` test <!--
		// Within this comment I can use ]]>
		// and other reserved characters like <
		// &, ', and ", but %MyParamEntity; will not be expanded
		// (if I retrieve the text of this node it will contain
		// %MyParamEntity; and not "Has been expanded")
		// and I can't place two dashes next to each other.
		// -->`,
		// 		},
		{`<![CDATA[ test ]]><!-- test -->`, ` test <!-- test -->`}, // TODO: probably wrong
		{`An example of escaped CENDs`, `An example of escaped CENDs`},
		{`<![CDATA[This text contains a CEND ]]]]><![CDATA[>]]>`, `This text contains a CEND ]]>`},
		{`<![CDATA[This text contains a CEND ]]]><![CDATA[]>]]>`, `This text contains a CEND ]]>`},
	}

	for _, test := range tests {
		res := StripCDATA(test.str)
		assert.Equal(t, test.res, res)
	}
}