Package: xapian-core / 1.4.3-2+deb9u3

cve-2018-0499-mset-snippet-escaping.patch Patch series | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
Description: Fix incomplete HTML escaping in MSet::snippet()
 Characters <, > and & were escaped in some cases, but not all - this patch
 adds escaping in the missing cases.  This issue has been allocated
 CVE-2018-0499.
Author: Olly Betts <olly@survex.com>
Bug-Debian: https://bugs.debian.org/902886
Origin: upstream
Last-Update: 2018-07-06

--- a/queryparser/termgenerator_internal.cc
+++ b/queryparser/termgenerator_internal.cc
@@ -432,6 +432,27 @@ SnipPipe::done()
     }
 }
 
+inline void
+append_escaping_xml(const char* p, const char* end, string& output)
+{
+    while (p != end) {
+	char ch = *p++;
+	switch (ch) {
+	    case '&':
+		output += "&amp;";
+		break;
+	    case '<':
+		output += "&lt;";
+		break;
+	    case '>':
+		output += "&gt;";
+		break;
+	    default:
+		output += ch;
+	}
+    }
+}
+
 inline bool
 SnipPipe::drain(const string & input,
 		const string & hi_start,
@@ -465,7 +486,7 @@ SnipPipe::drain(const string & input,
 
 	if (punc) {
 	    // Include end of sentence punctuation.
-	    output.append(input.data() + best_end, i.raw());
+	    append_escaping_xml(input.data() + best_end, i.raw(), output);
 	} else {
 	    // Append "..." or equivalent if this doesn't seem to be the start
 	    // of a sentence.
@@ -523,8 +544,7 @@ SnipPipe::drain(const string & input,
 	while (i != Utf8Iterator()) {
 	    unsigned ch = *i;
 	    if (Unicode::is_wordchar(ch)) {
-		const char * p = input.data() + best_begin;
-		output.append(p, i.raw() - p);
+		append_escaping_xml(input.data() + best_begin, i.raw(), output);
 		best_begin = i.raw() - input.data();
 		break;
 	    }
@@ -537,22 +557,9 @@ SnipPipe::drain(const string & input,
 	if (phrase_len) output += hi_start;
     }
 
-    while (best_begin != word.term_end) {
-	char ch = input[best_begin++];
-	switch (ch) {
-	    case '&':
-		output += "&amp;";
-		break;
-	    case '<':
-		output += "&lt;";
-		break;
-	    case '>':
-		output += "&gt;";
-		break;
-	    default:
-		output += ch;
-	}
-    }
+    const char* p = input.data();
+    append_escaping_xml(p + best_begin, p + word.term_end, output);
+    best_begin = word.term_end;
 
     if (phrase_len && --phrase_len == 0) output += hi_end;
 
--- a/tests/api_snippets.cc
+++ b/tests/api_snippets.cc
@@ -313,3 +313,23 @@ DEFINE_TESTCASE(snippet_empty, backend) {
 
     return true;
 }
+
+/// Check snippets escape HTML/XML suitably.
+DEFINE_TESTCASE(snippet_html_escape, backend) {
+    Xapian::Enquire enquire(get_database("apitest_simpledata"));
+    enquire.set_query(Xapian::Query("foo"));
+
+    Xapian::MSet mset = enquire.get_mset(0, 0);
+
+    Xapian::Stem stem;
+
+    const char *input = "#include <foo.h> to use libfoo";
+    TEST_STRINGS_EQUAL(mset.snippet(input, 12, stem),
+		       "...&lt;<b>foo</b>.h&gt; to...");
+
+    input = "&foo takes the address of foo";
+    TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
+		       "&amp;<b>foo</b> takes the address of <b>foo</b>");
+
+    return true;
+}