Package: appstream-glib / 0.7.18-1+deb11u1

Improve-handling-of-em-and-code-tags.patch Patch series | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
Date: Fri, 15 Jul 2022 21:18:47 +0200
Subject: Improve handling of <em> and <code> tags

This is still not great code but at least somewhat an improvement. Tests
were expanded to showcase the new behavior.

I think, ideally, we would append opening/closing tags to the ancestor
`p` or `li` node's cdata as soon as we encounter the start/end of an
`em` or `code` element. This would then also handle empty elements
correctly.

Origin: https://github.com/hughsie/appstream-glib/pull/446
Applied-upstream: 0.8.1, commit:674490bd54ff206f213ca4547db7fdb591a0fb3d
Bug-Debian: https://bugs.debian.org/1037206
---
 libappstream-glib/as-node.c      | 108 +++++++++++++++++++++++----------------
 libappstream-glib/as-self-test.c |  39 +++++++++++++-
 2 files changed, 101 insertions(+), 46 deletions(-)

diff --git a/libappstream-glib/as-node.c b/libappstream-glib/as-node.c
index 5e19337..655b947 100644
--- a/libappstream-glib/as-node.c
+++ b/libappstream-glib/as-node.c
@@ -674,6 +674,7 @@ as_node_end_element_cb (GMarkupParseContext *context,
 			GError             **error)
 {
 	AsNodeToXmlHelper *helper = (AsNodeToXmlHelper *) user_data;
+	AsNodeData *data = helper->current->data;
 
 	/* do not create a child node for em and code tags */
 	if (g_strcmp0 (element_name, "em") == 0) {
@@ -684,6 +685,42 @@ as_node_end_element_cb (GMarkupParseContext *context,
 		helper->is_code_text = 0;
 		return;
 	}
+
+	if (data->cdata != NULL) {
+		/* split up into lines and add each with spaces stripped */
+		if ((helper->flags & AS_NODE_FROM_XML_FLAG_LITERAL_TEXT) == 0) {
+			AsRefString *cdata = data->cdata;
+			data->cdata = as_node_reflow_text (cdata, strlen (cdata));
+			as_ref_string_unref (cdata);
+		}
+
+		/* intern commonly duplicated tag values and save a bit of memory */
+		if (data->is_tag_valid) {
+			AsNode *root = g_node_get_root (helper->current);
+			switch (data->tag) {
+			case AS_TAG_CATEGORY:
+			case AS_TAG_COMPULSORY_FOR_DESKTOP:
+			case AS_TAG_CONTENT_ATTRIBUTE:
+			case AS_TAG_DEVELOPER_NAME:
+			case AS_TAG_EXTENDS:
+			case AS_TAG_ICON:
+			case AS_TAG_ID:
+			case AS_TAG_KUDO:
+			case AS_TAG_LANG:
+			case AS_TAG_METADATA_LICENSE:
+			case AS_TAG_MIMETYPE:
+			case AS_TAG_PROJECT_GROUP:
+			case AS_TAG_PROJECT_LICENSE:
+			case AS_TAG_SOURCE_PKGNAME:
+			case AS_TAG_URL:
+				as_node_cdata_to_intern (root, data);
+				break;
+			default:
+				break;
+			}
+		}
+	}
+
 	helper->current = helper->current->parent;
 }
 
@@ -715,22 +752,9 @@ as_node_text_cb (GMarkupParseContext *context,
 	if (i >= text_len)
 		return;
 
-	/* split up into lines and add each with spaces stripped */
-	if (data->cdata != NULL) {
-		/* support em and code tags */
-		if (g_strcmp0 (as_tag_data_get_name (data), "p") == 0 ||
-			g_strcmp0 (as_tag_data_get_name (data), "li") == 0) {
-			g_autoptr(GString) str = g_string_new (data->cdata);
-			as_ref_string_unref (data->cdata);
-			if (helper->is_em_text)
-				g_string_append_printf (str, "<em>%s</em>", text);
-			else if (helper->is_code_text)
-				g_string_append_printf (str, "<code>%s</code>", text);
-			else
-				g_string_append (str, text);
-			data->cdata = as_ref_string_new_with_length (str->str, str->len);
-			return;
-		}
+	if (data->cdata != NULL &&
+	    g_strcmp0 (as_tag_data_get_name (data), "p") != 0 &&
+	    g_strcmp0 (as_tag_data_get_name (data), "li") != 0) {
 		g_set_error (error,
 			     AS_NODE_ERROR,
 			     AS_NODE_ERROR_INVALID_MARKUP,
@@ -739,37 +763,33 @@ as_node_text_cb (GMarkupParseContext *context,
 			     data->cdata, text);
 		return;
 	}
-	if ((helper->flags & AS_NODE_FROM_XML_FLAG_LITERAL_TEXT) > 0) {
-		data->cdata = as_ref_string_new_with_length (text, text_len + 1);
-	} else {
-		data->cdata = as_node_reflow_text (text, (gssize) text_len);
-	}
 
-	/* intern commonly duplicated tag values and save a bit of memory */
-	if (data->is_tag_valid && data->cdata != NULL) {
-		AsNode *root = g_node_get_root (helper->current);
-		switch (data->tag) {
-		case AS_TAG_CATEGORY:
-		case AS_TAG_COMPULSORY_FOR_DESKTOP:
-		case AS_TAG_CONTENT_ATTRIBUTE:
-		case AS_TAG_DEVELOPER_NAME:
-		case AS_TAG_EXTENDS:
-		case AS_TAG_ICON:
-		case AS_TAG_ID:
-		case AS_TAG_KUDO:
-		case AS_TAG_LANG:
-		case AS_TAG_METADATA_LICENSE:
-		case AS_TAG_MIMETYPE:
-		case AS_TAG_PROJECT_GROUP:
-		case AS_TAG_PROJECT_LICENSE:
-		case AS_TAG_SOURCE_PKGNAME:
-		case AS_TAG_URL:
-			as_node_cdata_to_intern (root, data);
-			break;
-		default:
-			break;
+	/* support em and code tags */
+	if (helper->is_em_text || helper->is_code_text || data->cdata != NULL) {
+		g_autoptr(GString) str = g_string_new (NULL);
+
+		if (data->cdata != NULL) {
+			g_string_append (str, data->cdata);
+			as_ref_string_unref (data->cdata);
 		}
+
+		if (helper->is_em_text)
+			g_string_append (str, "<em>");
+		if (helper->is_code_text)
+			g_string_append (str, "<code>");
+
+		g_string_append_len (str, text, text_len);
+
+		if (helper->is_code_text)
+			g_string_append (str, "</code>");
+		if (helper->is_em_text)
+			g_string_append (str, "</em>");
+
+		data->cdata = as_ref_string_new_with_length (str->str, str->len);
+		return;
 	}
+
+	data->cdata = as_ref_string_new_with_length (text, text_len);
 }
 
 static void
diff --git a/libappstream-glib/as-self-test.c b/libappstream-glib/as-self-test.c
index 3886e4b..44b32ab 100644
--- a/libappstream-glib/as-self-test.c
+++ b/libappstream-glib/as-self-test.c
@@ -2866,6 +2866,15 @@ as_test_node_xml_func (void)
 			     "It now also supports <em>em</em> and <code>code</code> tags."
 			     "</p>"
 			     "</description>";
+	const gchar *valid_em_code_2 = "<description>"
+			     "<p><em>Emphasis</em> at the start of the paragraph</p>"
+			     "</description>";
+	const gchar *valid_em_code_empty = "<description>"
+			     "<p><em></em></p>"
+			     "</description>";
+	const gchar *valid_em_code_empty_2 = "<description>"
+			     "<p>empty <em></em> emphasis</p>"
+			     "</description>";
 	GError *error = NULL;
 	AsNode *n2;
 	AsNode *root;
@@ -2936,8 +2945,34 @@ as_test_node_xml_func (void)
 
 	n2 = as_node_find (root, "description/p");
 	g_assert (n2 != NULL);
-	printf ("<%s>\n", as_node_get_data (n2));
-	g_assert_cmpstr (as_node_get_data (n2), ==, "It now also supports<em>em</em> and <code>code</code> tags.");
+	g_assert_cmpstr (as_node_get_data (n2), ==, "It now also supports <em>em</em> and <code>code</code> tags.");
+	as_node_unref (root);
+
+	root = as_node_from_xml (valid_em_code_2, 0, &error);
+	g_assert_no_error (error);
+	g_assert (root != NULL);
+
+	n2 = as_node_find (root, "description/p");
+	g_assert (n2 != NULL);
+	g_assert_cmpstr (as_node_get_data (n2), ==, "<em>Emphasis</em> at the start of the paragraph");
+	as_node_unref (root);
+
+	root = as_node_from_xml (valid_em_code_empty, 0, &error);
+	g_assert_no_error (error);
+	g_assert (root != NULL);
+
+	n2 = as_node_find (root, "description/p");
+	g_assert (n2 != NULL);
+	g_assert_cmpstr (as_node_get_data (n2), ==, NULL);
+	as_node_unref (root);
+
+	root = as_node_from_xml (valid_em_code_empty_2, 0, &error);
+	g_assert_no_error (error);
+	g_assert (root != NULL);
+
+	n2 = as_node_find (root, "description/p");
+	g_assert (n2 != NULL);
+	g_assert_cmpstr (as_node_get_data (n2), ==, "empty  emphasis");
 	as_node_unref (root);
 
 	/* keep comments */