File: CVE-2023-34624.patch

package info (click to toggle)
libhtmlcleaner-java 2.24-1%2Bdeb11u1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 2,336 kB
  • sloc: java: 13,856; xml: 767; sh: 3; makefile: 2
file content (134 lines) | stat: -rw-r--r-- 5,160 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
From: Markus Koschany <apo@debian.org>
Date: Mon, 7 Aug 2023 18:25:54 +0200
Subject: CVE-2023-34624

Origin: https://sourceforge.net/p/htmlcleaner/code/603/
Upstream-Bug: https://github.com/amplafi/htmlcleaner/issues/13
---
 .../java/org/htmlcleaner/CleanerProperties.java    | 15 +++++++++-
 src/main/java/org/htmlcleaner/HtmlCleaner.java     | 11 ++++---
 src/test/java/org/htmlcleaner/NestingTest.java     | 34 ++++++++++++++++++++++
 3 files changed, 55 insertions(+), 5 deletions(-)
 create mode 100644 src/test/java/org/htmlcleaner/NestingTest.java

diff --git a/src/main/java/org/htmlcleaner/CleanerProperties.java b/src/main/java/org/htmlcleaner/CleanerProperties.java
index 312190e..5172e7e 100644
--- a/src/main/java/org/htmlcleaner/CleanerProperties.java
+++ b/src/main/java/org/htmlcleaner/CleanerProperties.java
@@ -91,7 +91,18 @@ public class CleanerProperties implements HtmlModificationListener{
 
     private boolean allowInvalidAttributeNames;
     private String invalidAttributeNamePrefix;
-    
+
+    /**
+     * Provides an arbitrary recursion depth
+     */
+    private int maxDepth;
+    public int getMaxDepth() {
+        return maxDepth;
+    }
+    public void setMaxDepth(int maxDepth) {
+        this.maxDepth = maxDepth;
+    }
+
     /**
      * "cause the cleaner cannot keep track of whitespace at that level",
      * there are 2 lists built: one for the head , one for the body. So whitespace that falls outside of the head and body is not preserved
@@ -519,6 +530,7 @@ public class CleanerProperties implements HtmlModificationListener{
      * charset = "UTF-8";
      * trimAttributeValues = true;
      * tagInfoProvider = HTML5TagProvider.INSTANCE
+     * maxDepth = 1000
      */
     public void reset() {
         advancedXmlEscape = true;
@@ -558,6 +570,7 @@ public class CleanerProperties implements HtmlModificationListener{
         trimAttributeValues = true;
         invalidAttributeNamePrefix = "";
         allowInvalidAttributeNames = false;
+        maxDepth = 1000;
     }
 
     private void resetPruneTagSet() {
diff --git a/src/main/java/org/htmlcleaner/HtmlCleaner.java b/src/main/java/org/htmlcleaner/HtmlCleaner.java
index f46248d..b0d6ed2 100644
--- a/src/main/java/org/htmlcleaner/HtmlCleaner.java
+++ b/src/main/java/org/htmlcleaner/HtmlCleaner.java
@@ -490,7 +490,7 @@ public class HtmlCleaner {
         // Some transitions on resulting html require us to have the tag tree structure.
         // i.e. if we want to clear insignificant <br> tags. Thus this place is best for
         // marking nodes to be pruned.
-        while(markNodesToPrune(nodeList, cleanTimeValues)) {
+        while(markNodesToPrune(nodeList, cleanTimeValues, 0)) {
         	if (Thread.currentThread().isInterrupted()) {
         		handleInterruption();
             	return null;
@@ -519,7 +519,10 @@ public class HtmlCleaner {
         return cleanTimeValues.rootNode;
     }
 
-	private boolean markNodesToPrune(List nodeList, CleanTimeValues cleanTimeValues) {
+	private boolean markNodesToPrune(List nodeList, CleanTimeValues cleanTimeValues, int depth) {
+		if (depth > properties.getMaxDepth()) {
+			return false;
+		}
 	    boolean nodesPruned = false;
 		for (Object next :nodeList) {
 			if(next instanceof TagNode && !cleanTimeValues.pruneNodeSet.contains(next)){
@@ -527,7 +530,7 @@ public class HtmlCleaner {
     			if(addIfNeededToPruneSet(node, cleanTimeValues)) {
 			        nodesPruned = true;
     			} else if (!node.isEmpty()){
-    				nodesPruned |= markNodesToPrune(node.getAllChildren(), cleanTimeValues);
+					nodesPruned |= markNodesToPrune(node.getAllChildren(), cleanTimeValues, depth+1);
     			}
     		}
     	}
@@ -1572,4 +1575,4 @@ public class HtmlCleaner {
 		
 	}
 
-}
\ No newline at end of file
+}
diff --git a/src/test/java/org/htmlcleaner/NestingTest.java b/src/test/java/org/htmlcleaner/NestingTest.java
new file mode 100644
index 0000000..90c30a5
--- /dev/null
+++ b/src/test/java/org/htmlcleaner/NestingTest.java
@@ -0,0 +1,34 @@
+package org.htmlcleaner;
+
+import junit.framework.TestCase;
+import org.junit.Test;
+
+public class NestingTest extends TestCase {
+
+    public final static int TOO_DEEP_NESTING = 9999;
+    public final static String TOO_DEEP_DOC = _nestedDoc(TOO_DEEP_NESTING, "<div>", "</div>", "");
+
+    public static String _nestedDoc(int nesting, String open, String close, String content) {
+        StringBuilder sb = new StringBuilder(nesting * (open.length() + close.length()));
+        for (int i = 0; i < nesting; ++i) {
+            sb.append(open);
+            if ((i & 31) == 0) {
+                sb.append("\n");
+            }
+        }
+        sb.append("\n").append(content).append("\n");
+        for (int i = 0; i < nesting; ++i) {
+            sb.append(close);
+            if ((i & 31) == 0) {
+                sb.append("\n");
+            }
+        }
+        return sb.toString();
+    }
+
+    @Test
+    public void testDeepNesting(){
+        HtmlCleaner cleaner = new HtmlCleaner();
+        TagNode root = cleaner.clean(TOO_DEEP_DOC);
+    }
+}