From: Markus Koschany <apo@debian.org>
Date: Mon, 7 Aug 2023 18:25:54 +0200
Subject: CVE-2023-34624

Origin: https://sourceforge.net/p/htmlcleaner/code/603/
Upstream-Bug: https://github.com/amplafi/htmlcleaner/issues/13
---
 .../java/org/htmlcleaner/CleanerProperties.java    | 15 +++++++++-
 src/main/java/org/htmlcleaner/HtmlCleaner.java     | 11 ++++---
 src/test/java/org/htmlcleaner/NestingTest.java     | 34 ++++++++++++++++++++++
 3 files changed, 55 insertions(+), 5 deletions(-)
 create mode 100644 src/test/java/org/htmlcleaner/NestingTest.java

diff --git a/src/main/java/org/htmlcleaner/CleanerProperties.java b/src/main/java/org/htmlcleaner/CleanerProperties.java
index 312190e..5172e7e 100644
--- a/src/main/java/org/htmlcleaner/CleanerProperties.java
+++ b/src/main/java/org/htmlcleaner/CleanerProperties.java
@@ -91,7 +91,18 @@ public class CleanerProperties implements HtmlModificationListener{
 
     private boolean allowInvalidAttributeNames;
     private String invalidAttributeNamePrefix;
-    
+
+    /**
+     * Provides an arbitrary recursion depth
+     */
+    private int maxDepth;
+    public int getMaxDepth() {
+        return maxDepth;
+    }
+    public void setMaxDepth(int maxDepth) {
+        this.maxDepth = maxDepth;
+    }
+
     /**
      * "cause the cleaner cannot keep track of whitespace at that level",
      * there are 2 lists built: one for the head , one for the body. So whitespace that falls outside of the head and body is not preserved
@@ -519,6 +530,7 @@ public class CleanerProperties implements HtmlModificationListener{
      * charset = "UTF-8";
      * trimAttributeValues = true;
      * tagInfoProvider = HTML5TagProvider.INSTANCE
+     * maxDepth = 1000
      */
     public void reset() {
         advancedXmlEscape = true;
@@ -558,6 +570,7 @@ public class CleanerProperties implements HtmlModificationListener{
         trimAttributeValues = true;
         invalidAttributeNamePrefix = "";
         allowInvalidAttributeNames = false;
+        maxDepth = 1000;
     }
 
     private void resetPruneTagSet() {
diff --git a/src/main/java/org/htmlcleaner/HtmlCleaner.java b/src/main/java/org/htmlcleaner/HtmlCleaner.java
index f46248d..b0d6ed2 100644
--- a/src/main/java/org/htmlcleaner/HtmlCleaner.java
+++ b/src/main/java/org/htmlcleaner/HtmlCleaner.java
@@ -490,7 +490,7 @@ public class HtmlCleaner {
         // Some transitions on resulting html require us to have the tag tree structure.
         // i.e. if we want to clear insignificant <br> tags. Thus this place is best for
         // marking nodes to be pruned.
-        while(markNodesToPrune(nodeList, cleanTimeValues)) {
+        while(markNodesToPrune(nodeList, cleanTimeValues, 0)) {
         	if (Thread.currentThread().isInterrupted()) {
         		handleInterruption();
             	return null;
@@ -519,7 +519,10 @@ public class HtmlCleaner {
         return cleanTimeValues.rootNode;
     }
 
-	private boolean markNodesToPrune(List nodeList, CleanTimeValues cleanTimeValues) {
+	private boolean markNodesToPrune(List nodeList, CleanTimeValues cleanTimeValues, int depth) {
+		if (depth > properties.getMaxDepth()) {
+			return false;
+		}
 	    boolean nodesPruned = false;
 		for (Object next :nodeList) {
 			if(next instanceof TagNode && !cleanTimeValues.pruneNodeSet.contains(next)){
@@ -527,7 +530,7 @@ public class HtmlCleaner {
     			if(addIfNeededToPruneSet(node, cleanTimeValues)) {
 			        nodesPruned = true;
     			} else if (!node.isEmpty()){
-    				nodesPruned |= markNodesToPrune(node.getAllChildren(), cleanTimeValues);
+					nodesPruned |= markNodesToPrune(node.getAllChildren(), cleanTimeValues, depth+1);
     			}
     		}
     	}
@@ -1572,4 +1575,4 @@ public class HtmlCleaner {
 		
 	}
 
-}
\ No newline at end of file
+}
diff --git a/src/test/java/org/htmlcleaner/NestingTest.java b/src/test/java/org/htmlcleaner/NestingTest.java
new file mode 100644
index 0000000..90c30a5
--- /dev/null
+++ b/src/test/java/org/htmlcleaner/NestingTest.java
@@ -0,0 +1,34 @@
+package org.htmlcleaner;
+
+import junit.framework.TestCase;
+import org.junit.Test;
+
+public class NestingTest extends TestCase {
+
+    public final static int TOO_DEEP_NESTING = 9999;
+    public final static String TOO_DEEP_DOC = _nestedDoc(TOO_DEEP_NESTING, "<div>", "</div>", "");
+
+    public static String _nestedDoc(int nesting, String open, String close, String content) {
+        StringBuilder sb = new StringBuilder(nesting * (open.length() + close.length()));
+        for (int i = 0; i < nesting; ++i) {
+            sb.append(open);
+            if ((i & 31) == 0) {
+                sb.append("\n");
+            }
+        }
+        sb.append("\n").append(content).append("\n");
+        for (int i = 0; i < nesting; ++i) {
+            sb.append(close);
+            if ((i & 31) == 0) {
+                sb.append("\n");
+            }
+        }
+        return sb.toString();
+    }
+
+    @Test
+    public void testDeepNesting(){
+        HtmlCleaner cleaner = new HtmlCleaner();
+        TagNode root = cleaner.clean(TOO_DEEP_DOC);
+    }
+}
