Description: Handle Python errors caused by DNSDumpster more properly
 From upstream PR #412:
 - Replaced deprecated queue usage with safe list-append logic (self.q.append).
 - Refactored DNSDumpster handling:
   - Updated req() -> get_csrftoken() interaction.
   - Implemented resilient get_csrftoken() that accepts either Response
     objects or raw HTML strings.
   - Added graceful fallback when CSRF token is missing or HTML structure
     changes.
Author: Sven Geuer <sge@debian.org>
Origin: backport, https://github.com/aboul3la/Sublist3r/pull/412
Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1088685
Forwarded: not-needed
Last-Update: 2025-12-18
---
This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
--- a/sublist3r.py
+++ b/sublist3r.py
@@ -266,9 +266,26 @@
         return
 
     def run(self):
-        domain_list = self.enumerate()
-        for domain in domain_list:
-            self.q.append(domain)
+        # Safe wrapper so a broken engine doesn’t kill the whole scan
+        try:
+            domain_list = self.enumerate()
+        except Exception as e:
+            # engine_name is defined in each subclass (Google, Yahoo, Ask, etc.)
+            try:
+                self.print_("[!] Engine {0} failed: {1}".format(self.engine_name, e))
+            except Exception:
+                # Fallback if print_ or engine_name missing for some reason
+                print("[!] Engine failed: {0}".format(e))
+            domain_list = []
+
+        # Push results into the shared list, if present
+        if self.q is not None:
+            for domain in domain_list:
+                try:
+                    self.q.append(domain)
+                except Exception:
+                    # don’t let one bad entry kill the process
+                    pass
 
 
 class GoogleEnum(enumratorBaseThreaded):
@@ -640,13 +658,33 @@
         return self.get_response(resp)
 
     def get_csrftoken(self, resp):
-        csrf_regex = re.compile('<input type="hidden" name="csrfmiddlewaretoken" value="(.*?)">', re.S)
-        token = csrf_regex.findall(resp)[0]
-        return token.strip()
+        """
+        Accepts either a requests.Response object or a raw HTML string.
+        Returns the CSRF token from DNSDumpster HTML.
+        """
+        # If it's a Response object, extract .text
+        if hasattr(resp, "text"):
+            html = resp.text
+        else:
+            # Assume it's already a string
+            html = resp
+
+        match = re.search(
+            r'name="csrfmiddlewaretoken" value="(.*?)"',
+            html,
+        )
+        if not match:
+            raise Exception("Could not find CSRF token on DNSDumpster page")
+        return match.group(1)
 
     def enumerate(self):
         resp = self.req('GET', self.base_url)
-        token = self.get_csrftoken(resp)
+        try:
+            token = self.get_csrftoken(resp)
+        except Exception as e:
+            print("[!] DNSDumpster module failed: {0}".format(e))
+            return []  # gracefully skip this source
+
         params = {'csrfmiddlewaretoken': token, 'targetip': self.domain}
         post_resp = self.req('POST', self.base_url, params)
         self.extract_domains(post_resp)
