From 82ee233ea82a40c626e95a191fe2d52c745db870 Mon Sep 17 00:00:00 2001
From: dsk7 <jensg@posteo.de>
Date: Sat, 23 Apr 2022 19:12:13 +0200
Subject: MAINT: Quadratic runtime while parsing reduced to linear  (#808)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the PdfFileReader tries to find the xref marker, the readNextEndLine methods builds a so called line by reading byte-for-byte. Every time a new byte is read, it is concatenated with the currently read line. This leads to quadratic runtime O(n²) behavior as Python strings (also byte-strings) are immutable and have to be copied where n is the size of the file.
For files where the xref marker can not be found at the end this takes a enormous amount of time:

* 1mb of zeros at the end: 45.54 seconds
* 2mb of zeros at the end: 357.04 seconds
(measured on a laptop made in 2015)

This pull request changes the relevant section of the code to become linear runtime O(n), leading to a run time of less then a second for both cases mentioned above. Furthermore this PR adds a regression test.
---
 PyPDF2/pdf.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py
index 9979414..8b355e0 100644
--- a/PyPDF2/pdf.py
+++ b/PyPDF2/pdf.py
@@ -1930,7 +1930,7 @@ class PdfFileReader(object):
     def readNextEndLine(self, stream):
         debug = False
         if debug: print(">>readNextEndLine")
-        line = b_("")
+        line_parts = []
         while True:
             # Prevent infinite loops in malformed PDFs
             if stream.tell() == 0:
@@ -1957,10 +1957,10 @@ class PdfFileReader(object):
                 break
             else:
                 if debug: print("  x is neither")
-                line = x + line
-                if debug: print(("  RNEL line:", line))
+                line_parts.append(x)
         if debug: print("leaving RNEL")
-        return line
+        line_parts.reverse()
+        return b"".join(line_parts)
 
     def decrypt(self, password):
         """
-- 
2.30.2

