From fc0b8259e693caa8400fa8b6ac1e494e47ea7798 Mon Sep 17 00:00:00 2001
From: "Jason R. Coombs" <jaraco@jaraco.com>
Date: Wed, 4 Sep 2024 11:52:54 -0400
Subject: [PATCH] [3.11] gh-123270: Replaced SanitizedNames with a more
 surgical fix. (GH-123354) (#123425)

Applies changes from zipp 3.20.1 and jaraco/zippGH-124
(cherry picked from commit 2231286d78d328c2f575e0b05b16fe447d1656d6)

Co-authored-by: Jason R. Coombs <jaraco@jaraco.com>

* Restore the slash-prefixed paths in the malformed_paths test.

Origin: upstream, https://github.com/python/cpython/commit/fc0b8259e693caa8400fa8b6ac1e494e47ea7798
Bug-Debian: https://bugs.debian.org/1080245
---
 Lib/test/test_zipfile.py                      | 72 +++++++++++++++++--
 Lib/zipfile.py                                | 69 +++---------------
 ...-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst |  3 +
 3 files changed, 77 insertions(+), 67 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst

Index: python3/Lib/test/test_zipfile.py
===================================================================
--- python3.orig/Lib/test/test_zipfile.py
+++ python3/Lib/test/test_zipfile.py
@@ -3546,7 +3546,11 @@ class EncodedMetadataTests(unittest.Test
 
     def test_malformed_paths(self):
         """
-        Path should handle malformed paths.
+        Path should handle malformed paths gracefully.
+
+        Paths with leading slashes are not visible.
+
+        Paths with dots are treated like regular files.
         """
         data = io.BytesIO()
         zf = zipfile.ZipFile(data, "w")
@@ -3555,11 +3559,67 @@ class EncodedMetadataTests(unittest.Test
         zf.writestr("../parent.txt", b"content")
         zf.filename = ''
         root = zipfile.Path(zf)
-        assert list(map(str, root.iterdir())) == [
-            'one-slash.txt',
-            'two-slash.txt',
-            'parent.txt',
-        ]
+        assert list(map(str, root.iterdir())) == ['../']
+        assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content'
+
+    def test_unsupported_names(self):
+        """
+        Path segments with special characters are readable.
+
+        On some platforms or file systems, characters like
+        ``:`` and ``?`` are not allowed, but they are valid
+        in the zip file.
+        """
+        data = io.BytesIO()
+        zf = zipfile.ZipFile(data, "w")
+        zf.writestr("path?", b"content")
+        zf.writestr("V: NMS.flac", b"fLaC...")
+        zf.filename = ''
+        root = zipfile.Path(zf)
+        contents = root.iterdir()
+        assert next(contents).name == 'path?'
+        assert next(contents).name == 'V: NMS.flac'
+        assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..."
+
+    def test_backslash_not_separator(self):
+        """
+        In a zip file, backslashes are not separators.
+        """
+        data = io.BytesIO()
+        zf = zipfile.ZipFile(data, "w")
+        zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content")
+        zf.filename = ''
+        root = zipfile.Path(zf)
+        (first,) = root.iterdir()
+        assert not first.is_dir()
+        assert first.name == 'foo\\bar'
+
+
+class DirtyZipInfo(zipfile.ZipInfo):
+    """
+    Bypass name sanitization.
+    """
+
+    def __init__(self, filename, *args, **kwargs):
+        super().__init__(filename, *args, **kwargs)
+        self.filename = filename
+
+    @classmethod
+    def for_name(cls, name, archive):
+        """
+        Construct the same way that ZipFile.writestr does.
+
+        TODO: extract this functionality and re-use
+        """
+        self = cls(filename=name, date_time=time.localtime(time.time())[:6])
+        self.compress_type = archive.compression
+        self.compress_level = archive.compresslevel
+        if self.filename.endswith('/'):  # pragma: no cover
+            self.external_attr = 0o40775 << 16  # drwxrwxr-x
+            self.external_attr |= 0x10  # MS-DOS directory flag
+        else:
+            self.external_attr = 0o600 << 16  # ?rw-------
+        return self
 
 
 if __name__ == "__main__":
Index: python3/Lib/zipfile.py
===================================================================
--- python3.orig/Lib/zipfile.py
+++ python3/Lib/zipfile.py
@@ -2201,7 +2201,7 @@ def _parents(path):
 def _ancestry(path):
     """
     Given a path with elements separated by
-    posixpath.sep, generate all elements of that path
+    posixpath.sep, generate all elements of that path.
 
     >>> list(_ancestry('b/d'))
     ['b/d', 'b']
@@ -2213,9 +2213,14 @@ def _ancestry(path):
     ['b']
     >>> list(_ancestry(''))
     []
+
+    Multiple separators are treated like a single.
+
+    >>> list(_ancestry('//b//d///f//'))
+    ['//b//d///f', '//b//d', '//b']
     """
     path = path.rstrip(posixpath.sep)
-    while path and path != posixpath.sep:
+    while path.rstrip(posixpath.sep):
         yield path
         path, tail = posixpath.split(path)
 
@@ -2232,65 +2237,7 @@ def _difference(minuend, subtrahend):
     return itertools.filterfalse(set(subtrahend).__contains__, minuend)
 
 
-class SanitizedNames:
-    """
-    ZipFile mix-in to ensure names are sanitized.
-    """
-
-    def namelist(self):
-        return list(map(self._sanitize, super().namelist()))
-
-    @staticmethod
-    def _sanitize(name):
-        r"""
-        Ensure a relative path with posix separators and no dot names.
-        Modeled after
-        https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813
-        but provides consistent cross-platform behavior.
-        >>> san = SanitizedNames._sanitize
-        >>> san('/foo/bar')
-        'foo/bar'
-        >>> san('//foo.txt')
-        'foo.txt'
-        >>> san('foo/.././bar.txt')
-        'foo/bar.txt'
-        >>> san('foo../.bar.txt')
-        'foo../.bar.txt'
-        >>> san('\\foo\\bar.txt')
-        'foo/bar.txt'
-        >>> san('D:\\foo.txt')
-        'D/foo.txt'
-        >>> san('\\\\server\\share\\file.txt')
-        'server/share/file.txt'
-        >>> san('\\\\?\\GLOBALROOT\\Volume3')
-        '?/GLOBALROOT/Volume3'
-        >>> san('\\\\.\\PhysicalDrive1\\root')
-        'PhysicalDrive1/root'
-        Retain any trailing slash.
-        >>> san('abc/')
-        'abc/'
-        Raises a ValueError if the result is empty.
-        >>> san('../..')
-        Traceback (most recent call last):
-        ...
-        ValueError: Empty filename
-        """
-
-        def allowed(part):
-            return part and part not in {'..', '.'}
-
-        # Remove the drive letter.
-        # Don't use ntpath.splitdrive, because that also strips UNC paths
-        bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE)
-        clean = bare.replace('\\', '/')
-        parts = clean.split('/')
-        joined = '/'.join(filter(allowed, parts))
-        if not joined:
-            raise ValueError("Empty filename")
-        return joined + '/' * name.endswith('/')
-
-
-class CompleteDirs(SanitizedNames, ZipFile):
+class CompleteDirs(ZipFile):
     """
     A ZipFile subclass that ensures that implied directories
     are always included in the namelist.
Index: python3/Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst
===================================================================
--- /dev/null
+++ python3/Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst
@@ -0,0 +1,3 @@
+Applied a more surgical fix for malformed payloads in :class:`zipfile.Path`
+causing infinite loops (gh-122905) without breaking contents using
+legitimate characters.
