File: test_midx.py

package info (click to toggle)
dulwich 1.0.0-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 7,388 kB
  • sloc: python: 99,991; makefile: 163; sh: 67
file content (267 lines) | stat: -rw-r--r-- 9,975 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
# test_midx.py -- Compatibility tests for multi-pack-index functionality
# Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
#
# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
# General Public License as published by the Free Software Foundation; version 2.0
# or (at your option) any later version. You can redistribute it and/or
# modify it under the terms of either of these two licenses.

"""Compatibility tests for Git multi-pack-index functionality.

These tests verify that dulwich's MIDX implementation can read and interact
with MIDX files created by C Git, and that Git can read MIDX files created
by Dulwich.
"""

import os
import tempfile

from dulwich.midx import load_midx
from dulwich.object_store import DiskObjectStore
from dulwich.repo import Repo

from .utils import CompatTestCase, run_git_or_fail


class MIDXCompatTests(CompatTestCase):
    """Compatibility tests for multi-pack-index functionality."""

    # Multi-pack-index was introduced in Git 2.21.0
    min_git_version = (2, 21, 0)

    def setUp(self):
        super().setUp()
        self.test_dir = tempfile.mkdtemp()
        self.repo_path = os.path.join(self.test_dir, "test-repo")

        # Set up git identity to avoid committer identity errors
        self.overrideEnv("GIT_COMMITTER_NAME", "Test Author")
        self.overrideEnv("GIT_COMMITTER_EMAIL", "test@example.com")
        self.overrideEnv("GIT_AUTHOR_NAME", "Test Author")
        self.overrideEnv("GIT_AUTHOR_EMAIL", "test@example.com")

    def tearDown(self):
        from .utils import rmtree_ro

        rmtree_ro(self.test_dir)

    def create_test_repo_with_packs(self):
        """Create a test repository with multiple pack files."""
        # Initialize repository
        run_git_or_fail(["init"], cwd=self.test_dir)
        os.rename(os.path.join(self.test_dir, ".git"), self.repo_path)

        work_dir = os.path.join(self.test_dir, "work")
        os.makedirs(work_dir)

        # Create .git file pointing to our repo
        with open(os.path.join(work_dir, ".git"), "w") as f:
            f.write(f"gitdir: {self.repo_path}\n")

        # Create some commits and pack them
        for i in range(5):
            filename = f"file{i}.txt"
            with open(os.path.join(work_dir, filename), "w") as f:
                f.write(f"Content {i}\n" * 100)  # Make files bigger to ensure packing

            run_git_or_fail(["add", filename], cwd=work_dir)
            run_git_or_fail(
                [
                    "commit",
                    "-m",
                    f"Commit {i}",
                    "--author",
                    "Test Author <test@example.com>",
                ],
                cwd=work_dir,
            )

            # Create a pack file after each commit to get multiple packs
            if i > 0:  # Skip first commit to avoid empty pack
                run_git_or_fail(["repack", "-d"], cwd=work_dir)

        return work_dir

    def test_read_git_midx(self):
        """Test that Dulwich can read a MIDX file created by Git."""
        work_dir = self.create_test_repo_with_packs()

        # Have Git create a MIDX file
        run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)

        # Verify Git created the MIDX file
        midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
        self.assertTrue(
            os.path.exists(midx_path), "Git did not create multi-pack-index file"
        )

        # Load the MIDX file with Dulwich
        midx = load_midx(midx_path)
        try:
            # Verify we can read it
            self.assertGreater(len(midx), 0, "MIDX should contain objects")
            self.assertGreater(midx.pack_count, 0, "MIDX should reference packs")

            # Verify the pack names look reasonable
            # Git stores .idx extensions in MIDX files
            for pack_name in midx.pack_names:
                self.assertTrue(pack_name.startswith("pack-"))
                self.assertTrue(pack_name.endswith(".idx"))
        finally:
            midx.close()

    def test_git_uses_dulwich_midx(self):
        """Test that Git can use a MIDX file created by Dulwich."""
        work_dir = self.create_test_repo_with_packs()

        # Use Dulwich to create a MIDX file
        repo = Repo(self.repo_path)
        try:
            store = repo.object_store
            self.assertIsInstance(store, DiskObjectStore)

            # Write MIDX with Dulwich
            checksum = store.write_midx()
            self.assertEqual(20, len(checksum))
        finally:
            repo.close()

        # Verify the file was created
        midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
        self.assertTrue(os.path.exists(midx_path))

        # Have Git verify the MIDX file (should succeed with return code 0)
        run_git_or_fail(["multi-pack-index", "verify"], cwd=work_dir)

        # Try to use the MIDX with Git commands
        # This should work if the MIDX is valid
        run_git_or_fail(["fsck"], cwd=work_dir)

    def test_midx_object_lookup_matches_git(self):
        """Test that object lookups through MIDX match Git's results."""
        work_dir = self.create_test_repo_with_packs()

        # Have Git create a MIDX file
        run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)

        # Load with Dulwich
        repo = Repo(self.repo_path)
        try:
            store = repo.object_store

            # Get MIDX
            midx = store.get_midx()
            self.assertIsNotNone(midx, "MIDX should be loaded")

            # Get all objects from Git
            result = run_git_or_fail(["rev-list", "--all", "--objects"], cwd=work_dir)
            object_shas = [
                line.split()[0].encode("ascii")
                for line in result.decode("utf-8").strip().split("\n")
                if line
            ]

            # Verify we can find these objects through the MIDX
            found_count = 0
            for sha_hex in object_shas:
                # Convert hex to binary
                sha_bin = bytes.fromhex(sha_hex.decode("ascii"))

                # Check if it's in the MIDX
                if sha_bin in midx:
                    found_count += 1

                    # Verify we can get the object location
                    result = midx.object_offset(sha_bin)
                    self.assertIsNotNone(result)
                    pack_name, offset = result
                    self.assertIsInstance(pack_name, str)
                    self.assertIsInstance(offset, int)
                    self.assertGreater(offset, 0)

            # We should find at least some objects in the MIDX
            self.assertGreater(
                found_count, 0, "Should find at least some objects in MIDX"
            )
        finally:
            repo.close()

    def test_midx_with_multiple_packs(self):
        """Test MIDX functionality with multiple pack files."""
        work_dir = self.create_test_repo_with_packs()

        # Create multiple pack files explicitly
        run_git_or_fail(["repack"], cwd=work_dir)
        run_git_or_fail(["repack"], cwd=work_dir)

        # Create MIDX with Git
        run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)

        # Load with Dulwich
        midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
        midx = load_midx(midx_path)
        try:
            # Should have multiple packs
            # (Exact count may vary depending on Git version and repacking)
            self.assertGreaterEqual(midx.pack_count, 1)

            # Verify we can iterate over all entries
            entries = list(midx.iterentries())
            self.assertGreater(len(entries), 0)

            # All entries should have valid structure
            for sha, pack_name, offset in entries:
                self.assertEqual(20, len(sha))  # SHA-1 is 20 bytes
                self.assertIsInstance(pack_name, str)
                # Git stores .idx extensions in MIDX files
                self.assertTrue(pack_name.endswith(".idx"))
                self.assertIsInstance(offset, int)
                self.assertGreaterEqual(offset, 0)
        finally:
            midx.close()

    def test_dulwich_object_store_with_git_midx(self):
        """Test that DiskObjectStore can use Git-created MIDX for lookups."""
        work_dir = self.create_test_repo_with_packs()

        # Have Git create a MIDX file
        run_git_or_fail(["multi-pack-index", "write"], cwd=work_dir)

        # Load repo with Dulwich
        repo = Repo(self.repo_path)
        try:
            # Get a commit from the repo
            result = run_git_or_fail(["rev-parse", "HEAD"], cwd=work_dir)
            head_sha = result.decode("utf-8").strip().encode("ascii")

            # Verify we can access it through Dulwich
            # This should use the MIDX for lookup
            obj = repo.object_store[head_sha]
            self.assertIsNotNone(obj)
            self.assertEqual(b"commit", obj.type_name)
        finally:
            repo.close()

    def test_repack_with_midx(self):
        """Test that repacking works correctly with MIDX present."""
        work_dir = self.create_test_repo_with_packs()

        # Create MIDX with Dulwich
        repo = Repo(self.repo_path)
        try:
            repo.object_store.write_midx()
        finally:
            repo.close()

        # Verify Git can still repack
        run_git_or_fail(["repack", "-d"], cwd=work_dir)

        # The MIDX should still be readable
        midx_path = os.path.join(self.repo_path, "objects", "pack", "multi-pack-index")
        if os.path.exists(midx_path):  # Git may remove it during repack
            midx = load_midx(midx_path)
            try:
                self.assertGreaterEqual(len(midx), 0)
            finally:
                midx.close()