# test_diffstat.py -- Tests for diffstat
# SPDX-License-Identifier: MIT
# Copyright (c) 2025 Test Contributor
# All rights reserved.

"""Tests for dulwich.diffstat."""

import os
import tempfile
import unittest

from dulwich.diffstat import (
    diffstat,
    main,
    parse_patch,
)


class ParsePatchTests(unittest.TestCase):
    """Tests for parse_patch function."""

    def test_empty_input(self):
        """Test parsing an empty list of lines."""
        names, nametypes, counts = parse_patch([])
        self.assertEqual(names, [])
        self.assertEqual(nametypes, [])
        self.assertEqual(counts, [])

    def test_basic_git_diff(self):
        """Test parsing a basic git diff with additions and deletions."""
        diff = [
            b"diff --git a/file.txt b/file.txt",
            b"index 1234567..abcdefg 100644",
            b"--- a/file.txt",
            b"+++ b/file.txt",
            b"@@ -1,5 +1,7 @@",
            b" unchanged line",
            b"-deleted line",
            b"-another deleted line",
            b"+added line",
            b"+another added line",
            b"+third added line",
            b" unchanged line",
        ]
        names, nametypes, counts = parse_patch(diff)
        self.assertEqual(names, [b"file.txt"])
        self.assertEqual(nametypes, [False])  # Not a binary file
        self.assertEqual(counts, [(3, 2)])  # 3 additions, 2 deletions

    def test_chunk_ending_with_nonstandard_line(self):
        """Test parsing a git diff where a chunk ends with a non-standard line.

        This tests the code path in line 103 of diffstat.py where the in_patch_chunk
        flag is set to False when encountering a line that doesn't start with
        the unchanged, added, or deleted indicators.
        """
        diff = [
            b"diff --git a/file.txt b/file.txt",
            b"index 1234567..abcdefg 100644",
            b"--- a/file.txt",
            b"+++ b/file.txt",
            b"@@ -1,5 +1,7 @@",
            b" unchanged line",
            b"-deleted line",
            b"+added line",
            b"No leading space or indicator",  # Non-standard line
            b"diff --git a/file2.txt b/file2.txt",  # Next file's diff
            b"index 2345678..bcdefgh 100644",
            b"--- a/file2.txt",
            b"+++ b/file2.txt",
            b"@@ -1,3 +1,4 @@",
            b" unchanged in file2",
            b"+added in file2",
            b" another unchanged in file2",
        ]
        names, nametypes, counts = parse_patch(diff)
        self.assertEqual(names, [b"file.txt", b"file2.txt"])
        self.assertEqual(nametypes, [False, False])
        self.assertEqual(
            counts, [(1, 1), (1, 0)]
        )  # file1: 1 add, 1 delete; file2: 1 add, 0 delete

    def test_binary_files(self):
        """Test parsing a git diff with binary files."""
        diff = [
            b"diff --git a/image.png b/image.png",
            b"index 1234567..abcdefg 100644",
            b"Binary files a/image.png and b/image.png differ",
        ]
        names, nametypes, counts = parse_patch(diff)
        self.assertEqual(names, [b"image.png"])
        self.assertEqual(nametypes, [True])  # Is a binary file
        self.assertEqual(counts, [(0, 0)])  # No additions/deletions counted

    def test_renamed_file(self):
        """Test parsing a git diff with a renamed file."""
        diff = [
            b"diff --git a/oldname.txt b/newname.txt",
            b"similarity index 80%",
            b"rename from oldname.txt",
            b"rename to newname.txt",
            b"index 1234567..abcdefg 100644",
            b"--- a/oldname.txt",
            b"+++ b/newname.txt",
            b"@@ -1,3 +1,4 @@",
            b" unchanged line",
            b" another unchanged line",
            b"+added line",
            b" third unchanged line",
        ]
        names, nametypes, counts = parse_patch(diff)
        # The name should include both old and new names
        self.assertEqual(names, [b"oldname.txt => newname.txt"])
        self.assertEqual(nametypes, [False])  # Not a binary file
        self.assertEqual(counts, [(1, 0)])  # 1 addition, 0 deletions

    def test_multiple_files(self):
        """Test parsing a git diff with multiple files."""
        diff = [
            # First file
            b"diff --git a/file1.txt b/file1.txt",
            b"index 1234567..abcdefg 100644",
            b"--- a/file1.txt",
            b"+++ b/file1.txt",
            b"@@ -1,3 +1,4 @@",
            b" unchanged",
            b"+added",
            b" unchanged",
            b" unchanged",
            # Second file
            b"diff --git a/file2.txt b/file2.txt",
            b"index 1234567..abcdefg 100644",
            b"--- a/file2.txt",
            b"+++ b/file2.txt",
            b"@@ -1,3 +1,2 @@",
            b" unchanged",
            b"-deleted",
            b" unchanged",
        ]
        names, nametypes, counts = parse_patch(diff)
        self.assertEqual(names, [b"file1.txt", b"file2.txt"])
        self.assertEqual(nametypes, [False, False])
        self.assertEqual(
            counts, [(1, 0), (0, 1)]
        )  # 1 addition, 0 deletions for file1; 0 additions, 1 deletion for file2


class DiffstatTests(unittest.TestCase):
    """Tests for diffstat function."""

    def test_empty_diff(self):
        """Test generating diffstat for an empty diff."""
        result = diffstat([])
        self.assertEqual(result, b" 0 files changed, 0 insertions(+), 0 deletions(-)")

    def test_basic_diffstat(self):
        """Test generating a basic diffstat."""
        diff = [
            b"diff --git a/file.txt b/file.txt",
            b"index 1234567..abcdefg 100644",
            b"--- a/file.txt",
            b"+++ b/file.txt",
            b"@@ -1,2 +1,3 @@",
            b" unchanged line",
            b"+added line",
            b" unchanged line",
        ]
        result = diffstat(diff)
        # Check that the output contains key elements
        self.assertIn(b"file.txt", result)
        self.assertIn(b"1 files changed", result)
        self.assertIn(b"1 insertions(+)", result)
        self.assertIn(b"0 deletions(-)", result)

    def test_binary_file_diffstat(self):
        """Test generating diffstat with binary files."""
        diff = [
            b"diff --git a/image.png b/image.png",
            b"index 1234567..abcdefg 100644",
            b"Binary files a/image.png and b/image.png differ",
        ]
        result = diffstat(diff)
        self.assertIn(b"image.png", result)
        self.assertIn(b"Bin", result)  # Binary file indicator
        self.assertIn(b"1 files changed", result)
        self.assertIn(b"0 insertions(+)", result)
        self.assertIn(b"0 deletions(-)", result)

    def test_multiple_files_diffstat(self):
        """Test generating diffstat with multiple files."""
        diff = [
            # First file
            b"diff --git a/file1.txt b/file1.txt",
            b"index 1234567..abcdefg 100644",
            b"--- a/file1.txt",
            b"+++ b/file1.txt",
            b"@@ -1,3 +1,5 @@",
            b" unchanged",
            b"+added1",
            b"+added2",
            b" unchanged",
            b" unchanged",
            # Second file
            b"diff --git a/file2.txt b/file2.txt",
            b"index 1234567..abcdefg 100644",
            b"--- a/file2.txt",
            b"+++ b/file2.txt",
            b"@@ -1,3 +1,2 @@",
            b" unchanged",
            b"-deleted",
            b" unchanged",
        ]
        result = diffstat(diff)
        self.assertIn(b"file1.txt", result)
        self.assertIn(b"file2.txt", result)
        self.assertIn(b"2 files changed", result)
        self.assertIn(b"2 insertions(+)", result)
        self.assertIn(b"1 deletions(-)", result)

    def test_custom_width(self):
        """Test diffstat with custom width parameter."""
        diff = [
            b"diff --git a/file.txt b/file.txt",
            b"index 1234567..abcdefg 100644",
            b"--- a/file.txt",
            b"+++ b/file.txt",
            b"@@ -1,2 +1,5 @@",
            b" unchanged line",
            b"+added line 1",
            b"+added line 2",
            b"+added line 3",
            b" unchanged line",
        ]
        # Test with a very narrow width
        narrow_result = diffstat(diff, max_width=30)

        # Test with a wide width
        wide_result = diffstat(diff, max_width=120)

        # Both should contain the same file info but potentially different histogram widths
        self.assertIn(b"file.txt", narrow_result)
        self.assertIn(b"file.txt", wide_result)
        self.assertIn(b"1 files changed", narrow_result)
        self.assertIn(b"1 files changed", wide_result)
        self.assertIn(b"3 insertions(+)", narrow_result)
        self.assertIn(b"3 insertions(+)", wide_result)

    def test_histwidth_scaling(self):
        """Test histogram width scaling for various change sizes."""
        # Create a diff with a large number of changes to trigger the histogram scaling
        diff_lines = [
            b"diff --git a/file.txt b/file.txt",
            b"index 1234567..abcdefg 100644",
            b"--- a/file.txt",
            b"+++ b/file.txt",
            b"@@ -1,50 +1,50 @@",
        ]

        # Add a lot of added and deleted lines
        for i in range(30):
            diff_lines.append(b"+added line %d" % i)

        for i in range(20):
            diff_lines.append(b"-deleted line %d" % i)

        # Try with a narrow width to force scaling
        result = diffstat(diff_lines, max_width=40)
        self.assertIn(b"file.txt", result)
        self.assertIn(b"50", result)  # Should show 50 changes (30+20)

        # Make sure it has some + and - characters for the histogram
        plus_count = result.count(b"+")
        minus_count = result.count(b"-")
        self.assertGreater(plus_count, 0)
        self.assertGreater(minus_count, 0)

    def test_small_nonzero_changes(self):
        """Test with very small positive changes that would round to zero."""
        # Create a diff with a tiny number of changes and a large max_diff to trigger
        # the small ratio calculation
        normal_diff = [
            b"diff --git a/bigfile.txt b/bigfile.txt",
            b"index 1234567..abcdefg 100644",
            b"--- a/bigfile.txt",
            b"+++ b/bigfile.txt",
            b"@@ -1,1000 +1,1001 @@",
            b"+new line",  # Just one addition
        ]

        lot_of_changes_diff = [
            b"diff --git a/hugefile.txt b/hugefile.txt",
            b"index 1234567..abcdefg 100644",
            b"--- a/hugefile.txt",
            b"+++ b/hugefile.txt",
            b"@@ -1,1000 +1,2000 @@",
        ]

        # Add 1000 added lines to this one
        for i in range(1000):
            lot_of_changes_diff.append(b"+added line %d" % i)

        # Combine these diffs
        combined_diff = normal_diff + lot_of_changes_diff

        # Use a very large width to make the contrast obvious
        result = diffstat(combined_diff, max_width=200)

        # The small change should still have at least one '+' in the histogram
        self.assertIn(b"bigfile.txt", result)
        self.assertIn(b"hugefile.txt", result)
        self.assertIn(b"2 files changed", result)
        self.assertIn(b"1001 insertions(+)", result)

        # Get the line for bigfile.txt (should be the first file line)
        lines = result.split(b"\n")
        bigfile_line = next(line for line in lines if b"bigfile.txt" in line)

        # Make sure it has at least one + even though the ratio would be tiny
        self.assertIn(b"+", bigfile_line)

    def test_big_diff_histogram(self):
        """Test histogram creation with very large diffs."""
        # Create a large diff with many additions and deletions to test histogram width scaling
        diff_lines = [
            b"diff --git a/bigfile.txt b/bigfile.txt",
            b"index 1234567..abcdefg 100644",
            b"--- a/bigfile.txt",
            b"+++ b/bigfile.txt",
            b"@@ -1,1000 +1,2000 @@",
        ]

        # Add 1000 additions and 500 deletions
        for i in range(1000):
            diff_lines.append(b"+added line %d" % i)
        for i in range(500):
            diff_lines.append(b"-deleted line %d" % i)

        # Test with different widths
        narrow_result = diffstat(diff_lines, max_width=40)
        wide_result = diffstat(diff_lines, max_width=120)

        # Both should show the right number of changes
        for result in [narrow_result, wide_result]:
            self.assertIn(b"1 files changed", result)
            self.assertIn(b"1000 insertions(+)", result)
            self.assertIn(b"500 deletions(-)", result)

    def test_small_deletions_only(self):
        """Test histogram creation with only a few deletions."""
        # Create a diff with a huge maxdiff to force scaling, but only a few deletions
        diff1 = [
            b"diff --git a/file1.txt b/file1.txt",
            b"@@ -1,1000 +1,900 @@",
        ]
        for i in range(100):
            diff1.append(b"-deleted line %d" % i)

        # Create a second diff with many more changes to increase maxdiff
        diff2 = [
            b"diff --git a/file2.txt b/file2.txt",
            b"@@ -1,1000 +1,5000 @@",
        ]
        for i in range(4000):
            diff2.append(b"+added line %d" % i)

        # Combine the diffs
        diff = diff1 + diff2

        # Generate diffstat with a very wide display
        result = diffstat(diff, max_width=200)

        # Make sure both files are reported
        self.assertIn(b"file1.txt", result)
        self.assertIn(b"file2.txt", result)

        # Get the line for file1.txt
        lines = result.split(b"\n")
        file1_line = next(line for line in lines if b"file1.txt" in line)

        # Should show some - characters for the deletions
        self.assertIn(b"-", file1_line)

    def test_very_small_deletions_ratio(self):
        """Test histogram with tiny deletion ratio that would round to zero.

        This tests line 174 in diffstat.py where a small ratio between 0 and 1
        is forced to be at least 1 character wide in the histogram.
        """
        # Create a diff with a single deletion and a massive number of additions
        # to make the deletion ratio tiny
        diff = [
            b"diff --git a/file1.txt b/file1.txt",
            b"@@ -1,2 +1,1 @@",
            b"-single deleted line",  # Just one deletion
            b" unchanged line",
            b"diff --git a/file2.txt b/file2.txt",
            b"@@ -1,1 +1,10001 @@",
            b" unchanged line",
        ]

        # Add 10000 additions to file2 to create a huge maxdiff
        for i in range(10000):
            diff.append(b"+added line %d" % i)

        # Generate diffstat with a moderate display width
        result = diffstat(diff, max_width=80)

        # Make sure both files are reported
        self.assertIn(b"file1.txt", result)
        self.assertIn(b"file2.txt", result)

        # Get the line for file1.txt
        lines = result.split(b"\n")
        file1_line = next(line for line in lines if b"file1.txt" in line)

        # Should show at least one - character for the deletion
        # even though the ratio would be tiny (1/10001 ≈ 0.0001)
        self.assertIn(b"-", file1_line)

        # Confirm the summary stats are correct
        self.assertIn(b"2 files changed", result)
        self.assertIn(b"10000 insertions(+)", result)
        self.assertIn(b"1 deletions(-)", result)


class MainFunctionTests(unittest.TestCase):
    """Tests for the main() function."""

    def test_main_with_diff_file(self):
        """Test the main function with a diff file argument."""
        # Create a temporary diff file
        with tempfile.NamedTemporaryFile(delete=False) as tmp:
            diff_content = b"""diff --git a/file.txt b/file.txt
index 1234567..abcdefg 100644
--- a/file.txt
+++ b/file.txt
@@ -1,3 +1,4 @@
 unchanged line
+added line
 another unchanged line
 third unchanged line
"""
            tmp.write(diff_content)
            tmp_path = tmp.name

        try:
            # Save the original sys.argv
            import sys

            orig_argv = sys.argv

            # Test with a file path argument
            sys.argv = ["diffstat.py", tmp_path]
            return_code = main()
            self.assertEqual(return_code, 0)

            # Test with no args to trigger the self-test
            sys.argv = ["diffstat.py"]
            return_code = main()
            self.assertEqual(return_code, 0)
        finally:
            # Restore original sys.argv
            sys.argv = orig_argv

            # Clean up the temporary file
            if os.path.exists(tmp_path):
                os.unlink(tmp_path)

    def test_main_self_test_failure(self):
        """Test the main function when the self-test fails."""
        import io
        import sys

        from dulwich.diffstat import diffstat as real_diffstat

        # Save original sys.argv, diffstat function, and stdout
        orig_argv = sys.argv
        orig_diffstat = real_diffstat
        orig_stdout = sys.stdout

        try:
            # Set up for testing self-test failure
            sys.argv = ["diffstat.py"]

            # Replace stdout with a StringIO object to capture output
            captured_output = io.StringIO()
            sys.stdout = captured_output

            # Mock the diffstat function to return a wrong result
            # This will trigger the self-test failure path
            from dulwich import diffstat as diffstat_module

            diffstat_module.diffstat = lambda lines, max_width=80: b"WRONG OUTPUT"

            # The main function should return -1 for self-test failure
            return_code = main()
            self.assertEqual(return_code, -1)

            # Check if the expected output is captured
            captured = captured_output.getvalue()
            self.assertIn("self test failed", captured)
            self.assertIn("Received:", captured)
            self.assertIn("WRONG OUTPUT", captured)
            self.assertIn("Expected:", captured)

        finally:
            # Restore original sys.argv, diffstat function, and stdout
            sys.argv = orig_argv
            diffstat_module.diffstat = orig_diffstat
            sys.stdout = orig_stdout


if __name__ == "__main__":
    unittest.main()
