File: hashhandler.py

package info (click to toggle)
python-cogapp 3.6.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 388 kB
  • sloc: python: 3,449; makefile: 79
file content (175 lines) | stat: -rw-r--r-- 6,348 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
"""Hash handling for cog output verification."""

import base64
import re
from .utils import md5


class HashHandler:
    """Handles checksum generation and verification for cog output."""

    def __init__(self, end_output_marker):
        """Initialize the hash handler with the end output marker pattern.

        Args:
            end_output_marker: The end output marker string (e.g., "[[[end]]]")
        """
        self.end_output_marker = end_output_marker
        self._setup_patterns()

    def _setup_patterns(self):
        """Set up regex patterns for hash detection and formatting."""
        end_output = re.escape(self.end_output_marker)
        # Support both old format (checksum: 32-char hex) and new format (sum: 10-char base64)
        self.re_end_output_with_hash = re.compile(
            end_output
            + r"(?P<hashsect> *\((?:checksum: (?P<hash>[a-f0-9]{32})|sum: (?P<b64hash>[A-Za-z0-9+/]{10}))\))"
        )
        self.end_format = self.end_output_marker + " (sum: %s)"

    def compute_hash(self, content):
        """Compute MD5 hash of the given content.

        Args:
            content: String content to hash

        Returns:
            str: Hexadecimal hash digest
        """
        hasher = md5()
        hasher.update(content.encode("utf-8"))
        return hasher.hexdigest()

    def compute_lines_hash(self, lines):
        """Compute MD5 hash of a list of lines.

        Args:
            lines: List of line strings

        Returns:
            str: Hexadecimal hash digest
        """
        hasher = md5()
        for line in lines:
            hasher.update(line.encode("utf-8"))
        return hasher.hexdigest()

    def hex_to_base64_hash(self, hex_hash):
        """Convert a 32-character hex hash to a 10-character base64 hash.

        Args:
            hex_hash: 32-character hexadecimal hash string

        Returns:
            str: 10-character base64 hash string
        """
        # Convert hex to bytes
        hash_bytes = bytes.fromhex(hex_hash)
        # Encode to base64 and take first 10 characters
        b64_hash = base64.b64encode(hash_bytes).decode("ascii")[:10]
        return b64_hash

    def extract_hash_from_line(self, line):
        """Extract hash from an end output line if present.

        Args:
            line: The end output line to check

        Returns:
            tuple: (hash_type, hash_value) where hash_type is 'hex' or 'base64'
                   and hash_value is the raw hash value, or (None, None) if not found
        """
        hash_match = self.re_end_output_with_hash.search(line)
        if hash_match:
            # Check which format was matched
            if hash_match.group("hash"):
                # Old format: checksum with hex
                return ("hex", hash_match.group("hash"))
            else:
                # New format: sum with base64
                assert hash_match.group("b64hash"), (
                    "Regex matched but no hash group found"
                )
                return ("base64", hash_match.group("b64hash"))
        return (None, None)

    def validate_hash(self, line, expected_hash):
        """Validate that the hash in the line matches the expected hash.

        Args:
            line: The end output line containing the hash
            expected_hash: The expected hash value (hex format)

        Returns:
            bool: True if hash matches or no hash present, False if mismatch

        Raises:
            ValueError: If hash is present but doesn't match expected
        """
        hash_type, old_hash = self.extract_hash_from_line(line)
        if hash_type is not None:
            if hash_type == "hex":
                # Compare hex directly
                if old_hash != expected_hash:
                    raise ValueError(
                        "Output has been edited! Delete old checksum to unprotect."
                    )
            else:
                # Convert expected hex to base64 and compare
                assert hash_type == "base64", f"Unknown hash type: {hash_type}"
                expected_b64 = self.hex_to_base64_hash(expected_hash)
                if old_hash != expected_b64:
                    raise ValueError(
                        "Output has been edited! Delete old checksum to unprotect."
                    )
        return True

    def format_end_line_with_hash(
        self, line, new_hash, add_hash=True, preserve_format=False
    ):
        """Format the end output line with or without hash.

        Args:
            line: The original end output line
            new_hash: The hash to add if add_hash is True (hex format)
            add_hash: Whether to add hash to the output
            preserve_format: If True and an existing hash is found, preserve its format

        Returns:
            str: The formatted end output line
        """
        hash_match = self.re_end_output_with_hash.search(line)

        if add_hash:
            if preserve_format and hash_match:
                # Preserve the original format
                hash_type, old_hash = self.extract_hash_from_line(line)
                if hash_type == "hex":
                    # Keep hex format
                    formatted_hash = f" (checksum: {new_hash})"
                else:
                    # Keep base64 format
                    assert hash_type == "base64", f"Unknown hash type: {hash_type}"
                    b64_hash = self.hex_to_base64_hash(new_hash)
                    formatted_hash = f" (sum: {b64_hash})"

                # Replace the hash section
                endpieces = line.split(hash_match.group(0), 1)
                line = (self.end_output_marker + formatted_hash).join(endpieces)
            else:
                # Use new format
                b64_hash = self.hex_to_base64_hash(new_hash)

                if hash_match:
                    # Replace existing hash
                    endpieces = line.split(hash_match.group(0), 1)
                else:
                    # Add new hash
                    endpieces = line.split(self.end_output_marker, 1)
                line = (self.end_format % b64_hash).join(endpieces)
        else:
            # Remove hash if present
            if hash_match:
                line = line.replace(hash_match["hashsect"], "", 1)

        return line