File: hash.py

package info (click to toggle)
python-securesystemslib 1.3.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,316 kB
  • sloc: python: 5,319; sh: 38; makefile: 5
file content (354 lines) | stat: -rwxr-xr-x 11,302 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
"""
<Program Name>
  hash.py

<Author>
  Vladimir Diaz <vladimir.v.diaz@gmail.com>

<Started>
  February 28, 2012.  Based on a previous version of this module.

<Copyright>
  See LICENSE for licensing information.

<Purpose>
  Support secure hashing and message digests. Any hash-related routines that
  securesystemslib requires should be located in this module.  Simplifying the
  creation of digest objects, and providing a central location for hash
  routines are the main goals of this module.  Support routines implemented
  include functions to create digest objects given a filename or file object.
  Only the standard hashlib library is currently supported, but
  pyca/cryptography support will be added in the future.
"""

import hashlib

from securesystemslib import exceptions
from securesystemslib.storage import FilesystemBackend

DEFAULT_CHUNK_SIZE = 4096
DEFAULT_HASH_ALGORITHM = "sha256"
DEFAULT_HASH_LIBRARY = "hashlib"
SUPPORTED_LIBRARIES = ["hashlib"]


# If `pyca_crypto` is installed, add it to supported libraries
try:
    import binascii

    from cryptography.hazmat.backends import default_backend
    from cryptography.hazmat.primitives import hashes as _pyca_hashes

    # Dictionary of `pyca/cryptography` supported hash algorithms.
    PYCA_DIGEST_OBJECTS_CACHE = {
        "sha224": _pyca_hashes.SHA224,
        "sha256": _pyca_hashes.SHA256,
        "sha384": _pyca_hashes.SHA384,
        "sha512": _pyca_hashes.SHA512,
    }

    SUPPORTED_LIBRARIES.append("pyca_crypto")

    class PycaDiggestWrapper:
        """
        <Purpose>
          A wrapper around `cryptography.hazmat.primitives.hashes.Hash` which adds
          additional methods to meet expected interface for digest objects:

            digest_object.digest_size
            digest_object.hexdigest()
            digest_object.update('data')
            digest_object.digest()

        <Properties>
          algorithm:
            Specific for `cryptography.hazmat.primitives.hashes.Hash` object.

          digest_size:
            Returns original's object digest size.

        <Methods>
          digest(self) -> bytes:
            Calls original's object `finalize` method and returns digest as bytes.
            NOTE: `cryptography.hazmat.primitives.hashes.Hash` allows calling
            `finalize` method just once on the same instance, so everytime `digest`
            methods is called, we replace internal object (`_digest_obj`).

          hexdigest(self) -> str:
            Returns a string hex representation of digest.

          update(self, data) -> None:
            Updates digest object data by calling the original's object `update`
            method.
        """

        def __init__(self, digest_obj):
            self._digest_obj = digest_obj

        @property
        def algorithm(self):
            return self._digest_obj.algorithm

        @property
        def digest_size(self):
            return self._digest_obj.algorithm.digest_size

        def digest(self):
            digest_obj_copy = self._digest_obj.copy()
            digest = self._digest_obj.finalize()
            self._digest_obj = digest_obj_copy
            return digest

        def hexdigest(self):
            return binascii.hexlify(self.digest()).decode("utf-8")

        def update(self, data):
            self._digest_obj.update(data)

except ImportError:  # pragma: no cover
    pass


def digest(algorithm=DEFAULT_HASH_ALGORITHM, hash_library=DEFAULT_HASH_LIBRARY):
    """
    <Purpose>
      Provide the caller with the ability to create digest objects without having
      to worry about crypto library availability or which library to use.  The
      caller also has the option of specifying which hash algorithm and/or
      library to use.

      # Creation of a digest object using defaults or by specifying hash
      # algorithm and library.
      digest_object = securesystemslib.hash.digest()
      digest_object = securesystemslib.hash.digest('sha384')
      digest_object = securesystemslib.hash.digest('sha256', 'hashlib')

      # The expected interface for digest objects.
      digest_object.digest_size
      digest_object.hexdigest()
      digest_object.update('data')
      digest_object.digest()

      # Added hash routines by this module.
      digest_object = securesystemslib.hash.digest_fileobject(file_object)
      digest_object = securesystemslib.hash.digest_filename(filename)

    <Arguments>
      algorithm:
        The hash algorithm (e.g., 'sha256', 'sha512').

      hash_library:
        The crypto library to use for the given hash algorithm (e.g., 'hashlib').

    <Exceptions>
      securesystemslib.exceptions.UnsupportedAlgorithmError, if an unsupported
      hashing algorithm is specified, or digest could not be generated with given
      the algorithm.

      securesystemslib.exceptions.UnsupportedLibraryError, if an unsupported
      library was requested via 'hash_library'.

    <Side Effects>
      None.

    <Returns>
      Digest object

      e.g.
        hashlib.new(algorithm) or
        PycaDiggestWrapper object
    """

    # Was a hashlib digest object requested and is it supported?
    # If so, return the digest object.
    if hash_library == "hashlib" and hash_library in SUPPORTED_LIBRARIES:
        try:
            if algorithm == "blake2b-256":
                return hashlib.new("blake2b", digest_size=32)
            else:
                return hashlib.new(algorithm)

        except (ValueError, TypeError):
            # ValueError: the algorithm value was unknown
            # TypeError: unexpected argument digest_size (on old python)
            raise exceptions.UnsupportedAlgorithmError(algorithm)

    # Was a pyca_crypto digest object requested and is it supported?
    elif hash_library == "pyca_crypto" and hash_library in SUPPORTED_LIBRARIES:
        try:
            hash_algorithm = PYCA_DIGEST_OBJECTS_CACHE[algorithm]()
            return PycaDiggestWrapper(
                _pyca_hashes.Hash(hash_algorithm, default_backend())
            )

        except KeyError:
            raise exceptions.UnsupportedAlgorithmError(algorithm)

    # The requested hash library is not supported.
    else:
        raise exceptions.UnsupportedLibraryError(
            "Unsupported"
            " library requested.  Supported hash"
            " libraries: " + repr(SUPPORTED_LIBRARIES)
        )


def digest_fileobject(
    file_object,
    algorithm=DEFAULT_HASH_ALGORITHM,
    hash_library=DEFAULT_HASH_LIBRARY,
    normalize_line_endings=False,
):
    """
    <Purpose>
      Generate a digest object given a file object.  The new digest object
      is updated with the contents of 'file_object' prior to returning the
      object to the caller.

    <Arguments>
      file_object:
        File object whose contents will be used as the data
        to update the hash of a digest object to be returned.

      algorithm:
        The hash algorithm (e.g., 'sha256', 'sha512').

      hash_library:
        The library providing the hash algorithms (e.g., 'hashlib').

      normalize_line_endings: (default False)
        Whether or not to normalize line endings for cross-platform support.
        Note that this results in ambiguous hashes (e.g. 'abc\n' and 'abc\r\n'
        will produce the same hash), so be careful to only apply this to text
        files (not binary), when that equivalence is desirable and cannot result
        in easily-maliciously-corrupted files producing the same hash as a valid
        file.

    <Exceptions>
      securesystemslib.exceptions.FormatError, if the arguments are
      improperly formatted.

      securesystemslib.exceptions.UnsupportedAlgorithmError, if an unsupported
      hashing algorithm was specified via 'algorithm'.

      securesystemslib.exceptions.UnsupportedLibraryError, if an unsupported
      crypto library was specified via 'hash_library'.

    <Side Effects>
      None.

    <Returns>
      Digest object

      e.g.
        hashlib.new(algorithm) or
        PycaDiggestWrapper object
    """
    # Digest object returned whose hash will be updated using 'file_object'.
    # digest() raises:
    # securesystemslib.exceptions.UnsupportedAlgorithmError
    # securesystemslib.exceptions.UnsupportedLibraryError
    digest_object = digest(algorithm, hash_library)

    # Defensively seek to beginning, as there's no case where we don't
    # intend to start from the beginning of the file.
    file_object.seek(0)

    # Read the contents of the file object in at most 4096-byte chunks.
    # Update the hash with the data read from each chunk and return after
    # the entire file is processed.
    while True:
        data = file_object.read(DEFAULT_CHUNK_SIZE)
        if not data:
            break

        if normalize_line_endings:
            while data[-1:] == b"\r":
                c = file_object.read(1)
                if not c:
                    break

                data += c

            data = (
                data
                # First Windows
                .replace(b"\r\n", b"\n")
                # Then Mac
                .replace(b"\r", b"\n")
            )

        if not isinstance(data, bytes):
            digest_object.update(data.encode("utf-8"))

        else:
            digest_object.update(data)

    return digest_object


def digest_filename(
    filename,
    algorithm=DEFAULT_HASH_ALGORITHM,
    hash_library=DEFAULT_HASH_LIBRARY,
    normalize_line_endings=False,
    storage_backend=None,
):
    """
    <Purpose>
      Generate a digest object, update its hash using a file object
      specified by filename, and then return it to the caller.

    <Arguments>
      filename:
        The filename belonging to the file object to be used.

      algorithm:
        The hash algorithm (e.g., 'sha256', 'sha512').

      hash_library:
        The library providing the hash algorithms (e.g., 'hashlib').

      normalize_line_endings:
        Whether or not to normalize line endings for cross-platform support.

      storage_backend:
        An object which implements
        securesystemslib.storage.StorageBackendInterface. When no object is
        passed a FilesystemBackend will be instantiated and used.

    <Exceptions>
      securesystemslib.exceptions.UnsupportedAlgorithmError, if the given
      'algorithm' is unsupported.

      securesystemslib.exceptions.UnsupportedLibraryError, if the given
      'hash_library' is unsupported.

      securesystemslib.exceptions.StorageError, if the file cannot be opened.

    <Side Effects>
      None.

    <Returns>
      Digest object

      e.g.
        hashlib.new(algorithm) or
        PycaDiggestWrapper object
    """
    digest_object = None

    if storage_backend is None:
        storage_backend = FilesystemBackend()

    # Open 'filename' in read+binary mode.
    with storage_backend.get(filename) as file_object:
        # Create digest_object and update its hash data from file_object.
        # digest_fileobject() raises:
        # securesystemslib.exceptions.UnsupportedAlgorithmError
        # securesystemslib.exceptions.UnsupportedLibraryError
        digest_object = digest_fileobject(
            file_object, algorithm, hash_library, normalize_line_endings
        )

    return digest_object