File: fingerprint.py

package info (click to toggle)
python-tuspy 1.0.3-0.1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 212 kB
  • sloc: python: 884; makefile: 3
file content (37 lines) | stat: -rw-r--r-- 1,187 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
"""
An implementation of of <tusclient.figerprint.interface.Figerprint>,
using the hashlib to generate an md5 hash based on the file content
"""
from typing import IO
import hashlib
import os

from . import interface


class Fingerprint(interface.Fingerprint):
    BLOCK_SIZE = 65536

    def get_fingerprint(self, fs: IO):
        """
        Return a unique fingerprint string value based on the file stream recevied

        :Args:
            - fs[IO]: The file stream instance of the file for which a fingerprint would be generated.
        :Returns: fingerprint[str]
        """
        hasher = hashlib.md5()
        # we encode the content to avoid python 3 uncicode errors
        buf = self._encode_data(fs.read(self.BLOCK_SIZE))
        hasher.update(buf)
        # add in the file size to minimize chances of collision
        fs.seek(0, os.SEEK_END)
        file_size = fs.tell()
        return "size:{}--md5:{}".format(file_size, hasher.hexdigest())

    def _encode_data(self, data):
        try:
            return data.encode("utf-8")
        except AttributeError:
            # in case the content is already binary, this failure would happen.
            return data