#! /usr/bin/python3
#
# This script outputs all files matching a given checksum

from __future__ import print_function
import os
import hashlib
from optparse import OptionParser

class FindByChksum:

    def __init__( self, options, args ):
        self.__options = options
        self.__args = args

        self.__verbose = options.verbose
        self.__chksum = options.chksum
        self.__type = options.type
        self.__hash_limit = int( options.hash_limit )

        self.__dir = args[0]

    def get_hash( self, filename ):
        h = None
        if os.path.isfile( filename ):
            hasher = {
                'sha256': hashlib.sha256(),
                'md5': hashlib.md5()
            }.get( self.__type, None )

            if hasher:
                bytes_read = 0
                with open( filename, "rb" ) as fh:
                    while True:
                        if self.__hash_limit > 0 and bytes_read >= self.__hash_limit:
                            break
                        l = 4096
                        if self.__hash_limit > 0:
                            l = min( 4096, self.__hash_limit - bytes_read )
                        buf = fh.read( l )
                        if len( buf ) > 0:
                            hasher.update( buf )
                            bytes_read += len( buf )
                        else:
                            break
                h = hasher.hexdigest()
        return h

    def get_file_list( self, base_dir ):
        res = []
        for base, dirs, files in os.walk( base_dir ):
            for f in files:
                full_name = os.path.join( base, f )

                if self.__verbose:
                    print( "Processing %s" % ( full_name ) )

                h = self.get_hash( full_name )
                if not h:
                    if self.__verbose:
                        print( "Skip %s due to failed chksum" % ( full_name ) )
                    continue

                if self.__verbose:
                    print( "Got chksum %s for %s" % ( h, full_name ) )

                if h == self.__chksum or h.startswith( self.__chksum ):
                    res.append( full_name )
        return res
    
    def find( self ):
        if self.__verbose:
            print( "Finding in %s" % ( self.__dir ) )

        res = self.get_file_list( self.__dir )
        for f in res:
            print( f )

if __name__ == "__main__":
    parser = OptionParser( usage = "usage: %prog [options] <dir>" )
    parser.add_option( "-v", "--verbose",
                       action  = "store_true",
                       dest    = "verbose",
                       default = False,
                       help    = "verbose output" )
    parser.add_option( "-l", "--limit",
                       action  = "store",
                       dest    = "hash_limit",
                       default = "0",
                       help    = "set number of bytes used for hashingy" )
    parser.add_option( "-s", "--sum",
                       action  = "store",
                       dest    = "chksum",
                       default = "",
                       help    = "set the chksum (or prefix of it) for comparison" )
    parser.add_option( "-t", "--type",
                       action  = "store",
                       dest    = "type",
                       default = "sha256",
                       help    = "set the chksum type (sha256, md5)" )

    ( options, args ) = parser.parse_args()

    s = FindByChksum( options, args )

    s.find()
