python-scripts/scripts/Duplicate File Remover/DuplicateFileRemover.py

import hashlib
import os

# Returns the hash string of the given file name


def hashFile(filename):
    # For large files, if we read it all together it can lead to memory overflow
    BLOCKSIZE = 65536
    hasher = hashlib.md5()
    with open(filename, 'rb') as file:
        # Reads the particular blocksize from file
        buf = file.read(BLOCKSIZE)
        while(len(buf) > 0):
            hasher.update(buf)
            buf = file.read(BLOCKSIZE)
    return hasher.hexdigest()


if __name__ == "__main__":
    # Dictionary to store the hash and filename
    hashMap = {}

    # List to store deleted files
    deletedFiles = []
    filelist = [f for f in os.listdir() if os.path.isfile(f)]
    for f in filelist:
        key = hashFile(f)
        # If key already exists, it deletes the file
        if key in hashMap.keys():
            deletedFiles.append(f)
            os.remove(f)
        else:
            hashMap[key] = f
    if len(deletedFiles) != 0:
        print('Deleted Files')
        for i in deletedFiles:
            print(i)
    else:
        print('No duplicate files found')
Added duplicate file remover.py 2022-10-10 19:42:31 +00:00			`import hashlib`
			`import os`

			`# Returns the hash string of the given file name`


			`def hashFile(filename):`
			`# For large files, if we read it all together it can lead to memory overflow`
			`BLOCKSIZE = 65536`
			`hasher = hashlib.md5()`
			`with open(filename, 'rb') as file:`
			`# Reads the particular blocksize from file`
			`buf = file.read(BLOCKSIZE)`
			`while(len(buf) > 0):`
			`hasher.update(buf)`
			`buf = file.read(BLOCKSIZE)`
			`return hasher.hexdigest()`


			`if __name__ == "__main__":`
			`# Dictionary to store the hash and filename`
			`hashMap = {}`

			`# List to store deleted files`
			`deletedFiles = []`
			`filelist = [f for f in os.listdir() if os.path.isfile(f)]`
			`for f in filelist:`
			`key = hashFile(f)`
			`# If key already exists, it deletes the file`
			`if key in hashMap.keys():`
			`deletedFiles.append(f)`
			`os.remove(f)`
			`else:`
			`hashMap[key] = f`
			`if len(deletedFiles) != 0:`
			`print('Deleted Files')`
			`for i in deletedFiles:`
			`print(i)`
			`else:`
			`print('No duplicate files found')`