python-scripts/scripts/Duplicate File Remover/DuplicateFileRemover.py

40 lines
1.1 KiB
Python
Raw Normal View History

2022-10-10 19:42:31 +00:00
import hashlib
import os
# Returns the hash string of the given file name
def hashFile(filename):
# For large files, if we read it all together it can lead to memory overflow
BLOCKSIZE = 65536
hasher = hashlib.md5()
with open(filename, 'rb') as file:
# Reads the particular blocksize from file
buf = file.read(BLOCKSIZE)
while(len(buf) > 0):
hasher.update(buf)
buf = file.read(BLOCKSIZE)
return hasher.hexdigest()
if __name__ == "__main__":
# Dictionary to store the hash and filename
hashMap = {}
# List to store deleted files
deletedFiles = []
filelist = [f for f in os.listdir() if os.path.isfile(f)]
for f in filelist:
key = hashFile(f)
# If key already exists, it deletes the file
if key in hashMap.keys():
deletedFiles.append(f)
os.remove(f)
else:
hashMap[key] = f
if len(deletedFiles) != 0:
print('Deleted Files')
for i in deletedFiles:
print(i)
else:
print('No duplicate files found')