Added duplicate file remover.py

This commit is contained in:
Rahul Kumar 2022-10-11 01:12:31 +05:30 committed by GitHub
parent 19f0244fd4
commit c330f39b84
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 57 additions and 0 deletions

View File

@ -0,0 +1,40 @@
import hashlib
import os
# Returns the hash string of the given file name
def hashFile(filename):
# For large files, if we read it all together it can lead to memory overflow
BLOCKSIZE = 65536
hasher = hashlib.md5()
with open(filename, 'rb') as file:
# Reads the particular blocksize from file
buf = file.read(BLOCKSIZE)
while(len(buf) > 0):
hasher.update(buf)
buf = file.read(BLOCKSIZE)
return hasher.hexdigest()
if __name__ == "__main__":
# Dictionary to store the hash and filename
hashMap = {}
# List to store deleted files
deletedFiles = []
filelist = [f for f in os.listdir() if os.path.isfile(f)]
for f in filelist:
key = hashFile(f)
# If key already exists, it deletes the file
if key in hashMap.keys():
deletedFiles.append(f)
os.remove(f)
else:
hashMap[key] = f
if len(deletedFiles) != 0:
print('Deleted Files')
for i in deletedFiles:
print(i)
else:
print('No duplicate files found')

View File

@ -0,0 +1,17 @@
# Duplicate Files Remover
Duplicate Files is removed by using this Python Script.
- It checks the MD5 Hash Value of the Files to check whether they are duplicate or not.
If they are duplicate they are removed.
# Tech Stack:
- Python
- MD5
# Requirements:
- Python above 3.7 version
# Demo of the Project:
https://user-images.githubusercontent.com/77090462/170878106-8b35f0ae-76c8-4dfd-bb41-2e324855e805.mp4