mirror of
https://github.com/metafy-social/python-scripts.git
synced 2024-11-27 14:01:12 +00:00
Added duplicate file remover.py
This commit is contained in:
parent
19f0244fd4
commit
c330f39b84
40
scripts/Duplicate File Remover/DuplicateFileRemover.py
Normal file
40
scripts/Duplicate File Remover/DuplicateFileRemover.py
Normal file
|
@ -0,0 +1,40 @@
|
|||
import hashlib
|
||||
import os
|
||||
|
||||
# Returns the hash string of the given file name
|
||||
|
||||
|
||||
def hashFile(filename):
|
||||
# For large files, if we read it all together it can lead to memory overflow
|
||||
BLOCKSIZE = 65536
|
||||
hasher = hashlib.md5()
|
||||
with open(filename, 'rb') as file:
|
||||
# Reads the particular blocksize from file
|
||||
buf = file.read(BLOCKSIZE)
|
||||
while(len(buf) > 0):
|
||||
hasher.update(buf)
|
||||
buf = file.read(BLOCKSIZE)
|
||||
return hasher.hexdigest()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Dictionary to store the hash and filename
|
||||
hashMap = {}
|
||||
|
||||
# List to store deleted files
|
||||
deletedFiles = []
|
||||
filelist = [f for f in os.listdir() if os.path.isfile(f)]
|
||||
for f in filelist:
|
||||
key = hashFile(f)
|
||||
# If key already exists, it deletes the file
|
||||
if key in hashMap.keys():
|
||||
deletedFiles.append(f)
|
||||
os.remove(f)
|
||||
else:
|
||||
hashMap[key] = f
|
||||
if len(deletedFiles) != 0:
|
||||
print('Deleted Files')
|
||||
for i in deletedFiles:
|
||||
print(i)
|
||||
else:
|
||||
print('No duplicate files found')
|
17
scripts/Duplicate File Remover/README.md
Normal file
17
scripts/Duplicate File Remover/README.md
Normal file
|
@ -0,0 +1,17 @@
|
|||
# Duplicate Files Remover
|
||||
|
||||
Duplicate Files is removed by using this Python Script.
|
||||
|
||||
- It checks the MD5 Hash Value of the Files to check whether they are duplicate or not.
|
||||
If they are duplicate they are removed.
|
||||
|
||||
# Tech Stack:
|
||||
- Python
|
||||
- MD5
|
||||
|
||||
# Requirements:
|
||||
- Python above 3.7 version
|
||||
|
||||
# Demo of the Project:
|
||||
|
||||
https://user-images.githubusercontent.com/77090462/170878106-8b35f0ae-76c8-4dfd-bb41-2e324855e805.mp4
|
Loading…
Reference in New Issue
Block a user