mirror of
https://github.com/metafy-social/python-scripts.git
synced 2024-12-18 00:00:17 +00:00
Added duplicate file remover.py
This commit is contained in:
parent
19f0244fd4
commit
c330f39b84
40
scripts/Duplicate File Remover/DuplicateFileRemover.py
Normal file
40
scripts/Duplicate File Remover/DuplicateFileRemover.py
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Returns the hash string of the given file name
|
||||||
|
|
||||||
|
|
||||||
|
def hashFile(filename):
|
||||||
|
# For large files, if we read it all together it can lead to memory overflow
|
||||||
|
BLOCKSIZE = 65536
|
||||||
|
hasher = hashlib.md5()
|
||||||
|
with open(filename, 'rb') as file:
|
||||||
|
# Reads the particular blocksize from file
|
||||||
|
buf = file.read(BLOCKSIZE)
|
||||||
|
while(len(buf) > 0):
|
||||||
|
hasher.update(buf)
|
||||||
|
buf = file.read(BLOCKSIZE)
|
||||||
|
return hasher.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Dictionary to store the hash and filename
|
||||||
|
hashMap = {}
|
||||||
|
|
||||||
|
# List to store deleted files
|
||||||
|
deletedFiles = []
|
||||||
|
filelist = [f for f in os.listdir() if os.path.isfile(f)]
|
||||||
|
for f in filelist:
|
||||||
|
key = hashFile(f)
|
||||||
|
# If key already exists, it deletes the file
|
||||||
|
if key in hashMap.keys():
|
||||||
|
deletedFiles.append(f)
|
||||||
|
os.remove(f)
|
||||||
|
else:
|
||||||
|
hashMap[key] = f
|
||||||
|
if len(deletedFiles) != 0:
|
||||||
|
print('Deleted Files')
|
||||||
|
for i in deletedFiles:
|
||||||
|
print(i)
|
||||||
|
else:
|
||||||
|
print('No duplicate files found')
|
17
scripts/Duplicate File Remover/README.md
Normal file
17
scripts/Duplicate File Remover/README.md
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
# Duplicate Files Remover
|
||||||
|
|
||||||
|
Duplicate Files is removed by using this Python Script.
|
||||||
|
|
||||||
|
- It checks the MD5 Hash Value of the Files to check whether they are duplicate or not.
|
||||||
|
If they are duplicate they are removed.
|
||||||
|
|
||||||
|
# Tech Stack:
|
||||||
|
- Python
|
||||||
|
- MD5
|
||||||
|
|
||||||
|
# Requirements:
|
||||||
|
- Python above 3.7 version
|
||||||
|
|
||||||
|
# Demo of the Project:
|
||||||
|
|
||||||
|
https://user-images.githubusercontent.com/77090462/170878106-8b35f0ae-76c8-4dfd-bb41-2e324855e805.mp4
|
Loading…
Reference in New Issue
Block a user