mirror of
https://github.com/hastagAB/Awesome-Python-Scripts.git
synced 2024-11-23 20:11:07 +00:00
Added python script to remove duplicate files from a directory (#179)
* create script * create README.md * updated README.md * updated README.md
This commit is contained in:
parent
196815c664
commit
11501c5292
|
@ -155,6 +155,7 @@ So far, the following projects have been integrated to this repo:
|
|||
|[Send messages to sqs in parallel](send_sqs_messages_in_parallel)|[Jinam Shah](https://github.com/jinamshah)|
|
||||
|[Codeforces Checker](codeforcesChecker)|[Jinesh Parakh](https://github.com/jineshparakh)|
|
||||
|[Github repo creator](https://github.com/hastagAB/Awesome-Python-Scripts/tree/master/Git_repo_creator)|[Harish Tiwari ](https://github.com/optimist2309)
|
||||
|[Remove-Duplicate-Files](Remove-Duplicate-Files)|[Aayushi Varma](https://github.com/aayuv17)
|
||||
|
||||
|
||||
## How to use :
|
||||
|
|
5
Remove-Duplicate-Files/README.md
Normal file
5
Remove-Duplicate-Files/README.md
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Remove Duplicate Files
|
||||
A python script to find/remove duplicate files from the user specified directory
|
||||
|
||||
# Usage
|
||||
Simply run the script removeDuplicateFiles.py from the terminal after specifying the path
|
59
Remove-Duplicate-Files/removeDuplicateFiles.py
Normal file
59
Remove-Duplicate-Files/removeDuplicateFiles.py
Normal file
|
@ -0,0 +1,59 @@
|
|||
import os
|
||||
import hashlib
|
||||
|
||||
# function to compute SHA-1 hash of a file
|
||||
def computeFileHash(fileName):
|
||||
genHash = hashlib.sha1()
|
||||
with open(fileName, 'rb') as file:
|
||||
block = 0
|
||||
while block!=b'':
|
||||
block = file.read(1024)
|
||||
genHash.update(block)
|
||||
file.close()
|
||||
return genHash.hexdigest()
|
||||
|
||||
#function to get list of files present in a directory
|
||||
def getFileList(dirPath):
|
||||
listOfFiles=list()
|
||||
for(dirpath, dirnames, filenames) in os.walk(dirPath):
|
||||
listOfFiles+=[os.path.join(dirpath, file) for file in filenames]
|
||||
return listOfFiles
|
||||
|
||||
def main():
|
||||
dirPath = input("Enter relative path to directory: ")
|
||||
if not os.path.exists(dirPath):
|
||||
print("Invalid path.")
|
||||
exit()
|
||||
listOfFiles = getFileList(dirPath)
|
||||
duplicateFileSizes={}
|
||||
duplicateFileHashes={}
|
||||
""" grouping files according to their size, so that hashes have to be
|
||||
computed only for files having the same size"""
|
||||
for file in listOfFiles:
|
||||
fileSize = os.path.getsize(file)
|
||||
if fileSize in duplicateFileSizes:
|
||||
duplicateFileSizes[fileSize].append(file)
|
||||
else:
|
||||
duplicateFileSizes[fileSize] = [file]
|
||||
for List in duplicateFileSizes.values():
|
||||
if len(List)>1:
|
||||
for path in List:
|
||||
fileHash = computeFileHash(path)
|
||||
if fileHash in duplicateFileHashes.keys():
|
||||
duplicateFileHashes[fileHash].append(path)
|
||||
else:
|
||||
duplicateFileHashes[fileHash]=[path]
|
||||
print("Duplicates in the directory are:")
|
||||
for files in duplicateFileHashes.values():
|
||||
print("(", end='')
|
||||
for fileName in files:
|
||||
print(fileName, end=', ')
|
||||
print(")")
|
||||
delete = input('Enter Y to delete duplicate files: ')
|
||||
if delete=='Y' or delete=='y':
|
||||
for files in duplicateFileHashes.values():
|
||||
for fileName in files[1:]:
|
||||
os.remove(fileName)
|
||||
if __name__=='__main__':
|
||||
main()
|
||||
|
Loading…
Reference in New Issue
Block a user