mirror of
https://github.com/hastagAB/Awesome-Python-Scripts.git
synced 2024-11-23 20:11:07 +00:00
Added python script to remove duplicate files from a directory (#179)
* create script * create README.md * updated README.md * updated README.md
This commit is contained in:
parent
196815c664
commit
11501c5292
|
@ -155,6 +155,7 @@ So far, the following projects have been integrated to this repo:
|
||||||
|[Send messages to sqs in parallel](send_sqs_messages_in_parallel)|[Jinam Shah](https://github.com/jinamshah)|
|
|[Send messages to sqs in parallel](send_sqs_messages_in_parallel)|[Jinam Shah](https://github.com/jinamshah)|
|
||||||
|[Codeforces Checker](codeforcesChecker)|[Jinesh Parakh](https://github.com/jineshparakh)|
|
|[Codeforces Checker](codeforcesChecker)|[Jinesh Parakh](https://github.com/jineshparakh)|
|
||||||
|[Github repo creator](https://github.com/hastagAB/Awesome-Python-Scripts/tree/master/Git_repo_creator)|[Harish Tiwari ](https://github.com/optimist2309)
|
|[Github repo creator](https://github.com/hastagAB/Awesome-Python-Scripts/tree/master/Git_repo_creator)|[Harish Tiwari ](https://github.com/optimist2309)
|
||||||
|
|[Remove-Duplicate-Files](Remove-Duplicate-Files)|[Aayushi Varma](https://github.com/aayuv17)
|
||||||
|
|
||||||
|
|
||||||
## How to use :
|
## How to use :
|
||||||
|
|
5
Remove-Duplicate-Files/README.md
Normal file
5
Remove-Duplicate-Files/README.md
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# Remove Duplicate Files
|
||||||
|
A python script to find/remove duplicate files from the user specified directory
|
||||||
|
|
||||||
|
# Usage
|
||||||
|
Simply run the script removeDuplicateFiles.py from the terminal after specifying the path
|
59
Remove-Duplicate-Files/removeDuplicateFiles.py
Normal file
59
Remove-Duplicate-Files/removeDuplicateFiles.py
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
import os
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
# function to compute SHA-1 hash of a file
|
||||||
|
def computeFileHash(fileName):
|
||||||
|
genHash = hashlib.sha1()
|
||||||
|
with open(fileName, 'rb') as file:
|
||||||
|
block = 0
|
||||||
|
while block!=b'':
|
||||||
|
block = file.read(1024)
|
||||||
|
genHash.update(block)
|
||||||
|
file.close()
|
||||||
|
return genHash.hexdigest()
|
||||||
|
|
||||||
|
#function to get list of files present in a directory
|
||||||
|
def getFileList(dirPath):
|
||||||
|
listOfFiles=list()
|
||||||
|
for(dirpath, dirnames, filenames) in os.walk(dirPath):
|
||||||
|
listOfFiles+=[os.path.join(dirpath, file) for file in filenames]
|
||||||
|
return listOfFiles
|
||||||
|
|
||||||
|
def main():
|
||||||
|
dirPath = input("Enter relative path to directory: ")
|
||||||
|
if not os.path.exists(dirPath):
|
||||||
|
print("Invalid path.")
|
||||||
|
exit()
|
||||||
|
listOfFiles = getFileList(dirPath)
|
||||||
|
duplicateFileSizes={}
|
||||||
|
duplicateFileHashes={}
|
||||||
|
""" grouping files according to their size, so that hashes have to be
|
||||||
|
computed only for files having the same size"""
|
||||||
|
for file in listOfFiles:
|
||||||
|
fileSize = os.path.getsize(file)
|
||||||
|
if fileSize in duplicateFileSizes:
|
||||||
|
duplicateFileSizes[fileSize].append(file)
|
||||||
|
else:
|
||||||
|
duplicateFileSizes[fileSize] = [file]
|
||||||
|
for List in duplicateFileSizes.values():
|
||||||
|
if len(List)>1:
|
||||||
|
for path in List:
|
||||||
|
fileHash = computeFileHash(path)
|
||||||
|
if fileHash in duplicateFileHashes.keys():
|
||||||
|
duplicateFileHashes[fileHash].append(path)
|
||||||
|
else:
|
||||||
|
duplicateFileHashes[fileHash]=[path]
|
||||||
|
print("Duplicates in the directory are:")
|
||||||
|
for files in duplicateFileHashes.values():
|
||||||
|
print("(", end='')
|
||||||
|
for fileName in files:
|
||||||
|
print(fileName, end=', ')
|
||||||
|
print(")")
|
||||||
|
delete = input('Enter Y to delete duplicate files: ')
|
||||||
|
if delete=='Y' or delete=='y':
|
||||||
|
for files in duplicateFileHashes.values():
|
||||||
|
for fileName in files[1:]:
|
||||||
|
os.remove(fileName)
|
||||||
|
if __name__=='__main__':
|
||||||
|
main()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user