mirror of
https://github.com/hastagAB/Awesome-Python-Scripts.git
synced 2024-12-18 00:00:14 +00:00
60 lines
2.0 KiB
Python
60 lines
2.0 KiB
Python
|
import os
|
||
|
import hashlib
|
||
|
|
||
|
# function to compute SHA-1 hash of a file
|
||
|
def computeFileHash(fileName):
|
||
|
genHash = hashlib.sha1()
|
||
|
with open(fileName, 'rb') as file:
|
||
|
block = 0
|
||
|
while block!=b'':
|
||
|
block = file.read(1024)
|
||
|
genHash.update(block)
|
||
|
file.close()
|
||
|
return genHash.hexdigest()
|
||
|
|
||
|
#function to get list of files present in a directory
|
||
|
def getFileList(dirPath):
|
||
|
listOfFiles=list()
|
||
|
for(dirpath, dirnames, filenames) in os.walk(dirPath):
|
||
|
listOfFiles+=[os.path.join(dirpath, file) for file in filenames]
|
||
|
return listOfFiles
|
||
|
|
||
|
def main():
|
||
|
dirPath = input("Enter relative path to directory: ")
|
||
|
if not os.path.exists(dirPath):
|
||
|
print("Invalid path.")
|
||
|
exit()
|
||
|
listOfFiles = getFileList(dirPath)
|
||
|
duplicateFileSizes={}
|
||
|
duplicateFileHashes={}
|
||
|
""" grouping files according to their size, so that hashes have to be
|
||
|
computed only for files having the same size"""
|
||
|
for file in listOfFiles:
|
||
|
fileSize = os.path.getsize(file)
|
||
|
if fileSize in duplicateFileSizes:
|
||
|
duplicateFileSizes[fileSize].append(file)
|
||
|
else:
|
||
|
duplicateFileSizes[fileSize] = [file]
|
||
|
for List in duplicateFileSizes.values():
|
||
|
if len(List)>1:
|
||
|
for path in List:
|
||
|
fileHash = computeFileHash(path)
|
||
|
if fileHash in duplicateFileHashes.keys():
|
||
|
duplicateFileHashes[fileHash].append(path)
|
||
|
else:
|
||
|
duplicateFileHashes[fileHash]=[path]
|
||
|
print("Duplicates in the directory are:")
|
||
|
for files in duplicateFileHashes.values():
|
||
|
print("(", end='')
|
||
|
for fileName in files:
|
||
|
print(fileName, end=', ')
|
||
|
print(")")
|
||
|
delete = input('Enter Y to delete duplicate files: ')
|
||
|
if delete=='Y' or delete=='y':
|
||
|
for files in duplicateFileHashes.values():
|
||
|
for fileName in files[1:]:
|
||
|
os.remove(fileName)
|
||
|
if __name__=='__main__':
|
||
|
main()
|
||
|
|