Improved Code.

This commit is contained in:
Aditya Tiwari 2022-06-23 06:13:49 +05:30 committed by GitHub
parent 369c2a75eb
commit aab6433e61
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,50 +1,27 @@
import PyPDF2
### img2pdf #### from os import path
import os
import sys import sys
from fpdf import FPDF
from PIL import Image
import glob
def File_existance_checker(filePath):
if path.isfile(filePath):
return filePath
else:
print("[-] Provide a valid File")
sys.exit(1)
pdf_stored_path=input("Enter the name of you pdf file (please use backslash when typing in directory path):")
textFile_stored_path=path.join(path.dirname(pdf_stored_path),path.basename(pdf_stored_path).replace(".pdf",".txt"))
pdf_stored_path=File_existance_checker(pdf_stored_path)
images_path = raw_input("Enter the path of the folder containing images : ") print(textFile_stored_path)
images =images_path+"/*.*"
assert os.path.exists(images_path), "this diretory doesn't exist, "+str(images_path) with open(pdf_stored_path,'rb') as pdf_object:
f = os.chdir(images_path) pdf_read=PyPDF2.PdfFileReader(pdf_object)
print("Hooray we found your directory!")
image_list = []
for filename in glob.glob(images):
image_list.append(filename) pdf_pages=pdf_read.numPages
pdf = FPDF( unit = 'mm') for i in range(pdf_pages):
page_object=pdf_read.getPage(i)
imnames = [i.split("\\") for i in image_list] with open(textFile_stored_path,'a+') as f:
imnames = [i[-1] for i in imnames ] f.write((page_object.extract_text()))
imnums = [i.split('.') for i in imnames] print(f"[+] Pdf Text has been extracted and written to {path.basename(textFile_stored_path)}")
imnums = [i[0] for i in imnums]
imnums = [int(i) for i in imnums]
pos = 0
images_dict = dict(zip(image_list, imnums))
sorted_images = sorted(images_dict , key = images_dict.get)
for i in list(sorted_images):
pdf.add_page()
im = Image.open(i)
pdf.image(i,pos,pos,200,250)
pdf_name = raw_input("Enter the pdf name : ")
pdf_name = pdf_name+".pdf"
pdf.output(pdf_name)