From aab6433e61fc8a0546ef39fe8dad2382ad2e325a Mon Sep 17 00:00:00 2001 From: Aditya Tiwari Date: Thu, 23 Jun 2022 06:13:49 +0530 Subject: [PATCH] Improved Code. --- images2pdf/imges2pdf.py | 65 +++++++++++++---------------------------- 1 file changed, 21 insertions(+), 44 deletions(-) diff --git a/images2pdf/imges2pdf.py b/images2pdf/imges2pdf.py index 0dd8d7b..1bf1d9e 100644 --- a/images2pdf/imges2pdf.py +++ b/images2pdf/imges2pdf.py @@ -1,50 +1,27 @@ - -### img2pdf #### -import os +import PyPDF2 +from os import path import sys -from fpdf import FPDF -from PIL import Image -import glob +def File_existance_checker(filePath): + if path.isfile(filePath): + return filePath + else: + print("[-] Provide a valid File") + sys.exit(1) +pdf_stored_path=input("Enter the name of you pdf file (please use backslash when typing in directory path):") +textFile_stored_path=path.join(path.dirname(pdf_stored_path),path.basename(pdf_stored_path).replace(".pdf",".txt")) +pdf_stored_path=File_existance_checker(pdf_stored_path) -images_path = raw_input("Enter the path of the folder containing images : ") -images =images_path+"/*.*" +print(textFile_stored_path) -assert os.path.exists(images_path), "this diretory doesn't exist, "+str(images_path) -f = os.chdir(images_path) -print("Hooray we found your directory!") - -image_list = [] -for filename in glob.glob(images): +with open(pdf_stored_path,'rb') as pdf_object: + pdf_read=PyPDF2.PdfFileReader(pdf_object) - image_list.append(filename) - -pdf = FPDF( unit = 'mm') - -imnames = [i.split("\\") for i in image_list] -imnames = [i[-1] for i in imnames ] -imnums = [i.split('.') for i in imnames] -imnums = [i[0] for i in imnums] -imnums = [int(i) for i in imnums] - - - -pos = 0 -images_dict = dict(zip(image_list, imnums)) -sorted_images = sorted(images_dict , key = images_dict.get) - -for i in list(sorted_images): - pdf.add_page() - im = Image.open(i) - pdf.image(i,pos,pos,200,250) - -pdf_name = raw_input("Enter the pdf name : ") -pdf_name = pdf_name+".pdf" -pdf.output(pdf_name) - - - - - - + pdf_pages=pdf_read.numPages + + for i in range(pdf_pages): + page_object=pdf_read.getPage(i) + with open(textFile_stored_path,'a+') as f: + f.write((page_object.extract_text())) + print(f"[+] Pdf Text has been extracted and written to {path.basename(textFile_stored_path)}")