Merge pull request #258 from thegeekyb0y/patch-2

Improved Code.
2025-05-27 19:21:41 +00:00 · 2022-07-02 23:23:47 +05:30 · 2022-07-02 23:23:47 +05:30 · 5e9d1f4985
commit 5e9d1f4985
parent 369c2a75eb aab6433e61
1 changed files with 21 additions and 44 deletions
--- a/images2pdf/imges2pdf.py
+++ b/images2pdf/imges2pdf.py
@ -1,50 +1,27 @@
-
-### img2pdf ####
-import os 
+import PyPDF2
+from os import path
 import sys
-from fpdf import FPDF 
-from PIL import Image
-import glob

+def File_existance_checker(filePath):
+    if path.isfile(filePath):
+        return filePath
+    else:
+        print("[-] Provide a valid File")
+        sys.exit(1)
+pdf_stored_path=input("Enter the name of you pdf file (please use backslash when typing in directory path):")

+textFile_stored_path=path.join(path.dirname(pdf_stored_path),path.basename(pdf_stored_path).replace(".pdf",".txt"))
+pdf_stored_path=File_existance_checker(pdf_stored_path)

-images_path = raw_input("Enter the path of the folder containing images : ")
-images =images_path+"/*.*"
+print(textFile_stored_path)

-assert os.path.exists(images_path), "this diretory doesn't exist, "+str(images_path)
-f = os.chdir(images_path)
-print("Hooray we found your directory!")
-
-image_list = []
-for filename in glob.glob(images): 
+with open(pdf_stored_path,'rb') as pdf_object:
+    pdf_read=PyPDF2.PdfFileReader(pdf_object)
    
-    image_list.append(filename)
-
-pdf = FPDF( unit = 'mm')
-
-imnames = [i.split("\\") for i in image_list] 
-imnames = [i[-1] for i in imnames ]
-imnums = [i.split('.') for i in imnames]
-imnums = [i[0] for i in imnums]
-imnums = [int(i) for i in imnums]
-
-
-
-pos = 0 
-images_dict = dict(zip(image_list, imnums))
-sorted_images = sorted(images_dict , key = images_dict.get)
-
-for i in list(sorted_images):
-    pdf.add_page()
-    im = Image.open(i)
-    pdf.image(i,pos,pos,200,250)
-
-pdf_name = raw_input("Enter the pdf name : ")
-pdf_name = pdf_name+".pdf"
-pdf.output(pdf_name)
-
-
-
-
-
-
+    pdf_pages=pdf_read.numPages
+    
+    for i in range(pdf_pages):
+        page_object=pdf_read.getPage(i)
+        with open(textFile_stored_path,'a+') as f:
+            f.write((page_object.extract_text()))
+    print(f"[+] Pdf Text has been extracted and written to {path.basename(textFile_stored_path)}")