mirror of
https://github.com/hastagAB/Awesome-Python-Scripts.git
synced 2024-11-23 20:11:07 +00:00
commit
5e9d1f4985
|
@ -1,50 +1,27 @@
|
||||||
|
import PyPDF2
|
||||||
### img2pdf ####
|
from os import path
|
||||||
import os
|
|
||||||
import sys
|
import sys
|
||||||
from fpdf import FPDF
|
|
||||||
from PIL import Image
|
|
||||||
import glob
|
|
||||||
|
|
||||||
|
def File_existance_checker(filePath):
|
||||||
|
if path.isfile(filePath):
|
||||||
|
return filePath
|
||||||
|
else:
|
||||||
|
print("[-] Provide a valid File")
|
||||||
|
sys.exit(1)
|
||||||
|
pdf_stored_path=input("Enter the name of you pdf file (please use backslash when typing in directory path):")
|
||||||
|
|
||||||
|
textFile_stored_path=path.join(path.dirname(pdf_stored_path),path.basename(pdf_stored_path).replace(".pdf",".txt"))
|
||||||
|
pdf_stored_path=File_existance_checker(pdf_stored_path)
|
||||||
|
|
||||||
images_path = raw_input("Enter the path of the folder containing images : ")
|
print(textFile_stored_path)
|
||||||
images =images_path+"/*.*"
|
|
||||||
|
|
||||||
assert os.path.exists(images_path), "this diretory doesn't exist, "+str(images_path)
|
with open(pdf_stored_path,'rb') as pdf_object:
|
||||||
f = os.chdir(images_path)
|
pdf_read=PyPDF2.PdfFileReader(pdf_object)
|
||||||
print("Hooray we found your directory!")
|
|
||||||
|
|
||||||
image_list = []
|
|
||||||
for filename in glob.glob(images):
|
|
||||||
|
|
||||||
image_list.append(filename)
|
pdf_pages=pdf_read.numPages
|
||||||
|
|
||||||
pdf = FPDF( unit = 'mm')
|
for i in range(pdf_pages):
|
||||||
|
page_object=pdf_read.getPage(i)
|
||||||
imnames = [i.split("\\") for i in image_list]
|
with open(textFile_stored_path,'a+') as f:
|
||||||
imnames = [i[-1] for i in imnames ]
|
f.write((page_object.extract_text()))
|
||||||
imnums = [i.split('.') for i in imnames]
|
print(f"[+] Pdf Text has been extracted and written to {path.basename(textFile_stored_path)}")
|
||||||
imnums = [i[0] for i in imnums]
|
|
||||||
imnums = [int(i) for i in imnums]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
pos = 0
|
|
||||||
images_dict = dict(zip(image_list, imnums))
|
|
||||||
sorted_images = sorted(images_dict , key = images_dict.get)
|
|
||||||
|
|
||||||
for i in list(sorted_images):
|
|
||||||
pdf.add_page()
|
|
||||||
im = Image.open(i)
|
|
||||||
pdf.image(i,pos,pos,200,250)
|
|
||||||
|
|
||||||
pdf_name = raw_input("Enter the pdf name : ")
|
|
||||||
pdf_name = pdf_name+".pdf"
|
|
||||||
pdf.output(pdf_name)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user