diff --git a/PDF2text/README.md b/PDF2text/README.md new file mode 100644 index 0000000..f98f324 --- /dev/null +++ b/PDF2text/README.md @@ -0,0 +1,11 @@ +# Description: PDF2text +this is a small script to make a extract text from pdf file. + +### Dependencies: +1- [pdftotext](https://pypi.org/project/pdftotext/) + +## Usage +Run ```python script.py``` then enter path of pdf file. + + + diff --git a/PDF2text/requirements.txt b/PDF2text/requirements.txt new file mode 100644 index 0000000..f57178e --- /dev/null +++ b/PDF2text/requirements.txt @@ -0,0 +1 @@ +pdftotext diff --git a/PDF2text/script.py b/PDF2text/script.py new file mode 100644 index 0000000..50c0223 --- /dev/null +++ b/PDF2text/script.py @@ -0,0 +1,15 @@ +import os +import pdftotext + + +pdf_path = input("Enter the path of the pdf file : ") + +assert os.path.exists(pdf_path), "this pdf file doesn't exist" + +with open(pdf_path, 'rb') as f_r: + pdf_pages = pdftotext.PDF(f_r) + +for i, page in enumerate(pdf_pages): + print('Page {}'.format(i)) + print(page) + print('*'*100) diff --git a/README.md b/README.md index 4eae5a0..f8af8e5 100644 --- a/README.md +++ b/README.md @@ -156,10 +156,10 @@ So far, the following projects have been integrated to this repo: |[Codeforces Checker](codeforcesChecker)|[Jinesh Parakh](https://github.com/jineshparakh)| |[Github repo creator](https://github.com/hastagAB/Awesome-Python-Scripts/tree/master/Git_repo_creator)|[Harish Tiwari ](https://github.com/optimist2309) |[Remove-Duplicate-Files](Remove-Duplicate-Files)|[Aayushi Varma](https://github.com/aayuv17) +|[PDF2text](PDF2text)|[QuangPH](https://github.com/quangph-1686a) |[Image Watermarker (batch)](Image Watermarker (batch))|[Remco Halman](https://github.com/remcohalman) - ## How to use : - Clone/Download the directory and navigate to each folder. Or...