[feat]: extract_text_from_pdf

This commit is contained in:
anjali1102 2022-10-02 22:46:10 +05:30
parent 8618617f1e
commit 47f35b7358
2 changed files with 16 additions and 0 deletions

View File

@ -0,0 +1,9 @@
# extract text from pdf
This simple script will extract text from pdf
## Usage
- requires PyPDF2
- Use `pip3 install PyPDF2`
- Run `python script.py`

View File

@ -0,0 +1,7 @@
import PyPDF2
pdfFileObject = open('sample.pdf', 'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFileObject)
count = pdfReader.numPages
for i in range(count):
page = pdfReader.getPage(i)
print(page.extractText())