Merge pull request #75 from anjali1102/master

[feat]: extract_text_from_pdf
This commit is contained in:
Advaita Saha 2022-10-02 23:27:27 +05:30 committed by GitHub
commit 7fd5a2b156
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 16 additions and 0 deletions

View File

@ -0,0 +1,9 @@
# extract text from pdf
This simple script will extract text from pdf
## Usage
- requires PyPDF2
- Use `pip3 install PyPDF2`
- Run `python script.py`

View File

@ -0,0 +1,7 @@
import PyPDF2
pdfFileObject = open('sample.pdf', 'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFileObject)
count = pdfReader.numPages
for i in range(count):
page = pdfReader.getPage(i)
print(page.extractText())