diff --git a/scripts/extract_text_from_pdf/README.md b/scripts/extract_text_from_pdf/README.md new file mode 100644 index 0000000..ad4a244 --- /dev/null +++ b/scripts/extract_text_from_pdf/README.md @@ -0,0 +1,9 @@ +# extract text from pdf + +This simple script will extract text from pdf + +## Usage + +- requires PyPDF2 +- Use `pip3 install PyPDF2` +- Run `python script.py` diff --git a/scripts/extract_text_from_pdf/script.py b/scripts/extract_text_from_pdf/script.py new file mode 100644 index 0000000..99d090b --- /dev/null +++ b/scripts/extract_text_from_pdf/script.py @@ -0,0 +1,7 @@ +import PyPDF2 +pdfFileObject = open('sample.pdf', 'rb') +pdfReader = PyPDF2.PdfFileReader(pdfFileObject) +count = pdfReader.numPages +for i in range(count): + page = pdfReader.getPage(i) + print(page.extractText()) \ No newline at end of file