From 957f7ab45cfe84db3a9480e132dee60c4bc6dfb0 Mon Sep 17 00:00:00 2001 From: Jeremias Moreira Gomes Date: Sat, 24 Oct 2020 08:50:19 -0300 Subject: [PATCH] Download page as pdf (#196) * Download page as PDF. * Contributor name. * Pudim page typo. --- Download-page-as-pdf/README.md | 24 +++++++++++ Download-page-as-pdf/download-page-as-pdf.py | 42 ++++++++++++++++++++ Download-page-as-pdf/requirements.txt | 2 + README.md | 1 + 4 files changed, 69 insertions(+) create mode 100644 Download-page-as-pdf/README.md create mode 100644 Download-page-as-pdf/download-page-as-pdf.py create mode 100644 Download-page-as-pdf/requirements.txt diff --git a/Download-page-as-pdf/README.md b/Download-page-as-pdf/README.md new file mode 100644 index 0000000..b0ab79a --- /dev/null +++ b/Download-page-as-pdf/README.md @@ -0,0 +1,24 @@ +# Download Page as PDF: + +Download a page as a PDF . + + #### Required Modules : + - pyppdf + ```bash + pip3 install pyppdf + ``` + - pyppyteer + ```bash + pip3 install pyppeteer + ``` + + #### Examples of use : + - Download a page: + ```bash + python download-page-as-pdf.py -l 'www.pudim.com.br' + ``` + + - Download a page and give a pdf name: + ```bash + python download-page-as-pdf.py -l 'http://www.pudim.com.br' -n 'pudim.pdf' + ``` diff --git a/Download-page-as-pdf/download-page-as-pdf.py b/Download-page-as-pdf/download-page-as-pdf.py new file mode 100644 index 0000000..5e26c87 --- /dev/null +++ b/Download-page-as-pdf/download-page-as-pdf.py @@ -0,0 +1,42 @@ +#!/usr/bin/python +# -*- coding: UTF-8 -*- + +import argparse +import pyppdf +import re +from pyppeteer.errors import PageError, TimeoutError, NetworkError + + +def main(): + parser = argparse.ArgumentParser(description = 'Page Downloader as PDF') + parser.add_argument('--link', '-l', action = 'store', dest = 'link', + required = True, help = 'Inform the link to download.') + parser.add_argument('--name', '-n', action = 'store', dest = 'name', + required = False, help = 'Inform the name to save.') + + arguments = parser.parse_args() + + url = arguments.link + + if not arguments.name: + name = re.sub(r'^\w+://', '', url.lower()) + name = name.replace('/', '-') + else: + name = arguments.name + + if not name.endswith('.pdf'): + name = name + '.pdf' + + print(f'Name of the file: {name}') + + try: + pyppdf.save_pdf(name, url) + except PageError: + print('URL could not be resolved.') + except TimeoutError: + print('Timeout.') + except NetworkError: + print('No access to the network.') + +if __name__ == '__main__': + main() diff --git a/Download-page-as-pdf/requirements.txt b/Download-page-as-pdf/requirements.txt new file mode 100644 index 0000000..89960d9 --- /dev/null +++ b/Download-page-as-pdf/requirements.txt @@ -0,0 +1,2 @@ +pyppdf==0.1.2 +pyppeteer==0.2.2 diff --git a/README.md b/README.md index 5655761..e7e61d8 100644 --- a/README.md +++ b/README.md @@ -165,6 +165,7 @@ So far, the following projects have been integrated to this repo: |[IMDBQuerier](IMDBQuerier)|[Burak Bekci](https://github.com/Bekci) |[URL shortener](url_shortener)|[Sam Ebison](https://github.com/ebsa491) |[2048](https://github.com/hastagAB/Awesome-Python-Scripts/tree/master/2048)|[Krunal](https://github.com/gitkp11) +|[Download Page as PDF](https://github.com/hastagAB/Awesome-Python-Scripts/tree/master/Download-page-as-pdf)|[Jeremias Gomes](https://github.com/j3r3mias) ## How to use :