mirror of
https://github.com/hastagAB/Awesome-Python-Scripts.git
synced 2024-11-23 20:11:07 +00:00
Merge pull request #26 from GhostofGoes/master
Add Slideshare-Downloader script to download SlideShare presentations
This commit is contained in:
commit
f2a4a5be6b
23
Slideshare-Downloader/README.md
Normal file
23
Slideshare-Downloader/README.md
Normal file
|
@ -0,0 +1,23 @@
|
|||
# Slideshare-Downloader
|
||||
Download slides from slideshows shared on SlideShare (Now LinkedIn SlideShare) as a PDF.
|
||||
|
||||
# Usage
|
||||
This was written for Python 3, but it should work with Python 2.7 as well.
|
||||
|
||||
## Installation
|
||||
### Linux/Mac
|
||||
```bash
|
||||
python3 -m pip install --user -U -r requirements.txt
|
||||
python3 slideshare_downloader.py --help
|
||||
```
|
||||
|
||||
### Windows
|
||||
```powershell
|
||||
py -3 -m pip install --user -U -r requirements.txt
|
||||
py -3 slideshare_downloader.py --help
|
||||
```
|
||||
|
||||
## Running
|
||||
```bash
|
||||
slideshare_downloader.py -f some_slides -u http://www.slideshare.net/codeblue_jp/igor-skochinsky-enpub
|
||||
```
|
4
Slideshare-Downloader/requirements.txt
Normal file
4
Slideshare-Downloader/requirements.txt
Normal file
|
@ -0,0 +1,4 @@
|
|||
beautifulsoup4>=4.0.0
|
||||
requests>=2.0.0
|
||||
img2pdf>=0.2.1
|
||||
docopt>=0.6.0
|
73
Slideshare-Downloader/slideshare_downloader.py
Normal file
73
Slideshare-Downloader/slideshare_downloader.py
Normal file
|
@ -0,0 +1,73 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
# Credit for base code goes to: yodiaditya
|
||||
# https://github.com/yodiaditya/slideshare-downloader/blob/master/convertpdf.py
|
||||
|
||||
"""SlideShare Downloader.
|
||||
|
||||
Usage:
|
||||
slideshare_downloader.py [options]
|
||||
|
||||
Options:
|
||||
-h, --help Show this screen
|
||||
-f <file> Specify output filename
|
||||
-u <url> URL to download
|
||||
"""
|
||||
|
||||
import img2pdf
|
||||
from docopt import docopt
|
||||
|
||||
from os import walk, mkdir, chdir, getcwd
|
||||
from os.path import join
|
||||
|
||||
from urllib.parse import urlparse
|
||||
from urllib.request import urlopen
|
||||
from bs4 import BeautifulSoup
|
||||
from requests import get
|
||||
|
||||
|
||||
class SlideShare:
|
||||
""" Download slides from SlideShare and convert them into a PDF. """
|
||||
def __init__(self):
|
||||
self.TOP_DIR = getcwd()
|
||||
|
||||
def get_slides(self, download_url=None, filename=None):
|
||||
if download_url:
|
||||
i_dir = self.download_images(download_url)
|
||||
else:
|
||||
i_dir = self.download_images(input('SlideShare full URL (including "http://"): '))
|
||||
if filename:
|
||||
self.create_pdf(i_dir, filename + '.pdf')
|
||||
else:
|
||||
self.create_pdf(i_dir, i_dir + '.pdf')
|
||||
|
||||
@staticmethod
|
||||
def download_images(page_url):
|
||||
html = urlopen(page_url).read()
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
images = soup.findAll('img', {'class': 'slide_image'}) # Parse out the slide images
|
||||
image_dir = soup.title.string.strip(' \t\r\n').lower().replace(' ', '-') # Get name of the slide deck
|
||||
try:
|
||||
mkdir(image_dir) # Create the folder for our images
|
||||
except FileExistsError:
|
||||
print("The directory '%s' already exists. Assuming PDF rebuild, continuing with existing contents...\n"
|
||||
"Delete the directory to re-download the slide images." % image_dir)
|
||||
return image_dir
|
||||
chdir(image_dir) # Change to image folder so we don't pollute starting folder
|
||||
for image in images:
|
||||
image_url = image.get('data-full').split('?')[0]
|
||||
with open(urlparse(image_url).path.split('/')[-1], "wb") as file:
|
||||
response = get(image_url)
|
||||
file.write(response.content)
|
||||
return image_dir
|
||||
|
||||
def create_pdf(self, image_dir, filename):
|
||||
chdir(join(self.TOP_DIR, image_dir))
|
||||
files = next(walk(join(self.TOP_DIR, image_dir)))[2]
|
||||
with open(join(self.TOP_DIR, filename), "wb") as file:
|
||||
img2pdf.convert(*files, title=filename, outputstream=file)
|
||||
|
||||
if __name__ == "__main__":
|
||||
arguments = docopt(__doc__)
|
||||
ss = SlideShare()
|
||||
ss.get_slides(arguments['-u'], arguments['-f'])
|
Loading…
Reference in New Issue
Block a user