mirror of
https://github.com/hastagAB/Awesome-Python-Scripts.git
synced 2024-11-24 04:21:08 +00:00
Merge pull request #26 from GhostofGoes/master
Add Slideshare-Downloader script to download SlideShare presentations
This commit is contained in:
commit
f2a4a5be6b
23
Slideshare-Downloader/README.md
Normal file
23
Slideshare-Downloader/README.md
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
# Slideshare-Downloader
|
||||||
|
Download slides from slideshows shared on SlideShare (Now LinkedIn SlideShare) as a PDF.
|
||||||
|
|
||||||
|
# Usage
|
||||||
|
This was written for Python 3, but it should work with Python 2.7 as well.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
### Linux/Mac
|
||||||
|
```bash
|
||||||
|
python3 -m pip install --user -U -r requirements.txt
|
||||||
|
python3 slideshare_downloader.py --help
|
||||||
|
```
|
||||||
|
|
||||||
|
### Windows
|
||||||
|
```powershell
|
||||||
|
py -3 -m pip install --user -U -r requirements.txt
|
||||||
|
py -3 slideshare_downloader.py --help
|
||||||
|
```
|
||||||
|
|
||||||
|
## Running
|
||||||
|
```bash
|
||||||
|
slideshare_downloader.py -f some_slides -u http://www.slideshare.net/codeblue_jp/igor-skochinsky-enpub
|
||||||
|
```
|
4
Slideshare-Downloader/requirements.txt
Normal file
4
Slideshare-Downloader/requirements.txt
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
beautifulsoup4>=4.0.0
|
||||||
|
requests>=2.0.0
|
||||||
|
img2pdf>=0.2.1
|
||||||
|
docopt>=0.6.0
|
73
Slideshare-Downloader/slideshare_downloader.py
Normal file
73
Slideshare-Downloader/slideshare_downloader.py
Normal file
|
@ -0,0 +1,73 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Credit for base code goes to: yodiaditya
|
||||||
|
# https://github.com/yodiaditya/slideshare-downloader/blob/master/convertpdf.py
|
||||||
|
|
||||||
|
"""SlideShare Downloader.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
slideshare_downloader.py [options]
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-h, --help Show this screen
|
||||||
|
-f <file> Specify output filename
|
||||||
|
-u <url> URL to download
|
||||||
|
"""
|
||||||
|
|
||||||
|
import img2pdf
|
||||||
|
from docopt import docopt
|
||||||
|
|
||||||
|
from os import walk, mkdir, chdir, getcwd
|
||||||
|
from os.path import join
|
||||||
|
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
from urllib.request import urlopen
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from requests import get
|
||||||
|
|
||||||
|
|
||||||
|
class SlideShare:
|
||||||
|
""" Download slides from SlideShare and convert them into a PDF. """
|
||||||
|
def __init__(self):
|
||||||
|
self.TOP_DIR = getcwd()
|
||||||
|
|
||||||
|
def get_slides(self, download_url=None, filename=None):
|
||||||
|
if download_url:
|
||||||
|
i_dir = self.download_images(download_url)
|
||||||
|
else:
|
||||||
|
i_dir = self.download_images(input('SlideShare full URL (including "http://"): '))
|
||||||
|
if filename:
|
||||||
|
self.create_pdf(i_dir, filename + '.pdf')
|
||||||
|
else:
|
||||||
|
self.create_pdf(i_dir, i_dir + '.pdf')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def download_images(page_url):
|
||||||
|
html = urlopen(page_url).read()
|
||||||
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
|
images = soup.findAll('img', {'class': 'slide_image'}) # Parse out the slide images
|
||||||
|
image_dir = soup.title.string.strip(' \t\r\n').lower().replace(' ', '-') # Get name of the slide deck
|
||||||
|
try:
|
||||||
|
mkdir(image_dir) # Create the folder for our images
|
||||||
|
except FileExistsError:
|
||||||
|
print("The directory '%s' already exists. Assuming PDF rebuild, continuing with existing contents...\n"
|
||||||
|
"Delete the directory to re-download the slide images." % image_dir)
|
||||||
|
return image_dir
|
||||||
|
chdir(image_dir) # Change to image folder so we don't pollute starting folder
|
||||||
|
for image in images:
|
||||||
|
image_url = image.get('data-full').split('?')[0]
|
||||||
|
with open(urlparse(image_url).path.split('/')[-1], "wb") as file:
|
||||||
|
response = get(image_url)
|
||||||
|
file.write(response.content)
|
||||||
|
return image_dir
|
||||||
|
|
||||||
|
def create_pdf(self, image_dir, filename):
|
||||||
|
chdir(join(self.TOP_DIR, image_dir))
|
||||||
|
files = next(walk(join(self.TOP_DIR, image_dir)))[2]
|
||||||
|
with open(join(self.TOP_DIR, filename), "wb") as file:
|
||||||
|
img2pdf.convert(*files, title=filename, outputstream=file)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
arguments = docopt(__doc__)
|
||||||
|
ss = SlideShare()
|
||||||
|
ss.get_slides(arguments['-u'], arguments['-f'])
|
Loading…
Reference in New Issue
Block a user