From 99ffa52eb67fd3d99baa418fc307a9500ee7ef69 Mon Sep 17 00:00:00 2001 From: JC Francois Date: Thu, 26 Mar 2020 19:03:21 +0100 Subject: [PATCH 01/18] Added upload of video to Mastodon instance --- twoot.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/twoot.py b/twoot.py index af311d8..d74bf87 100755 --- a/twoot.py +++ b/twoot.py @@ -470,24 +470,34 @@ def main(argv): # Skip to next tweet continue - # Upload photos media_ids = [] - for photo in tweet['photos']: - media = False - # Download picture + + # Upload video if there is one + if tweet['video'] is not None: try: - media = requests.get(photo) - except: # Picture cannot be downloaded for any reason + media_posted = mastodon.media_post(tweet['video']) + media_ids.append(media_posted['id']) + except (MastodonAPIError, MastodonIllegalArgumentError, TypeError): # Media cannot be uploaded (invalid format, dead link, etc.) pass - # Upload picture to Mastodon instance - if media: + else: # Only upload pic if no video was uploaded + # Upload photos + for photo in tweet['photos']: + media = False + # Download picture try: - media_posted = mastodon.media_post(media.content, mime_type=media.headers['content-type']) - media_ids.append(media_posted['id']) - except (MastodonAPIError, MastodonIllegalArgumentError, TypeError): # Media cannot be uploaded (invalid format, dead link, etc.) + media = requests.get(photo) + except: # Picture cannot be downloaded for any reason pass + # Upload picture to Mastodon instance + if media: + try: + media_posted = mastodon.media_post(media.content, mime_type=media.headers['content-type']) + media_ids.append(media_posted['id']) + except (MastodonAPIError, MastodonIllegalArgumentError, TypeError): # Media cannot be uploaded (invalid format, dead link, etc.) + pass + # Post toot try: mastodon = Mastodon( From 04c95f3ad3898b96a3cb105f2b8bb9253d9e339a Mon Sep 17 00:00:00 2001 From: JC Francois Date: Thu, 26 Mar 2020 19:58:17 +0100 Subject: [PATCH 02/18] Added command-line option to download video from tweet and upload to Mastodon --- twitterdl.py | 1 - twoot.py | 34 +++++++++++++++++++--------------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/twitterdl.py b/twitterdl.py index 1624ffa..f4b4316 100644 --- a/twitterdl.py +++ b/twitterdl.py @@ -1,6 +1,5 @@ import argparse - import requests import json import urllib.parse diff --git a/twoot.py b/twoot.py index d74bf87..eafd764 100755 --- a/twoot.py +++ b/twoot.py @@ -84,7 +84,7 @@ def handle_no_js(session, page, headers): return new_page -def cleanup_tweet_text(tt_iter): +def cleanup_tweet_text(tt_iter, get_vids): ''' Receives an iterator over all the elements contained in the tweet-text container. Processes them to remove Twitter-specific stuff and make them suitable for @@ -120,15 +120,17 @@ def cleanup_tweet_text(tt_iter): if tag.has_attr('data-expanded-path'): data_expanded_path = tag['data-expanded-path'] if 'video' in data_expanded_path: - # Download video from twitter and store in filesystem - tweet_uri = "https://twitter.com/" + data_expanded_path.strip("/video/1") - twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=1) - try: - twitter_dl.download() - except json.JSONDecodeError: - print("ERROR: Could not get playlist") - - tweet_text += '\n\n[Video embedded in original tweet]' + if get_vids: + # Download video from twitter and store in filesystem + tweet_uri = "https://twitter.com/" + data_expanded_path.strip("/video/1") + twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=1) + try: + twitter_dl.download() + except json.JSONDecodeError: + print("ERROR: Could not get playlist") + tweet_text += '\n\n[Video embedded in original tweet]' + else: + tweet_text += '\n\n[Video embedded in original tweet]' # If element is hashflag (hashtag + icon), handle as simple hashtag elif tag.name == 'span' and tag['class'][0] == 'twitter-hashflag-container': @@ -177,15 +179,15 @@ def contains_class(body_classes, some_class): def main(argv): # Build parser for command line arguments - # TODO Add option for ingestion of video content parser = argparse.ArgumentParser(description='toot tweets.') parser.add_argument('-t', metavar='', action='store', required=True) parser.add_argument('-i', metavar='', action='store', required=True) parser.add_argument('-m', metavar='', action='store', required=True) parser.add_argument('-p', metavar='', action='store', required=True) - parser.add_argument('-r', action='store_true') - parser.add_argument('-a', metavar='', action='store', type=float, default=1) - parser.add_argument('-d', metavar='', action='store', type=float, default=0) + parser.add_argument('-r', action='store_true', help='Also post replies to other tweets') + parser.add_argument('-v', action='store_true', help='Ingest twitter videos and upload to Mastodon instance') + parser.add_argument('-a', metavar='', action='store', type=float, default=1) + parser.add_argument('-d', metavar='', action='store', type=float, default=0) # Parse command line args = vars(parser.parse_args()) @@ -195,6 +197,7 @@ def main(argv): mast_account = args['m'] mast_password = args['p'] tweets_and_replies = args['r'] + get_vids = args['v'] max_age = float(args['a']) min_delay = float(args['d']) @@ -338,7 +341,7 @@ def main(argv): # extract iterator over tweet text contents tt_iter = tmt.find('div', class_='tweet-text').div.children - tweet_text = cleanup_tweet_text(tt_iter) + tweet_text = cleanup_tweet_text(tt_iter, get_vids) # Mention if the tweet is a reply-to if reply_to_username is not None: @@ -521,6 +524,7 @@ def main(argv): (twit_account, mast_instance, mast_account, tweet['tweet_id'], toot['id'])) sql.commit() + # TODO Cleanup downloaded video files if __name__ == "__main__": main(sys.argv) From 9a8cd0ef651a7b48e01f0009e1cfb7034269154c Mon Sep 17 00:00:00 2001 From: JC Francois Date: Thu, 26 Mar 2020 20:50:59 +0100 Subject: [PATCH 03/18] TODO's and FIXME's --- twoot.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/twoot.py b/twoot.py index eafd764..1c00cda 100755 --- a/twoot.py +++ b/twoot.py @@ -31,6 +31,7 @@ from pathlib import Path from mastodon import Mastodon, MastodonError, MastodonAPIError, MastodonIllegalArgumentError import twitterdl import json.decoder +import shutil # Update from https://www.whatismybrowser.com/guides/the-latest-user-agent/ @@ -123,6 +124,7 @@ def cleanup_tweet_text(tt_iter, get_vids): if get_vids: # Download video from twitter and store in filesystem tweet_uri = "https://twitter.com/" + data_expanded_path.strip("/video/1") + # FIXME Use specific directory for downloading videos (that can be easily deleted) twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=1) try: twitter_dl.download() From 0231f224a37126df46cbb68930ff47a2c68cc52b Mon Sep 17 00:00:00 2001 From: jeancf Date: Fri, 27 Mar 2020 17:26:04 +0100 Subject: [PATCH 04/18] Improved naming of downloaded videos and implemented cleanup --- twoot.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/twoot.py b/twoot.py index 1c00cda..b22610a 100755 --- a/twoot.py +++ b/twoot.py @@ -85,11 +85,15 @@ def handle_no_js(session, page, headers): return new_page -def cleanup_tweet_text(tt_iter, get_vids): + +def cleanup_tweet_text(tt_iter, tweet_uri, get_vids): ''' Receives an iterator over all the elements contained in the tweet-text container. Processes them to remove Twitter-specific stuff and make them suitable for posting on Mastodon + :param tt_iter: iterator over the HTML elements in the text of the tweet + :param tweet_uri: Used to downloaded videos + :param get_vids: True to download embedded twitter videos and save them on the filesystem ''' tweet_text = '' # Iterate elements @@ -123,8 +127,6 @@ def cleanup_tweet_text(tt_iter, get_vids): if 'video' in data_expanded_path: if get_vids: # Download video from twitter and store in filesystem - tweet_uri = "https://twitter.com/" + data_expanded_path.strip("/video/1") - # FIXME Use specific directory for downloading videos (that can be easily deleted) twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=1) try: twitter_dl.download() @@ -264,8 +266,9 @@ def main(argv): # Skip this tweet continue - # Extract tweet id + # Extract tweet ID and status ID tweet_id = str(status['href']).strip('?p=v') + status_id = tweet_id.split('/')[3] # Extract url of full status page full_status_url = 'https://mobile.twitter.com' + tweet_id + '?p=v' @@ -310,7 +313,7 @@ def main(argv): authenticity_token = soup.find('input', {'name': 'authenticity_token'}).get('value') form_input = {'show_media': 1, 'authenticity_token': authenticity_token, 'commit': 'Display media'} - full_status_page = session.post(full_status_url + '?p=v', data=form_input, headers=headers) + full_status_page = session.post(full_status_url, data=form_input, headers=headers) # Verify that download worked assert full_status_page.status_code == 200, \ @@ -343,7 +346,7 @@ def main(argv): # extract iterator over tweet text contents tt_iter = tmt.find('div', class_='tweet-text').div.children - tweet_text = cleanup_tweet_text(tt_iter, get_vids) + tweet_text = cleanup_tweet_text(tt_iter, full_status_url, get_vids) # Mention if the tweet is a reply-to if reply_to_username is not None: @@ -386,9 +389,7 @@ def main(argv): pass # Check if video was downloaded - sid = re.search('/([0-9]+)$', tweet_id) - status_id = sid.groups()[0] - video_path = Path('./output') / author_account / status_id + video_path = Path('./output') / twit_account / status_id video_file_list = list(video_path.glob('*.mp4')) video_file = None if len(video_file_list) != 0: @@ -526,7 +527,8 @@ def main(argv): (twit_account, mast_instance, mast_account, tweet['tweet_id'], toot['id'])) sql.commit() - # TODO Cleanup downloaded video files + # Cleanup downloaded video files + shutil.rmtree('./output/' + twit_account) if __name__ == "__main__": main(sys.argv) From 2fe06c0bbccd2d0ecd903f9ee7a8c13c87029972 Mon Sep 17 00:00:00 2001 From: jeancf Date: Fri, 27 Mar 2020 17:45:40 +0100 Subject: [PATCH 05/18] Use correct capitalization of twitter account name for deleting video directory --- twoot.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/twoot.py b/twoot.py index b22610a..ae11ddb 100755 --- a/twoot.py +++ b/twoot.py @@ -246,8 +246,10 @@ def main(argv): # Verify that we now have the correct twitter page body_classes = soup.body.get_attribute_list('class') - assert contains_class(body_classes, 'users-show-page'), \ - 'This is not the correct twitter page. Quitting' + assert contains_class(body_classes, 'users-show-page'), 'This is not the correct twitter page. Quitting' + + # Replace twit_account with version with correct capitalization + twit_account = soup.find('span', class_='screen-name').get_text() # Extract twitter timeline timeline = soup.find_all('table', class_='tweet') From dd1d54d2a4fddab63a1ff488d54038d75bc3efb2 Mon Sep 17 00:00:00 2001 From: jeancf Date: Sat, 28 Mar 2020 11:08:09 +0100 Subject: [PATCH 06/18] Check if tweet in db before ingest to speed up processing of feed --- twoot.py | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/twoot.py b/twoot.py index ae11ddb..06d8861 100755 --- a/twoot.py +++ b/twoot.py @@ -205,6 +205,12 @@ def main(argv): max_age = float(args['a']) min_delay = float(args['d']) + # Try to open database. If it does not exist, create it + sql = sqlite3.connect('twoot.db') + db = sql.cursor() + db.execute('''CREATE TABLE IF NOT EXISTS toots (twitter_account TEXT, mastodon_instance TEXT, + mastodon_account TEXT, tweet_id TEXT, toot_id TEXT)''') + # ********************************************************** # Load twitter page of user. Process all tweets and generate # list of dictionaries ready to be posted on Mastodon @@ -256,6 +262,20 @@ def main(argv): for status in timeline: + # Extract tweet ID and status ID + tweet_id = str(status['href']).strip('?p=v') + status_id = tweet_id.split('/')[3] + + # Check in database if tweet has already been posted + db.execute('''SELECT * FROM toots WHERE twitter_account = ? AND mastodon_instance = ? AND + mastodon_account = ? AND tweet_id = ?''', + (twit_account, mast_instance, mast_account, tweet_id)) + tweet_in_db = db.fetchone() + + if tweet_in_db is not None: + # Skip to next tweet + continue + reply_to_username = None # Check if the tweet is a reply-to reply_to_div = status.find('div', class_='tweet-reply-context username') @@ -268,10 +288,6 @@ def main(argv): # Skip this tweet continue - # Extract tweet ID and status ID - tweet_id = str(status['href']).strip('?p=v') - status_id = tweet_id.split('/')[3] - # Extract url of full status page full_status_url = 'https://mobile.twitter.com' + tweet_id + '?p=v' @@ -415,16 +431,10 @@ def main(argv): # print(t) # ********************************************************** - # Iterate tweets. Check if the tweet has already been posted - # on Mastodon. If not, post it and add it to database + # Iterate tweets in list. + # post each on Mastodon and reference to it in database # ********************************************************** - # Try to open database. If it does not exist, create it - sql = sqlite3.connect('twoot.db') - db = sql.cursor() - db.execute('''CREATE TABLE IF NOT EXISTS toots (twitter_account TEXT, mastodon_instance TEXT, - mastodon_account TEXT, tweet_id TEXT, toot_id TEXT)''') - # Create Mastodon application if it does not exist yet if not os.path.isfile(mast_instance + '.secret'): try: @@ -458,17 +468,6 @@ def main(argv): # Upload tweets for tweet in reversed(tweets): - # Check in database if tweet has already been posted - # FIXME Move tests to the front of the process to avoid the unnecessary processing of already ingested tweets - db.execute('''SELECT * FROM toots WHERE twitter_account = ? AND mastodon_instance = ? AND - mastodon_account = ? AND tweet_id = ?''', - (twit_account, mast_instance, mast_account, tweet['tweet_id'])) - tweet_in_db = db.fetchone() - - if tweet_in_db is not None: - # Skip to next tweet - continue - # Check that the tweet is not too young (might be deleted) or too old age_in_hours = (time.time() - float(tweet['timestamp'])) / 3600.0 min_delay_in_hours = min_delay / 60.0 From ba3da6ab7cc24e76e2e889790d1d104691436310 Mon Sep 17 00:00:00 2001 From: jeancf Date: Sat, 28 Mar 2020 11:21:28 +0100 Subject: [PATCH 07/18] Handled exception of video download directory absent when trying to delete it --- twoot.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/twoot.py b/twoot.py index 06d8861..61e572f 100755 --- a/twoot.py +++ b/twoot.py @@ -529,7 +529,10 @@ def main(argv): sql.commit() # Cleanup downloaded video files - shutil.rmtree('./output/' + twit_account) + try: + shutil.rmtree('./output/' + twit_account) + except FileNotFoundError: # The directory does not exist + pass if __name__ == "__main__": main(sys.argv) From b1c9ec3811d1179dc4c58d0f15bc4353318b153e Mon Sep 17 00:00:00 2001 From: jeancf Date: Sat, 28 Mar 2020 11:33:38 +0100 Subject: [PATCH 08/18] Added documentation of -v option to README.md --- README.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 079f330..0c97936 100644 --- a/README.md +++ b/README.md @@ -20,11 +20,12 @@ of last week. * Fetch timeline of given users from twitter.com * Scrape html and formats tweets for post on mastodon * Emojis supported -* Upload images from tweet to Mastodon (videos not supported) +* Optionally upload videos from tweet to Mastodon +* Upload images from tweet to Mastodon * Specify maximum age of tweet to be considered * Specify minimum delay before considering a tweet for upload * Remember tweets already tooted to prevent double posting -* Can optionally post reply-to tweets on the mastodon account +* Optionally post reply-to tweets on the mastodon account # usage @@ -45,10 +46,16 @@ is @superduperbot@botsin.space | -i | Mastodon instance domain name | `botsin.space` | Yes | | -m | Mastodon username | `superduperbot` | Yes | | -p | Mastodon password | `my_Sup3r-S4f3*pw` | Yes | +| -v | upload videos to Mastodon | *N/A* | No | | -r | Post reply-to tweets (ignored by default) | *N/A* | No | | -a | Max. age of tweet to post (in days) | `1` | No | | -d | Min. delay before posting new tweet (in minutes) | `15` | No | +When using the `-v` switch consider: +* The copyright of the content that you want to cross-post +* The storage / transfer limitations of the Mastodon instance that you are posting to +* The upstream bandwidth that you may consume on your internet connection + Default max age is 1 day. Decimal values are OK. Default min delay is 0 minutes. From d056497f65e2fcae700dae6a72973f4ea66bc7fe Mon Sep 17 00:00:00 2001 From: jeancf Date: Sat, 28 Mar 2020 11:47:53 +0100 Subject: [PATCH 09/18] Added additional dependencies to README.md --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0c97936..faa2c3b 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,10 @@ Default min delay is 0 minutes. Make sure python3 is installed. -Twoot depends on beautifulsoup4 and mastodon python module: `sudo pip install beautifulsoup4 Mastodon.py` +Twoot depends on beautifulsoup4 and mastodon python module. It also +requires m3u8 and ffmpeg-python. + + pip install beautifulsoup4 Mastodon.py m3u8 ffmpeg-python In your user folder, execute `git clone https://gitlab.com/jeancf/twoot.git` to clone repo with twoot.py script. From cd482359a3643ac1ba12af4b0c00320773dc2579 Mon Sep 17 00:00:00 2001 From: jeancf Date: Sat, 28 Mar 2020 11:55:13 +0100 Subject: [PATCH 10/18] Updated .gitignore to disregard *.sh files --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index b043063..d467991 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ .vscode/ venv/ *.secret +*.sh twoot.db From ae60d2e00261031266b57d9653c0155cd66abb11 Mon Sep 17 00:00:00 2001 From: jeancf Date: Sat, 28 Mar 2020 11:59:47 +0100 Subject: [PATCH 11/18] Updated README.md with requirement to have ffmpeg installed --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index faa2c3b..fac834a 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,8 @@ Make sure python3 is installed. Twoot depends on beautifulsoup4 and mastodon python module. It also requires m3u8 and ffmpeg-python. +ffmpeg must also be installed. + pip install beautifulsoup4 Mastodon.py m3u8 ffmpeg-python In your user folder, execute `git clone https://gitlab.com/jeancf/twoot.git` From df4eaa0dd7022f19571007570f0f61debfdf76a1 Mon Sep 17 00:00:00 2001 From: jeancf Date: Sat, 28 Mar 2020 13:55:43 +0100 Subject: [PATCH 12/18] Set debug=0 on call to download to avoid mail spam --- twoot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/twoot.py b/twoot.py index 61e572f..e6e411f 100755 --- a/twoot.py +++ b/twoot.py @@ -127,7 +127,7 @@ def cleanup_tweet_text(tt_iter, tweet_uri, get_vids): if 'video' in data_expanded_path: if get_vids: # Download video from twitter and store in filesystem - twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=1) + twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=0) try: twitter_dl.download() except json.JSONDecodeError: From 9c56ad57c8dbe3bc6f7f56b73e7aa32f2e576bd9 Mon Sep 17 00:00:00 2001 From: jeancf Date: Sat, 28 Mar 2020 14:07:00 +0100 Subject: [PATCH 13/18] Added TODOs to improve management of locations of video download --- twoot.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/twoot.py b/twoot.py index e6e411f..cf8a5ba 100755 --- a/twoot.py +++ b/twoot.py @@ -127,6 +127,7 @@ def cleanup_tweet_text(tt_iter, tweet_uri, get_vids): if 'video' in data_expanded_path: if get_vids: # Download video from twitter and store in filesystem + # TODO set output location to ./output/twit_account twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=0) try: twitter_dl.download() @@ -407,6 +408,7 @@ def main(argv): pass # Check if video was downloaded + # TODO Check subdirectories of twit_account directory for video video_path = Path('./output') / twit_account / status_id video_file_list = list(video_path.glob('*.mp4')) video_file = None From 2090d214b6464abad6033234fb41732d36246c41 Mon Sep 17 00:00:00 2001 From: jeancf Date: Sat, 28 Mar 2020 14:11:06 +0100 Subject: [PATCH 14/18] Trying to stop debug messages --- twoot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/twoot.py b/twoot.py index cf8a5ba..503f5a1 100755 --- a/twoot.py +++ b/twoot.py @@ -128,7 +128,7 @@ def cleanup_tweet_text(tt_iter, tweet_uri, get_vids): if get_vids: # Download video from twitter and store in filesystem # TODO set output location to ./output/twit_account - twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=0) + twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500) try: twitter_dl.download() except json.JSONDecodeError: From 6fa2019618d339b212dd7d955115c3f53bcb3f2e Mon Sep 17 00:00:00 2001 From: JC Francois Date: Sun, 29 Mar 2020 13:41:49 +0200 Subject: [PATCH 15/18] Calling twitterdl.py as subprocess --- twitterdl.py | 1 + twoot.py | 14 ++++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) mode change 100644 => 100755 twitterdl.py diff --git a/twitterdl.py b/twitterdl.py old mode 100644 new mode 100755 index f4b4316..3ba32fb --- a/twitterdl.py +++ b/twitterdl.py @@ -1,3 +1,4 @@ +#! /usr/bin/env python3 import argparse import requests diff --git a/twoot.py b/twoot.py index 503f5a1..2abc50b 100755 --- a/twoot.py +++ b/twoot.py @@ -29,7 +29,7 @@ import datetime, time import re from pathlib import Path from mastodon import Mastodon, MastodonError, MastodonAPIError, MastodonIllegalArgumentError -import twitterdl +import subprocess import json.decoder import shutil @@ -128,12 +128,14 @@ def cleanup_tweet_text(tt_iter, tweet_uri, get_vids): if get_vids: # Download video from twitter and store in filesystem # TODO set output location to ./output/twit_account - twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500) try: - twitter_dl.download() - except json.JSONDecodeError: - print("ERROR: Could not get playlist") - tweet_text += '\n\n[Video embedded in original tweet]' + dl_feedback = subprocess.run(["./twitterdl.py", tweet_uri, "-w 500"], capture_output=True) + if dl_feedback.returncode != 0: + # TODO Log dl_feedback.stderr + tweet_text += '\n\n[Video embedded in original tweet]' + except OSError: + print("Could not execute twitterdl.py (is it there? Is it set as executable?)") + sys.exit(-1) else: tweet_text += '\n\n[Video embedded in original tweet]' From 965317f5b2324a8f6b30f881aa127b00925d5557 Mon Sep 17 00:00:00 2001 From: JC Francois Date: Sun, 29 Mar 2020 13:57:18 +0200 Subject: [PATCH 16/18] Added details on optional dependencies to README.md --- README.md | 24 +++++++++++++----------- twoot.py | 5 +++-- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index fac834a..873f899 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ of last week. ``` twoot.py [-h] -t -i -m -p [-r] + account> -p [-r] [-v] [-a ] [-d ] ``` @@ -42,19 +42,19 @@ is @superduperbot@botsin.space |Switch |Description | Example | Req | |-------|--------------------------------------------------|--------------------|-----| -| -t | twitter account name without '@' | `SuperDuperBot` | Yes | +| -t | twitter account name without '@' | `SuperDuper` | Yes | | -i | Mastodon instance domain name | `botsin.space` | Yes | | -m | Mastodon username | `superduperbot` | Yes | | -p | Mastodon password | `my_Sup3r-S4f3*pw` | Yes | | -v | upload videos to Mastodon | *N/A* | No | | -r | Post reply-to tweets (ignored by default) | *N/A* | No | -| -a | Max. age of tweet to post (in days) | `1` | No | +| -a | Max. age of tweet to post (in days) | `5` | No | | -d | Min. delay before posting new tweet (in minutes) | `15` | No | When using the `-v` switch consider: -* The copyright of the content that you want to cross-post -* The storage / transfer limitations of the Mastodon instance that you are posting to -* The upstream bandwidth that you may consume on your internet connection +* whether the copyright of the content that you want to cross-post allows it +* the storage / transfer limitations of the Mastodon instance that you are posting to +* the upstream bandwidth that you may consume on your internet connection Default max age is 1 day. Decimal values are OK. @@ -64,13 +64,15 @@ Default min delay is 0 minutes. Make sure python3 is installed. -Twoot depends on beautifulsoup4 and mastodon python module. It also -requires m3u8 and ffmpeg-python. +Twoot depends on `beautifulsoup4` and `Mastodon.py` python modules. -ffmpeg must also be installed. - - pip install beautifulsoup4 Mastodon.py m3u8 ffmpeg-python +If you plan to use the `-v` switch to download videos, the additional depedencies are required: +* Python modules `m3u8` and `ffmpeg-python` +* [ffmpeg](https://ffmpeg.org/download.html) (check the package manager of your distribution) +``` +> pip install beautifulsoup4 Mastodon.py m3u8 ffmpeg-python +``` In your user folder, execute `git clone https://gitlab.com/jeancf/twoot.git` to clone repo with twoot.py script. diff --git a/twoot.py b/twoot.py index 2abc50b..a8e4dc4 100755 --- a/twoot.py +++ b/twoot.py @@ -126,9 +126,10 @@ def cleanup_tweet_text(tt_iter, tweet_uri, get_vids): data_expanded_path = tag['data-expanded-path'] if 'video' in data_expanded_path: if get_vids: - # Download video from twitter and store in filesystem - # TODO set output location to ./output/twit_account + # Download video from twitter and store in filesystem. Running as subprocess to avoid + # requirement to install ffmpeg and ffmpeg-python for those that do not want to post videos try: + # TODO set output location to ./output/twit_account dl_feedback = subprocess.run(["./twitterdl.py", tweet_uri, "-w 500"], capture_output=True) if dl_feedback.returncode != 0: # TODO Log dl_feedback.stderr From e32620d79b8c743d1e863557e6954af6b2b06bce Mon Sep 17 00:00:00 2001 From: JC Francois Date: Sun, 29 Mar 2020 17:16:54 +0200 Subject: [PATCH 17/18] Implemented proper naming of downloaded videos --- twoot.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/twoot.py b/twoot.py index a8e4dc4..ec80a7e 100755 --- a/twoot.py +++ b/twoot.py @@ -86,14 +86,16 @@ def handle_no_js(session, page, headers): return new_page -def cleanup_tweet_text(tt_iter, tweet_uri, get_vids): +def cleanup_tweet_text(tt_iter, twit_account, status_id, tweet_uri, get_vids): ''' Receives an iterator over all the elements contained in the tweet-text container. Processes them to remove Twitter-specific stuff and make them suitable for posting on Mastodon :param tt_iter: iterator over the HTML elements in the text of the tweet + :param twit_account: Used to name directory where videos are downloaded + :param status_id: Used to name directory where videos are downloaded :param tweet_uri: Used to downloaded videos - :param get_vids: True to download embedded twitter videos and save them on the filesystem + :param get_vids: True to download embedded twitter videos and save them on the filesystem ''' tweet_text = '' # Iterate elements @@ -129,8 +131,11 @@ def cleanup_tweet_text(tt_iter, tweet_uri, get_vids): # Download video from twitter and store in filesystem. Running as subprocess to avoid # requirement to install ffmpeg and ffmpeg-python for those that do not want to post videos try: - # TODO set output location to ./output/twit_account - dl_feedback = subprocess.run(["./twitterdl.py", tweet_uri, "-w 500"], capture_output=True) + # TODO set output location to ./output/twit_account/status_id + dl_feedback = subprocess.run( + ["./twitterdl.py", tweet_uri, "-ooutput/" + twit_account + "/" + status_id, "-w 500"], + capture_output=True + ) if dl_feedback.returncode != 0: # TODO Log dl_feedback.stderr tweet_text += '\n\n[Video embedded in original tweet]' @@ -368,7 +373,7 @@ def main(argv): # extract iterator over tweet text contents tt_iter = tmt.find('div', class_='tweet-text').div.children - tweet_text = cleanup_tweet_text(tt_iter, full_status_url, get_vids) + tweet_text = cleanup_tweet_text(tt_iter, twit_account, status_id, full_status_url, get_vids) # Mention if the tweet is a reply-to if reply_to_username is not None: @@ -411,12 +416,19 @@ def main(argv): pass # Check if video was downloaded - # TODO Check subdirectories of twit_account directory for video - video_path = Path('./output') / twit_account / status_id - video_file_list = list(video_path.glob('*.mp4')) video_file = None - if len(video_file_list) != 0: - video_file = video_file_list[0].absolute().as_posix() + + video_path = Path('./output') / twit_account / status_id + if video_path.exists(): + # Take the first subdirectory of video path (named after original poster of video) + video_path = [p for p in video_path.iterdir() if p.is_dir()][0] + # Take again the first subdirectory of video path (named after status id of original post where vidoe is attached) + video_path = [p for p in video_path.iterdir() if p.is_dir()][0] + # list video files + video_file_list = list(video_path.glob('*.mp4')) + if len(video_file_list) != 0: + # Extract posix path of first video file in list + video_file = video_file_list[0].absolute().as_posix() # Add dictionary with content of tweet to list tweet = { From 092f2ab3711c965af91e0ca3d963fc8dc62e8e32 Mon Sep 17 00:00:00 2001 From: JC Francois Date: Sun, 5 Apr 2020 10:37:54 +0200 Subject: [PATCH 18/18] Cleanup and README.md update for release --- README.md | 12 ++++++++++-- twitterdl.py | 22 ++++++++++++++++++++++ twoot.py | 5 +++-- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 873f899..585777f 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,17 @@ +# Twoot + +Twoot is a python script that extracts tweets from a twitter feed and +reposts them as toots on a Mastodon account. + I started twoot when [tootbot](https://github.com/cquest/tootbot) stopped working. Tootbot relies on rss feeds from https://twitrss.me that broke when Twitter refreshed their web UI in July 2019. Instead twoot is self contained and handles all the processing. +**UPDATE 05 APR 2020** VERSION 1.0. Twoot can now optionally download +videos from Twitter and upload them on Mastodon. + **UPDATE 17 MAR 2020** Added command line switch (`-r`) to also post reply-to tweets on the mastodon account. They will not be included by default anymore. @@ -66,9 +74,9 @@ Make sure python3 is installed. Twoot depends on `beautifulsoup4` and `Mastodon.py` python modules. -If you plan to use the `-v` switch to download videos, the additional depedencies are required: +**Only If you plan to download videos** with the `-v` switch, are the additional dependencies required: * Python modules `m3u8` and `ffmpeg-python` -* [ffmpeg](https://ffmpeg.org/download.html) (check the package manager of your distribution) +* [ffmpeg](https://ffmpeg.org/download.html) (installed with the package manager of your distribution) ``` > pip install beautifulsoup4 Mastodon.py m3u8 ffmpeg-python diff --git a/twitterdl.py b/twitterdl.py index 3ba32fb..984f6a9 100755 --- a/twitterdl.py +++ b/twitterdl.py @@ -1,5 +1,27 @@ #! /usr/bin/env python3 +""" + This file is a modification of + https://github.com/h4ckninja/twitter-video-downloader/ + The original package has an unknown license. The modified version + is released here under GPL v3. + + Copyright (C) 2019 Jean-Christophe Francois + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" + import argparse import requests import json diff --git a/twoot.py b/twoot.py index ec80a7e..0c65369 100755 --- a/twoot.py +++ b/twoot.py @@ -1,7 +1,7 @@ #! /usr/bin/env python3 # -*- coding: utf-8 -*- -''' +""" Copyright (C) 2019 Jean-Christophe Francois This program is free software: you can redistribute it and/or modify @@ -16,7 +16,7 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . -''' +""" import sys import argparse @@ -551,5 +551,6 @@ def main(argv): except FileNotFoundError: # The directory does not exist pass + if __name__ == "__main__": main(sys.argv)