From 1781eb5653e8f384a68508f3c48534d72e41ae88 Mon Sep 17 00:00:00 2001 From: jeancf Date: Wed, 14 Oct 2020 21:51:00 +0200 Subject: [PATCH 1/5] Basic logging setup --- README.md | 2 +- twoot.py | 26 +++++++++++++++++++------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 585777f..19f21da 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ is @superduperbot@botsin.space |Switch |Description | Example | Req | |-------|--------------------------------------------------|--------------------|-----| -| -t | twitter account name without '@' | `SuperDuper` | Yes | +| -t | twitter account name without '@' | `SuperDuper` | Yes | | -i | Mastodon instance domain name | `botsin.space` | Yes | | -m | Mastodon username | `superduperbot` | Yes | | -p | Mastodon password | `my_Sup3r-S4f3*pw` | Yes | diff --git a/twoot.py b/twoot.py index 4fee31c..b125c66 100755 --- a/twoot.py +++ b/twoot.py @@ -19,6 +19,7 @@ """ import sys +import logging import argparse import os import random @@ -42,8 +43,9 @@ USER_AGENTS = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; Xbox; Xbox One) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36 Edge/44.18363.8131', ] -#TODO log to file - +# Setup logging to file +logging.basicConfig(filename="twoot.log", level=logging.DEBUG) +logging.info('*********** NEW RUN ***********') def handle_no_js(session, page, headers): """ @@ -270,11 +272,12 @@ def main(argv): timeline = soup.find_all('table', class_='tweet') for status in timeline: - # Extract tweet ID and status ID tweet_id = str(status['href']).strip('?p=v') status_id = tweet_id.split('/')[3] + logging.debug('processing tweet %s', tweet_id) + # Check in database if tweet has already been posted db.execute('''SELECT * FROM toots WHERE twitter_account = ? AND mastodon_instance = ? AND mastodon_account = ? AND tweet_id = ?''', @@ -282,9 +285,12 @@ def main(argv): tweet_in_db = db.fetchone() if tweet_in_db is not None: + logging.debug("Tweet %s already in database", tweet_id) # Skip to next tweet continue + logging.debug('Tweet %s not found in database', tweet_id) + reply_to_username = None # Check if the tweet is a reply-to reply_to_div = status.find('div', class_='tweet-reply-context username') @@ -446,9 +452,11 @@ def main(argv): } tweets.append(tweet) + logging.debug('Tweet %s added to list to upload', tweet_id) + # DEBUG: Print extracted tweets - #for t in tweets: - # print(t) + for t in tweets: + print(t) # ********************************************************** # Iterate tweets in list. @@ -488,6 +496,7 @@ def main(argv): # Upload tweets for tweet in reversed(tweets): + logging.debug('Uploading Tweet %s', tweet["tweet_id"]) # Check that the tweet is not too young (might be deleted) or too old age_in_hours = (time.time() - float(tweet['timestamp'])) / 3600.0 min_delay_in_hours = min_delay / 60.0 @@ -526,6 +535,7 @@ def main(argv): pass # Post toot + logging.debug('Doing it now') try: mastodon = Mastodon( access_token=mast_account + '.secret', @@ -538,10 +548,12 @@ def main(argv): toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids, visibility='public') except MastodonError as me: - print('ERROR: posting ' + tweet['tweet_text'] + ' to ' + mast_instance + ' Failed') - print(me) + logging.error('posting ' + tweet['tweet_text'] + ' to ' + mast_instance + ' Failed') + logging.error(me) sys.exit(1) + logging.debug('Tweet %s posted on %s', tweet_id, mast_account) + # Insert toot id into database if 'id' in toot: db.execute("INSERT INTO toots VALUES ( ? , ? , ? , ? , ? )", From 4f326ee3cda429c65ac728ad550bf2a3c5e28e88 Mon Sep 17 00:00:00 2001 From: jeancf Date: Mon, 9 Nov 2020 15:55:42 +0100 Subject: [PATCH 2/5] Added more debug messages --- twoot.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/twoot.py b/twoot.py index b125c66..c320ce1 100755 --- a/twoot.py +++ b/twoot.py @@ -279,17 +279,20 @@ def main(argv): logging.debug('processing tweet %s', tweet_id) # Check in database if tweet has already been posted - db.execute('''SELECT * FROM toots WHERE twitter_account = ? AND mastodon_instance = ? AND - mastodon_account = ? AND tweet_id = ?''', + db.execute("SELECT * FROM toots WHERE twitter_account=? AND mastodon_instance=? AND mastodon_account=? AND tweet_id=?", (twit_account, mast_instance, mast_account, tweet_id)) tweet_in_db = db.fetchone() + logging.debug("SELECT * FROM toots WHERE twitter_account='{}' AND mastodon_instance='{}' AND mastodon_account='{}' AND tweet_id='{}'" + .format(twit_account, mast_instance, mast_account, tweet_id) + ) + if tweet_in_db is not None: logging.debug("Tweet %s already in database", tweet_id) # Skip to next tweet continue - - logging.debug('Tweet %s not found in database', tweet_id) + else: + logging.debug('Tweet %s not found in database', tweet_id) reply_to_username = None # Check if the tweet is a reply-to @@ -301,6 +304,7 @@ def main(argv): reply_to_username = reply_to_div.a.get_text() else: # Skip this tweet + logging.debug("Tweet is a reply-to and we don't want that. Skipping.") continue # Extract url of full status page @@ -504,6 +508,7 @@ def main(argv): if age_in_hours < min_delay_in_hours or age_in_hours > max_age_in_hours: # Skip to next tweet + logging.debug("Tweet too young or too old, skipping") continue media_ids = [] @@ -511,9 +516,11 @@ def main(argv): # Upload video if there is one if tweet['video'] is not None: try: + logging.debug("Uploading video") media_posted = mastodon.media_post(tweet['video']) media_ids.append(media_posted['id']) except (MastodonAPIError, MastodonIllegalArgumentError, TypeError): # Media cannot be uploaded (invalid format, dead link, etc.) + logging.debug("Uploading video failed") pass else: # Only upload pic if no video was uploaded From 267d4cb551ef7a31f8444ad99c5d8476c018dad8 Mon Sep 17 00:00:00 2001 From: jeancf Date: Sun, 13 Dec 2020 10:44:07 +0100 Subject: [PATCH 3/5] TODO is done --- twoot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/twoot.py b/twoot.py index b125c66..b84f422 100755 --- a/twoot.py +++ b/twoot.py @@ -133,7 +133,7 @@ def cleanup_tweet_text(tt_iter, twit_account, status_id, tweet_uri, get_vids): # Download video from twitter and store in filesystem. Running as subprocess to avoid # requirement to install ffmpeg and ffmpeg-python for those that do not want to post videos try: - # TODO set output location to ./output/twit_account/status_id + # Set output location to ./output/twit_account/status_id dl_feedback = subprocess.run( ["./twitterdl.py", tweet_uri, "-ooutput/" + twit_account + "/" + status_id, "-w 500"], capture_output=True From b7175067c0e54952af2940091518fff02681d6a2 Mon Sep 17 00:00:00 2001 From: jeancf Date: Sun, 13 Dec 2020 18:25:27 +0100 Subject: [PATCH 4/5] Added timeout to execution of twitterdl.py --- twoot.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/twoot.py b/twoot.py index b84f422..71ae0dd 100755 --- a/twoot.py +++ b/twoot.py @@ -44,8 +44,8 @@ USER_AGENTS = [ ] # Setup logging to file -logging.basicConfig(filename="twoot.log", level=logging.DEBUG) -logging.info('*********** NEW RUN ***********') +logging.basicConfig(filename="twoot.log", level=logging.WARNING) +logging.debug('*********** NEW RUN ***********') def handle_no_js(session, page, headers): """ @@ -136,14 +136,18 @@ def cleanup_tweet_text(tt_iter, twit_account, status_id, tweet_uri, get_vids): # Set output location to ./output/twit_account/status_id dl_feedback = subprocess.run( ["./twitterdl.py", tweet_uri, "-ooutput/" + twit_account + "/" + status_id, "-w 500"], - capture_output=True + capture_output=True, + timeout=300 # let's try 5 minutes ) if dl_feedback.returncode != 0: - # TODO Log dl_feedback.stderr + logging.warning('Video in tweet ' + status_id + ' from ' + twit_account + ' failed to download') tweet_text += '\n\n[Video embedded in original tweet]' except OSError: - print("Could not execute twitterdl.py (is it there? Is it set as executable?)") + logging.error("Could not execute twitterdl.py (is it there? Is it set as executable?)") sys.exit(-1) + except subprocess.TimeoutExpired: + # Video download and encoding took too long + tweet_text += '\n\n[Video embedded in original tweet]' else: tweet_text += '\n\n[Video embedded in original tweet]' From c4bf95c1a7d3fa94cd40e7c6eb50a6a672201063 Mon Sep 17 00:00:00 2001 From: BuildTools Date: Sun, 13 Dec 2020 21:04:33 +0100 Subject: [PATCH 5/5] Commented out printing of extracted tweets --- twoot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/twoot.py b/twoot.py index e540aed..c5e6274 100755 --- a/twoot.py +++ b/twoot.py @@ -463,8 +463,8 @@ def main(argv): logging.debug('Tweet %s added to list to upload', tweet_id) # DEBUG: Print extracted tweets - for t in tweets: - print(t) +# for t in tweets: +# print(t) # ********************************************************** # Iterate tweets in list.