Merge remote-tracking branch 'gitlab/logging'

This commit is contained in:
jeancf 2020-12-16 18:40:00 +01:00
commit 372269c2be
2 changed files with 37 additions and 14 deletions

View File

@ -50,7 +50,7 @@ is @superduperbot@botsin.space
|Switch |Description | Example | Req |
|-------|--------------------------------------------------|--------------------|-----|
| -t | twitter account name without '@' | `SuperDuper` | Yes |
| -t | twitter account name without '@' | `SuperDuper` | Yes |
| -i | Mastodon instance domain name | `botsin.space` | Yes |
| -m | Mastodon username | `superduperbot` | Yes |
| -p | Mastodon password | `my_Sup3r-S4f3*pw` | Yes |

View File

@ -19,6 +19,7 @@
"""
import sys
import logging
import argparse
import os
import random
@ -42,8 +43,9 @@ USER_AGENTS = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; Xbox; Xbox One) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36 Edge/44.18363.8131',
]
#TODO log to file
# Setup logging to file
logging.basicConfig(filename="twoot.log", level=logging.WARNING)
logging.debug('*********** NEW RUN ***********')
def handle_no_js(session, page, headers):
"""
@ -131,17 +133,21 @@ def cleanup_tweet_text(tt_iter, twit_account, status_id, tweet_uri, get_vids):
# Download video from twitter and store in filesystem. Running as subprocess to avoid
# requirement to install ffmpeg and ffmpeg-python for those that do not want to post videos
try:
# TODO set output location to ./output/twit_account/status_id
# Set output location to ./output/twit_account/status_id
dl_feedback = subprocess.run(
["./twitterdl.py", tweet_uri, "-ooutput/" + twit_account + "/" + status_id, "-w 500"],
capture_output=True
capture_output=True,
timeout=300 # let's try 5 minutes
)
if dl_feedback.returncode != 0:
# TODO Log dl_feedback.stderr
logging.warning('Video in tweet ' + status_id + ' from ' + twit_account + ' failed to download')
tweet_text += '\n\n[Video embedded in original tweet]'
except OSError:
print("Could not execute twitterdl.py (is it there? Is it set as executable?)")
logging.error("Could not execute twitterdl.py (is it there? Is it set as executable?)")
sys.exit(-1)
except subprocess.TimeoutExpired:
# Video download and encoding took too long
tweet_text += '\n\n[Video embedded in original tweet]'
else:
tweet_text += '\n\n[Video embedded in original tweet]'
@ -270,20 +276,27 @@ def main(argv):
timeline = soup.find_all('table', class_='tweet')
for status in timeline:
# Extract tweet ID and status ID
tweet_id = str(status['href']).strip('?p=v')
status_id = tweet_id.split('/')[3]
logging.debug('processing tweet %s', tweet_id)
# Check in database if tweet has already been posted
db.execute('''SELECT * FROM toots WHERE twitter_account = ? AND mastodon_instance = ? AND
mastodon_account = ? AND tweet_id = ?''',
db.execute("SELECT * FROM toots WHERE twitter_account=? AND mastodon_instance=? AND mastodon_account=? AND tweet_id=?",
(twit_account, mast_instance, mast_account, tweet_id))
tweet_in_db = db.fetchone()
logging.debug("SELECT * FROM toots WHERE twitter_account='{}' AND mastodon_instance='{}' AND mastodon_account='{}' AND tweet_id='{}'"
.format(twit_account, mast_instance, mast_account, tweet_id)
)
if tweet_in_db is not None:
logging.debug("Tweet %s already in database", tweet_id)
# Skip to next tweet
continue
else:
logging.debug('Tweet %s not found in database', tweet_id)
reply_to_username = None
# Check if the tweet is a reply-to
@ -295,6 +308,7 @@ def main(argv):
reply_to_username = reply_to_div.a.get_text()
else:
# Skip this tweet
logging.debug("Tweet is a reply-to and we don't want that. Skipping.")
continue
# Extract url of full status page
@ -446,9 +460,11 @@ def main(argv):
}
tweets.append(tweet)
logging.debug('Tweet %s added to list to upload', tweet_id)
# DEBUG: Print extracted tweets
#for t in tweets:
# print(t)
# for t in tweets:
# print(t)
# **********************************************************
# Iterate tweets in list.
@ -488,6 +504,7 @@ def main(argv):
# Upload tweets
for tweet in reversed(tweets):
logging.debug('Uploading Tweet %s', tweet["tweet_id"])
# Check that the tweet is not too young (might be deleted) or too old
age_in_hours = (time.time() - float(tweet['timestamp'])) / 3600.0
min_delay_in_hours = min_delay / 60.0
@ -495,6 +512,7 @@ def main(argv):
if age_in_hours < min_delay_in_hours or age_in_hours > max_age_in_hours:
# Skip to next tweet
logging.debug("Tweet too young or too old, skipping")
continue
media_ids = []
@ -502,9 +520,11 @@ def main(argv):
# Upload video if there is one
if tweet['video'] is not None:
try:
logging.debug("Uploading video")
media_posted = mastodon.media_post(tweet['video'])
media_ids.append(media_posted['id'])
except (MastodonAPIError, MastodonIllegalArgumentError, TypeError): # Media cannot be uploaded (invalid format, dead link, etc.)
logging.debug("Uploading video failed")
pass
else: # Only upload pic if no video was uploaded
@ -526,6 +546,7 @@ def main(argv):
pass
# Post toot
logging.debug('Doing it now')
try:
mastodon = Mastodon(
access_token=mast_account + '.secret',
@ -538,10 +559,12 @@ def main(argv):
toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids, visibility='public')
except MastodonError as me:
print('ERROR: posting ' + tweet['tweet_text'] + ' to ' + mast_instance + ' Failed')
print(me)
logging.error('posting ' + tweet['tweet_text'] + ' to ' + mast_instance + ' Failed')
logging.error(me)
sys.exit(1)
logging.debug('Tweet %s posted on %s', tweet_id, mast_account)
# Insert toot id into database
if 'id' in toot:
db.execute("INSERT INTO toots VALUES ( ? , ? , ? , ? , ? )",