Merge remote-tracking branch 'gitlab/logging'

This commit is contained in:
jeancf 2020-12-16 18:40:00 +01:00
commit 372269c2be
2 changed files with 37 additions and 14 deletions

View File

@ -19,6 +19,7 @@
""" """
import sys import sys
import logging
import argparse import argparse
import os import os
import random import random
@ -42,8 +43,9 @@ USER_AGENTS = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; Xbox; Xbox One) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36 Edge/44.18363.8131', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; Xbox; Xbox One) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36 Edge/44.18363.8131',
] ]
#TODO log to file # Setup logging to file
logging.basicConfig(filename="twoot.log", level=logging.WARNING)
logging.debug('*********** NEW RUN ***********')
def handle_no_js(session, page, headers): def handle_no_js(session, page, headers):
""" """
@ -131,17 +133,21 @@ def cleanup_tweet_text(tt_iter, twit_account, status_id, tweet_uri, get_vids):
# Download video from twitter and store in filesystem. Running as subprocess to avoid # Download video from twitter and store in filesystem. Running as subprocess to avoid
# requirement to install ffmpeg and ffmpeg-python for those that do not want to post videos # requirement to install ffmpeg and ffmpeg-python for those that do not want to post videos
try: try:
# TODO set output location to ./output/twit_account/status_id # Set output location to ./output/twit_account/status_id
dl_feedback = subprocess.run( dl_feedback = subprocess.run(
["./twitterdl.py", tweet_uri, "-ooutput/" + twit_account + "/" + status_id, "-w 500"], ["./twitterdl.py", tweet_uri, "-ooutput/" + twit_account + "/" + status_id, "-w 500"],
capture_output=True capture_output=True,
timeout=300 # let's try 5 minutes
) )
if dl_feedback.returncode != 0: if dl_feedback.returncode != 0:
# TODO Log dl_feedback.stderr logging.warning('Video in tweet ' + status_id + ' from ' + twit_account + ' failed to download')
tweet_text += '\n\n[Video embedded in original tweet]' tweet_text += '\n\n[Video embedded in original tweet]'
except OSError: except OSError:
print("Could not execute twitterdl.py (is it there? Is it set as executable?)") logging.error("Could not execute twitterdl.py (is it there? Is it set as executable?)")
sys.exit(-1) sys.exit(-1)
except subprocess.TimeoutExpired:
# Video download and encoding took too long
tweet_text += '\n\n[Video embedded in original tweet]'
else: else:
tweet_text += '\n\n[Video embedded in original tweet]' tweet_text += '\n\n[Video embedded in original tweet]'
@ -270,20 +276,27 @@ def main(argv):
timeline = soup.find_all('table', class_='tweet') timeline = soup.find_all('table', class_='tweet')
for status in timeline: for status in timeline:
# Extract tweet ID and status ID # Extract tweet ID and status ID
tweet_id = str(status['href']).strip('?p=v') tweet_id = str(status['href']).strip('?p=v')
status_id = tweet_id.split('/')[3] status_id = tweet_id.split('/')[3]
logging.debug('processing tweet %s', tweet_id)
# Check in database if tweet has already been posted # Check in database if tweet has already been posted
db.execute('''SELECT * FROM toots WHERE twitter_account = ? AND mastodon_instance = ? AND db.execute("SELECT * FROM toots WHERE twitter_account=? AND mastodon_instance=? AND mastodon_account=? AND tweet_id=?",
mastodon_account = ? AND tweet_id = ?''',
(twit_account, mast_instance, mast_account, tweet_id)) (twit_account, mast_instance, mast_account, tweet_id))
tweet_in_db = db.fetchone() tweet_in_db = db.fetchone()
logging.debug("SELECT * FROM toots WHERE twitter_account='{}' AND mastodon_instance='{}' AND mastodon_account='{}' AND tweet_id='{}'"
.format(twit_account, mast_instance, mast_account, tweet_id)
)
if tweet_in_db is not None: if tweet_in_db is not None:
logging.debug("Tweet %s already in database", tweet_id)
# Skip to next tweet # Skip to next tweet
continue continue
else:
logging.debug('Tweet %s not found in database', tweet_id)
reply_to_username = None reply_to_username = None
# Check if the tweet is a reply-to # Check if the tweet is a reply-to
@ -295,6 +308,7 @@ def main(argv):
reply_to_username = reply_to_div.a.get_text() reply_to_username = reply_to_div.a.get_text()
else: else:
# Skip this tweet # Skip this tweet
logging.debug("Tweet is a reply-to and we don't want that. Skipping.")
continue continue
# Extract url of full status page # Extract url of full status page
@ -446,9 +460,11 @@ def main(argv):
} }
tweets.append(tweet) tweets.append(tweet)
logging.debug('Tweet %s added to list to upload', tweet_id)
# DEBUG: Print extracted tweets # DEBUG: Print extracted tweets
#for t in tweets: # for t in tweets:
# print(t) # print(t)
# ********************************************************** # **********************************************************
# Iterate tweets in list. # Iterate tweets in list.
@ -488,6 +504,7 @@ def main(argv):
# Upload tweets # Upload tweets
for tweet in reversed(tweets): for tweet in reversed(tweets):
logging.debug('Uploading Tweet %s', tweet["tweet_id"])
# Check that the tweet is not too young (might be deleted) or too old # Check that the tweet is not too young (might be deleted) or too old
age_in_hours = (time.time() - float(tweet['timestamp'])) / 3600.0 age_in_hours = (time.time() - float(tweet['timestamp'])) / 3600.0
min_delay_in_hours = min_delay / 60.0 min_delay_in_hours = min_delay / 60.0
@ -495,6 +512,7 @@ def main(argv):
if age_in_hours < min_delay_in_hours or age_in_hours > max_age_in_hours: if age_in_hours < min_delay_in_hours or age_in_hours > max_age_in_hours:
# Skip to next tweet # Skip to next tweet
logging.debug("Tweet too young or too old, skipping")
continue continue
media_ids = [] media_ids = []
@ -502,9 +520,11 @@ def main(argv):
# Upload video if there is one # Upload video if there is one
if tweet['video'] is not None: if tweet['video'] is not None:
try: try:
logging.debug("Uploading video")
media_posted = mastodon.media_post(tweet['video']) media_posted = mastodon.media_post(tweet['video'])
media_ids.append(media_posted['id']) media_ids.append(media_posted['id'])
except (MastodonAPIError, MastodonIllegalArgumentError, TypeError): # Media cannot be uploaded (invalid format, dead link, etc.) except (MastodonAPIError, MastodonIllegalArgumentError, TypeError): # Media cannot be uploaded (invalid format, dead link, etc.)
logging.debug("Uploading video failed")
pass pass
else: # Only upload pic if no video was uploaded else: # Only upload pic if no video was uploaded
@ -526,6 +546,7 @@ def main(argv):
pass pass
# Post toot # Post toot
logging.debug('Doing it now')
try: try:
mastodon = Mastodon( mastodon = Mastodon(
access_token=mast_account + '.secret', access_token=mast_account + '.secret',
@ -538,10 +559,12 @@ def main(argv):
toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids, visibility='public') toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids, visibility='public')
except MastodonError as me: except MastodonError as me:
print('ERROR: posting ' + tweet['tweet_text'] + ' to ' + mast_instance + ' Failed') logging.error('posting ' + tweet['tweet_text'] + ' to ' + mast_instance + ' Failed')
print(me) logging.error(me)
sys.exit(1) sys.exit(1)
logging.debug('Tweet %s posted on %s', tweet_id, mast_account)
# Insert toot id into database # Insert toot id into database
if 'id' in toot: if 'id' in toot:
db.execute("INSERT INTO toots VALUES ( ? , ? , ? , ? , ? )", db.execute("INSERT INTO toots VALUES ( ? , ? , ? , ? , ? )",