Moved time check to beginning of process

2025-01-18 23:37:05 +00:00 · 2020-12-17 17:31:43 +01:00 · 2020-12-17 17:31:43 +01:00 · 0787669a3a
commit 0787669a3a
parent d92bcea2a7
1 changed files with 41 additions and 44 deletions
--- a/twoot.py
+++ b/twoot.py
@ -31,7 +31,6 @@ import re
 from pathlib import Path
 from mastodon import Mastodon, MastodonError, MastodonAPIError, MastodonIllegalArgumentError
 import subprocess
 import json.decoder
 import shutil
@ -48,7 +47,7 @@ logging.basicConfig(filename="twoot.log", level=logging.INFO)
 logging.info('*********** NEW RUN ***********')
-def cleanup_tweet_text(tt_iter, twit_account, status_id, tweet_uri, get_vids):
+def process_tweet_content(tt_iter, twit_account, status_id, tweet_uri, get_vids):
    '''
    Receives an iterator over all the elements contained in the tweet-text container.
    Processes them to remove Twitter-specific stuff and make them suitable for
@ -155,6 +154,18 @@ def contains_class(body_classes, some_class):
    return found
 def is_time_valid(timestamp, max_age, min_delay):
    ret = True
    # Check that the tweet is not too young (might be deleted) or too old
    age_in_hours = (time.time() - float(timestamp)) / 3600.0
    min_delay_in_hours = min_delay / 60.0
    max_age_in_hours = max_age * 24.0
    if age_in_hours < min_delay_in_hours or age_in_hours > max_age_in_hours:
        ret = False
    return ret
 def main(argv):
@ -237,7 +248,12 @@ def main(argv):
    # Extract twitter timeline
    timeline = soup.find_all('div', class_='timeline-item')
-    logging.info('Processing timeline')
+    logging.info('Processing ' + len(timeline) + ' tweets found in timeline')
    # **********************************************************
    # Process each tweets and generate dictionary
    # with data ready to be posted on Mastodon
    # **********************************************************
    for status in timeline:
        # Extract tweet ID and status ID
        tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m')
@ -245,6 +261,15 @@ def main(argv):
        logging.debug('processing tweet %s', tweet_id)
        # Extract time stamp
        time_string = status.find('span', class_='tweet-date').a.get('title')
        timestamp = datetime.datetime.strptime(time_string, '%d/%m/%Y, %H:%M:%S').timestamp()
        # Check if time is within acceptable range
        if not is_time_valid(timestamp, max_age, min_delay):
            logging.debug("Tweet outside valid time range, skipping")
            continue
        # Check in database if tweet has already been posted
        db.execute("SELECT * FROM toots WHERE twitter_account=? AND mastodon_instance=? AND mastodon_account=? AND tweet_id=?",
                   (twit_account, mast_instance, mast_account, tweet_id))
@ -257,19 +282,6 @@ def main(argv):
        else:
            logging.debug('Tweet %s not found in database', tweet_id)
        reply_to_username = None
        # TODO  Check if the tweet is a reply-to
        reply_to_div = None
        if reply_to_div is not None:
            # Do we need to handle reply-to tweets?
            if tweets_and_replies:
                # TODO  Capture user name being replied to
                pass
            else:
                # Skip this tweet
                logging.debug("Tweet is a reply-to and we don't want that. Skipping.")
                continue
        # extract author
        author = status.find('a', class_='fullname').get('title')
@ -279,22 +291,16 @@ def main(argv):
        # Extract URL of full status page (for video download)
        full_status_url = 'https://twitter.com' + tweet_id
-        # Extract time stamp
+        # TODO  Check if the tweet is a reply-to
-        time_string = status.find('span', class_='tweet-date').a.get('title')
+
-        timestamp = datetime.datetime.strptime(time_string, '%d/%m/%Y, %H:%M:%S').timestamp()
+        # Check it the tweet is a retweet from somebody else
        if author_account.lower() != twit_account.lower():
            tweet_text = 'RT from ' + author + ' (@' + author_account + ')\n\n'
        # extract iterator over tweet text contents
        tt_iter = status.find('div', class_='tweet-content media-body').children
-        tweet_text = cleanup_tweet_text(tt_iter, twit_account, status_id, full_status_url, get_vids)
+        tweet_text += process_tweet_content(tt_iter, twit_account, status_id, full_status_url, get_vids)
        # Mention if the tweet is a reply-to
        if reply_to_username is not None:
            tweet_text = 'In reply to ' + reply_to_username + '\n\n' + tweet_text
        # Check it the tweet is a retweet from somebody else
        if author_account.lower() != twit_account.lower():
            tweet_text = 'RT from ' + author + ' (@' + author_account + ')\n\n' + tweet_text
        # Add footer with link to original tweet
        tweet_text += '\n\nOriginal tweet : ' + full_status_url
@ -358,15 +364,17 @@ def main(argv):
        }
        tweets.append(tweet)
-        logging.debug('Tweet %s added to list to upload', tweet_id)
+        logging.debug('Tweet %s added to list of toots to upload', tweet_id)
    # TODO  Log summary stats: how many not in db, how many in valid timeframe
    # DEBUG: Print extracted tweets
-#    for t in tweets:
+    #for t in tweets:
-#         print(t)
+    #print(t)
    # **********************************************************
    # Iterate tweets in list.
-    # post each on Mastodon and reference to it in database
+    # post each on Mastodon and record it in database
    # **********************************************************
    # Create Mastodon application if it does not exist yet
@ -396,22 +404,12 @@ def main(argv):
        )
    except MastodonError as me:
-        print('ERROR: Login to ' + mast_instance + ' Failed')
+        logging.fatal('ERROR: Login to ' + mast_instance + ' Failed\n' + me)
        print(me)
        sys.exit(1)
    # Upload tweets
    for tweet in reversed(tweets):
        logging.debug('Uploading Tweet %s', tweet["tweet_id"])
        # Check that the tweet is not too young (might be deleted) or too old
        age_in_hours = (time.time() - float(tweet['timestamp'])) / 3600.0
        min_delay_in_hours = min_delay / 60.0
        max_age_in_hours = max_age * 24.0
        if age_in_hours < min_delay_in_hours or age_in_hours > max_age_in_hours:
            # Skip to next tweet
            logging.debug("Tweet too young or too old, skipping")
            continue
        media_ids = []
@ -444,7 +442,6 @@ def main(argv):
                        pass
        # Post toot
        logging.debug('Doing it now')
        try:
            mastodon = Mastodon(
                access_token=mast_account + '.secret',