2025-02-25 09:28:43 +00:00
2 changed files with 59 additions and 65 deletions
--- a/default.toml
+++ b/default.toml
@ -9,110 +9,88 @@ mastodon_instance = ""
 mastodon_user = ""
 [options]
 # List of nitter instances from which to pick at random to download tweets.
 # Specify only the address without leading `https://` and without trailing `/`
 # By default a built-in list of 2-3 known good instances is used
 #
 #nitter_instances = ["nitter.nl", "nitter.fdn.fr"]
 # Download videos from twitter and upload them on Mastodon
 # Default is false
-#
+upload_videos = false
 #upload_videos = true
 # Also post the "reply-to" tweets from twitter account
 # Default is false
-#
+post_reply_to = false
 #post_reply_to = true
 # Do not post the retweets of other twitter accounts
 # Default is false
-#
+skip_retweets = false
 #skip_retweets = true
 # Replace redirected links in tweets with direct URLs
 # Default is false
-#
+remove_link_redirections = false
 #remove_link_redirections = true
 # Clean up URLs in tweets to remove trackers
 # Default is false
-#
+remove_trackers_from_urls = false
 #remove_trackers_from_urls = true
 # Footer line added at bottom of toots
 # e.g. "#twitter #bot"
 # Default is ""
-#
+footer = ""
 #footer = "#twitter #bot"
 # If specified, also diplay a timestamp on the "Original Tweet" line
-# in the given format.
+# in the given format e.g. "%d %b %Y %H:%M %Z"
 # see https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior
 # Default is "" (tweet timestamp is not displayed)
-#
+tweet_time_format = ""
 #tweet_time_format = "%d %b %Y %H:%M %Z"
 # Specify the timezone that the timestamp on the tweet should be displayed in
-# Use `tz_identifier`from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
+# Use the `tz_identifier`from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
 # example "Europe/Brussels"
 # default is using the local timezone of the machine running the script
-#tweet_timezone = "Europe/Brussels"
+tweet_timezone = ""
 # Do not add reference to "Original tweet" on toots
 # default is false
-#remove_original_tweet_ref = true
+remove_original_tweet_ref = false
 # Check if profile avatar or banner pictures were changed and update
 # the Mastodon account if necessary
 # Default is false
-#update_profile = true
+update_profile = false
 # Maximum age of tweet to post (in days, decimal values accepted)
 # Default is 1
-#
+tweet_max_age = 1
 #tweet_max_age = 0.5
 # Minimum age of tweet to post (in minutes)
 # Default is 0 (post tweet as soon as possible)
-#
+tweet_delay = 0
 #tweet_delay = 15
 # How many seconds to pause between successive uploads of toots.
 # Increase this value if successive tweets appear in the wrong order.
 # Default is 0 (no pause)
 #
 # upload_pause = 5
 # Maximum number of toots to post in each run
 # Default is 0 (which means unlimited)
-#
+toot_cap = 0
 #toot_cap = 2
 # Replace twitter.com in links by random alternative out of this list
 # List of nitter instances
 # e.g. subst_twitter = ["nitter.net", ]
 # Default is []
-#
+subst_twitter = []
 #subst_twitter = ["nitter.net", ]
 # Replace youtube.com in links by random alternative out of this list
 # List of Invidious or Piped instances
 # e.g. subst_youtube = ["piped.kavin.rocks", "invidious.flokinet.to", ]
 # Default is []
-#
+subst_youtube = []
 #subst_youtube = ["piped.kavin.rocks", "invidious.flokinet.to", ]
 # Replace reddit.com in links by random alternative out of this list
 # List of Teddit instances
 # e.g. subst_reddit = ["teddit.net", ]
 # Default is []
-#
+subst_reddit = []
 #subst_reddit = ["teddit.net", ]
 # Verbosity of log messages
 # One of DEBUG, INFO, WARNING, ERROR, CRITICAL, OFF
 # Default is "WARNING"
-#
+log_level = "WARNING"
 #log_level = "INFO"
 # How many days to keep log messages for
 # Log messages older than log_days will be deleted
 # Default is 3
-#
+log_days = 3
 #log_days = 1
--- a/twoot.py
+++ b/twoot.py
@ -41,6 +41,25 @@ MAX_REC_COUNT = 50
 # How many seconds to wait before giving up on a download (except video download)
 HTTPS_REQ_TIMEOUT = 10
 NITTER_URLS = [
    'nitter.lacontrevoie.fr',
    # 'nitter.cutelab.space',  # 404 on 12/07/2023
    'nitter.weiler.rocks',  # added 15/06/2023
    'nitter.nl',  # added 16/06/2023
    # 'n.l5.ca',  # Not working 11/07/2023
    # 'nitter.fly.dev',  # gone 11/07/2023
    # 'notabird.site',  # gone 11/07/2023
    # 'nitter.sethforprivacy.com',  # too slow, removed 16/06/2023
    # 'nitter.it',  # different pic naming scheme
    # 'twitter.femboy.hu',  # 404 on 06/05/2023
    # 'nitter.grimneko.de', # 404 on 01/06/2023
    # 'nitter.namazso.eu',  # lots of 403 27/02/2023
    # 'twitter.beparanoid.de',  # moved 27/022023
    # 'nitter.fdn.fr', # not updated, rate limited, removed 06/02/2023
    # 'nitter.hu',
    # 'nitter.privacydev.net', # USA, added 06/02/2023, removed 15/02/2023 too slow
 ]
 # Update from https://www.whatismybrowser.com/guides/the-latest-user-agent/
 USER_AGENTS = [
    'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
@ -82,11 +101,6 @@ def build_config(args):
    # Default options
    options = {
        'nitter_instances': [
            'nitter.lacontrevoie.fr',
            'nitter.weiler.rocks',  # added 15/06/2023
            'nitter.nl',  # added 16/06/2023
        ],
        'upload_videos': False,
        'post_reply_to': False,
        'skip_retweets': False,
@ -98,7 +112,6 @@ def build_config(args):
        'remove_original_tweet_ref': False,
        'tweet_max_age': float(1),
        'tweet_delay': float(0),
        'upload_pause': float(0),
        'toot_cap': int(0),
        'subst_twitter': [],
        'subst_youtube': [],
@ -282,9 +295,9 @@ def get_timeline(nitter_url):
    timeline = []
    for item in list:
        classes = item['class']
-        if 'timeline-item' in classes:  # Individual tweet
+        if 'timeline-item' in classes:
            timeline.append(item)
-        elif 'thread-line' in classes:  # First tweet of a thread
+        elif 'thread-line' in classes:
            # Get the first item of thread
            first_item = item.find('div', class_='timeline-item')
            timeline.append(first_item)
@ -892,7 +905,6 @@ def main(argv):
        log_level = logging.INFO
    elif ll_str == "WARNING":
        log_level = logging.WARNING
        print('log level warning set')
    elif ll_str == "ERROR":
        log_level = logging.ERROR
    elif ll_str == "CRITICAL":
@ -928,7 +940,7 @@ def main(argv):
    logging.info('  subst_twitter            : ' + str(TOML['options']['subst_twitter']))
    logging.info('  subst_youtube            : ' + str(TOML['options']['subst_youtube']))
    logging.info('  subst_reddit             : ' + str(TOML['options']['subst_reddit']))
-    logging.info('  log_level                : ' + TOML['options']['log_level'])
+    logging.info('  log_level                : ' + str(TOML['options']['log_level']))
    logging.info('  log_days                 : ' + str(TOML['options']['log_days']))
    # Try to open database. If it does not exist, create it
@ -942,19 +954,23 @@ def main(argv):
    db.execute('''CREATE INDEX IF NOT EXIsTS profile_index ON profiles (mastodon_instance, mastodon_account)''')
    # Select random nitter instance to fetch updates from
-    nitter_url = 'https://' + TOML['options']['nitter_instances'][random.randint(0, len(TOML['options']['nitter_instances']) - 1)]
+    nitter_url = 'https://' + NITTER_URLS[random.randint(0, len(NITTER_URLS) - 1)]
    # **********************************************************
    # Load twitter page of user. Process all tweets and generate
    # list of dictionaries ready to be posted on Mastodon
    # **********************************************************
    # To store content of all tweets from this user
    tweets = []
    # Load twitter page of user
    soup, timeline = get_timeline(nitter_url)
    logging.info('Processing ' + str(len(timeline)) + ' tweets found in timeline')
    # **********************************************************
-    # Process each tweets and generate an array of dictionaries
+    # Process each tweets and generate dictionary
    # with data ready to be posted on Mastodon
    # **********************************************************
    tweets = []
    out_date_cnt = 0
    in_db_cnt = 0
    for status in timeline:
@ -1199,9 +1215,9 @@ def main(argv):
        except MastodonAPIError:
            # Assuming this is an:
            # ERROR ('Mastodon API returned error', 422, 'Unprocessable Entity', 'Cannot attach files that have not finished processing. Try again in a moment!')
-            logging.warning('Mastodon API Error 422: Cannot attach files that have not finished processing. Waiting 30 seconds and retrying.')
+            logging.warning('Mastodon API Error 422: Cannot attach files that have not finished processing. Waiting 60 seconds and retrying.')
-            # Wait 30 seconds
+            # Wait 60 seconds
-            time.sleep(30)
+            time.sleep(60)
            # retry posting
            try:
                toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids)
@ -1219,7 +1235,7 @@ def main(argv):
            posted_cnt += 1
            logging.debug('Tweet %s posted on %s', tweet['tweet_id'], TOML['config']['mastodon_user'])
            # Test to find out if slowing down successive posting helps with ordering of threads
-            time.sleep(TOML['options']['upload_pause'])
+            time.sleep(5)
        # Insert toot id into database
        if 'id' in toot: