add upload pause to config

Move nitter instances to config file
Add comments
2025-02-24 17:08:42 +00:00 · 2023-07-14 13:21:12 +02:00 · 2023-07-14 13:12:25 +02:00 · 2023-07-14 13:11:20 +02:00
2 changed files with 65 additions and 59 deletions
--- a/default.toml
+++ b/default.toml
@ -9,88 +9,110 @@ mastodon_instance = ""
 mastodon_user = ""

 [options]
+# List of nitter instances from which to pick at random to download tweets.
+# Specify only the address without leading `https://` and without trailing `/`
+# By default a built-in list of 2-3 known good instances is used
+#
+#nitter_instances = ["nitter.nl", "nitter.fdn.fr"]
+
 # Download videos from twitter and upload them on Mastodon
 # Default is false
-upload_videos = false
+#
+#upload_videos = true

 # Also post the "reply-to" tweets from twitter account
 # Default is false
-post_reply_to = false
+#
+#post_reply_to = true

 # Do not post the retweets of other twitter accounts
 # Default is false
-skip_retweets = false
+#
+#skip_retweets = true

 # Replace redirected links in tweets with direct URLs
 # Default is false
-remove_link_redirections = false
+#
+#remove_link_redirections = true

 # Clean up URLs in tweets to remove trackers
 # Default is false
-remove_trackers_from_urls = false
+#
+#remove_trackers_from_urls = true

 # Footer line added at bottom of toots
-# e.g. "#twitter #bot"
 # Default is ""
-footer = ""
+#
+#footer = "#twitter #bot"

 # If specified, also diplay a timestamp on the "Original Tweet" line
-# in the given format e.g. "%d %b %Y %H:%M %Z"
+# in the given format.
 # see https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior
 # Default is "" (tweet timestamp is not displayed)
-tweet_time_format = ""
+#
+#tweet_time_format = "%d %b %Y %H:%M %Z"

 # Specify the timezone that the timestamp on the tweet should be displayed in
-# Use the `tz_identifier`from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
-# example "Europe/Brussels"
+# Use `tz_identifier`from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
 # default is using the local timezone of the machine running the script
-tweet_timezone = ""
+#tweet_timezone = "Europe/Brussels"

 # Do not add reference to "Original tweet" on toots
 # default is false
-remove_original_tweet_ref = false
+#remove_original_tweet_ref = true

 # Check if profile avatar or banner pictures were changed and update
 # the Mastodon account if necessary
 # Default is false
-update_profile = false
+#update_profile = true

 # Maximum age of tweet to post (in days, decimal values accepted)
 # Default is 1
-tweet_max_age = 1
+#
+#tweet_max_age = 0.5

 # Minimum age of tweet to post (in minutes)
 # Default is 0 (post tweet as soon as possible)
-tweet_delay = 0
+#
+#tweet_delay = 15
+
+# How many seconds to pause between successive uploads of toots.
+# Increase this value if successive tweets appear in the wrong order.
+# Default is 0 (no pause)
+#
+# upload_pause = 5

 # Maximum number of toots to post in each run
 # Default is 0 (which means unlimited)
-toot_cap = 0
+#
+#toot_cap = 2

 # Replace twitter.com in links by random alternative out of this list
 # List of nitter instances
-# e.g. subst_twitter = ["nitter.net", ]
 # Default is []
-subst_twitter = []
+#
+#subst_twitter = ["nitter.net", ]

 # Replace youtube.com in links by random alternative out of this list
 # List of Invidious or Piped instances
-# e.g. subst_youtube = ["piped.kavin.rocks", "invidious.flokinet.to", ]
 # Default is []
-subst_youtube = []
+#
+#subst_youtube = ["piped.kavin.rocks", "invidious.flokinet.to", ]

 # Replace reddit.com in links by random alternative out of this list
 # List of Teddit instances
-# e.g. subst_reddit = ["teddit.net", ]
 # Default is []
-subst_reddit = []
+#
+#subst_reddit = ["teddit.net", ]

 # Verbosity of log messages
 # One of DEBUG, INFO, WARNING, ERROR, CRITICAL, OFF
 # Default is "WARNING"
-log_level = "WARNING"
+#
+#log_level = "INFO"

 # How many days to keep log messages for
 # Log messages older than log_days will be deleted
 # Default is 3
-log_days = 3
+#
+#log_days = 1
--- a/twoot.py
+++ b/twoot.py
@ -41,25 +41,6 @@ MAX_REC_COUNT = 50
 # How many seconds to wait before giving up on a download (except video download)
 HTTPS_REQ_TIMEOUT = 10

-NITTER_URLS = [
-    'nitter.lacontrevoie.fr',
-    # 'nitter.cutelab.space',  # 404 on 12/07/2023
-    'nitter.weiler.rocks',  # added 15/06/2023
-    'nitter.nl',  # added 16/06/2023
-    # 'n.l5.ca',  # Not working 11/07/2023
-    # 'nitter.fly.dev',  # gone 11/07/2023
-    # 'notabird.site',  # gone 11/07/2023
-    # 'nitter.sethforprivacy.com',  # too slow, removed 16/06/2023
-    # 'nitter.it',  # different pic naming scheme
-    # 'twitter.femboy.hu',  # 404 on 06/05/2023
-    # 'nitter.grimneko.de', # 404 on 01/06/2023
-    # 'nitter.namazso.eu',  # lots of 403 27/02/2023
-    # 'twitter.beparanoid.de',  # moved 27/022023
-    # 'nitter.fdn.fr', # not updated, rate limited, removed 06/02/2023
-    # 'nitter.hu',
-    # 'nitter.privacydev.net', # USA, added 06/02/2023, removed 15/02/2023 too slow
-]
-
 # Update from https://www.whatismybrowser.com/guides/the-latest-user-agent/
 USER_AGENTS = [
    'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
@ -101,6 +82,11 @@ def build_config(args):

    # Default options
    options = {
+        'nitter_instances': [
+            'nitter.lacontrevoie.fr',
+            'nitter.weiler.rocks',  # added 15/06/2023
+            'nitter.nl',  # added 16/06/2023
+        ],
        'upload_videos': False,
        'post_reply_to': False,
        'skip_retweets': False,
@ -112,6 +98,7 @@ def build_config(args):
        'remove_original_tweet_ref': False,
        'tweet_max_age': float(1),
        'tweet_delay': float(0),
+        'upload_pause': float(0),
        'toot_cap': int(0),
        'subst_twitter': [],
        'subst_youtube': [],
@ -295,9 +282,9 @@ def get_timeline(nitter_url):
    timeline = []
    for item in list:
        classes = item['class']
-        if 'timeline-item' in classes:
+        if 'timeline-item' in classes:  # Individual tweet
            timeline.append(item)
-        elif 'thread-line' in classes:
+        elif 'thread-line' in classes:  # First tweet of a thread
            # Get the first item of thread
            first_item = item.find('div', class_='timeline-item')
            timeline.append(first_item)
@ -905,6 +892,7 @@ def main(argv):
        log_level = logging.INFO
    elif ll_str == "WARNING":
        log_level = logging.WARNING
+        print('log level warning set')
    elif ll_str == "ERROR":
        log_level = logging.ERROR
    elif ll_str == "CRITICAL":
@ -940,7 +928,7 @@ def main(argv):
    logging.info('  subst_twitter            : ' + str(TOML['options']['subst_twitter']))
    logging.info('  subst_youtube            : ' + str(TOML['options']['subst_youtube']))
    logging.info('  subst_reddit             : ' + str(TOML['options']['subst_reddit']))
-    logging.info('  log_level                : ' + str(TOML['options']['log_level']))
+    logging.info('  log_level                : ' + TOML['options']['log_level'])
    logging.info('  log_days                 : ' + str(TOML['options']['log_days']))

    # Try to open database. If it does not exist, create it
@ -954,23 +942,19 @@ def main(argv):
    db.execute('''CREATE INDEX IF NOT EXIsTS profile_index ON profiles (mastodon_instance, mastodon_account)''')

    # Select random nitter instance to fetch updates from
-    nitter_url = 'https://' + NITTER_URLS[random.randint(0, len(NITTER_URLS) - 1)]
+    nitter_url = 'https://' + TOML['options']['nitter_instances'][random.randint(0, len(TOML['options']['nitter_instances']) - 1)]

-    # **********************************************************
-    # Load twitter page of user. Process all tweets and generate
-    # list of dictionaries ready to be posted on Mastodon
-    # **********************************************************
-    # To store content of all tweets from this user
-    tweets = []

+    # Load twitter page of user
    soup, timeline = get_timeline(nitter_url)

    logging.info('Processing ' + str(len(timeline)) + ' tweets found in timeline')

    # **********************************************************
-    # Process each tweets and generate dictionary
+    # Process each tweets and generate an array of dictionaries
    # with data ready to be posted on Mastodon
    # **********************************************************
+    tweets = []
    out_date_cnt = 0
    in_db_cnt = 0
    for status in timeline:
@ -1215,9 +1199,9 @@ def main(argv):
        except MastodonAPIError:
            # Assuming this is an:
            # ERROR ('Mastodon API returned error', 422, 'Unprocessable Entity', 'Cannot attach files that have not finished processing. Try again in a moment!')
-            logging.warning('Mastodon API Error 422: Cannot attach files that have not finished processing. Waiting 60 seconds and retrying.')
-            # Wait 60 seconds
-            time.sleep(60)
+            logging.warning('Mastodon API Error 422: Cannot attach files that have not finished processing. Waiting 30 seconds and retrying.')
+            # Wait 30 seconds
+            time.sleep(30)
            # retry posting
            try:
                toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids)
@ -1235,7 +1219,7 @@ def main(argv):
            posted_cnt += 1
            logging.debug('Tweet %s posted on %s', tweet['tweet_id'], TOML['config']['mastodon_user'])
            # Test to find out if slowing down successive posting helps with ordering of threads
-            time.sleep(5)
+            time.sleep(TOML['options']['upload_pause'])

        # Insert toot id into database
        if 'id' in toot:
Author	SHA1	Message	Date
jeancf	fb8d83800e	add upload pause to config	2023-07-14 13:21:12 +02:00
jeancf	d6ed64d6fc	Move nitter instances to config file	2023-07-14 13:12:25 +02:00
jeancf	cdc1fb03f7	Add comments	2023-07-14 13:11:20 +02:00