Compare commits

..

No commits in common. "fb8d83800e8ce829f69e82abd98fa69b5e19bcfc" and "b10a8392c88e335b77fbf9267c74665886be521b" have entirely different histories.

2 changed files with 59 additions and 65 deletions

View File

@ -9,110 +9,88 @@ mastodon_instance = ""
mastodon_user = "" mastodon_user = ""
[options] [options]
# List of nitter instances from which to pick at random to download tweets.
# Specify only the address without leading `https://` and without trailing `/`
# By default a built-in list of 2-3 known good instances is used
#
#nitter_instances = ["nitter.nl", "nitter.fdn.fr"]
# Download videos from twitter and upload them on Mastodon # Download videos from twitter and upload them on Mastodon
# Default is false # Default is false
# upload_videos = false
#upload_videos = true
# Also post the "reply-to" tweets from twitter account # Also post the "reply-to" tweets from twitter account
# Default is false # Default is false
# post_reply_to = false
#post_reply_to = true
# Do not post the retweets of other twitter accounts # Do not post the retweets of other twitter accounts
# Default is false # Default is false
# skip_retweets = false
#skip_retweets = true
# Replace redirected links in tweets with direct URLs # Replace redirected links in tweets with direct URLs
# Default is false # Default is false
# remove_link_redirections = false
#remove_link_redirections = true
# Clean up URLs in tweets to remove trackers # Clean up URLs in tweets to remove trackers
# Default is false # Default is false
# remove_trackers_from_urls = false
#remove_trackers_from_urls = true
# Footer line added at bottom of toots # Footer line added at bottom of toots
# e.g. "#twitter #bot"
# Default is "" # Default is ""
# footer = ""
#footer = "#twitter #bot"
# If specified, also diplay a timestamp on the "Original Tweet" line # If specified, also diplay a timestamp on the "Original Tweet" line
# in the given format. # in the given format e.g. "%d %b %Y %H:%M %Z"
# see https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior # see https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior
# Default is "" (tweet timestamp is not displayed) # Default is "" (tweet timestamp is not displayed)
# tweet_time_format = ""
#tweet_time_format = "%d %b %Y %H:%M %Z"
# Specify the timezone that the timestamp on the tweet should be displayed in # Specify the timezone that the timestamp on the tweet should be displayed in
# Use `tz_identifier`from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones # Use the `tz_identifier`from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
# example "Europe/Brussels"
# default is using the local timezone of the machine running the script # default is using the local timezone of the machine running the script
#tweet_timezone = "Europe/Brussels" tweet_timezone = ""
# Do not add reference to "Original tweet" on toots # Do not add reference to "Original tweet" on toots
# default is false # default is false
#remove_original_tweet_ref = true remove_original_tweet_ref = false
# Check if profile avatar or banner pictures were changed and update # Check if profile avatar or banner pictures were changed and update
# the Mastodon account if necessary # the Mastodon account if necessary
# Default is false # Default is false
#update_profile = true update_profile = false
# Maximum age of tweet to post (in days, decimal values accepted) # Maximum age of tweet to post (in days, decimal values accepted)
# Default is 1 # Default is 1
# tweet_max_age = 1
#tweet_max_age = 0.5
# Minimum age of tweet to post (in minutes) # Minimum age of tweet to post (in minutes)
# Default is 0 (post tweet as soon as possible) # Default is 0 (post tweet as soon as possible)
# tweet_delay = 0
#tweet_delay = 15
# How many seconds to pause between successive uploads of toots.
# Increase this value if successive tweets appear in the wrong order.
# Default is 0 (no pause)
#
# upload_pause = 5
# Maximum number of toots to post in each run # Maximum number of toots to post in each run
# Default is 0 (which means unlimited) # Default is 0 (which means unlimited)
# toot_cap = 0
#toot_cap = 2
# Replace twitter.com in links by random alternative out of this list # Replace twitter.com in links by random alternative out of this list
# List of nitter instances # List of nitter instances
# e.g. subst_twitter = ["nitter.net", ]
# Default is [] # Default is []
# subst_twitter = []
#subst_twitter = ["nitter.net", ]
# Replace youtube.com in links by random alternative out of this list # Replace youtube.com in links by random alternative out of this list
# List of Invidious or Piped instances # List of Invidious or Piped instances
# e.g. subst_youtube = ["piped.kavin.rocks", "invidious.flokinet.to", ]
# Default is [] # Default is []
# subst_youtube = []
#subst_youtube = ["piped.kavin.rocks", "invidious.flokinet.to", ]
# Replace reddit.com in links by random alternative out of this list # Replace reddit.com in links by random alternative out of this list
# List of Teddit instances # List of Teddit instances
# e.g. subst_reddit = ["teddit.net", ]
# Default is [] # Default is []
# subst_reddit = []
#subst_reddit = ["teddit.net", ]
# Verbosity of log messages # Verbosity of log messages
# One of DEBUG, INFO, WARNING, ERROR, CRITICAL, OFF # One of DEBUG, INFO, WARNING, ERROR, CRITICAL, OFF
# Default is "WARNING" # Default is "WARNING"
# log_level = "WARNING"
#log_level = "INFO"
# How many days to keep log messages for # How many days to keep log messages for
# Log messages older than log_days will be deleted # Log messages older than log_days will be deleted
# Default is 3 # Default is 3
# log_days = 3
#log_days = 1

View File

@ -41,6 +41,25 @@ MAX_REC_COUNT = 50
# How many seconds to wait before giving up on a download (except video download) # How many seconds to wait before giving up on a download (except video download)
HTTPS_REQ_TIMEOUT = 10 HTTPS_REQ_TIMEOUT = 10
NITTER_URLS = [
'nitter.lacontrevoie.fr',
# 'nitter.cutelab.space', # 404 on 12/07/2023
'nitter.weiler.rocks', # added 15/06/2023
'nitter.nl', # added 16/06/2023
# 'n.l5.ca', # Not working 11/07/2023
# 'nitter.fly.dev', # gone 11/07/2023
# 'notabird.site', # gone 11/07/2023
# 'nitter.sethforprivacy.com', # too slow, removed 16/06/2023
# 'nitter.it', # different pic naming scheme
# 'twitter.femboy.hu', # 404 on 06/05/2023
# 'nitter.grimneko.de', # 404 on 01/06/2023
# 'nitter.namazso.eu', # lots of 403 27/02/2023
# 'twitter.beparanoid.de', # moved 27/022023
# 'nitter.fdn.fr', # not updated, rate limited, removed 06/02/2023
# 'nitter.hu',
# 'nitter.privacydev.net', # USA, added 06/02/2023, removed 15/02/2023 too slow
]
# Update from https://www.whatismybrowser.com/guides/the-latest-user-agent/ # Update from https://www.whatismybrowser.com/guides/the-latest-user-agent/
USER_AGENTS = [ USER_AGENTS = [
'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
@ -82,11 +101,6 @@ def build_config(args):
# Default options # Default options
options = { options = {
'nitter_instances': [
'nitter.lacontrevoie.fr',
'nitter.weiler.rocks', # added 15/06/2023
'nitter.nl', # added 16/06/2023
],
'upload_videos': False, 'upload_videos': False,
'post_reply_to': False, 'post_reply_to': False,
'skip_retweets': False, 'skip_retweets': False,
@ -98,7 +112,6 @@ def build_config(args):
'remove_original_tweet_ref': False, 'remove_original_tweet_ref': False,
'tweet_max_age': float(1), 'tweet_max_age': float(1),
'tweet_delay': float(0), 'tweet_delay': float(0),
'upload_pause': float(0),
'toot_cap': int(0), 'toot_cap': int(0),
'subst_twitter': [], 'subst_twitter': [],
'subst_youtube': [], 'subst_youtube': [],
@ -282,9 +295,9 @@ def get_timeline(nitter_url):
timeline = [] timeline = []
for item in list: for item in list:
classes = item['class'] classes = item['class']
if 'timeline-item' in classes: # Individual tweet if 'timeline-item' in classes:
timeline.append(item) timeline.append(item)
elif 'thread-line' in classes: # First tweet of a thread elif 'thread-line' in classes:
# Get the first item of thread # Get the first item of thread
first_item = item.find('div', class_='timeline-item') first_item = item.find('div', class_='timeline-item')
timeline.append(first_item) timeline.append(first_item)
@ -892,7 +905,6 @@ def main(argv):
log_level = logging.INFO log_level = logging.INFO
elif ll_str == "WARNING": elif ll_str == "WARNING":
log_level = logging.WARNING log_level = logging.WARNING
print('log level warning set')
elif ll_str == "ERROR": elif ll_str == "ERROR":
log_level = logging.ERROR log_level = logging.ERROR
elif ll_str == "CRITICAL": elif ll_str == "CRITICAL":
@ -928,7 +940,7 @@ def main(argv):
logging.info(' subst_twitter : ' + str(TOML['options']['subst_twitter'])) logging.info(' subst_twitter : ' + str(TOML['options']['subst_twitter']))
logging.info(' subst_youtube : ' + str(TOML['options']['subst_youtube'])) logging.info(' subst_youtube : ' + str(TOML['options']['subst_youtube']))
logging.info(' subst_reddit : ' + str(TOML['options']['subst_reddit'])) logging.info(' subst_reddit : ' + str(TOML['options']['subst_reddit']))
logging.info(' log_level : ' + TOML['options']['log_level']) logging.info(' log_level : ' + str(TOML['options']['log_level']))
logging.info(' log_days : ' + str(TOML['options']['log_days'])) logging.info(' log_days : ' + str(TOML['options']['log_days']))
# Try to open database. If it does not exist, create it # Try to open database. If it does not exist, create it
@ -942,19 +954,23 @@ def main(argv):
db.execute('''CREATE INDEX IF NOT EXIsTS profile_index ON profiles (mastodon_instance, mastodon_account)''') db.execute('''CREATE INDEX IF NOT EXIsTS profile_index ON profiles (mastodon_instance, mastodon_account)''')
# Select random nitter instance to fetch updates from # Select random nitter instance to fetch updates from
nitter_url = 'https://' + TOML['options']['nitter_instances'][random.randint(0, len(TOML['options']['nitter_instances']) - 1)] nitter_url = 'https://' + NITTER_URLS[random.randint(0, len(NITTER_URLS) - 1)]
# **********************************************************
# Load twitter page of user. Process all tweets and generate
# list of dictionaries ready to be posted on Mastodon
# **********************************************************
# To store content of all tweets from this user
tweets = []
# Load twitter page of user
soup, timeline = get_timeline(nitter_url) soup, timeline = get_timeline(nitter_url)
logging.info('Processing ' + str(len(timeline)) + ' tweets found in timeline') logging.info('Processing ' + str(len(timeline)) + ' tweets found in timeline')
# ********************************************************** # **********************************************************
# Process each tweets and generate an array of dictionaries # Process each tweets and generate dictionary
# with data ready to be posted on Mastodon # with data ready to be posted on Mastodon
# ********************************************************** # **********************************************************
tweets = []
out_date_cnt = 0 out_date_cnt = 0
in_db_cnt = 0 in_db_cnt = 0
for status in timeline: for status in timeline:
@ -1199,9 +1215,9 @@ def main(argv):
except MastodonAPIError: except MastodonAPIError:
# Assuming this is an: # Assuming this is an:
# ERROR ('Mastodon API returned error', 422, 'Unprocessable Entity', 'Cannot attach files that have not finished processing. Try again in a moment!') # ERROR ('Mastodon API returned error', 422, 'Unprocessable Entity', 'Cannot attach files that have not finished processing. Try again in a moment!')
logging.warning('Mastodon API Error 422: Cannot attach files that have not finished processing. Waiting 30 seconds and retrying.') logging.warning('Mastodon API Error 422: Cannot attach files that have not finished processing. Waiting 60 seconds and retrying.')
# Wait 30 seconds # Wait 60 seconds
time.sleep(30) time.sleep(60)
# retry posting # retry posting
try: try:
toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids) toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids)
@ -1219,7 +1235,7 @@ def main(argv):
posted_cnt += 1 posted_cnt += 1
logging.debug('Tweet %s posted on %s', tweet['tweet_id'], TOML['config']['mastodon_user']) logging.debug('Tweet %s posted on %s', tweet['tweet_id'], TOML['config']['mastodon_user'])
# Test to find out if slowing down successive posting helps with ordering of threads # Test to find out if slowing down successive posting helps with ordering of threads
time.sleep(TOML['options']['upload_pause']) time.sleep(5)
# Insert toot id into database # Insert toot id into database
if 'id' in toot: if 'id' in toot: