diff --git a/CHANGELOG.md b/CHANGELOG.md index bb7e9eb..df19a2e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +**28 JUN 2023** VERSION 4.0 + +* Added option to update avatar and banner pictures on profile if changed on Twitter +* Tweaked list of nitter instances +* Updated list of user agents + **13 MAR 2023** VERSION 3.2.2 Updated list of nitter instances **21 FEB 2023** VERSION 3.2.1 Updated user agents and list of nitter instances diff --git a/README.md b/README.md index 92f1a8e..a107969 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,18 @@ Twoot is a python script that mirrors tweets from a twitter account to a Mastodon account. It is simple to set-up on a local machine, configurable and feature-rich. -**28 JUN 2023** VERSION 4.0 +**12 JUL 2023** VERSION 4.1 -* Added option to update avatar and banner pictures on profile if changed on Twitter +**Nitter has recently added a change that highlights tweets that are part of a thread. Twoot +cannot handle this modification yet therefore TWEETS THAT ARE PART OF A THREAD ARE CURRENTLY +IGNORED.** A warning message is added to the log file instead. +An update is being worked on. Stay tuned. + +**A new dependency to python module `pytz` has been added**. Please run `pip install pytz` +in your environment to install it. + +* Added option to display timestamp of the original tweet in toot * Tweaked list of nitter instances -* Updated list of user agents > Previous updates can be found in CHANGELOG. @@ -28,6 +35,7 @@ It is simple to set-up on a local machine, configurable and feature-rich. * Optionally remove trackers (UTM parameters) from URLs * Optional domain substitution for Twitter, Youtube and Reddit domains (e.g. [Nitter](https://github.com/zedeus/nitter/wiki/Instances), [Invidious](https://redirect.invidious.io/) and [teddit](https://teddit.net/) respectively) +* option to add timestamp of the original tweet to bottom of toot * Optional footer line to add tags at bottom of toot * Allows rate-limiting posts to Mastodon instance @@ -101,6 +109,21 @@ have changed on the twitter page. This check compares the name of files used by of the files that have been uploaded on Mastodon and if they differ both files are downloaded from twitter and uploaded on Mastodon. The check is very fast if there is no update. +### Adding timestamp of original tweet to toot + +Use `tweet_time_format` option in configuration file to specify the datetime format to display the date +at which the tweet was published next to the "Original tweet" link. Valid format specifiers are +the same as those used to format datetimes in python +(https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior). +e.g. `tweet_time_format = "(%d %b %Y %H:%M %Z)"` + +An empty or missing `tweet_time_format` disables the display of the timestamp. + +By default, dates are specified in UTC time zone. To convert the timestamp to another time zone, +use the `tweet_timezone` option in configuration file. Valid time zone names are those of the Olson time +zone database (https://en.wikipedia.org/wiki/Tz_database) +e.g. `tweet_timezone = "Europe/Paris"` + ### Rate control Default max age is 1 day. Decimal values are OK. diff --git a/default.toml b/default.toml index 21f589b..c45fa86 100644 --- a/default.toml +++ b/default.toml @@ -34,6 +34,18 @@ remove_trackers_from_urls = false # Default is "" footer = "" +# If specified, also diplay a timestamp on the "Original Tweet" line +# in the given format e.g. "%d %b %Y %H:%M %Z" +# see https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior +# Default is "" (tweet timestamp is not displayed) +tweet_time_format = "" + +# Specify the timezone that the timestamp on the tweet should be displayed in +# Use the `tz_identifier`from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones +# example "Europe/Brussels" +# default is using the local timezone of the machine running the script +tweet_timezone = "" + # Do not add reference to "Original tweet" on toots # default is false remove_original_tweet_ref = false diff --git a/twoot.py b/twoot.py index bdcb2a9..1d202ec 100755 --- a/twoot.py +++ b/twoot.py @@ -43,12 +43,12 @@ HTTPS_REQ_TIMEOUT = 10 NITTER_URLS = [ 'https://nitter.lacontrevoie.fr', - 'https://n.l5.ca', 'https://nitter.cutelab.space', # USA, added 16/02/2023 'https://nitter.weiler.rocks', # added 15/06/2023 - 'https://nitter.fly.dev', # anycast, added 06/02/2023 - 'https://notabird.site', # anycast, added 06/02/2023 'https://nitter.nl', # added 16/06/2023 + # 'https://n.l5.ca', # Not working 11/07/2023 + # 'https://nitter.fly.dev', # gone 11/07/2023 + # 'https://notabird.site', # gone 11/07/2023 # 'https://nitter.sethforprivacy.com', # too slow, removed 16/06/2023 # 'https://nitter.it', # different pic naming scheme # 'https://twitter.femboy.hu', # 404 on 06/05/2023 @@ -70,6 +70,22 @@ USER_AGENTS = [ 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Vivaldi/6.1.3035.84', ] +""" +Temporary mitigation for unability to parse threads. Skip tweets that are part of a thread +""" +def has_class_timeline_item_but_not_thread(tag): + if tag.has_attr('class'): + classes = tag['class'] + if 'timeline-item' in classes and 'thread' not in classes: + return True + elif 'timeline-item' in classes and 'thread' in classes: + logging.warning('Tweet is part of a thread which are a new nitter feature that is not handled yet. Skipping') + return False + else: + return False + else: + return False + def build_config(args): """ @@ -90,7 +106,9 @@ def build_config(args): 'skip_retweets': False, 'remove_link_redirections': False, 'remove_trackers_from_urls': False, - 'footer': '', + 'footer': "", + 'tweet_time_format': "", + 'tweet_timezone': "", 'remove_original_tweet_ref': False, 'tweet_max_age': float(1), 'tweet_delay': float(0), @@ -790,14 +808,16 @@ def main(argv): logging.info(' remove_link_redirections : ' + str(TOML['options']['remove_link_redirections'])) logging.info(' remove_trackers_from_urls: ' + str(TOML['options']['remove_trackers_from_urls'])) logging.info(' footer : ' + TOML['options']['footer']) + logging.info(' tweet_time_format : ' + TOML['options']['tweet_time_format']) + logging.info(' tweet_timezone : ' + TOML['options']['tweet_timezone']) logging.info(' remove_original_tweet_ref: ' + str(TOML['options']['remove_original_tweet_ref'])) logging.info(' update_profile : ' + str(TOML['options']['update_profile'])) logging.info(' tweet_max_age : ' + str(TOML['options']['tweet_max_age'])) logging.info(' tweet_delay : ' + str(TOML['options']['tweet_delay'])) logging.info(' toot_cap : ' + str(TOML['options']['toot_cap'])) logging.info(' subst_twitter : ' + str(TOML['options']['subst_twitter'])) - logging.info(' subst_twitter : ' + str(TOML['options']['subst_youtube'])) - logging.info(' subst_twitter : ' + str(TOML['options']['subst_reddit'])) + logging.info(' subst_youtube : ' + str(TOML['options']['subst_youtube'])) + logging.info(' subst_reddit : ' + str(TOML['options']['subst_reddit'])) logging.info(' log_level : ' + str(TOML['options']['log_level'])) logging.info(' log_days : ' + str(TOML['options']['log_days'])) @@ -867,7 +887,7 @@ def main(argv): soup = BeautifulSoup(twit_account_page.text, 'html.parser') # Extract twitter timeline - timeline = soup.find_all('div', class_='timeline-item') + timeline = soup.find_all(has_class_timeline_item_but_not_thread) logging.info('Processing ' + str(len(timeline)) + ' tweets found in timeline') @@ -879,21 +899,26 @@ def main(argv): in_db_cnt = 0 for status in timeline: # Extract tweet ID and status ID - tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m') - status_id = tweet_id.split('/')[3] + try: + tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m') + status_id = tweet_id.split('/')[3] + except Exception as e: + logging.critical('Malformed timeline downloaded from nitter instance') + logging.debug(e) + shutdown(-1) logging.debug('processing tweet %s', tweet_id) # Extract time stamp time_string = status.find('span', class_='tweet-date').a.get('title') try: - timestamp = datetime.strptime(time_string, '%d/%m/%Y, %H:%M:%S').timestamp() + timestamp = datetime.strptime(time_string, '%d/%m/%Y, %H:%M:%S') except: # Dec 21, 2021 · 12:00 PM UTC - timestamp = datetime.strptime(time_string, '%b %d, %Y · %I:%M %p %Z').timestamp() + timestamp = datetime.strptime(time_string, '%b %d, %Y · %I:%M %p %Z') # Check if time is within acceptable range - if not is_time_valid(timestamp): + if not is_time_valid(timestamp.timestamp()): out_date_cnt += 1 logging.debug("Tweet outside valid time range, skipping") continue @@ -976,9 +1001,25 @@ def main(argv): # Add footer with link to original tweet if TOML['options']['remove_original_tweet_ref'] is False: if TOML['options']['footer'] != '': - tweet_text += '\nOriginal tweet : ' + substitute_source(full_status_url) + tweet_text += '\nOriginal tweet: ' + substitute_source(full_status_url) else: - tweet_text += '\n\nOriginal tweet : ' + substitute_source(full_status_url) + tweet_text += '\n\nOriginal tweet: ' + substitute_source(full_status_url) + + # Add timestamp to the "Original Tweet" line + if TOML['options']['tweet_time_format'] != "": + timestamp_display = timestamp + # Adjust timezone + import pytz + if TOML['options']['tweet_timezone'] != "": + timezone_display = pytz.timezone(TOML['options']['tweet_timezone']) + else: # Use local timezone by default + timezone_display = datetime.now().astimezone().tzinfo + logging.debug("Timestamp UTC: " + str(timestamp)) + logging.debug("Timezone to use: " + str(timezone_display)) + timestamp_display = pytz.utc.localize(timestamp).astimezone(timezone_display) + logging.debug("Timestamp converted " + str(timestamp_display)) + + tweet_text += ' ' + datetime.strftime(timestamp_display, TOML['options']['tweet_time_format']) # If no media was specifically added in the tweet, try to get the first picture # with "twitter:image" meta tag in first linked page in tweet text @@ -1020,7 +1061,7 @@ def main(argv): tweet = { "author": author, "author_account": author_account, - "timestamp": timestamp, + "timestamp": timestamp.timestamp(), "tweet_id": tweet_id, "tweet_text": tweet_text, "video": video_file, @@ -1103,9 +1144,9 @@ def main(argv): except MastodonAPIError: # Assuming this is an: # ERROR ('Mastodon API returned error', 422, 'Unprocessable Entity', 'Cannot attach files that have not finished processing. Try again in a moment!') - logging.warning('Mastodon API Error 422: Cannot attach files that have not finished processing. Waiting 15 seconds and retrying.') - # Wait 15 seconds - time.sleep(15) + logging.warning('Mastodon API Error 422: Cannot attach files that have not finished processing. Waiting 60 seconds and retrying.') + # Wait 60 seconds + time.sleep(60) # retry posting try: toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids)