From 191c23074d258874c3a8a87ccd17e5011a643318 Mon Sep 17 00:00:00 2001 From: jeancf Date: Fri, 23 Jun 2023 17:35:01 +0200 Subject: [PATCH 01/25] test timestamp --- twoot.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/twoot.py b/twoot.py index bdcb2a9..1ae4d3c 100755 --- a/twoot.py +++ b/twoot.py @@ -980,6 +980,10 @@ def main(argv): else: tweet_text += '\n\nOriginal tweet : ' + substitute_source(full_status_url) + # Check what timestamp would look like + logging.debug("TEST TIMESTAMP") + logging.debug(tweet_text + ' ' + time_string) + # If no media was specifically added in the tweet, try to get the first picture # with "twitter:image" meta tag in first linked page in tweet text if not photos: From 9e66475fe0e182fcfb71808481f49621bb832369 Mon Sep 17 00:00:00 2001 From: jeancf Date: Wed, 28 Jun 2023 12:53:23 +0200 Subject: [PATCH 02/25] Set release date --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9c492cf..92f1a8e 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Twoot is a python script that mirrors tweets from a twitter account to a Mastodon account. It is simple to set-up on a local machine, configurable and feature-rich. -**19 JUN 2023** VERSION 4.0 +**28 JUN 2023** VERSION 4.0 * Added option to update avatar and banner pictures on profile if changed on Twitter * Tweaked list of nitter instances From 3732392dbf00aee13189f0428ab7ae825c0f98dc Mon Sep 17 00:00:00 2001 From: jeancf Date: Wed, 28 Jun 2023 13:53:26 +0200 Subject: [PATCH 03/25] Longer wait to mitigate API error 422 --- twoot.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/twoot.py b/twoot.py index 1ae4d3c..40c56bf 100755 --- a/twoot.py +++ b/twoot.py @@ -1107,9 +1107,9 @@ def main(argv): except MastodonAPIError: # Assuming this is an: # ERROR ('Mastodon API returned error', 422, 'Unprocessable Entity', 'Cannot attach files that have not finished processing. Try again in a moment!') - logging.warning('Mastodon API Error 422: Cannot attach files that have not finished processing. Waiting 15 seconds and retrying.') - # Wait 15 seconds - time.sleep(15) + logging.warning('Mastodon API Error 422: Cannot attach files that have not finished processing. Waiting 60 seconds and retrying.') + # Wait 60 seconds + time.sleep(60) # retry posting try: toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids) From 23d897f42c7fad74fc9e106306a880028d62ca8f Mon Sep 17 00:00:00 2001 From: jeancf Date: Wed, 28 Jun 2023 14:58:02 +0200 Subject: [PATCH 04/25] Small adjustment --- twoot.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/twoot.py b/twoot.py index 40c56bf..fcc6ea3 100755 --- a/twoot.py +++ b/twoot.py @@ -976,13 +976,13 @@ def main(argv): # Add footer with link to original tweet if TOML['options']['remove_original_tweet_ref'] is False: if TOML['options']['footer'] != '': - tweet_text += '\nOriginal tweet : ' + substitute_source(full_status_url) + tweet_text += '\nOriginal tweet: ' + substitute_source(full_status_url) else: - tweet_text += '\n\nOriginal tweet : ' + substitute_source(full_status_url) + tweet_text += '\n\nOriginal tweet: ' + substitute_source(full_status_url) # Check what timestamp would look like logging.debug("TEST TIMESTAMP") - logging.debug(tweet_text + ' ' + time_string) + logging.debug(tweet_text + ' (' + time_string + ')') # If no media was specifically added in the tweet, try to get the first picture # with "twitter:image" meta tag in first linked page in tweet text From b5f6405ceb5d6e22fcf39963c6512386a1da6584 Mon Sep 17 00:00:00 2001 From: jeancf Date: Wed, 28 Jun 2023 15:19:46 +0200 Subject: [PATCH 05/25] Test further --- twoot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/twoot.py b/twoot.py index fcc6ea3..87cd36e 100755 --- a/twoot.py +++ b/twoot.py @@ -981,8 +981,8 @@ def main(argv): tweet_text += '\n\nOriginal tweet: ' + substitute_source(full_status_url) # Check what timestamp would look like - logging.debug("TEST TIMESTAMP") - logging.debug(tweet_text + ' (' + time_string + ')') + logging.debug("TEST TIMESTAMP " + str(time_string)) + logging.debug(tweet_text + ' (' + datetime.strftime(timestamp, '%d %b %Y %H:%M %Z') + ')') # If no media was specifically added in the tweet, try to get the first picture # with "twitter:image" meta tag in first linked page in tweet text From 156df6040c6175281e455bf24390c5b3f3d1a9cf Mon Sep 17 00:00:00 2001 From: jeancf Date: Wed, 28 Jun 2023 16:29:32 +0200 Subject: [PATCH 06/25] correct logging message --- twoot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/twoot.py b/twoot.py index 87cd36e..f2fafb3 100755 --- a/twoot.py +++ b/twoot.py @@ -796,8 +796,8 @@ def main(argv): logging.info(' tweet_delay : ' + str(TOML['options']['tweet_delay'])) logging.info(' toot_cap : ' + str(TOML['options']['toot_cap'])) logging.info(' subst_twitter : ' + str(TOML['options']['subst_twitter'])) - logging.info(' subst_twitter : ' + str(TOML['options']['subst_youtube'])) - logging.info(' subst_twitter : ' + str(TOML['options']['subst_reddit'])) + logging.info(' subst_youtube : ' + str(TOML['options']['subst_youtube'])) + logging.info(' subst_reddit : ' + str(TOML['options']['subst_reddit'])) logging.info(' log_level : ' + str(TOML['options']['log_level'])) logging.info(' log_days : ' + str(TOML['options']['log_days'])) From 93f8e493db1ed9e5c095c5639342fe1fc220d6a5 Mon Sep 17 00:00:00 2001 From: jeancf Date: Wed, 28 Jun 2023 17:46:00 +0200 Subject: [PATCH 07/25] Fixed float timestamp bug --- twoot.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/twoot.py b/twoot.py index f2fafb3..7f1d186 100755 --- a/twoot.py +++ b/twoot.py @@ -887,10 +887,10 @@ def main(argv): # Extract time stamp time_string = status.find('span', class_='tweet-date').a.get('title') try: - timestamp = datetime.strptime(time_string, '%d/%m/%Y, %H:%M:%S').timestamp() + timestamp = datetime.strptime(time_string, '%d/%m/%Y, %H:%M:%S') except: # Dec 21, 2021 · 12:00 PM UTC - timestamp = datetime.strptime(time_string, '%b %d, %Y · %I:%M %p %Z').timestamp() + timestamp = datetime.strptime(time_string, '%b %d, %Y · %I:%M %p %Z') # Check if time is within acceptable range if not is_time_valid(timestamp): @@ -1024,7 +1024,7 @@ def main(argv): tweet = { "author": author, "author_account": author_account, - "timestamp": timestamp, + "timestamp": timestamp.timestamp(), "tweet_id": tweet_id, "tweet_text": tweet_text, "video": video_file, From f1b7247b3d528a3dbffb637f9a0c9367e0b5c409 Mon Sep 17 00:00:00 2001 From: jeancf Date: Wed, 28 Jun 2023 17:57:15 +0200 Subject: [PATCH 08/25] Corrected one more float bug --- twoot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/twoot.py b/twoot.py index 7f1d186..ecb2148 100755 --- a/twoot.py +++ b/twoot.py @@ -893,7 +893,7 @@ def main(argv): timestamp = datetime.strptime(time_string, '%b %d, %Y · %I:%M %p %Z') # Check if time is within acceptable range - if not is_time_valid(timestamp): + if not is_time_valid(timestamp.timestamp()): out_date_cnt += 1 logging.debug("Tweet outside valid time range, skipping") continue From 969b6849b813fcb26ff97484d0d404eb9654e876 Mon Sep 17 00:00:00 2001 From: jeancf Date: Wed, 28 Jun 2023 18:34:42 +0200 Subject: [PATCH 09/25] Other time representation --- twoot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/twoot.py b/twoot.py index ecb2148..2a7a40f 100755 --- a/twoot.py +++ b/twoot.py @@ -982,7 +982,7 @@ def main(argv): # Check what timestamp would look like logging.debug("TEST TIMESTAMP " + str(time_string)) - logging.debug(tweet_text + ' (' + datetime.strftime(timestamp, '%d %b %Y %H:%M %Z') + ')') + logging.debug(tweet_text + ' (' + datetime.strftime(timestamp, '%x %X') + ')') # If no media was specifically added in the tweet, try to get the first picture # with "twitter:image" meta tag in first linked page in tweet text From cb798b7b170343abf56d6ec27745e05e6e605645 Mon Sep 17 00:00:00 2001 From: jeancf Date: Wed, 28 Jun 2023 21:47:48 +0200 Subject: [PATCH 10/25] Added TOML option --- default.toml | 6 ++++++ twoot.py | 10 ++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/default.toml b/default.toml index 21f589b..c01b268 100644 --- a/default.toml +++ b/default.toml @@ -34,6 +34,12 @@ remove_trackers_from_urls = false # Default is "" footer = "" +# If specified, also diplay a timestamp on the "Original Tweet" line +# in the given format e.g. "%D %b %Y %H:%M" +# see https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior +# Default is "" (tweet timestamp is not displayed) +tweet_time_format = "" + # Do not add reference to "Original tweet" on toots # default is false remove_original_tweet_ref = false diff --git a/twoot.py b/twoot.py index 2a7a40f..e2b18a5 100755 --- a/twoot.py +++ b/twoot.py @@ -90,7 +90,8 @@ def build_config(args): 'skip_retweets': False, 'remove_link_redirections': False, 'remove_trackers_from_urls': False, - 'footer': '', + 'footer': "", + 'tweet_time_format': "", 'remove_original_tweet_ref': False, 'tweet_max_age': float(1), 'tweet_delay': float(0), @@ -790,6 +791,7 @@ def main(argv): logging.info(' remove_link_redirections : ' + str(TOML['options']['remove_link_redirections'])) logging.info(' remove_trackers_from_urls: ' + str(TOML['options']['remove_trackers_from_urls'])) logging.info(' footer : ' + TOML['options']['footer']) + logging.info(' tweet_time_format : ' + TOML['options']['tweet_time_format']) logging.info(' remove_original_tweet_ref: ' + str(TOML['options']['remove_original_tweet_ref'])) logging.info(' update_profile : ' + str(TOML['options']['update_profile'])) logging.info(' tweet_max_age : ' + str(TOML['options']['tweet_max_age'])) @@ -980,9 +982,9 @@ def main(argv): else: tweet_text += '\n\nOriginal tweet: ' + substitute_source(full_status_url) - # Check what timestamp would look like - logging.debug("TEST TIMESTAMP " + str(time_string)) - logging.debug(tweet_text + ' (' + datetime.strftime(timestamp, '%x %X') + ')') + # Add timestamp to the "Original Tweet" line + if TOML['options']['tweet_time_format'] != "": + tweet_text += ' (' + datetime.strftime(timestamp, TOML['options']['tweet_time_format']) + ')' # If no media was specifically added in the tweet, try to get the first picture # with "twitter:image" meta tag in first linked page in tweet text From 95ef8929dd753108a880c800c1f91148a5ac5fc4 Mon Sep 17 00:00:00 2001 From: jeancf Date: Thu, 29 Jun 2023 17:05:15 +0200 Subject: [PATCH 11/25] Do not force the use of brackets --- twoot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/twoot.py b/twoot.py index e2b18a5..0bd8e56 100755 --- a/twoot.py +++ b/twoot.py @@ -984,7 +984,7 @@ def main(argv): # Add timestamp to the "Original Tweet" line if TOML['options']['tweet_time_format'] != "": - tweet_text += ' (' + datetime.strftime(timestamp, TOML['options']['tweet_time_format']) + ')' + tweet_text += ' ' + datetime.strftime(timestamp, TOML['options']['tweet_time_format']) # If no media was specifically added in the tweet, try to get the first picture # with "twitter:image" meta tag in first linked page in tweet text From 6ad4219e93672a94ef30b20cc3261cdab8c30aa7 Mon Sep 17 00:00:00 2001 From: jeancf Date: Tue, 11 Jul 2023 11:38:37 +0200 Subject: [PATCH 12/25] Added tweet_timezone ro toml file --- default.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/default.toml b/default.toml index c01b268..d4f024c 100644 --- a/default.toml +++ b/default.toml @@ -40,6 +40,12 @@ footer = "" # Default is "" (tweet timestamp is not displayed) tweet_time_format = "" +# Specify the timezone that the timestamp on the tweet should be displayed in +# Use the `tz_identifier`from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones +# example "Europe/Brussels" +# default is UTC +tweet_timezone = "UTC" + # Do not add reference to "Original tweet" on toots # default is false remove_original_tweet_ref = false From 3af7c29b7f16dc96565e48f78d8dd310ec7c1fd7 Mon Sep 17 00:00:00 2001 From: jeancf Date: Tue, 11 Jul 2023 11:44:22 +0200 Subject: [PATCH 13/25] changed default of tweet_timezone --- default.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/default.toml b/default.toml index d4f024c..f0e8a8c 100644 --- a/default.toml +++ b/default.toml @@ -43,8 +43,8 @@ tweet_time_format = "" # Specify the timezone that the timestamp on the tweet should be displayed in # Use the `tz_identifier`from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones # example "Europe/Brussels" -# default is UTC -tweet_timezone = "UTC" +# default is using the local timezone of the machine running the script +tweet_timezone = "" # Do not add reference to "Original tweet" on toots # default is false From dc1ad8208093d288a3fc1f4e40ed8a85475411bf Mon Sep 17 00:00:00 2001 From: jeancf Date: Tue, 11 Jul 2023 13:15:43 +0200 Subject: [PATCH 14/25] Added timezone logic --- twoot.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/twoot.py b/twoot.py index 0bd8e56..e7c716b 100755 --- a/twoot.py +++ b/twoot.py @@ -92,6 +92,7 @@ def build_config(args): 'remove_trackers_from_urls': False, 'footer': "", 'tweet_time_format': "", + 'tweet_timezone': "", 'remove_original_tweet_ref': False, 'tweet_max_age': float(1), 'tweet_delay': float(0), @@ -792,6 +793,7 @@ def main(argv): logging.info(' remove_trackers_from_urls: ' + str(TOML['options']['remove_trackers_from_urls'])) logging.info(' footer : ' + TOML['options']['footer']) logging.info(' tweet_time_format : ' + TOML['options']['tweet_time_format']) + logging.info(' tweet_timezone : ' + TOML['options']['tweet_timezone']) logging.info(' remove_original_tweet_ref: ' + str(TOML['options']['remove_original_tweet_ref'])) logging.info(' update_profile : ' + str(TOML['options']['update_profile'])) logging.info(' tweet_max_age : ' + str(TOML['options']['tweet_max_age'])) @@ -984,7 +986,19 @@ def main(argv): # Add timestamp to the "Original Tweet" line if TOML['options']['tweet_time_format'] != "": - tweet_text += ' ' + datetime.strftime(timestamp, TOML['options']['tweet_time_format']) + timestamp_display = timestamp + # Adjust timezone + import pytz + if TOML['options']['tweet_timezone'] != "": + timezone_display = pytz.timezone(TOML['options']['tweet_timezone']) + else: # Use local timezone by default + timezone_display = datetime.now().astimezone().tzinfo + logging.debug("Timestamp UTC " + str(timestamp)) + logging.debug("Timezone to use" + str(timezone_display)) + timestamp_display = pytz.utc.localize(timestamp).astimezone(timezone_display) + logging.debug("Timestamp converted " + str(timestamp_display)) + + tweet_text += ' (' + datetime.strftime(timestamp_display, TOML['options']['tweet_time_format']) + ')' # If no media was specifically added in the tweet, try to get the first picture # with "twitter:image" meta tag in first linked page in tweet text From e3c8d228a7eb39ab041c57781ca8bc71dcec22fb Mon Sep 17 00:00:00 2001 From: jeancf Date: Tue, 11 Jul 2023 13:25:42 +0200 Subject: [PATCH 15/25] Cleanup nitter instances --- twoot.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/twoot.py b/twoot.py index e7c716b..da8b2e8 100755 --- a/twoot.py +++ b/twoot.py @@ -42,13 +42,15 @@ MAX_REC_COUNT = 50 HTTPS_REQ_TIMEOUT = 10 NITTER_URLS = [ + 'https://canada.unofficialbird.com/', # New 11/07/2023 + 'https://nederland.unofficialbird.com/', # New 11/07/2023 'https://nitter.lacontrevoie.fr', - 'https://n.l5.ca', 'https://nitter.cutelab.space', # USA, added 16/02/2023 'https://nitter.weiler.rocks', # added 15/06/2023 - 'https://nitter.fly.dev', # anycast, added 06/02/2023 - 'https://notabird.site', # anycast, added 06/02/2023 'https://nitter.nl', # added 16/06/2023 + # 'https://n.l5.ca', # Not working 11/07/2023 + # 'https://nitter.fly.dev', # gone 11/07/2023 + # 'https://notabird.site', # gone 11/07/2023 # 'https://nitter.sethforprivacy.com', # too slow, removed 16/06/2023 # 'https://nitter.it', # different pic naming scheme # 'https://twitter.femboy.hu', # 404 on 06/05/2023 From f4addcf5a312e27e6424608561d58c7f5325c699 Mon Sep 17 00:00:00 2001 From: jeancf Date: Tue, 11 Jul 2023 13:40:53 +0200 Subject: [PATCH 16/25] Better default in config --- default.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/default.toml b/default.toml index f0e8a8c..c45fa86 100644 --- a/default.toml +++ b/default.toml @@ -35,7 +35,7 @@ remove_trackers_from_urls = false footer = "" # If specified, also diplay a timestamp on the "Original Tweet" line -# in the given format e.g. "%D %b %Y %H:%M" +# in the given format e.g. "%d %b %Y %H:%M %Z" # see https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior # Default is "" (tweet timestamp is not displayed) tweet_time_format = "" From 89b6068a04e350b6d5574e452a79f19a3fc85b15 Mon Sep 17 00:00:00 2001 From: jeancf Date: Tue, 11 Jul 2023 19:45:53 +0200 Subject: [PATCH 17/25] Handling tweet-link exception --- twoot.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/twoot.py b/twoot.py index da8b2e8..ffc5624 100755 --- a/twoot.py +++ b/twoot.py @@ -885,8 +885,13 @@ def main(argv): in_db_cnt = 0 for status in timeline: # Extract tweet ID and status ID - tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m') - status_id = tweet_id.split('/')[3] + try: + tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m') + status_id = tweet_id.split('/')[3] + except Exception as e: + logging.critical('Malformed timeline downloaded from nitter instance') + logging.debug(e) + shutdown(-1) logging.debug('processing tweet %s', tweet_id) @@ -995,8 +1000,8 @@ def main(argv): timezone_display = pytz.timezone(TOML['options']['tweet_timezone']) else: # Use local timezone by default timezone_display = datetime.now().astimezone().tzinfo - logging.debug("Timestamp UTC " + str(timestamp)) - logging.debug("Timezone to use" + str(timezone_display)) + logging.debug("Timestamp UTC: " + str(timestamp)) + logging.debug("Timezone to use: " + str(timezone_display)) timestamp_display = pytz.utc.localize(timestamp).astimezone(timezone_display) logging.debug("Timestamp converted " + str(timestamp_display)) From 1faf11222d24b57a1baa2436dbfe198a01d30008 Mon Sep 17 00:00:00 2001 From: jeancf Date: Tue, 11 Jul 2023 20:41:46 +0200 Subject: [PATCH 18/25] Added saving page to file --- twoot.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/twoot.py b/twoot.py index ffc5624..50467cd 100755 --- a/twoot.py +++ b/twoot.py @@ -890,7 +890,12 @@ def main(argv): status_id = tweet_id.split('/')[3] except Exception as e: logging.critical('Malformed timeline downloaded from nitter instance') + logging.critical('Check ' + TOML['config']['twitter_account'] + '.html') logging.debug(e) + # save page to file + of = open(TOML['config']['twitter_account'] + '.html', 'w') + of.write(twit_account_page.text) + of.close() shutdown(-1) logging.debug('processing tweet %s', tweet_id) From 7528afaff9e9fdd77a61814351faa1ac88d76a6e Mon Sep 17 00:00:00 2001 From: jeancf Date: Tue, 11 Jul 2023 20:58:41 +0200 Subject: [PATCH 19/25] Removed brackets from timestamp --- twoot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/twoot.py b/twoot.py index 50467cd..508beac 100755 --- a/twoot.py +++ b/twoot.py @@ -43,7 +43,7 @@ HTTPS_REQ_TIMEOUT = 10 NITTER_URLS = [ 'https://canada.unofficialbird.com/', # New 11/07/2023 - 'https://nederland.unofficialbird.com/', # New 11/07/2023 + 'https://nederland.unofficialbird.com/', # New 11/07/2023 'https://nitter.lacontrevoie.fr', 'https://nitter.cutelab.space', # USA, added 16/02/2023 'https://nitter.weiler.rocks', # added 15/06/2023 @@ -1010,7 +1010,7 @@ def main(argv): timestamp_display = pytz.utc.localize(timestamp).astimezone(timezone_display) logging.debug("Timestamp converted " + str(timestamp_display)) - tweet_text += ' (' + datetime.strftime(timestamp_display, TOML['options']['tweet_time_format']) + ')' + tweet_text += ' ' + datetime.strftime(timestamp_display, TOML['options']['tweet_time_format']) # If no media was specifically added in the tweet, try to get the first picture # with "twitter:image" meta tag in first linked page in tweet text From f64b92d175f0aef19c65c79f69e9020d5c377a3b Mon Sep 17 00:00:00 2001 From: jeancf Date: Tue, 11 Jul 2023 21:10:39 +0200 Subject: [PATCH 20/25] Updated README and CHANGELOG --- CHANGELOG.md | 6 ++++++ README.md | 24 +++++++++++++++++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb7e9eb..df19a2e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +**28 JUN 2023** VERSION 4.0 + +* Added option to update avatar and banner pictures on profile if changed on Twitter +* Tweaked list of nitter instances +* Updated list of user agents + **13 MAR 2023** VERSION 3.2.2 Updated list of nitter instances **21 FEB 2023** VERSION 3.2.1 Updated user agents and list of nitter instances diff --git a/README.md b/README.md index 92f1a8e..eb97153 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,13 @@ Twoot is a python script that mirrors tweets from a twitter account to a Mastodon account. It is simple to set-up on a local machine, configurable and feature-rich. -**28 JUN 2023** VERSION 4.0 +**11 JUL 2023** VERSION 4.1 -* Added option to update avatar and banner pictures on profile if changed on Twitter +**A new dependency to python module `pytz` has been added**. Please run `pip install pytz` +in your environment to install it. + +* Added option to add timestamp of the original tweet to toot * Tweaked list of nitter instances -* Updated list of user agents > Previous updates can be found in CHANGELOG. @@ -28,6 +30,7 @@ It is simple to set-up on a local machine, configurable and feature-rich. * Optionally remove trackers (UTM parameters) from URLs * Optional domain substitution for Twitter, Youtube and Reddit domains (e.g. [Nitter](https://github.com/zedeus/nitter/wiki/Instances), [Invidious](https://redirect.invidious.io/) and [teddit](https://teddit.net/) respectively) +* option to add timestamp of the original tweet to bottom of toot * Optional footer line to add tags at bottom of toot * Allows rate-limiting posts to Mastodon instance @@ -101,6 +104,21 @@ have changed on the twitter page. This check compares the name of files used by of the files that have been uploaded on Mastodon and if they differ both files are downloaded from twitter and uploaded on Mastodon. The check is very fast if there is no update. +### Adding timestamp of original tweet to toot + +Use `tweet_time_format` option in configuration file to specify the datetime format to display the date +at which the tweet was published next to the "Original tweet" link. Valid format specifiers are +the same as those used to format datetimes in python +(https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior). +e.g. `tweet_time_format = "(%d %b %Y %H:%M %Z)"` + +An empty or missing `tweet_time_format` disables the display of the timestamp. + +By default, dates are specified in UTC time zone. To convert the timestamp to another time zone, +use the `tweet_timezone` option in configuration file. Valid time zone names are those of the Olson time +zone database (https://en.wikipedia.org/wiki/Tz_database) +e.g. `tweet_timezone = "Europe/Paris"` + ### Rate control Default max age is 1 day. Decimal values are OK. From ea12cea20f27126044e284042ac65d27d137bff5 Mon Sep 17 00:00:00 2001 From: jeancf Date: Tue, 11 Jul 2023 20:41:46 +0200 Subject: [PATCH 21/25] Revert "Added saving page to file" This reverts commit 1faf11222d24b57a1baa2436dbfe198a01d30008. --- twoot.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/twoot.py b/twoot.py index 508beac..29ad2c5 100755 --- a/twoot.py +++ b/twoot.py @@ -890,12 +890,7 @@ def main(argv): status_id = tweet_id.split('/')[3] except Exception as e: logging.critical('Malformed timeline downloaded from nitter instance') - logging.critical('Check ' + TOML['config']['twitter_account'] + '.html') logging.debug(e) - # save page to file - of = open(TOML['config']['twitter_account'] + '.html', 'w') - of.write(twit_account_page.text) - of.close() shutdown(-1) logging.debug('processing tweet %s', tweet_id) From 530953f48b3f3f9745d2b59650c5b770fcef7c51 Mon Sep 17 00:00:00 2001 From: jeancf Date: Wed, 12 Jul 2023 14:51:04 +0200 Subject: [PATCH 22/25] Add Exclusion of thread tweets --- twoot.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/twoot.py b/twoot.py index 29ad2c5..3d1809f 100755 --- a/twoot.py +++ b/twoot.py @@ -72,6 +72,22 @@ USER_AGENTS = [ 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Vivaldi/6.1.3035.84', ] +""" +Temporary mitigation for unability to parse threads. Skip tweets that are part of a thread +""" +def has_class_timeline_item_but_not_thread(tag): + if tag.has_attr('class'): + classes = tag['class'] + if 'timeline-item' in classes and 'thread' not in classes: + return True + elif 'timeline-item' in classes and 'thread' in classes: + logging.warning('Tweet is part of a thread which are a new nitter feature that is not handled yet. Skipping') + return False + else: + return False + else: + return False + def build_config(args): """ @@ -873,7 +889,7 @@ def main(argv): soup = BeautifulSoup(twit_account_page.text, 'html.parser') # Extract twitter timeline - timeline = soup.find_all('div', class_='timeline-item') + timeline = soup.find_all(has_class_timeline_item_but_not_thread) logging.info('Processing ' + str(len(timeline)) + ' tweets found in timeline') From 248552ae4fe3f6a6bd7e60a1b5da99f0ecaa0a60 Mon Sep 17 00:00:00 2001 From: jeancf Date: Wed, 12 Jul 2023 14:56:49 +0200 Subject: [PATCH 23/25] Update README --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index eb97153..f2b31fb 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,12 @@ It is simple to set-up on a local machine, configurable and feature-rich. **11 JUL 2023** VERSION 4.1 +**Nitter has recently added a change that highlights tweets that are part of a thread. Twoot +cannot handle this modification yet therefore TWEETS THAT ARE PART OF A THREAD ARE CURRENTLY +IGNORED.** A warning message is added to the log file instead. + +An update is being worked on. Stay tuned. + **A new dependency to python module `pytz` has been added**. Please run `pip install pytz` in your environment to install it. From 6188f77e2b4ac4dd7c83f15f7e91eea7a6f902bc Mon Sep 17 00:00:00 2001 From: jeancf Date: Wed, 12 Jul 2023 15:01:24 +0200 Subject: [PATCH 24/25] Removed unreliable nitter instances --- twoot.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/twoot.py b/twoot.py index 3d1809f..1d202ec 100755 --- a/twoot.py +++ b/twoot.py @@ -42,8 +42,6 @@ MAX_REC_COUNT = 50 HTTPS_REQ_TIMEOUT = 10 NITTER_URLS = [ - 'https://canada.unofficialbird.com/', # New 11/07/2023 - 'https://nederland.unofficialbird.com/', # New 11/07/2023 'https://nitter.lacontrevoie.fr', 'https://nitter.cutelab.space', # USA, added 16/02/2023 'https://nitter.weiler.rocks', # added 15/06/2023 From 79f3d1b13875863c7ca0f48cd468d7bea9a68858 Mon Sep 17 00:00:00 2001 From: jeancf Date: Wed, 12 Jul 2023 15:07:00 +0200 Subject: [PATCH 25/25] Update README --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f2b31fb..a107969 100644 --- a/README.md +++ b/README.md @@ -3,18 +3,17 @@ Twoot is a python script that mirrors tweets from a twitter account to a Mastodon account. It is simple to set-up on a local machine, configurable and feature-rich. -**11 JUL 2023** VERSION 4.1 +**12 JUL 2023** VERSION 4.1 **Nitter has recently added a change that highlights tweets that are part of a thread. Twoot cannot handle this modification yet therefore TWEETS THAT ARE PART OF A THREAD ARE CURRENTLY IGNORED.** A warning message is added to the log file instead. - An update is being worked on. Stay tuned. **A new dependency to python module `pytz` has been added**. Please run `pip install pytz` in your environment to install it. -* Added option to add timestamp of the original tweet to toot +* Added option to display timestamp of the original tweet in toot * Tweaked list of nitter instances > Previous updates can be found in CHANGELOG.