Merge branch 'timestamp' as version 4.1

This commit is contained in:
jeancf 2023-07-12 18:07:38 +02:00
commit 1b80568387
4 changed files with 103 additions and 21 deletions

View File

@ -1,5 +1,11 @@
# Changelog
**28 JUN 2023** VERSION 4.0
* Added option to update avatar and banner pictures on profile if changed on Twitter
* Tweaked list of nitter instances
* Updated list of user agents
**13 MAR 2023** VERSION 3.2.2 Updated list of nitter instances
**21 FEB 2023** VERSION 3.2.1 Updated user agents and list of nitter instances

View File

@ -3,11 +3,18 @@
Twoot is a python script that mirrors tweets from a twitter account to a Mastodon account.
It is simple to set-up on a local machine, configurable and feature-rich.
**28 JUN 2023** VERSION 4.0
**12 JUL 2023** VERSION 4.1
* Added option to update avatar and banner pictures on profile if changed on Twitter
**Nitter has recently added a change that highlights tweets that are part of a thread. Twoot
cannot handle this modification yet therefore TWEETS THAT ARE PART OF A THREAD ARE CURRENTLY
IGNORED.** A warning message is added to the log file instead.
An update is being worked on. Stay tuned.
**A new dependency to python module `pytz` has been added**. Please run `pip install pytz`
in your environment to install it.
* Added option to display timestamp of the original tweet in toot
* Tweaked list of nitter instances
* Updated list of user agents
> Previous updates can be found in CHANGELOG.
@ -28,6 +35,7 @@ It is simple to set-up on a local machine, configurable and feature-rich.
* Optionally remove trackers (UTM parameters) from URLs
* Optional domain substitution for Twitter, Youtube and Reddit domains (e.g. [Nitter](https://github.com/zedeus/nitter/wiki/Instances),
[Invidious](https://redirect.invidious.io/) and [teddit](https://teddit.net/) respectively)
* option to add timestamp of the original tweet to bottom of toot
* Optional footer line to add tags at bottom of toot
* Allows rate-limiting posts to Mastodon instance
@ -101,6 +109,21 @@ have changed on the twitter page. This check compares the name of files used by
of the files that have been uploaded on Mastodon and if they differ both files are downloaded from
twitter and uploaded on Mastodon. The check is very fast if there is no update.
### Adding timestamp of original tweet to toot
Use `tweet_time_format` option in configuration file to specify the datetime format to display the date
at which the tweet was published next to the "Original tweet" link. Valid format specifiers are
the same as those used to format datetimes in python
(https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior).
e.g. `tweet_time_format = "(%d %b %Y %H:%M %Z)"`
An empty or missing `tweet_time_format` disables the display of the timestamp.
By default, dates are specified in UTC time zone. To convert the timestamp to another time zone,
use the `tweet_timezone` option in configuration file. Valid time zone names are those of the Olson time
zone database (https://en.wikipedia.org/wiki/Tz_database)
e.g. `tweet_timezone = "Europe/Paris"`
### Rate control
Default max age is 1 day. Decimal values are OK.

View File

@ -34,6 +34,18 @@ remove_trackers_from_urls = false
# Default is ""
footer = ""
# If specified, also diplay a timestamp on the "Original Tweet" line
# in the given format e.g. "%d %b %Y %H:%M %Z"
# see https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior
# Default is "" (tweet timestamp is not displayed)
tweet_time_format = ""
# Specify the timezone that the timestamp on the tweet should be displayed in
# Use the `tz_identifier`from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
# example "Europe/Brussels"
# default is using the local timezone of the machine running the script
tweet_timezone = ""
# Do not add reference to "Original tweet" on toots
# default is false
remove_original_tweet_ref = false

View File

@ -43,12 +43,12 @@ HTTPS_REQ_TIMEOUT = 10
NITTER_URLS = [
'https://nitter.lacontrevoie.fr',
'https://n.l5.ca',
'https://nitter.cutelab.space', # USA, added 16/02/2023
'https://nitter.weiler.rocks', # added 15/06/2023
'https://nitter.fly.dev', # anycast, added 06/02/2023
'https://notabird.site', # anycast, added 06/02/2023
'https://nitter.nl', # added 16/06/2023
# 'https://n.l5.ca', # Not working 11/07/2023
# 'https://nitter.fly.dev', # gone 11/07/2023
# 'https://notabird.site', # gone 11/07/2023
# 'https://nitter.sethforprivacy.com', # too slow, removed 16/06/2023
# 'https://nitter.it', # different pic naming scheme
# 'https://twitter.femboy.hu', # 404 on 06/05/2023
@ -70,6 +70,22 @@ USER_AGENTS = [
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Vivaldi/6.1.3035.84',
]
"""
Temporary mitigation for unability to parse threads. Skip tweets that are part of a thread
"""
def has_class_timeline_item_but_not_thread(tag):
if tag.has_attr('class'):
classes = tag['class']
if 'timeline-item' in classes and 'thread' not in classes:
return True
elif 'timeline-item' in classes and 'thread' in classes:
logging.warning('Tweet is part of a thread which are a new nitter feature that is not handled yet. Skipping')
return False
else:
return False
else:
return False
def build_config(args):
"""
@ -90,7 +106,9 @@ def build_config(args):
'skip_retweets': False,
'remove_link_redirections': False,
'remove_trackers_from_urls': False,
'footer': '',
'footer': "",
'tweet_time_format': "",
'tweet_timezone': "",
'remove_original_tweet_ref': False,
'tweet_max_age': float(1),
'tweet_delay': float(0),
@ -790,14 +808,16 @@ def main(argv):
logging.info(' remove_link_redirections : ' + str(TOML['options']['remove_link_redirections']))
logging.info(' remove_trackers_from_urls: ' + str(TOML['options']['remove_trackers_from_urls']))
logging.info(' footer : ' + TOML['options']['footer'])
logging.info(' tweet_time_format : ' + TOML['options']['tweet_time_format'])
logging.info(' tweet_timezone : ' + TOML['options']['tweet_timezone'])
logging.info(' remove_original_tweet_ref: ' + str(TOML['options']['remove_original_tweet_ref']))
logging.info(' update_profile : ' + str(TOML['options']['update_profile']))
logging.info(' tweet_max_age : ' + str(TOML['options']['tweet_max_age']))
logging.info(' tweet_delay : ' + str(TOML['options']['tweet_delay']))
logging.info(' toot_cap : ' + str(TOML['options']['toot_cap']))
logging.info(' subst_twitter : ' + str(TOML['options']['subst_twitter']))
logging.info(' subst_twitter : ' + str(TOML['options']['subst_youtube']))
logging.info(' subst_twitter : ' + str(TOML['options']['subst_reddit']))
logging.info(' subst_youtube : ' + str(TOML['options']['subst_youtube']))
logging.info(' subst_reddit : ' + str(TOML['options']['subst_reddit']))
logging.info(' log_level : ' + str(TOML['options']['log_level']))
logging.info(' log_days : ' + str(TOML['options']['log_days']))
@ -867,7 +887,7 @@ def main(argv):
soup = BeautifulSoup(twit_account_page.text, 'html.parser')
# Extract twitter timeline
timeline = soup.find_all('div', class_='timeline-item')
timeline = soup.find_all(has_class_timeline_item_but_not_thread)
logging.info('Processing ' + str(len(timeline)) + ' tweets found in timeline')
@ -879,21 +899,26 @@ def main(argv):
in_db_cnt = 0
for status in timeline:
# Extract tweet ID and status ID
tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m')
status_id = tweet_id.split('/')[3]
try:
tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m')
status_id = tweet_id.split('/')[3]
except Exception as e:
logging.critical('Malformed timeline downloaded from nitter instance')
logging.debug(e)
shutdown(-1)
logging.debug('processing tweet %s', tweet_id)
# Extract time stamp
time_string = status.find('span', class_='tweet-date').a.get('title')
try:
timestamp = datetime.strptime(time_string, '%d/%m/%Y, %H:%M:%S').timestamp()
timestamp = datetime.strptime(time_string, '%d/%m/%Y, %H:%M:%S')
except:
# Dec 21, 2021 · 12:00 PM UTC
timestamp = datetime.strptime(time_string, '%b %d, %Y · %I:%M %p %Z').timestamp()
timestamp = datetime.strptime(time_string, '%b %d, %Y · %I:%M %p %Z')
# Check if time is within acceptable range
if not is_time_valid(timestamp):
if not is_time_valid(timestamp.timestamp()):
out_date_cnt += 1
logging.debug("Tweet outside valid time range, skipping")
continue
@ -976,9 +1001,25 @@ def main(argv):
# Add footer with link to original tweet
if TOML['options']['remove_original_tweet_ref'] is False:
if TOML['options']['footer'] != '':
tweet_text += '\nOriginal tweet : ' + substitute_source(full_status_url)
tweet_text += '\nOriginal tweet: ' + substitute_source(full_status_url)
else:
tweet_text += '\n\nOriginal tweet : ' + substitute_source(full_status_url)
tweet_text += '\n\nOriginal tweet: ' + substitute_source(full_status_url)
# Add timestamp to the "Original Tweet" line
if TOML['options']['tweet_time_format'] != "":
timestamp_display = timestamp
# Adjust timezone
import pytz
if TOML['options']['tweet_timezone'] != "":
timezone_display = pytz.timezone(TOML['options']['tweet_timezone'])
else: # Use local timezone by default
timezone_display = datetime.now().astimezone().tzinfo
logging.debug("Timestamp UTC: " + str(timestamp))
logging.debug("Timezone to use: " + str(timezone_display))
timestamp_display = pytz.utc.localize(timestamp).astimezone(timezone_display)
logging.debug("Timestamp converted " + str(timestamp_display))
tweet_text += ' ' + datetime.strftime(timestamp_display, TOML['options']['tweet_time_format'])
# If no media was specifically added in the tweet, try to get the first picture
# with "twitter:image" meta tag in first linked page in tweet text
@ -1020,7 +1061,7 @@ def main(argv):
tweet = {
"author": author,
"author_account": author_account,
"timestamp": timestamp,
"timestamp": timestamp.timestamp(),
"tweet_id": tweet_id,
"tweet_text": tweet_text,
"video": video_file,
@ -1103,9 +1144,9 @@ def main(argv):
except MastodonAPIError:
# Assuming this is an:
# ERROR ('Mastodon API returned error', 422, 'Unprocessable Entity', 'Cannot attach files that have not finished processing. Try again in a moment!')
logging.warning('Mastodon API Error 422: Cannot attach files that have not finished processing. Waiting 15 seconds and retrying.')
# Wait 15 seconds
time.sleep(15)
logging.warning('Mastodon API Error 422: Cannot attach files that have not finished processing. Waiting 60 seconds and retrying.')
# Wait 60 seconds
time.sleep(60)
# retry posting
try:
toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids)