mirror of
https://gitlab.com/jeancf/twoot.git
synced 2024-11-27 14:01:13 +00:00
Merge branch 'timestamp' as version 4.1
This commit is contained in:
commit
1b80568387
|
@ -1,5 +1,11 @@
|
|||
# Changelog
|
||||
|
||||
**28 JUN 2023** VERSION 4.0
|
||||
|
||||
* Added option to update avatar and banner pictures on profile if changed on Twitter
|
||||
* Tweaked list of nitter instances
|
||||
* Updated list of user agents
|
||||
|
||||
**13 MAR 2023** VERSION 3.2.2 Updated list of nitter instances
|
||||
|
||||
**21 FEB 2023** VERSION 3.2.1 Updated user agents and list of nitter instances
|
||||
|
|
29
README.md
29
README.md
|
@ -3,11 +3,18 @@
|
|||
Twoot is a python script that mirrors tweets from a twitter account to a Mastodon account.
|
||||
It is simple to set-up on a local machine, configurable and feature-rich.
|
||||
|
||||
**28 JUN 2023** VERSION 4.0
|
||||
**12 JUL 2023** VERSION 4.1
|
||||
|
||||
* Added option to update avatar and banner pictures on profile if changed on Twitter
|
||||
**Nitter has recently added a change that highlights tweets that are part of a thread. Twoot
|
||||
cannot handle this modification yet therefore TWEETS THAT ARE PART OF A THREAD ARE CURRENTLY
|
||||
IGNORED.** A warning message is added to the log file instead.
|
||||
An update is being worked on. Stay tuned.
|
||||
|
||||
**A new dependency to python module `pytz` has been added**. Please run `pip install pytz`
|
||||
in your environment to install it.
|
||||
|
||||
* Added option to display timestamp of the original tweet in toot
|
||||
* Tweaked list of nitter instances
|
||||
* Updated list of user agents
|
||||
|
||||
> Previous updates can be found in CHANGELOG.
|
||||
|
||||
|
@ -28,6 +35,7 @@ It is simple to set-up on a local machine, configurable and feature-rich.
|
|||
* Optionally remove trackers (UTM parameters) from URLs
|
||||
* Optional domain substitution for Twitter, Youtube and Reddit domains (e.g. [Nitter](https://github.com/zedeus/nitter/wiki/Instances),
|
||||
[Invidious](https://redirect.invidious.io/) and [teddit](https://teddit.net/) respectively)
|
||||
* option to add timestamp of the original tweet to bottom of toot
|
||||
* Optional footer line to add tags at bottom of toot
|
||||
* Allows rate-limiting posts to Mastodon instance
|
||||
|
||||
|
@ -101,6 +109,21 @@ have changed on the twitter page. This check compares the name of files used by
|
|||
of the files that have been uploaded on Mastodon and if they differ both files are downloaded from
|
||||
twitter and uploaded on Mastodon. The check is very fast if there is no update.
|
||||
|
||||
### Adding timestamp of original tweet to toot
|
||||
|
||||
Use `tweet_time_format` option in configuration file to specify the datetime format to display the date
|
||||
at which the tweet was published next to the "Original tweet" link. Valid format specifiers are
|
||||
the same as those used to format datetimes in python
|
||||
(https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior).
|
||||
e.g. `tweet_time_format = "(%d %b %Y %H:%M %Z)"`
|
||||
|
||||
An empty or missing `tweet_time_format` disables the display of the timestamp.
|
||||
|
||||
By default, dates are specified in UTC time zone. To convert the timestamp to another time zone,
|
||||
use the `tweet_timezone` option in configuration file. Valid time zone names are those of the Olson time
|
||||
zone database (https://en.wikipedia.org/wiki/Tz_database)
|
||||
e.g. `tweet_timezone = "Europe/Paris"`
|
||||
|
||||
### Rate control
|
||||
|
||||
Default max age is 1 day. Decimal values are OK.
|
||||
|
|
12
default.toml
12
default.toml
|
@ -34,6 +34,18 @@ remove_trackers_from_urls = false
|
|||
# Default is ""
|
||||
footer = ""
|
||||
|
||||
# If specified, also diplay a timestamp on the "Original Tweet" line
|
||||
# in the given format e.g. "%d %b %Y %H:%M %Z"
|
||||
# see https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior
|
||||
# Default is "" (tweet timestamp is not displayed)
|
||||
tweet_time_format = ""
|
||||
|
||||
# Specify the timezone that the timestamp on the tweet should be displayed in
|
||||
# Use the `tz_identifier`from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
|
||||
# example "Europe/Brussels"
|
||||
# default is using the local timezone of the machine running the script
|
||||
tweet_timezone = ""
|
||||
|
||||
# Do not add reference to "Original tweet" on toots
|
||||
# default is false
|
||||
remove_original_tweet_ref = false
|
||||
|
|
77
twoot.py
77
twoot.py
|
@ -43,12 +43,12 @@ HTTPS_REQ_TIMEOUT = 10
|
|||
|
||||
NITTER_URLS = [
|
||||
'https://nitter.lacontrevoie.fr',
|
||||
'https://n.l5.ca',
|
||||
'https://nitter.cutelab.space', # USA, added 16/02/2023
|
||||
'https://nitter.weiler.rocks', # added 15/06/2023
|
||||
'https://nitter.fly.dev', # anycast, added 06/02/2023
|
||||
'https://notabird.site', # anycast, added 06/02/2023
|
||||
'https://nitter.nl', # added 16/06/2023
|
||||
# 'https://n.l5.ca', # Not working 11/07/2023
|
||||
# 'https://nitter.fly.dev', # gone 11/07/2023
|
||||
# 'https://notabird.site', # gone 11/07/2023
|
||||
# 'https://nitter.sethforprivacy.com', # too slow, removed 16/06/2023
|
||||
# 'https://nitter.it', # different pic naming scheme
|
||||
# 'https://twitter.femboy.hu', # 404 on 06/05/2023
|
||||
|
@ -70,6 +70,22 @@ USER_AGENTS = [
|
|||
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Vivaldi/6.1.3035.84',
|
||||
]
|
||||
|
||||
"""
|
||||
Temporary mitigation for unability to parse threads. Skip tweets that are part of a thread
|
||||
"""
|
||||
def has_class_timeline_item_but_not_thread(tag):
|
||||
if tag.has_attr('class'):
|
||||
classes = tag['class']
|
||||
if 'timeline-item' in classes and 'thread' not in classes:
|
||||
return True
|
||||
elif 'timeline-item' in classes and 'thread' in classes:
|
||||
logging.warning('Tweet is part of a thread which are a new nitter feature that is not handled yet. Skipping')
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def build_config(args):
|
||||
"""
|
||||
|
@ -90,7 +106,9 @@ def build_config(args):
|
|||
'skip_retweets': False,
|
||||
'remove_link_redirections': False,
|
||||
'remove_trackers_from_urls': False,
|
||||
'footer': '',
|
||||
'footer': "",
|
||||
'tweet_time_format': "",
|
||||
'tweet_timezone': "",
|
||||
'remove_original_tweet_ref': False,
|
||||
'tweet_max_age': float(1),
|
||||
'tweet_delay': float(0),
|
||||
|
@ -790,14 +808,16 @@ def main(argv):
|
|||
logging.info(' remove_link_redirections : ' + str(TOML['options']['remove_link_redirections']))
|
||||
logging.info(' remove_trackers_from_urls: ' + str(TOML['options']['remove_trackers_from_urls']))
|
||||
logging.info(' footer : ' + TOML['options']['footer'])
|
||||
logging.info(' tweet_time_format : ' + TOML['options']['tweet_time_format'])
|
||||
logging.info(' tweet_timezone : ' + TOML['options']['tweet_timezone'])
|
||||
logging.info(' remove_original_tweet_ref: ' + str(TOML['options']['remove_original_tweet_ref']))
|
||||
logging.info(' update_profile : ' + str(TOML['options']['update_profile']))
|
||||
logging.info(' tweet_max_age : ' + str(TOML['options']['tweet_max_age']))
|
||||
logging.info(' tweet_delay : ' + str(TOML['options']['tweet_delay']))
|
||||
logging.info(' toot_cap : ' + str(TOML['options']['toot_cap']))
|
||||
logging.info(' subst_twitter : ' + str(TOML['options']['subst_twitter']))
|
||||
logging.info(' subst_twitter : ' + str(TOML['options']['subst_youtube']))
|
||||
logging.info(' subst_twitter : ' + str(TOML['options']['subst_reddit']))
|
||||
logging.info(' subst_youtube : ' + str(TOML['options']['subst_youtube']))
|
||||
logging.info(' subst_reddit : ' + str(TOML['options']['subst_reddit']))
|
||||
logging.info(' log_level : ' + str(TOML['options']['log_level']))
|
||||
logging.info(' log_days : ' + str(TOML['options']['log_days']))
|
||||
|
||||
|
@ -867,7 +887,7 @@ def main(argv):
|
|||
soup = BeautifulSoup(twit_account_page.text, 'html.parser')
|
||||
|
||||
# Extract twitter timeline
|
||||
timeline = soup.find_all('div', class_='timeline-item')
|
||||
timeline = soup.find_all(has_class_timeline_item_but_not_thread)
|
||||
|
||||
logging.info('Processing ' + str(len(timeline)) + ' tweets found in timeline')
|
||||
|
||||
|
@ -879,21 +899,26 @@ def main(argv):
|
|||
in_db_cnt = 0
|
||||
for status in timeline:
|
||||
# Extract tweet ID and status ID
|
||||
tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m')
|
||||
status_id = tweet_id.split('/')[3]
|
||||
try:
|
||||
tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m')
|
||||
status_id = tweet_id.split('/')[3]
|
||||
except Exception as e:
|
||||
logging.critical('Malformed timeline downloaded from nitter instance')
|
||||
logging.debug(e)
|
||||
shutdown(-1)
|
||||
|
||||
logging.debug('processing tweet %s', tweet_id)
|
||||
|
||||
# Extract time stamp
|
||||
time_string = status.find('span', class_='tweet-date').a.get('title')
|
||||
try:
|
||||
timestamp = datetime.strptime(time_string, '%d/%m/%Y, %H:%M:%S').timestamp()
|
||||
timestamp = datetime.strptime(time_string, '%d/%m/%Y, %H:%M:%S')
|
||||
except:
|
||||
# Dec 21, 2021 · 12:00 PM UTC
|
||||
timestamp = datetime.strptime(time_string, '%b %d, %Y · %I:%M %p %Z').timestamp()
|
||||
timestamp = datetime.strptime(time_string, '%b %d, %Y · %I:%M %p %Z')
|
||||
|
||||
# Check if time is within acceptable range
|
||||
if not is_time_valid(timestamp):
|
||||
if not is_time_valid(timestamp.timestamp()):
|
||||
out_date_cnt += 1
|
||||
logging.debug("Tweet outside valid time range, skipping")
|
||||
continue
|
||||
|
@ -976,9 +1001,25 @@ def main(argv):
|
|||
# Add footer with link to original tweet
|
||||
if TOML['options']['remove_original_tweet_ref'] is False:
|
||||
if TOML['options']['footer'] != '':
|
||||
tweet_text += '\nOriginal tweet : ' + substitute_source(full_status_url)
|
||||
tweet_text += '\nOriginal tweet: ' + substitute_source(full_status_url)
|
||||
else:
|
||||
tweet_text += '\n\nOriginal tweet : ' + substitute_source(full_status_url)
|
||||
tweet_text += '\n\nOriginal tweet: ' + substitute_source(full_status_url)
|
||||
|
||||
# Add timestamp to the "Original Tweet" line
|
||||
if TOML['options']['tweet_time_format'] != "":
|
||||
timestamp_display = timestamp
|
||||
# Adjust timezone
|
||||
import pytz
|
||||
if TOML['options']['tweet_timezone'] != "":
|
||||
timezone_display = pytz.timezone(TOML['options']['tweet_timezone'])
|
||||
else: # Use local timezone by default
|
||||
timezone_display = datetime.now().astimezone().tzinfo
|
||||
logging.debug("Timestamp UTC: " + str(timestamp))
|
||||
logging.debug("Timezone to use: " + str(timezone_display))
|
||||
timestamp_display = pytz.utc.localize(timestamp).astimezone(timezone_display)
|
||||
logging.debug("Timestamp converted " + str(timestamp_display))
|
||||
|
||||
tweet_text += ' ' + datetime.strftime(timestamp_display, TOML['options']['tweet_time_format'])
|
||||
|
||||
# If no media was specifically added in the tweet, try to get the first picture
|
||||
# with "twitter:image" meta tag in first linked page in tweet text
|
||||
|
@ -1020,7 +1061,7 @@ def main(argv):
|
|||
tweet = {
|
||||
"author": author,
|
||||
"author_account": author_account,
|
||||
"timestamp": timestamp,
|
||||
"timestamp": timestamp.timestamp(),
|
||||
"tweet_id": tweet_id,
|
||||
"tweet_text": tweet_text,
|
||||
"video": video_file,
|
||||
|
@ -1103,9 +1144,9 @@ def main(argv):
|
|||
except MastodonAPIError:
|
||||
# Assuming this is an:
|
||||
# ERROR ('Mastodon API returned error', 422, 'Unprocessable Entity', 'Cannot attach files that have not finished processing. Try again in a moment!')
|
||||
logging.warning('Mastodon API Error 422: Cannot attach files that have not finished processing. Waiting 15 seconds and retrying.')
|
||||
# Wait 15 seconds
|
||||
time.sleep(15)
|
||||
logging.warning('Mastodon API Error 422: Cannot attach files that have not finished processing. Waiting 60 seconds and retrying.')
|
||||
# Wait 60 seconds
|
||||
time.sleep(60)
|
||||
# retry posting
|
||||
try:
|
||||
toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids)
|
||||
|
|
Loading…
Reference in New Issue
Block a user