Merge branch 'master'

This commit is contained in:
jeancf 2023-07-25 13:00:40 +02:00
commit 189c37272a
3 changed files with 52 additions and 25 deletions

View File

@ -1,5 +1,18 @@
# Changelog # Changelog
**22 JUL 2023** VERSION 4.3.1
Minor improvements of robustness (avoid interruption of processing):
* Ignore timeline-item without tweet-link tag
* Improve detection of missing video
**17 JUL 2023** VERSION 4.3
* Twitter threads are replicated on Mastodon: each follow-up message in a thread is posted
as a reply to its predecessor.
* An issue with downloading videos has been fixed ("ERROR: Sorry, you are not authorized to see this status").
**14 JUL 2023** VERSION 4.2 **14 JUL 2023** VERSION 4.2
Twoot can now handle threads. All tweets can again be uploaded on Mastodon. Tweets in a threads are Twoot can now handle threads. All tweets can again be uploaded on Mastodon. Tweets in a threads are

View File

@ -3,11 +3,12 @@
Twoot is a python script that mirrors tweets from a twitter account to a Mastodon account. Twoot is a python script that mirrors tweets from a twitter account to a Mastodon account.
It is simple to set-up on a local machine, configurable and feature-rich. It is simple to set-up on a local machine, configurable and feature-rich.
**17 JUL 2023** VERSION 4.3 **24 JUL 2023** VERSION 4.3.2
* Twitter threads are replicated on Mastodon: each follow-up message in a thread is posted Minor fixes (avoid interruption of processing):
as a reply to its predecessor.
* An issue with downloading videos has been fixed ("ERROR: Sorry, you are not authorized to see this status"). * Re-add missing \n before url of quoted tweet
* Avoid interruption of processing in case of empty thread timeline
> Previous updates can be found in CHANGELOG. > Previous updates can be found in CHANGELOG.

View File

@ -204,9 +204,11 @@ def _get_rest_of_thread(session, headers, nitter_url, thread_url, first_item):
# Make soup # Make soup
soup = BeautifulSoup(thread_page.text, 'html.parser') soup = BeautifulSoup(thread_page.text, 'html.parser')
list = []
# Get all items in thread after main tweet # Get all items in thread after main tweet
after_tweet = soup.find('div', 'after-tweet') after_tweet = soup.find('div', 'after-tweet')
list = after_tweet.find_all('div', class_='timeline-item') if after_tweet is not None:
list = after_tweet.find_all('div', class_='timeline-item')
# Build timeline of tuples # Build timeline of tuples
previous_tweet_url = thread_url previous_tweet_url = thread_url
@ -674,25 +676,31 @@ def process_attachments(nitter_url, attachments_container, status_id, author_acc
logging.debug("downloading video from twitter") logging.debug("downloading video from twitter")
import youtube_dl import youtube_dl
video_path = vid_container.source['src'] video_path_source = vid_container.source
if video_path is not None: if video_path_source is not None:
video_file = urljoin(nitter_url, video_path) video_path = video_path_source['src']
ydl_opts = { if video_path is not None:
'outtmpl': "output/" + TOML['config']['twitter_account'] + "/" + status_id + "/%(id)s.%(ext)s", video_file = urljoin(nitter_url, video_path)
# 'format': "best[width<=500]", ydl_opts = {
'socket_timeout': 60, 'outtmpl': "output/" + TOML['config']['twitter_account'] + "/" + status_id + "/%(id)s.%(ext)s",
'quiet': True, # 'format': "best[width<=500]",
} 'socket_timeout': 60,
'quiet': True,
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl: with youtube_dl.YoutubeDL(ydl_opts) as ydl:
try: try:
ydl.download([video_file]) ydl.download([video_file])
except Exception as e: except Exception as e:
logging.warning('Error downloading twitter video: ' + str(e)) logging.warning('Error downloading twitter video: ' + str(e))
vid_in_tweet = True vid_in_tweet = True
else: else:
logging.debug('downloaded twitter video from attachments') logging.debug('downloaded twitter video from attachments')
else:
logging.debug("Media is unavailable")
vid_in_tweet = True
else: else:
logging.debug("Media is unavailable")
vid_in_tweet = True vid_in_tweet = True
return pics, vid_in_tweet return pics, vid_in_tweet
@ -968,7 +976,12 @@ def main(argv):
in_db_cnt = 0 in_db_cnt = 0
for replied_to_tweet, status in timeline: for replied_to_tweet, status in timeline:
# Extract tweet ID and status ID # Extract tweet ID and status ID
tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m') tweet_link_tag = status.find('a', class_='tweet-link')
if tweet_link_tag is None:
logging.debug("Malformed timeline item (no tweet link), skipping")
continue
tweet_id = tweet_link_tag.get('href').strip('#m')
status_id = tweet_id.split('/')[3] status_id = tweet_id.split('/')[3]
logging.debug('processing tweet %s', tweet_id) logging.debug('processing tweet %s', tweet_id)
@ -1041,7 +1054,7 @@ def main(argv):
# Process quote: append link to tweet_text # Process quote: append link to tweet_text
quote_div = status.find('a', class_='quote-link') quote_div = status.find('a', class_='quote-link')
if quote_div is not None: if quote_div is not None:
tweet_text += substitute_source('\n\nhttps://twitter.com' + quote_div.get('href').strip('#m')) tweet_text += '\n\n' + substitute_source('https://twitter.com' + quote_div.get('href').strip('#m'))
# Process card: extract image if necessary # Process card: extract image if necessary
card_class = status.find('a', class_='card-container') card_class = status.find('a', class_='card-container')
@ -1056,7 +1069,7 @@ def main(argv):
status_id, author_account) status_id, author_account)
photos.extend(pics) photos.extend(pics)
if vid_in_tweet: if vid_in_tweet:
tweet_text += '\n\n[Video embedded in original tweet]' tweet_text += '\n\n[Video is unavailable]'
# Add custom footer from config file # Add custom footer from config file
if TOML['options']['footer'] != '': if TOML['options']['footer'] != '':