Merge branch 'master'

2025-05-06 19:43:57 +00:00 · 2023-07-25 13:00:40 +02:00 · 2023-07-25 13:00:40 +02:00 · 189c37272a
commit 189c37272a
parent b6396f7b51 c34707b149
3 changed files with 52 additions and 25 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,18 @@
 # Changelog

+**22 JUL 2023** VERSION 4.3.1
+
+Minor improvements of robustness (avoid interruption of processing):
+
+* Ignore timeline-item without tweet-link tag
+* Improve detection of missing video
+
+**17 JUL 2023** VERSION 4.3
+
+* Twitter threads are replicated on Mastodon: each follow-up message in a thread is posted
+as a reply to its predecessor.
+* An issue with downloading videos has been fixed ("ERROR: Sorry, you are not authorized to see this status").
+
 **14 JUL 2023** VERSION 4.2

 Twoot can now handle threads. All tweets can again be uploaded on Mastodon. Tweets in a threads are
--- a/README.md
+++ b/README.md
@ -3,11 +3,12 @@
 Twoot is a python script that mirrors tweets from a twitter account to a Mastodon account.
 It is simple to set-up on a local machine, configurable and feature-rich.

-**17 JUL 2023** VERSION 4.3
+**24 JUL 2023** VERSION 4.3.2

-* Twitter threads are replicated on Mastodon: each follow-up message in a thread is posted
-as a reply to its predecessor.
-* An issue with downloading videos has been fixed ("ERROR: Sorry, you are not authorized to see this status").
+Minor fixes (avoid interruption of processing):
+
+* Re-add missing \n before url of quoted tweet
+* Avoid interruption of processing in case of empty thread timeline

 > Previous updates can be found in CHANGELOG.

--- a/twoot.py
+++ b/twoot.py
@ -204,9 +204,11 @@ def _get_rest_of_thread(session, headers, nitter_url, thread_url, first_item):
    # Make soup
    soup = BeautifulSoup(thread_page.text, 'html.parser')

+    list = []
    # Get all items in thread after main tweet
    after_tweet = soup.find('div', 'after-tweet')
-    list = after_tweet.find_all('div', class_='timeline-item')
+    if after_tweet is not None:
+        list = after_tweet.find_all('div', class_='timeline-item')

    # Build timeline of tuples
    previous_tweet_url = thread_url
@ -674,25 +676,31 @@ def process_attachments(nitter_url, attachments_container, status_id, author_acc
            logging.debug("downloading video from twitter")
            import youtube_dl

-            video_path = vid_container.source['src']
-            if video_path is not None:
-                video_file = urljoin(nitter_url, video_path)
-                ydl_opts = {
-                    'outtmpl': "output/" + TOML['config']['twitter_account'] + "/" + status_id + "/%(id)s.%(ext)s",
-                    # 'format': "best[width<=500]",
-                    'socket_timeout': 60,
-                    'quiet': True,
-                }
+            video_path_source = vid_container.source
+            if video_path_source is not None:
+                video_path = video_path_source['src']
+                if video_path is not None:
+                    video_file = urljoin(nitter_url, video_path)
+                    ydl_opts = {
+                        'outtmpl': "output/" + TOML['config']['twitter_account'] + "/" + status_id + "/%(id)s.%(ext)s",
+                        # 'format': "best[width<=500]",
+                        'socket_timeout': 60,
+                        'quiet': True,
+                    }

-                with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-                    try:
-                        ydl.download([video_file])
-                    except Exception as e:
-                        logging.warning('Error downloading twitter video: ' + str(e))
-                        vid_in_tweet = True
-                    else:
-                        logging.debug('downloaded twitter video from attachments')
+                    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+                        try:
+                            ydl.download([video_file])
+                        except Exception as e:
+                            logging.warning('Error downloading twitter video: ' + str(e))
+                            vid_in_tweet = True
+                        else:
+                            logging.debug('downloaded twitter video from attachments')
+                else:
+                    logging.debug("Media is unavailable")
+                    vid_in_tweet = True
            else:
+                logging.debug("Media is unavailable")
                vid_in_tweet = True

    return pics, vid_in_tweet
@ -968,7 +976,12 @@ def main(argv):
    in_db_cnt = 0
    for replied_to_tweet, status in timeline:
        # Extract tweet ID and status ID
-        tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m')
+        tweet_link_tag = status.find('a', class_='tweet-link')
+        if tweet_link_tag is None:
+            logging.debug("Malformed timeline item (no tweet link), skipping")
+            continue
+
+        tweet_id = tweet_link_tag.get('href').strip('#m')
        status_id = tweet_id.split('/')[3]

        logging.debug('processing tweet %s', tweet_id)
@ -1041,7 +1054,7 @@ def main(argv):
        # Process quote: append link to tweet_text
        quote_div = status.find('a', class_='quote-link')
        if quote_div is not None:
-            tweet_text += substitute_source('\n\nhttps://twitter.com' + quote_div.get('href').strip('#m'))
+            tweet_text += '\n\n' + substitute_source('https://twitter.com' + quote_div.get('href').strip('#m'))

        # Process card: extract image if necessary
        card_class = status.find('a', class_='card-container')
@ -1056,7 +1069,7 @@ def main(argv):
                                                     status_id, author_account)
            photos.extend(pics)
            if vid_in_tweet:
-                tweet_text += '\n\n[Video embedded in original tweet]'
+                tweet_text += '\n\n[Video is unavailable]'

        # Add custom footer from config file
        if TOML['options']['footer'] != '':