Simplified process_media_body and _attachments

2025-05-07 20:13:56 +00:00 · 2022-11-23 14:49:06 +01:00 · 2022-11-23 14:49:06 +01:00 · b6315f193c
commit b6315f193c
parent 294bf1fae1
1 changed files with 8 additions and 16 deletions
--- a/twoot.py
+++ b/twoot.py
@ -173,13 +173,11 @@ def clean_url(dirty_url):
    return cleaned_url


-def process_media_body(tt_iter, remove_redir, remove_trackers):
+def process_media_body(tt_iter):
    """
    Receives an iterator over all the elements contained in the tweet-text container.
    Processes them to make them suitable for posting on Mastodon
    :param tt_iter: iterator over the HTML elements in the text of the tweet
-    :param remove_redir: bool to indicate if redirections should be removed
-    :param remove_trackers: bool to indicate if trackers should be removed
    :return:        cleaned up text of the tweet
    """
    tweet_text = ''
@ -200,12 +198,12 @@ def process_media_body(tt_iter, remove_redir, remove_trackers):
                tweet_text += tag_text
            else:
                # This is a real link
-                if remove_redir:
+                if TOML['options']['remove_link_redirections']:
                    url = deredir_url(tag.get('href'))
                else:
                    url = tag.get('href')

-                if remove_trackers:
+                if TOML['options']['remove_trackers_from_urls']:
                    tweet_text += clean_url(url)
                else:
                    tweet_text += url
@ -232,12 +230,11 @@ def process_card(nitter_url, card_container):
    return list


-def process_attachments(nitter_url, attachments_container, get_vids, twit_account, status_id, author_account):
+def process_attachments(nitter_url, attachments_container, status_id, author_account):
    """
    Extract images or video from attachments. Videos are downloaded on the file system.
    :param nitter_url: url of nitter mirror
    :param attachments_container: soup of 'div' tag containing attachments markup
-    :param get_vids: whether to download videos or not
    :param twit_account: name of twitter account
    :param status_id: id of tweet being processed
    :param author_account: author of tweet with video attachment
@ -256,7 +253,7 @@ def process_attachments(nitter_url, attachments_container, get_vids, twit_accoun
    if gif_class is not None:
        gif_video_file = nitter_url + gif_class.source.get('src')

-        video_path = os.path.join('output', twit_account, status_id, author_account, status_id)
+        video_path = os.path.join('output', TOML['config']['twitter_account'], status_id, author_account, status_id)
        os.makedirs(video_path, exist_ok=True)

        # Open directory for writing file
@ -283,12 +280,12 @@ def process_attachments(nitter_url, attachments_container, get_vids, twit_accoun
    vid_in_tweet = False
    vid_class = attachments_container.find('div', class_='video-container')
    if vid_class is not None:
-        if get_vids:
+        if TOML['options']['upload_videos']:
            import youtube_dl

            video_file = os.path.join('https://twitter.com', author_account, 'status', status_id)
            ydl_opts = {
-                'outtmpl': "output/" + twit_account + "/" + status_id + "/%(id)s.%(ext)s",
+                'outtmpl': "output/" + TOML['config']['twitter_account'] + "/" + status_id + "/%(id)s.%(ext)s",
                'format': "best[width<=500]",
                'socket_timeout': 60,
                'quiet': True,
@ -653,10 +650,7 @@ def main(argv):
        tt_iter = status.find('div', class_='tweet-content media-body').children

        # Process text of tweet
-        tweet_text += process_media_body(tt_iter,
-                                         TOML['options']['remove_link_redirections'],
-                                         TOML['options']['remove_trackers_from_urls']
-        )
+        tweet_text += process_media_body(tt_iter)

        # Process quote: append link to tweet_text
        quote_div = status.find('a', class_='quote-link')
@ -673,8 +667,6 @@ def main(argv):
        if attachments_class is not None:
            pics, vid_in_tweet = process_attachments(nitter_url,
                                                     attachments_class,
-                                                     TOML['options']['upload_videos'],
-                                                     TOML['config']['twitter_account'], 
                                                     status_id, author_account
            )
            photos.extend(pics)