Simplified process_media_body and _attachments

This commit is contained in:
jeancf 2022-11-23 14:49:06 +01:00
parent 294bf1fae1
commit b6315f193c

View File

@ -173,13 +173,11 @@ def clean_url(dirty_url):
return cleaned_url
def process_media_body(tt_iter, remove_redir, remove_trackers):
def process_media_body(tt_iter):
"""
Receives an iterator over all the elements contained in the tweet-text container.
Processes them to make them suitable for posting on Mastodon
:param tt_iter: iterator over the HTML elements in the text of the tweet
:param remove_redir: bool to indicate if redirections should be removed
:param remove_trackers: bool to indicate if trackers should be removed
:return: cleaned up text of the tweet
"""
tweet_text = ''
@ -200,12 +198,12 @@ def process_media_body(tt_iter, remove_redir, remove_trackers):
tweet_text += tag_text
else:
# This is a real link
if remove_redir:
if TOML['options']['remove_link_redirections']:
url = deredir_url(tag.get('href'))
else:
url = tag.get('href')
if remove_trackers:
if TOML['options']['remove_trackers_from_urls']:
tweet_text += clean_url(url)
else:
tweet_text += url
@ -232,12 +230,11 @@ def process_card(nitter_url, card_container):
return list
def process_attachments(nitter_url, attachments_container, get_vids, twit_account, status_id, author_account):
def process_attachments(nitter_url, attachments_container, status_id, author_account):
"""
Extract images or video from attachments. Videos are downloaded on the file system.
:param nitter_url: url of nitter mirror
:param attachments_container: soup of 'div' tag containing attachments markup
:param get_vids: whether to download videos or not
:param twit_account: name of twitter account
:param status_id: id of tweet being processed
:param author_account: author of tweet with video attachment
@ -256,7 +253,7 @@ def process_attachments(nitter_url, attachments_container, get_vids, twit_accoun
if gif_class is not None:
gif_video_file = nitter_url + gif_class.source.get('src')
video_path = os.path.join('output', twit_account, status_id, author_account, status_id)
video_path = os.path.join('output', TOML['config']['twitter_account'], status_id, author_account, status_id)
os.makedirs(video_path, exist_ok=True)
# Open directory for writing file
@ -283,12 +280,12 @@ def process_attachments(nitter_url, attachments_container, get_vids, twit_accoun
vid_in_tweet = False
vid_class = attachments_container.find('div', class_='video-container')
if vid_class is not None:
if get_vids:
if TOML['options']['upload_videos']:
import youtube_dl
video_file = os.path.join('https://twitter.com', author_account, 'status', status_id)
ydl_opts = {
'outtmpl': "output/" + twit_account + "/" + status_id + "/%(id)s.%(ext)s",
'outtmpl': "output/" + TOML['config']['twitter_account'] + "/" + status_id + "/%(id)s.%(ext)s",
'format': "best[width<=500]",
'socket_timeout': 60,
'quiet': True,
@ -653,10 +650,7 @@ def main(argv):
tt_iter = status.find('div', class_='tweet-content media-body').children
# Process text of tweet
tweet_text += process_media_body(tt_iter,
TOML['options']['remove_link_redirections'],
TOML['options']['remove_trackers_from_urls']
)
tweet_text += process_media_body(tt_iter)
# Process quote: append link to tweet_text
quote_div = status.find('a', class_='quote-link')
@ -673,8 +667,6 @@ def main(argv):
if attachments_class is not None:
pics, vid_in_tweet = process_attachments(nitter_url,
attachments_class,
TOML['options']['upload_videos'],
TOML['config']['twitter_account'],
status_id, author_account
)
photos.extend(pics)