mirror of
https://gitlab.com/jeancf/twoot.git
synced 2025-04-17 18:27:37 +00:00
Simplified process_media_body and _attachments
This commit is contained in:
parent
294bf1fae1
commit
b6315f193c
24
twoot.py
24
twoot.py
@ -173,13 +173,11 @@ def clean_url(dirty_url):
|
|||||||
return cleaned_url
|
return cleaned_url
|
||||||
|
|
||||||
|
|
||||||
def process_media_body(tt_iter, remove_redir, remove_trackers):
|
def process_media_body(tt_iter):
|
||||||
"""
|
"""
|
||||||
Receives an iterator over all the elements contained in the tweet-text container.
|
Receives an iterator over all the elements contained in the tweet-text container.
|
||||||
Processes them to make them suitable for posting on Mastodon
|
Processes them to make them suitable for posting on Mastodon
|
||||||
:param tt_iter: iterator over the HTML elements in the text of the tweet
|
:param tt_iter: iterator over the HTML elements in the text of the tweet
|
||||||
:param remove_redir: bool to indicate if redirections should be removed
|
|
||||||
:param remove_trackers: bool to indicate if trackers should be removed
|
|
||||||
:return: cleaned up text of the tweet
|
:return: cleaned up text of the tweet
|
||||||
"""
|
"""
|
||||||
tweet_text = ''
|
tweet_text = ''
|
||||||
@ -200,12 +198,12 @@ def process_media_body(tt_iter, remove_redir, remove_trackers):
|
|||||||
tweet_text += tag_text
|
tweet_text += tag_text
|
||||||
else:
|
else:
|
||||||
# This is a real link
|
# This is a real link
|
||||||
if remove_redir:
|
if TOML['options']['remove_link_redirections']:
|
||||||
url = deredir_url(tag.get('href'))
|
url = deredir_url(tag.get('href'))
|
||||||
else:
|
else:
|
||||||
url = tag.get('href')
|
url = tag.get('href')
|
||||||
|
|
||||||
if remove_trackers:
|
if TOML['options']['remove_trackers_from_urls']:
|
||||||
tweet_text += clean_url(url)
|
tweet_text += clean_url(url)
|
||||||
else:
|
else:
|
||||||
tweet_text += url
|
tweet_text += url
|
||||||
@ -232,12 +230,11 @@ def process_card(nitter_url, card_container):
|
|||||||
return list
|
return list
|
||||||
|
|
||||||
|
|
||||||
def process_attachments(nitter_url, attachments_container, get_vids, twit_account, status_id, author_account):
|
def process_attachments(nitter_url, attachments_container, status_id, author_account):
|
||||||
"""
|
"""
|
||||||
Extract images or video from attachments. Videos are downloaded on the file system.
|
Extract images or video from attachments. Videos are downloaded on the file system.
|
||||||
:param nitter_url: url of nitter mirror
|
:param nitter_url: url of nitter mirror
|
||||||
:param attachments_container: soup of 'div' tag containing attachments markup
|
:param attachments_container: soup of 'div' tag containing attachments markup
|
||||||
:param get_vids: whether to download videos or not
|
|
||||||
:param twit_account: name of twitter account
|
:param twit_account: name of twitter account
|
||||||
:param status_id: id of tweet being processed
|
:param status_id: id of tweet being processed
|
||||||
:param author_account: author of tweet with video attachment
|
:param author_account: author of tweet with video attachment
|
||||||
@ -256,7 +253,7 @@ def process_attachments(nitter_url, attachments_container, get_vids, twit_accoun
|
|||||||
if gif_class is not None:
|
if gif_class is not None:
|
||||||
gif_video_file = nitter_url + gif_class.source.get('src')
|
gif_video_file = nitter_url + gif_class.source.get('src')
|
||||||
|
|
||||||
video_path = os.path.join('output', twit_account, status_id, author_account, status_id)
|
video_path = os.path.join('output', TOML['config']['twitter_account'], status_id, author_account, status_id)
|
||||||
os.makedirs(video_path, exist_ok=True)
|
os.makedirs(video_path, exist_ok=True)
|
||||||
|
|
||||||
# Open directory for writing file
|
# Open directory for writing file
|
||||||
@ -283,12 +280,12 @@ def process_attachments(nitter_url, attachments_container, get_vids, twit_accoun
|
|||||||
vid_in_tweet = False
|
vid_in_tweet = False
|
||||||
vid_class = attachments_container.find('div', class_='video-container')
|
vid_class = attachments_container.find('div', class_='video-container')
|
||||||
if vid_class is not None:
|
if vid_class is not None:
|
||||||
if get_vids:
|
if TOML['options']['upload_videos']:
|
||||||
import youtube_dl
|
import youtube_dl
|
||||||
|
|
||||||
video_file = os.path.join('https://twitter.com', author_account, 'status', status_id)
|
video_file = os.path.join('https://twitter.com', author_account, 'status', status_id)
|
||||||
ydl_opts = {
|
ydl_opts = {
|
||||||
'outtmpl': "output/" + twit_account + "/" + status_id + "/%(id)s.%(ext)s",
|
'outtmpl': "output/" + TOML['config']['twitter_account'] + "/" + status_id + "/%(id)s.%(ext)s",
|
||||||
'format': "best[width<=500]",
|
'format': "best[width<=500]",
|
||||||
'socket_timeout': 60,
|
'socket_timeout': 60,
|
||||||
'quiet': True,
|
'quiet': True,
|
||||||
@ -653,10 +650,7 @@ def main(argv):
|
|||||||
tt_iter = status.find('div', class_='tweet-content media-body').children
|
tt_iter = status.find('div', class_='tweet-content media-body').children
|
||||||
|
|
||||||
# Process text of tweet
|
# Process text of tweet
|
||||||
tweet_text += process_media_body(tt_iter,
|
tweet_text += process_media_body(tt_iter)
|
||||||
TOML['options']['remove_link_redirections'],
|
|
||||||
TOML['options']['remove_trackers_from_urls']
|
|
||||||
)
|
|
||||||
|
|
||||||
# Process quote: append link to tweet_text
|
# Process quote: append link to tweet_text
|
||||||
quote_div = status.find('a', class_='quote-link')
|
quote_div = status.find('a', class_='quote-link')
|
||||||
@ -673,8 +667,6 @@ def main(argv):
|
|||||||
if attachments_class is not None:
|
if attachments_class is not None:
|
||||||
pics, vid_in_tweet = process_attachments(nitter_url,
|
pics, vid_in_tweet = process_attachments(nitter_url,
|
||||||
attachments_class,
|
attachments_class,
|
||||||
TOML['options']['upload_videos'],
|
|
||||||
TOML['config']['twitter_account'],
|
|
||||||
status_id, author_account
|
status_id, author_account
|
||||||
)
|
)
|
||||||
photos.extend(pics)
|
photos.extend(pics)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user