Done but to be tested

This commit is contained in:
jeancf 2022-11-27 20:45:32 +01:00
parent 27ed4cf10e
commit 21ac2cf7dd

View File

@ -73,7 +73,6 @@ def deredir_url(url):
:param url: url to be de-redirected
:return: direct url
"""
# Check if we need to do anyting
if TOML['options']['remove_link_redirections'] is False:
return url
@ -155,11 +154,10 @@ def substitute_source(orig_url):
param orig_url: url to check for substitutes
:return: url with replaced domains
"""
parsed_url = urlparse(orig_url)
domain = parsed_url.netloc
logging.debug("Checking domain " + domain)
logging.debug("Checking domain %s for substitution ", domain)
# Handle twitter
twitter_subst = TOML["options"]["substitution"]["twitter"]
@ -199,7 +197,6 @@ def clean_url(orig_url):
>>> clean_url('https://example.com/video/this-aerial-ropeway?utm_source=Twitter&utm_medium=video&utm_campaign=organic&utm_content=Nov13&a=aaa&b=1#mkt_tok=tik&mkt_tik=tok')
'https://example.com/video/this-aerial-ropeway?a=aaa&b=1#mkt_tik=tok'
"""
# Check if we have to do anything
if TOML['options']['remove_trackers_from_urls'] is False:
return orig_url
@ -227,6 +224,7 @@ def process_media_body(tt_iter):
:param tt_iter: iterator over the HTML elements in the text of the tweet
:return: cleaned up text of the tweet
"""
tweet_text = ''
# Iterate elements
for tag in tt_iter:
@ -289,7 +287,7 @@ def process_attachments(nitter_url, attachments_container, status_id, author_acc
for image in images:
pics.append(nitter_url + image.get('href'))
logging.debug('collected ' + str(len(pics)) + ' images from attachments')
logging.debug('collected ' + str(len(pics)) + ' image(s) from attachments')
# Download nitter video (converted animated GIF)
gif_class = attachments_container.find('video', class_='gif')
@ -700,7 +698,7 @@ def main(argv):
# Process quote: append link to tweet_text
quote_div = status.find('a', class_='quote-link')
if quote_div is not None:
tweet_text += '\n\nhttps://twitter.com' + quote_div.get('href').strip('#m')
tweet_text += substitute_source('\n\nhttps://twitter.com' + quote_div.get('href').strip('#m'))
# Process card : extract image if necessary
card_class = status.find('a', class_='card-container')
@ -719,7 +717,7 @@ def main(argv):
tweet_text += '\n\n[Video embedded in original tweet]'
# Add footer with link to original tweet
tweet_text += '\n\nOriginal tweet : ' + full_status_url
tweet_text += '\n\nOriginal tweet : ' + substitute_source(full_status_url)
# If no media was specifically added in the tweet, try to get the first picture
# with "twitter:image" meta tag in first linked page in tweet text
@ -770,7 +768,6 @@ def main(argv):
tweets.append(tweet)
logging.debug('Tweet %s added to list of toots to upload', tweet_id)
logging.debug('TEXT:\n' + tweet["tweet_text"])
# Log summary stats
logging.info(str(out_date_cnt) + ' tweets outside of valid time range')