From 27ed4cf10e119cde514a521015827b2f3eb32d14 Mon Sep 17 00:00:00 2001 From: jeancf Date: Sun, 27 Nov 2022 18:30:39 +0100 Subject: [PATCH] Refactored process_media_body() --- twoot.py | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/twoot.py b/twoot.py index 261fb47..4f15177 100755 --- a/twoot.py +++ b/twoot.py @@ -74,6 +74,10 @@ def deredir_url(url): :return: direct url """ + # Check if we need to do anyting + if TOML['options']['remove_link_redirections'] is False: + return url + # Get a copy of the default headers that requests would use headers = requests.utils.default_headers() @@ -187,7 +191,7 @@ def substitute_source(orig_url): return dest_url -def clean_url(dirty_url): +def clean_url(orig_url): """ Given a URL, return it with the UTM parameters removed from query and fragment :param dirty_url: url to be cleaned @@ -196,20 +200,24 @@ def clean_url(dirty_url): 'https://example.com/video/this-aerial-ropeway?a=aaa&b=1#mkt_tik=tok' """ - url_parsed = urlparse(dirty_url) + # Check if we have to do anything + if TOML['options']['remove_trackers_from_urls'] is False: + return orig_url - cleaned_url = urlunparse([ + url_parsed = urlparse(orig_url) + + dest_url = urlunparse([ url_parsed.scheme, - _substitute_source(url_parsed.netloc), + url_parsed.netloc, url_parsed.path, url_parsed.params, _remove_trackers_query(url_parsed.query), _remove_trackers_fragment(url_parsed.fragment) ]) - if cleaned_url != dirty_url: - logging.debug('Cleaned URL from: ' + dirty_url + ' to: ' + cleaned_url) + if dest_url != orig_url: + logging.debug('Cleaned URL from: ' + orig_url + ' to: ' + dest_url) - return cleaned_url + return dest_url def process_media_body(tt_iter): @@ -237,15 +245,11 @@ def process_media_body(tt_iter): tweet_text += tag_text else: # This is a real link - if TOML['options']['remove_link_redirections']: - url = deredir_url(tag.get('href')) - else: - url = tag.get('href') - - if TOML['options']['remove_trackers_from_urls']: - tweet_text += clean_url(url) - else: - tweet_text += url + url = deredir_url(tag.get('href')) + url = substitute_source(url) + url = clean_url(url) + + tweet_text += url else: logging.warning("No handler for tag in twitter text: " + tag.prettify())