mirror of
https://gitlab.com/jeancf/twoot.git
synced 2024-11-24 12:31:12 +00:00
Refactored process_media_body()
This commit is contained in:
parent
8aa9bb9ae0
commit
27ed4cf10e
36
twoot.py
36
twoot.py
|
@ -74,6 +74,10 @@ def deredir_url(url):
|
|||
:return: direct url
|
||||
"""
|
||||
|
||||
# Check if we need to do anyting
|
||||
if TOML['options']['remove_link_redirections'] is False:
|
||||
return url
|
||||
|
||||
# Get a copy of the default headers that requests would use
|
||||
headers = requests.utils.default_headers()
|
||||
|
||||
|
@ -187,7 +191,7 @@ def substitute_source(orig_url):
|
|||
|
||||
return dest_url
|
||||
|
||||
def clean_url(dirty_url):
|
||||
def clean_url(orig_url):
|
||||
"""
|
||||
Given a URL, return it with the UTM parameters removed from query and fragment
|
||||
:param dirty_url: url to be cleaned
|
||||
|
@ -196,20 +200,24 @@ def clean_url(dirty_url):
|
|||
'https://example.com/video/this-aerial-ropeway?a=aaa&b=1#mkt_tik=tok'
|
||||
"""
|
||||
|
||||
url_parsed = urlparse(dirty_url)
|
||||
# Check if we have to do anything
|
||||
if TOML['options']['remove_trackers_from_urls'] is False:
|
||||
return orig_url
|
||||
|
||||
cleaned_url = urlunparse([
|
||||
url_parsed = urlparse(orig_url)
|
||||
|
||||
dest_url = urlunparse([
|
||||
url_parsed.scheme,
|
||||
_substitute_source(url_parsed.netloc),
|
||||
url_parsed.netloc,
|
||||
url_parsed.path,
|
||||
url_parsed.params,
|
||||
_remove_trackers_query(url_parsed.query),
|
||||
_remove_trackers_fragment(url_parsed.fragment)
|
||||
])
|
||||
if cleaned_url != dirty_url:
|
||||
logging.debug('Cleaned URL from: ' + dirty_url + ' to: ' + cleaned_url)
|
||||
if dest_url != orig_url:
|
||||
logging.debug('Cleaned URL from: ' + orig_url + ' to: ' + dest_url)
|
||||
|
||||
return cleaned_url
|
||||
return dest_url
|
||||
|
||||
|
||||
def process_media_body(tt_iter):
|
||||
|
@ -237,15 +245,11 @@ def process_media_body(tt_iter):
|
|||
tweet_text += tag_text
|
||||
else:
|
||||
# This is a real link
|
||||
if TOML['options']['remove_link_redirections']:
|
||||
url = deredir_url(tag.get('href'))
|
||||
else:
|
||||
url = tag.get('href')
|
||||
|
||||
if TOML['options']['remove_trackers_from_urls']:
|
||||
tweet_text += clean_url(url)
|
||||
else:
|
||||
tweet_text += url
|
||||
url = deredir_url(tag.get('href'))
|
||||
url = substitute_source(url)
|
||||
url = clean_url(url)
|
||||
|
||||
tweet_text += url
|
||||
else:
|
||||
logging.warning("No handler for tag in twitter text: " + tag.prettify())
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user