mirror of
https://gitlab.com/jeancf/twoot.git
synced 2024-11-24 12:31:12 +00:00
Refactored process_media_body()
This commit is contained in:
parent
8aa9bb9ae0
commit
27ed4cf10e
36
twoot.py
36
twoot.py
|
@ -74,6 +74,10 @@ def deredir_url(url):
|
||||||
:return: direct url
|
:return: direct url
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# Check if we need to do anyting
|
||||||
|
if TOML['options']['remove_link_redirections'] is False:
|
||||||
|
return url
|
||||||
|
|
||||||
# Get a copy of the default headers that requests would use
|
# Get a copy of the default headers that requests would use
|
||||||
headers = requests.utils.default_headers()
|
headers = requests.utils.default_headers()
|
||||||
|
|
||||||
|
@ -187,7 +191,7 @@ def substitute_source(orig_url):
|
||||||
|
|
||||||
return dest_url
|
return dest_url
|
||||||
|
|
||||||
def clean_url(dirty_url):
|
def clean_url(orig_url):
|
||||||
"""
|
"""
|
||||||
Given a URL, return it with the UTM parameters removed from query and fragment
|
Given a URL, return it with the UTM parameters removed from query and fragment
|
||||||
:param dirty_url: url to be cleaned
|
:param dirty_url: url to be cleaned
|
||||||
|
@ -196,20 +200,24 @@ def clean_url(dirty_url):
|
||||||
'https://example.com/video/this-aerial-ropeway?a=aaa&b=1#mkt_tik=tok'
|
'https://example.com/video/this-aerial-ropeway?a=aaa&b=1#mkt_tik=tok'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
url_parsed = urlparse(dirty_url)
|
# Check if we have to do anything
|
||||||
|
if TOML['options']['remove_trackers_from_urls'] is False:
|
||||||
|
return orig_url
|
||||||
|
|
||||||
cleaned_url = urlunparse([
|
url_parsed = urlparse(orig_url)
|
||||||
|
|
||||||
|
dest_url = urlunparse([
|
||||||
url_parsed.scheme,
|
url_parsed.scheme,
|
||||||
_substitute_source(url_parsed.netloc),
|
url_parsed.netloc,
|
||||||
url_parsed.path,
|
url_parsed.path,
|
||||||
url_parsed.params,
|
url_parsed.params,
|
||||||
_remove_trackers_query(url_parsed.query),
|
_remove_trackers_query(url_parsed.query),
|
||||||
_remove_trackers_fragment(url_parsed.fragment)
|
_remove_trackers_fragment(url_parsed.fragment)
|
||||||
])
|
])
|
||||||
if cleaned_url != dirty_url:
|
if dest_url != orig_url:
|
||||||
logging.debug('Cleaned URL from: ' + dirty_url + ' to: ' + cleaned_url)
|
logging.debug('Cleaned URL from: ' + orig_url + ' to: ' + dest_url)
|
||||||
|
|
||||||
return cleaned_url
|
return dest_url
|
||||||
|
|
||||||
|
|
||||||
def process_media_body(tt_iter):
|
def process_media_body(tt_iter):
|
||||||
|
@ -237,15 +245,11 @@ def process_media_body(tt_iter):
|
||||||
tweet_text += tag_text
|
tweet_text += tag_text
|
||||||
else:
|
else:
|
||||||
# This is a real link
|
# This is a real link
|
||||||
if TOML['options']['remove_link_redirections']:
|
url = deredir_url(tag.get('href'))
|
||||||
url = deredir_url(tag.get('href'))
|
url = substitute_source(url)
|
||||||
else:
|
url = clean_url(url)
|
||||||
url = tag.get('href')
|
|
||||||
|
tweet_text += url
|
||||||
if TOML['options']['remove_trackers_from_urls']:
|
|
||||||
tweet_text += clean_url(url)
|
|
||||||
else:
|
|
||||||
tweet_text += url
|
|
||||||
else:
|
else:
|
||||||
logging.warning("No handler for tag in twitter text: " + tag.prettify())
|
logging.warning("No handler for tag in twitter text: " + tag.prettify())
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user