substitute_source() completed

This commit is contained in:
jeancf 2022-11-27 18:12:49 +01:00
parent 318354669e
commit f92d025e5c

View File

@ -146,13 +146,15 @@ def _remove_trackers_fragment(fragment_str):
return fragment_str
def _substitute_source(domain):
def substitute_source(orig_url):
"""
private function
param domain: Domain name to substitute
:return: domain or sustitute if applicable
param orig_url: url to check for substitutes
:return: url with replaced domains
"""
parsed_url = urlparse(orig_url)
domain = parsed_url.netloc
logging.debug("Checking domain " + domain)
# Handle twitter
@ -173,7 +175,17 @@ def _substitute_source(domain):
domain = reddit_subst[random.randint(0, len(reddit_subst) - 1)]
logging.debug("Replaced reddit.com by " + domain)
return domain
dest_url = urlunparse([
parsed_url.scheme,
_substitute_source(parsed_url.netloc),
parsed_url.path,
parsed_url.params,
_remove_trackers_query(parsed_url.query),
_remove_trackers_fragment(parsed_url.fragment)
])
return dest_url
def clean_url(dirty_url):
"""