substitute_source() completed

This commit is contained in:
jeancf 2022-11-27 18:12:49 +01:00
parent 318354669e
commit f92d025e5c

View File

@ -146,13 +146,15 @@ def _remove_trackers_fragment(fragment_str):
return fragment_str return fragment_str
def _substitute_source(domain): def substitute_source(orig_url):
""" """
private function param orig_url: url to check for substitutes
param domain: Domain name to substitute :return: url with replaced domains
:return: domain or sustitute if applicable
""" """
parsed_url = urlparse(orig_url)
domain = parsed_url.netloc
logging.debug("Checking domain " + domain) logging.debug("Checking domain " + domain)
# Handle twitter # Handle twitter
@ -173,7 +175,17 @@ def _substitute_source(domain):
domain = reddit_subst[random.randint(0, len(reddit_subst) - 1)] domain = reddit_subst[random.randint(0, len(reddit_subst) - 1)]
logging.debug("Replaced reddit.com by " + domain) logging.debug("Replaced reddit.com by " + domain)
return domain dest_url = urlunparse([
parsed_url.scheme,
_substitute_source(parsed_url.netloc),
parsed_url.path,
parsed_url.params,
_remove_trackers_query(parsed_url.query),
_remove_trackers_fragment(parsed_url.fragment)
])
return dest_url
def clean_url(dirty_url): def clean_url(dirty_url):
""" """