From f92d025e5c919b67577ce284e93ccd668b009afe Mon Sep 17 00:00:00 2001 From: jeancf Date: Sun, 27 Nov 2022 18:12:49 +0100 Subject: [PATCH] substitute_source() completed --- twoot.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/twoot.py b/twoot.py index 0f16cb6..1ef62ba 100755 --- a/twoot.py +++ b/twoot.py @@ -146,13 +146,15 @@ def _remove_trackers_fragment(fragment_str): return fragment_str -def _substitute_source(domain): +def substitute_source(orig_url): """ - private function - param domain: Domain name to substitute - :return: domain or sustitute if applicable + param orig_url: url to check for substitutes + :return: url with replaced domains """ + parsed_url = urlparse(orig_url) + domain = parsed_url.netloc + logging.debug("Checking domain " + domain) # Handle twitter @@ -173,7 +175,17 @@ def _substitute_source(domain): domain = reddit_subst[random.randint(0, len(reddit_subst) - 1)] logging.debug("Replaced reddit.com by " + domain) - return domain + dest_url = urlunparse([ + parsed_url.scheme, + _substitute_source(parsed_url.netloc), + parsed_url.path, + parsed_url.params, + _remove_trackers_query(parsed_url.query), + _remove_trackers_fragment(parsed_url.fragment) + ]) + + + return dest_url def clean_url(dirty_url): """