mirror of
https://gitlab.com/jeancf/twoot.git
synced 2024-11-24 12:31:12 +00:00
substitute_source() completed
This commit is contained in:
parent
318354669e
commit
f92d025e5c
22
twoot.py
22
twoot.py
|
@ -146,13 +146,15 @@ def _remove_trackers_fragment(fragment_str):
|
|||
return fragment_str
|
||||
|
||||
|
||||
def _substitute_source(domain):
|
||||
def substitute_source(orig_url):
|
||||
"""
|
||||
private function
|
||||
param domain: Domain name to substitute
|
||||
:return: domain or sustitute if applicable
|
||||
param orig_url: url to check for substitutes
|
||||
:return: url with replaced domains
|
||||
"""
|
||||
|
||||
parsed_url = urlparse(orig_url)
|
||||
domain = parsed_url.netloc
|
||||
|
||||
logging.debug("Checking domain " + domain)
|
||||
|
||||
# Handle twitter
|
||||
|
@ -173,7 +175,17 @@ def _substitute_source(domain):
|
|||
domain = reddit_subst[random.randint(0, len(reddit_subst) - 1)]
|
||||
logging.debug("Replaced reddit.com by " + domain)
|
||||
|
||||
return domain
|
||||
dest_url = urlunparse([
|
||||
parsed_url.scheme,
|
||||
_substitute_source(parsed_url.netloc),
|
||||
parsed_url.path,
|
||||
parsed_url.params,
|
||||
_remove_trackers_query(parsed_url.query),
|
||||
_remove_trackers_fragment(parsed_url.fragment)
|
||||
])
|
||||
|
||||
|
||||
return dest_url
|
||||
|
||||
def clean_url(dirty_url):
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue
Block a user