From 40d14c4d5db49deb08a70376cb7bc01d159b4776 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Tue, 22 Nov 2022 11:05:16 +0100
Subject: [PATCH 01/13] Added de-redirection of URL in tweet

---
 twoot.py | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/twoot.py b/twoot.py
index f01e710..cd15e73 100755
--- a/twoot.py
+++ b/twoot.py
@@ -66,6 +66,33 @@ USER_AGENTS = [
     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Vivaldi/5.4.2753.51',
 ]
 
+def deredir_url(url):
+    """
+    Given a URL, return the URL that the page really downloads from
+    :param url: url to be de-redirected
+    :return: direct url
+    """
+
+    # Get a copy of the default headers that requests would use
+    headers = requests.utils.default_headers()
+
+    # Update default headers with randomly selected user agent
+    headers.update(
+        {
+            'User-Agent': USER_AGENTS[random.randint(0, len(USER_AGENTS) - 1)],
+        }
+    )
+
+    ret = None
+    try:
+        # Download the page
+        ret = requests.get(url, headers, timeout=5)
+    except:
+        # If anything goes wrong keep the URL intact
+        return url
+
+    # Return the URL that the page was downloaded from
+    return ret.url
 
 def _remove_trackers_query(query_str):
     """
@@ -158,11 +185,12 @@ def process_media_body(tt_iter, remove_trackers):
                 # Only keep hashtag text
                 tweet_text += tag_text
             else:
-                # This is a real link, keep url
+                # This is a real link
+                url = deredir_url(tag.get('href'))
                 if remove_trackers:
-                    tweet_text += clean_url(tag.get('href'))
+                    tweet_text += clean_url(url)
                 else:
-                    tweet_text += tag.get('href')
+                    tweet_text += url
         else:
             logging.warning("No handler for tag in twitter text: " + tag.prettify())
 
@@ -426,7 +454,7 @@ def main(argv):
     if tweets_and_replies:
         url += '/with_replies'
 
-    # Download twitter page of user.
+    # Download twitter page of user
     try:
         twit_account_page = session.get(url, headers=headers, timeout=HTTPS_REQ_TIMEOUT)
     except requests.exceptions.ConnectionError:

From 68e4918b02804e5d2a782d0e2b17d6ac0742a650 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Tue, 22 Nov 2022 11:08:29 +0100
Subject: [PATCH 02/13] Added debug message

---
 twoot.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/twoot.py b/twoot.py
index cd15e73..43bc435 100755
--- a/twoot.py
+++ b/twoot.py
@@ -91,6 +91,9 @@ def deredir_url(url):
         # If anything goes wrong keep the URL intact
         return url
 
+    if ret.url != url:
+        logging.debug("Removed redirection from: " + url + " to: " + ret.url)
+
     # Return the URL that the page was downloaded from
     return ret.url
 

From e11102f4a6018b8bda94d12f62332d8d87525ee7 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Tue, 22 Nov 2022 11:33:45 +0100
Subject: [PATCH 03/13] User agent removed

---
 twoot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/twoot.py b/twoot.py
index 43bc435..b987679 100755
--- a/twoot.py
+++ b/twoot.py
@@ -86,7 +86,7 @@ def deredir_url(url):
     ret = None
     try:
         # Download the page
-        ret = requests.get(url, headers, timeout=5)
+        ret = requests.get(url, timeout=5)
     except:
         # If anything goes wrong keep the URL intact
         return url

From 9625c2128bc866cc450347fc892310731e68fef0 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Tue, 22 Nov 2022 11:38:49 +0100
Subject: [PATCH 04/13] modified get request in deredir_url()

---
 twoot.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/twoot.py b/twoot.py
index b987679..8fb3d5e 100755
--- a/twoot.py
+++ b/twoot.py
@@ -86,7 +86,7 @@ def deredir_url(url):
     ret = None
     try:
         # Download the page
-        ret = requests.get(url, timeout=5)
+        ret = requests.get(url, headers=headers, timeout=5)
     except:
         # If anything goes wrong keep the URL intact
         return url
@@ -97,6 +97,7 @@ def deredir_url(url):
     # Return the URL that the page was downloaded from
     return ret.url
 
+
 def _remove_trackers_query(query_str):
     """
     private function

From 9b5a76db60aeb8bb9af67f33e94439896ceca8df Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Tue, 22 Nov 2022 12:50:34 +0100
Subject: [PATCH 05/13] updated README.md

---
 README.md | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index d117fda..17700d3 100644
--- a/README.md
+++ b/README.md
@@ -3,11 +3,7 @@
 Twoot is a python script that mirrors tweets from a twitter account to a Mastodon account.
 It is simple to set-up on a local machine, configurable and feature-rich.
 
-**UPDATE 22 NOV 2022** VERSION 2.4 Added command-line option (`-u`) to
-remove tracking parameters from URLs included in tweets. A tracking URL is a
-normal URL with additional parameters attached to it. These parameters are used
-by marketing companies to identify the source of a click and the effectiveness
-of a communication campaign.
+**UPDATE XX NOV 2022** VERSION 2.5 Added command-line option (`-l`)
 
 > Previous updates can be found in CHANGELOG.
 
@@ -25,7 +21,7 @@ of a communication campaign.
 * Optionally ignore retweets
 * Allows rate-limiting posts to Mastodon instance
 
-## usage
+## Usage
 
 ```
 twoot.py [-h] -t <twitter account> -i <mastodon instance> -m <mastodon account>
@@ -33,7 +29,7 @@ twoot.py [-h] -t <twitter account> -i <mastodon instance> -m <mastodon account>
                 [-d <min delay (in mins)>] [-c <max # of toots to post>]
 ```
 
-## arguments
+## Arguments
 
 Assuming that the Twitter handle is @SuperDuperBot and the Mastodon account
 is @superduperbot@botsin.space
@@ -42,16 +38,24 @@ is @superduperbot@botsin.space
 |-------|--------------------------------------------------|--------------------|-----|
 | -t    | twitter account name without '@'                 | `SuperDuper`       | Yes |
 | -i    | Mastodon instance domain name                    | `botsin.space`     | Yes |
-| -m    | Mastodon username                                | `superduperbot`    | Yes |
+| -m    | Mastodon username                                | `sd@example.com`   | Yes |
 | -p    | Mastodon password                                | `my_Sup3r-S4f3*pw` | Yes |
 | -v    | upload videos to Mastodon                        | *N/A*              | No  |
 | -r    | Post reply-to tweets (ignored by default)        | *N/A*              | No  |
 | -s    | Skip retweets (posted by default)                | *N/A*              | No  |
+| -l    | Remove link redirection                          | *N/A*              | No  |
 | -u    | Remove trackers from URLs                        | *N/A*              | No  |
 | -a    | Max. age of tweet to post (in days)              | `5`                | No  |
 | -d    | Min. age before posting new tweet (in minutes)   | `15`               | No  |
 | -c    | Max number of toots allowed to post (cap)        | `1`                | No  |
 
+## Notes
+
+`-l` will follow every link included in the tweet and replace them with the url that the
+resource is directly dowmnloaded from (if applicable). e.g. bit.ly/xxyyyzz -> example.com
+Every link visit can take up to 5 sec (timeout) therefore this option will slow down
+tweet processing.
+
 When using the `-v` switch consider:
 
 * whether the copyright of the content that you want to cross-post allows it
@@ -64,7 +68,8 @@ Default min delay is 0 minutes.
 
 No limitation is applied to the number of toots uploaded if `-c` is not specified.
 
-## installation
+
+## Installation
 
 Make sure python3 is installed.
 

From 0d1be42dcc473ac6d6f3cac6c96829bdcbd50270 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Tue, 22 Nov 2022 22:01:27 +0100
Subject: [PATCH 06/13] Added code to remove trackers from fragments

---
 twoot.py | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/twoot.py b/twoot.py
index 8fb3d5e..15b72ee 100755
--- a/twoot.py
+++ b/twoot.py
@@ -110,13 +110,18 @@ def _remove_trackers_query(query_str):
     # tag by TikTok
     # tags by Snapchat
     # tags by Facebook
-    params_to_remove = [
-        "utm_source", "utm_medium", "utm_campaign", "utm_term", "utm_content",
+    params_to_remove = {
+        "gclid", "_ga", "gclsrc", "dclid",
+        "utm_source", "utm_medium", "utm_campaign", "utm_term", "utm_content", "utm_cid", "utm_reader", "utm_name", "utm_referrer", "utm_social", "utm_social-type",
         "mkt_tok",
         "campaign_name", "ad_set_name", "campaign_id", "ad_set_id",
-        "media", "interest_group_name",
-        "xtor"
-    ]
+        "fbclid", "campaign_name", "ad_set_name", "ad_set_id", "media", "interest_group_name", "ad_set_id"
+        "igshid",
+        "cvid", "oicd", "msclkid",
+        "soc_src", "soc_trk",
+        "_openstat", "yclid",
+        "xtor", "xtref", "adid",
+    }
     query_to_clean = dict(parse_qsl(query_str, keep_blank_values=True))
     query_cleaned = [(k, v) for k, v in query_to_clean.items() if not k in params_to_remove]
     return urlencode(query_cleaned, doseq=True)
@@ -129,12 +134,15 @@ def _remove_trackers_fragment(fragment_str):
     :param query_str: fragment to be cleaned
     :return: cleaned fragment
     """
- 
-    # Not implemented
-    # Unclear what, if anything, can be done
-    # Need better understanding of fragment-based tracking
-    # https://builtvisible.com/one-weird-trick-to-avoid-utm-parameters/
 
+    params_to_remove = {
+        "Echobox",
+    }
+    
+    if '=' in fragment_str:
+        fragment_str = fragment_str.split('&')
+        query_cleaned = [i for i in fragment_str if i.split('=')[0] not in params_to_remove]
+        fragment_str = '&'.join(query_cleaned)
     return fragment_str
 
 

From 7e7fa4620ffcfd89bdca727595711a6029e90a34 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Wed, 23 Nov 2022 09:59:06 +0100
Subject: [PATCH 07/13] Implemented -l command-line option

---
 twoot.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/twoot.py b/twoot.py
index 15b72ee..f98d9cb 100755
--- a/twoot.py
+++ b/twoot.py
@@ -66,6 +66,7 @@ USER_AGENTS = [
     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Vivaldi/5.4.2753.51',
 ]
 
+
 def deredir_url(url):
     """
     Given a URL, return the URL that the page really downloads from
@@ -123,7 +124,7 @@ def _remove_trackers_query(query_str):
         "xtor", "xtref", "adid",
     }
     query_to_clean = dict(parse_qsl(query_str, keep_blank_values=True))
-    query_cleaned = [(k, v) for k, v in query_to_clean.items() if not k in params_to_remove]
+    query_cleaned = [(k, v) for k, v in query_to_clean.items() if k not in params_to_remove]
     return urlencode(query_cleaned, doseq=True)
 
 
@@ -138,7 +139,7 @@ def _remove_trackers_fragment(fragment_str):
     params_to_remove = {
         "Echobox",
     }
-    
+
     if '=' in fragment_str:
         fragment_str = fragment_str.split('&')
         query_cleaned = [i for i in fragment_str if i.split('=')[0] not in params_to_remove]
@@ -172,7 +173,7 @@ def clean_url(dirty_url):
     return cleaned_url
 
 
-def process_media_body(tt_iter, remove_trackers):
+def process_media_body(tt_iter, remove_redir, remove_trackers):
     """
     Receives an iterator over all the elements contained in the tweet-text container.
     Processes them to make them suitable for posting on Mastodon
@@ -198,7 +199,11 @@ def process_media_body(tt_iter, remove_trackers):
                 tweet_text += tag_text
             else:
                 # This is a real link
-                url = deredir_url(tag.get('href'))
+                if remove_redir:
+                    url = deredir_url(tag.get('href'))
+                else:
+                    url = tag.get('href')
+
                 if remove_trackers:
                     tweet_text += clean_url(url)
                 else:
@@ -382,6 +387,7 @@ def main(argv):
     parser.add_argument('-p', metavar='<mastodon password>', action='store', required=True)
     parser.add_argument('-r', action='store_true', help='Also post replies to other tweets')
     parser.add_argument('-s', action='store_true', help='Suppress retweets')
+    parser.add_argument('-l', action='store_true', help='Remove link redirection')
     parser.add_argument('-u', action='store_true', help='Remove trackers from URLs')
     parser.add_argument('-v', action='store_true', help='Ingest twitter videos and upload to Mastodon instance')
     parser.add_argument('-a', metavar='<max age (in days)>', action='store', type=float, default=1)
@@ -397,6 +403,7 @@ def main(argv):
     mast_password = args['p']
     tweets_and_replies = args['r']
     suppress_retweets = args['s']
+    remove_redir = args['l']
     remove_trackers = args['u']
     get_vids = args['v']
     max_age = float(args['a'])
@@ -423,6 +430,7 @@ def main(argv):
     logging.info('    -m ' + mast_account)
     logging.info('    -r ' + str(tweets_and_replies))
     logging.info('    -s ' + str(suppress_retweets))
+    logging.info('    -l ' + str(remove_redir))
     logging.info('    -u ' + str(remove_trackers))
     logging.info('    -v ' + str(get_vids))
     logging.info('    -a ' + str(max_age))
@@ -579,7 +587,7 @@ def main(argv):
         tt_iter = status.find('div', class_='tweet-content media-body').children
 
         # Process text of tweet
-        tweet_text += process_media_body(tt_iter, remove_trackers)
+        tweet_text += process_media_body(tt_iter, remove_redir, remove_trackers)
 
         # Process quote: append link to tweet_text
         quote_div = status.find('a', class_='quote-link')

From 3930acc93ffca633893bc8a9b3cf260ef51e7be7 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Wed, 23 Nov 2022 09:59:45 +0100
Subject: [PATCH 08/13] Updated README

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 17700d3..304f428 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,9 @@
 Twoot is a python script that mirrors tweets from a twitter account to a Mastodon account.
 It is simple to set-up on a local machine, configurable and feature-rich.
 
-**UPDATE XX NOV 2022** VERSION 2.5 Added command-line option (`-l`)
+**UPDATE XX NOV 2022** VERSION 2.5 Added command-line option (`-l`) to remove redirection
+from links included in tweets. Obfuscated links are replaced by the URL that the resource
+is directly downloaded from.
 
 > Previous updates can be found in CHANGELOG.
 

From f0b5ee98d2d1b1316d7a3d76bc7429fb07229497 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Wed, 23 Nov 2022 10:50:41 +0100
Subject: [PATCH 09/13] Added missing parameter in docstring

---
 test.py  | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 twoot.py |  1 +
 2 files changed, 100 insertions(+)
 create mode 100755 test.py

diff --git a/test.py b/test.py
new file mode 100755
index 0000000..5b4a630
--- /dev/null
+++ b/test.py
@@ -0,0 +1,99 @@
+#! /usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+from urllib.parse import urlparse, parse_qsl, urlencode, urlunparse
+import requests
+
+def deredir_url(url):
+    """
+    Given a URL, return the URL that the page really downloads from
+    :param url: url to be de-redirected
+    :return: direct url
+    """
+
+    ret = None
+    try:
+        # Download the page
+        ret = requests.get(url, timeout=5)
+    except:
+        # If anything goes wrong keep the URL intact
+        return url
+
+    # Return the URL that the page was downloaded from
+    return ret.url
+
+def _remove_tracker_params(query_str):
+    """
+    private function
+    Given a query string from a URL, strip out the known trackers
+    :param query_str: query to be cleaned
+    :return: query cleaned
+    """
+    # Avalaible URL tracking parameters :
+    # UTM tags by Google Ads, M$ Ads, ...
+    # tag by TikTok
+    # tags by Snapchat
+    # tags by Facebook
+    params_to_remove = [
+        "utm_source", "utm_medium", "utm_campaign", "utm_term", "utm_content",
+        "mkt_tok",
+        "campaign_name", "ad_set_name", "campaign_id", "ad_set_id",
+        "media", "interest_group_name",
+        "xtor"
+    ]
+    query_to_clean = dict(parse_qsl(query_str, keep_blank_values=True))
+    query_cleaned = [(k, v) for k, v in query_to_clean.items() if not k in params_to_remove]
+    return urlencode(query_cleaned, safe='#', doseq=True)
+
+
+def _remove_trackers_fragment(fragment_str):
+    """
+    private function
+    Given a fragment string from a URL, strip out the known trackers
+    :param query_str: fragment to be cleaned
+    :return: cleaned fragment
+    """
+ 
+    # Not implemented
+    # Unclear what, if anything, can be done
+    # Need better understanding of fragment-based tracking
+    # https://builtvisible.com/one-weird-trick-to-avoid-utm-parameters/
+
+    return fragment_str
+
+
+def clean_url(dirty_url):
+    """
+    Given a URL, return it with the UTM parameters removed from query and fragment
+    :param dirty_url: url to be cleaned
+    :return: url cleaned
+    >>> clean_url('https://example.com/video/this-aerial-ropeway?utm_source=Twitter&utm_medium=video&utm_campaign=organic&utm_content=Nov13&a=aaa&b=1#mkt_tok=tik&mkt_tik=tok')
+    'https://example.com/video/this-aerial-ropeway?a=aaa&b=1#mkt_tik=tok'
+    """
+
+    url_parsed = urlparse(dirty_url, allow_fragments=False)
+
+    cleaned_url = urlunparse([
+        url_parsed.scheme,
+        url_parsed.netloc,
+        url_parsed.path,
+        url_parsed.params,
+        _remove_tracker_params(url_parsed.query),
+        _remove_trackers_fragment(url_parsed.fragment)
+    ])
+
+    return cleaned_url
+
+def main():
+    # url = 'https://example.com/video/this-aerial-ropeway?utm_source=Twitter&utm_medium=video&utm_campaign=organic&utm_content=Nov13&a=aaa&b=1#mkt_tok=tik&mkt_tik=tok'
+    # url = "https://docs.helix-editor.com/keymap.html#movement"
+    # url = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7071508/#sec1-nutrients-12-00530title"
+    # url = "https://uscode.house.gov/view.xhtml?req=granuleid:USC-prelim-title42-section12208&num=0&edition=prelim"
+    url = "https://shorturl.at/qwP38"
+    print('Orig: ' + url)
+    direct_url = deredir_url(url)
+    print('dir : ' + direct_url)
+    print('to  : ' + clean_url(direct_url))
+
+if __name__=="__main__":
+    main()
diff --git a/twoot.py b/twoot.py
index f98d9cb..03892a5 100755
--- a/twoot.py
+++ b/twoot.py
@@ -178,6 +178,7 @@ def process_media_body(tt_iter, remove_redir, remove_trackers):
     Receives an iterator over all the elements contained in the tweet-text container.
     Processes them to make them suitable for posting on Mastodon
     :param tt_iter: iterator over the HTML elements in the text of the tweet
+    :param remove_redir: bool to indicate if redirections should be removed
     :param remove_trackers: bool to indicate if trackers should be removed
     :return:        cleaned up text of the tweet
     """

From 89dc01a97e04e4f61599cc3a83056b8df983f3f0 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Wed, 23 Nov 2022 11:50:19 +0100
Subject: [PATCH 10/13] Removed wrong gile

---
 test.py | 99 ---------------------------------------------------------
 1 file changed, 99 deletions(-)
 delete mode 100755 test.py

diff --git a/test.py b/test.py
deleted file mode 100755
index 5b4a630..0000000
--- a/test.py
+++ /dev/null
@@ -1,99 +0,0 @@
-#! /usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-from urllib.parse import urlparse, parse_qsl, urlencode, urlunparse
-import requests
-
-def deredir_url(url):
-    """
-    Given a URL, return the URL that the page really downloads from
-    :param url: url to be de-redirected
-    :return: direct url
-    """
-
-    ret = None
-    try:
-        # Download the page
-        ret = requests.get(url, timeout=5)
-    except:
-        # If anything goes wrong keep the URL intact
-        return url
-
-    # Return the URL that the page was downloaded from
-    return ret.url
-
-def _remove_tracker_params(query_str):
-    """
-    private function
-    Given a query string from a URL, strip out the known trackers
-    :param query_str: query to be cleaned
-    :return: query cleaned
-    """
-    # Avalaible URL tracking parameters :
-    # UTM tags by Google Ads, M$ Ads, ...
-    # tag by TikTok
-    # tags by Snapchat
-    # tags by Facebook
-    params_to_remove = [
-        "utm_source", "utm_medium", "utm_campaign", "utm_term", "utm_content",
-        "mkt_tok",
-        "campaign_name", "ad_set_name", "campaign_id", "ad_set_id",
-        "media", "interest_group_name",
-        "xtor"
-    ]
-    query_to_clean = dict(parse_qsl(query_str, keep_blank_values=True))
-    query_cleaned = [(k, v) for k, v in query_to_clean.items() if not k in params_to_remove]
-    return urlencode(query_cleaned, safe='#', doseq=True)
-
-
-def _remove_trackers_fragment(fragment_str):
-    """
-    private function
-    Given a fragment string from a URL, strip out the known trackers
-    :param query_str: fragment to be cleaned
-    :return: cleaned fragment
-    """
- 
-    # Not implemented
-    # Unclear what, if anything, can be done
-    # Need better understanding of fragment-based tracking
-    # https://builtvisible.com/one-weird-trick-to-avoid-utm-parameters/
-
-    return fragment_str
-
-
-def clean_url(dirty_url):
-    """
-    Given a URL, return it with the UTM parameters removed from query and fragment
-    :param dirty_url: url to be cleaned
-    :return: url cleaned
-    >>> clean_url('https://example.com/video/this-aerial-ropeway?utm_source=Twitter&utm_medium=video&utm_campaign=organic&utm_content=Nov13&a=aaa&b=1#mkt_tok=tik&mkt_tik=tok')
-    'https://example.com/video/this-aerial-ropeway?a=aaa&b=1#mkt_tik=tok'
-    """
-
-    url_parsed = urlparse(dirty_url, allow_fragments=False)
-
-    cleaned_url = urlunparse([
-        url_parsed.scheme,
-        url_parsed.netloc,
-        url_parsed.path,
-        url_parsed.params,
-        _remove_tracker_params(url_parsed.query),
-        _remove_trackers_fragment(url_parsed.fragment)
-    ])
-
-    return cleaned_url
-
-def main():
-    # url = 'https://example.com/video/this-aerial-ropeway?utm_source=Twitter&utm_medium=video&utm_campaign=organic&utm_content=Nov13&a=aaa&b=1#mkt_tok=tik&mkt_tik=tok'
-    # url = "https://docs.helix-editor.com/keymap.html#movement"
-    # url = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7071508/#sec1-nutrients-12-00530title"
-    # url = "https://uscode.house.gov/view.xhtml?req=granuleid:USC-prelim-title42-section12208&num=0&edition=prelim"
-    url = "https://shorturl.at/qwP38"
-    print('Orig: ' + url)
-    direct_url = deredir_url(url)
-    print('dir : ' + direct_url)
-    print('to  : ' + clean_url(direct_url))
-
-if __name__=="__main__":
-    main()

From 50e961b70f04befdf8d80783bb94763259974d25 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Wed, 23 Nov 2022 11:51:32 +0100
Subject: [PATCH 11/13] Removed wrong file

---
 default.toml | 37 -------------------------------------
 1 file changed, 37 deletions(-)
 delete mode 100644 default.toml

diff --git a/default.toml b/default.toml
deleted file mode 100644
index 2b7d373..0000000
--- a/default.toml
+++ /dev/null
@@ -1,37 +0,0 @@
-[config]
-# twitter account name without '@'
-twitter_account = "SuperDuper"
-
-# Domain name of Mastodon instance
-mastodon_instance = "botsin.space"
-
-# Mastodon username
-mastodon_user = "superduperbot"
-
-[options]
-# Download videos from twitter and upload them on Mastodon
-upload_videos = false
-
-# Also post the "reply-to" tweets from twitter account
-post_reply_to = false
-
-# Do not post the retweets of other twitter accounts
-skip_retweets = false
-
-# Clean up URLs in tweets to remove trackers  (UNIMPLEMENTED)
-remove_trackers_from_URL = false
-
-# Rewrite URLs to use invidious instance instead of youtube  (UNIMPLEMENTED)
-substitute_invidious = false
-
-# Rewrite URLs to use nitter instance instead of twitter  (UNIMPLEMENTED)
-substitute_nitter = false
-
-# Maximum age of tweet to post (in days, decimal values accepted)
-tweet_max_age = 1
-
-# Minimum age of tweet before posting (in minutes)
-tweet_delay = 15
-
-# Maximum number of toots to post in this run
-twoot_cap = 1

From 2c4d6bd7e00c621f707b099e471b13f448666128 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Wed, 23 Nov 2022 21:46:05 +0100
Subject: [PATCH 12/13] Last updates before release

---
 CHANGELOG.md | 10 ++++++++--
 README.md    |  9 +++++----
 twoot.py     |  2 +-
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c7bf795..0e1c48e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,8 +1,14 @@
-**XX NOV 2022** VERSION 2.4 Added command-line option (`-u`) to
+**23 NOV 2022** VERSION 2.5 Added command-line option (`-l`) to remove
+redirection from links included in tweets. Obfuscated links are replaced
+by the URL that the resource is directly downloaded from. Also improved
+tracker removal by cleaning URL fragments as well (contrib: mathdatech,
+thanks!).
+
+**22 NOV 2022** VERSION 2.4 Added command-line option (`-u`) to
 remove tracking parameters from URLs included in tweets. A tracking URL
 is a normal URL with parameters attached to it. These parameters are used
 by marketing companies to identify the source of a click and the effectiveness
-of a communication campaign.
+of a communication campaign (contrib: mathdatech, thanks!).
 
 **15 NOV 2022** VERSION 2.3 Added command-line option (`-s`) to
 skip retweets. With this option, retweets will be ignored and not posted
diff --git a/README.md b/README.md
index 304f428..6c5a027 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,12 @@
 # Twoot
 
-Twoot is a python script that mirrors tweets from a twitter account to a Mastodon account.
-It is simple to set-up on a local machine, configurable and feature-rich.
+**Twoot is a python script that mirrors tweets from a twitter account to a Mastodon account.
+It is simple to set-up on a local machine, configurable and feature-rich.**
 
-**UPDATE XX NOV 2022** VERSION 2.5 Added command-line option (`-l`) to remove redirection
+**UPDATE 23 NOV 2022** VERSION 2.5 Added command-line option (`-l`) to remove redirection
 from links included in tweets. Obfuscated links are replaced by the URL that the resource
-is directly downloaded from.
+is directly downloaded from. Also improved tracker removal by cleaning URL fragments as well
+(contrib: mathdatech, thanks!).
 
 > Previous updates can be found in CHANGELOG.
 
diff --git a/twoot.py b/twoot.py
index 03892a5..618af8e 100755
--- a/twoot.py
+++ b/twoot.py
@@ -40,7 +40,7 @@ MAX_REC_COUNT = 50
 
 # Set the desired verbosity of logging
 # One of logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL
-LOGGING_LEVEL = logging.DEBUG
+LOGGING_LEVEL = logging.INFO
 
 # How many seconds to wait before giving up on a download (except video download)
 HTTPS_REQ_TIMEOUT = 10

From 91ffbde963f795d32413d04ca59fd8c0a48e1976 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Wed, 23 Nov 2022 21:51:57 +0100
Subject: [PATCH 13/13] Last minute thought

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 6c5a027..6844689 100644
--- a/README.md
+++ b/README.md
@@ -59,6 +59,9 @@ resource is directly dowmnloaded from (if applicable). e.g. bit.ly/xxyyyzz -> ex
 Every link visit can take up to 5 sec (timeout) therefore this option will slow down
 tweet processing.
 
+If you are interested by tracker removal (`-u`) you should also select redirection removal(`-l`)
+as trackers are often hidden behind the redirection of a short URL.
+
 When using the `-v` switch consider:
 
 * whether the copyright of the content that you want to cross-post allows it