From 9fc76b9981c3da109dad21383bbf4b657a9d0ba3 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Wed, 16 Dec 2020 18:47:27 +0100
Subject: [PATCH 01/42] Updated user agents

---
 twoot.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/twoot.py b/twoot.py
index c5e6274..ecd468e 100755
--- a/twoot.py
+++ b/twoot.py
@@ -37,10 +37,10 @@ import shutil
 
 # Update from https://www.whatismybrowser.com/guides/the-latest-user-agent/
 USER_AGENTS = [
-    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36',
-    'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/73.0',
-    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13 Safari/605.1.15',
-    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; Xbox; Xbox One) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36 Edge/44.18363.8131',
+    'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
+    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0',
+    'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.1 Safari/605.1.15',
+    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36 Edg/87.0.664.60',
     ]
 
 # Setup logging to file

From 894c13d551376330af43da647c5b7036686f4435 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Wed, 16 Dec 2020 19:43:17 +0100
Subject: [PATCH 02/42] Download page from nitter.net

---
 twoot.py | 62 ++++++++++++--------------------------------------------
 1 file changed, 13 insertions(+), 49 deletions(-)

diff --git a/twoot.py b/twoot.py
index ecd468e..6ec825a 100755
--- a/twoot.py
+++ b/twoot.py
@@ -44,48 +44,8 @@ USER_AGENTS = [
     ]
 
 # Setup logging to file
-logging.basicConfig(filename="twoot.log", level=logging.WARNING)
-logging.debug('*********** NEW RUN ***********')
-
-def handle_no_js(session, page, headers):
-    """
-    Check if page is a "No Javascript" page instead of the content that we wanted
-    If it is, submit the form on the page as POST request to get the correct page and return it
-    :param session: current requests session
-    :param page: Response object to check
-    :param headers: HTTP headers used in initial request
-    :return: correct page (Response object)
-    """
-    # DEBUG: Save page to file
-    #of = open('no_js_page.html', 'w')
-    #of.write(page.text)
-    #of.close()
-
-    # Set default return value
-    new_page = page
-
-    # Make soup
-    soup = BeautifulSoup(page.text, 'html.parser')
-
-    if soup.form.p is not None:
-        if 'JavaScript is disabled' in str(soup.form.p.string):
-            # Submit POST form response with cookies
-            headers.update(
-                {
-                    'Content-Type': 'application/x-www-form-urlencoded',
-                    'Referer': page.request.url,
-                }
-            )
-
-            action = soup.form.get('action')
-
-            # Submit the form
-            new_page = session.post(action, headers=headers, cookies=page.cookies)
-
-            # Verify that download worked
-            assert (new_page.status_code == 200), 'The twitter page did not download correctly. Aborting'
-
-    return new_page
+logging.basicConfig(filename="twoot.log", level=logging.INFO)
+logging.info('*********** NEW RUN ***********')
 
 
 def cleanup_tweet_text(tt_iter, twit_account, status_id, tweet_uri, get_vids):
@@ -195,6 +155,7 @@ def contains_class(body_classes, some_class):
 
     return found
 
+
 def main(argv):
 
     # Build parser for command line arguments
@@ -220,6 +181,8 @@ def main(argv):
     max_age = float(args['a'])
     min_delay = float(args['d'])
 
+    logging.info('Updating ' + twit_account + ' on ' + mast_instance)
+
     # Try to open database. If it does not exist, create it
     sql = sqlite3.connect('twoot.db')
     db = sql.cursor()
@@ -246,21 +209,22 @@ def main(argv):
         }
     )
 
-    url = 'https://mobile.twitter.com/' + twit_account
-    # Download twitter page of user. We should get a 'no javascript' landing page and some cookies
+    url = 'https://nitter.net/' + twit_account
+    # Download twitter page of user.
     twit_account_page = session.get(url, headers=headers)
 
     # Verify that download worked
     assert twit_account_page.status_code == 200,\
         'The twitter page did not download correctly. Aborting'
 
-    # If we got a No Javascript page, download the correct page
-    twit_account_page = handle_no_js(session, twit_account_page, headers)
+    logging.info('Page downloaded successfully')
 
     # DEBUG: Save page to file
-    #of = open(twit_account + '.html', 'w')
-    #of.write(twit_account_page.text)
-    #of.close()
+    of = open(twit_account + '.html', 'w')
+    of.write(twit_account_page.text)
+    of.close()
+
+    exit(0)
 
     # Make soup
     soup = BeautifulSoup(twit_account_page.text, 'html.parser')

From e2841535f64637c5af96e9f7b7ad312ab20c01a8 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Wed, 16 Dec 2020 20:42:44 +0100
Subject: [PATCH 03/42] Extracted twit_account

---
 twoot.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/twoot.py b/twoot.py
index 6ec825a..bf41671 100755
--- a/twoot.py
+++ b/twoot.py
@@ -215,7 +215,7 @@ def main(argv):
 
     # Verify that download worked
     assert twit_account_page.status_code == 200,\
-        'The twitter page did not download correctly. Aborting'
+        'The nitter page did not download correctly. Aborting'
 
     logging.info('Page downloaded successfully')
 
@@ -224,17 +224,14 @@ def main(argv):
     of.write(twit_account_page.text)
     of.close()
 
-    exit(0)
-
     # Make soup
     soup = BeautifulSoup(twit_account_page.text, 'html.parser')
 
-    # Verify that we now have the correct twitter page
-    body_classes = soup.body.get_attribute_list('class')
-    assert contains_class(body_classes, 'users-show-page'), 'This is not the correct twitter page. Quitting'
-
     # Replace twit_account with version with correct capitalization
-    twit_account = soup.find('span', class_='screen-name').get_text()
+    ta = soup.find('meta', property='og:title').get('content')
+    twit_account = re.search('\(@(.+)\)', ta).group(1)
+    print(twit_account)
+    exit(0)
 
     # Extract twitter timeline
     timeline = soup.find_all('table', class_='tweet')

From 910b7a8b13651a43cbbe10ac1bfac4b74cad1501 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Wed, 16 Dec 2020 20:48:00 +0100
Subject: [PATCH 04/42] Safer implementation

---
 twoot.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/twoot.py b/twoot.py
index bf41671..7a26af0 100755
--- a/twoot.py
+++ b/twoot.py
@@ -229,7 +229,10 @@ def main(argv):
 
     # Replace twit_account with version with correct capitalization
     ta = soup.find('meta', property='og:title').get('content')
-    twit_account = re.search('\(@(.+)\)', ta).group(1)
+    ta_match = re.search('\(@(.+)\)', ta)
+    if ta_match is not None:
+        twit_account = ta_match.group(1)
+
     print(twit_account)
     exit(0)
 

From c25e36b498958d5fc14567574b480dfbdf74b6b4 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Wed, 16 Dec 2020 20:55:26 +0100
Subject: [PATCH 05/42] Extracted timeline

---
 twoot.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/twoot.py b/twoot.py
index 7a26af0..57d2641 100755
--- a/twoot.py
+++ b/twoot.py
@@ -233,11 +233,10 @@ def main(argv):
     if ta_match is not None:
         twit_account = ta_match.group(1)
 
-    print(twit_account)
-    exit(0)
-
     # Extract twitter timeline
-    timeline = soup.find_all('table', class_='tweet')
+    timeline = soup.find_all('div', class_='timeline-item')
+    print(len(timeline))
+    exit(0)
 
     for status in timeline:
         # Extract tweet ID and status ID

From 7cc076053febf993ddbda8a81e9e1603d41467b9 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Wed, 16 Dec 2020 21:55:13 +0100
Subject: [PATCH 06/42] Extracted tweet_id and status_id

---
 twoot.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/twoot.py b/twoot.py
index 57d2641..d1420f0 100755
--- a/twoot.py
+++ b/twoot.py
@@ -235,12 +235,11 @@ def main(argv):
 
     # Extract twitter timeline
     timeline = soup.find_all('div', class_='timeline-item')
-    print(len(timeline))
-    exit(0)
 
+    logging.info('Processing timeline')
     for status in timeline:
         # Extract tweet ID and status ID
-        tweet_id = str(status['href']).strip('?p=v')
+        tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m')
         status_id = tweet_id.split('/')[3]
 
         logging.debug('processing tweet %s', tweet_id)
@@ -250,10 +249,6 @@ def main(argv):
                    (twit_account, mast_instance, mast_account, tweet_id))
         tweet_in_db = db.fetchone()
 
-        logging.debug("SELECT * FROM toots WHERE twitter_account='{}' AND mastodon_instance='{}' AND mastodon_account='{}' AND tweet_id='{}'"
-                      .format(twit_account, mast_instance, mast_account, tweet_id)
-                      )
-
         if tweet_in_db is not None:
             logging.debug("Tweet %s already in database", tweet_id)
             # Skip to next tweet
@@ -262,13 +257,12 @@ def main(argv):
             logging.debug('Tweet %s not found in database', tweet_id)
 
         reply_to_username = None
-        # Check if the tweet is a reply-to
-        reply_to_div = status.find('div', class_='tweet-reply-context username')
+        # TODO  Check if the tweet is a reply-to
+        reply_to_div = None
         if reply_to_div is not None:
             # Do we need to handle reply-to tweets?
             if tweets_and_replies:
-                # Capture user name being replied to
-                reply_to_username = reply_to_div.a.get_text()
+                # TODO  Capture user name being replied to
             else:
                 # Skip this tweet
                 logging.debug("Tweet is a reply-to and we don't want that. Skipping.")

From e87599d40b538e20b084cdc1411d631c896288f5 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Wed, 16 Dec 2020 21:57:03 +0100
Subject: [PATCH 07/42] Removed downloading of full status page of the tweet

---
 twoot.py | 26 --------------------------
 1 file changed, 26 deletions(-)

diff --git a/twoot.py b/twoot.py
index d1420f0..816ffe6 100755
--- a/twoot.py
+++ b/twoot.py
@@ -268,32 +268,6 @@ def main(argv):
                 logging.debug("Tweet is a reply-to and we don't want that. Skipping.")
                 continue
 
-        # Extract url of full status page
-        full_status_url = 'https://mobile.twitter.com' + tweet_id + '?p=v'
-
-        # fetch full status page
-        full_status_page = session.get(full_status_url, headers=headers)
-
-        # Verify that download worked
-        assert full_status_page.status_code == 200, \
-            'The twitter page did not download correctly. Aborting'
-
-        # If we got a No Javascript page, download the correct page
-        full_status_page = handle_no_js(session, full_status_page, headers)
-
-        # DEBUG: Save page to file
-        #of = open('full_status_page.html', 'w')
-        #of.write(full_status_page.text)
-        #of.close()
-
-        # Make soup
-        soup = BeautifulSoup(full_status_page.text, 'html.parser')
-
-        # Verify that we now have the correct twitter page
-        body_classes = soup.body.get_attribute_list('class')
-        assert contains_class(body_classes, 'tweets-show-page'), \
-            'This is not the correct twitter page. Quitting'
-
         # Check if tweet contains pic censored as "Sensitive material"
         if soup.find('div', class_='accept-data') is not None:
             # If it does, submit form to obtain uncensored tweet

From 4e6a97d765c7fbe980241647aa6f4e3ca206eb9f Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Wed, 16 Dec 2020 21:58:24 +0100
Subject: [PATCH 08/42] Removed downloading of status page with uncensored pics

---
 twoot.py | 34 ----------------------------------
 1 file changed, 34 deletions(-)

diff --git a/twoot.py b/twoot.py
index 816ffe6..e5b66d9 100755
--- a/twoot.py
+++ b/twoot.py
@@ -268,40 +268,6 @@ def main(argv):
                 logging.debug("Tweet is a reply-to and we don't want that. Skipping.")
                 continue
 
-        # Check if tweet contains pic censored as "Sensitive material"
-        if soup.find('div', class_='accept-data') is not None:
-            # If it does, submit form to obtain uncensored tweet
-            # Submit POST form response with cookies
-            headers.update(
-                {
-                    'Origin': 'https://mobile.twitter.com',
-                    'Host': 'mobile.twitter.com',
-                    'Content-Type': 'application/x-www-form-urlencoded',
-                    'Referer': full_status_url,
-                }
-            )
-
-            # Data payload for POST request
-            authenticity_token = soup.find('input', {'name': 'authenticity_token'}).get('value')
-            form_input = {'show_media': 1, 'authenticity_token': authenticity_token, 'commit': 'Display media'}
-
-            full_status_page = session.post(full_status_url, data=form_input, headers=headers)
-
-            # Verify that download worked
-            assert full_status_page.status_code == 200, \
-                'The twitter page did not download correctly. Aborting'
-
-            # DEBUG: Save page to file
-            #of = open('full_status_page_uncensored.html', 'w')
-            #of.write(full_status_page.text)
-            #of.close()
-
-            # Remake soup
-            soup = BeautifulSoup(full_status_page.text, 'html.parser')
-
-        # Isolate table main-tweet
-        tmt = soup.find('table', class_='main-tweet')
-
         # Extract avatar
         author_logo_url = tmt.find('td', class_='avatar').a.img['src']
 

From 19d988dfcbd0e7ad7a382fb0447e8b3dad619b73 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Wed, 16 Dec 2020 22:03:09 +0100
Subject: [PATCH 09/42] Removed extracting avatar

---
 twoot.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/twoot.py b/twoot.py
index e5b66d9..5a310cd 100755
--- a/twoot.py
+++ b/twoot.py
@@ -268,9 +268,6 @@ def main(argv):
                 logging.debug("Tweet is a reply-to and we don't want that. Skipping.")
                 continue
 
-        # Extract avatar
-        author_logo_url = tmt.find('td', class_='avatar').a.img['src']
-
         # extract author
         author = tmt.find('div', class_='fullname').a.strong.get_text()
 

From e6e24cbfd5e7b1e77f6312a88cd20d06b92f9905 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Wed, 16 Dec 2020 22:15:27 +0100
Subject: [PATCH 10/42] Extracted author, author_account, time_string,
 timestamp

---
 twoot.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/twoot.py b/twoot.py
index 5a310cd..d312626 100755
--- a/twoot.py
+++ b/twoot.py
@@ -263,20 +263,21 @@ def main(argv):
             # Do we need to handle reply-to tweets?
             if tweets_and_replies:
                 # TODO  Capture user name being replied to
+                pass
             else:
                 # Skip this tweet
                 logging.debug("Tweet is a reply-to and we don't want that. Skipping.")
                 continue
 
         # extract author
-        author = tmt.find('div', class_='fullname').a.strong.get_text()
+        author = status.find('a', class_='fullname').get('title')
 
         # Extract user name
-        author_account = str(tmt.find('span', class_='username').span.next_sibling).strip('\n ')
+        author_account = status.find('a', class_='username').get('title').lstrip('@')
 
         # Extract time stamp
-        time_string = tmt.find('div', class_='metadata').a.get_text()
-        timestamp = datetime.datetime.strptime(time_string, '%I:%M %p - %d %b %Y').timestamp()
+        time_string = status.find('span', class_='tweet-date').a.get('title')
+        timestamp = datetime.datetime.strptime(time_string, '%d/%m/%Y, %H:%M:%S').timestamp()
 
         # extract iterator over tweet text contents
         tt_iter = tmt.find('div', class_='tweet-text').div.children

From 857a7f9b9e735d17139c8eb2bd0358dfe2fc1cec Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Wed, 16 Dec 2020 22:46:01 +0100
Subject: [PATCH 11/42] Extracted full_status_url

---
 twoot.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/twoot.py b/twoot.py
index d312626..9b8f54d 100755
--- a/twoot.py
+++ b/twoot.py
@@ -275,12 +275,15 @@ def main(argv):
         # Extract user name
         author_account = status.find('a', class_='username').get('title').lstrip('@')
 
+        # Extract URL of full status page (for video download)
+        full_status_url = 'https://twitter.com' + tweet_id
+
         # Extract time stamp
         time_string = status.find('span', class_='tweet-date').a.get('title')
         timestamp = datetime.datetime.strptime(time_string, '%d/%m/%Y, %H:%M:%S').timestamp()
 
         # extract iterator over tweet text contents
-        tt_iter = tmt.find('div', class_='tweet-text').div.children
+        tt_iter = status.find('div', class_='tweet-content media-body').children
 
         tweet_text = cleanup_tweet_text(tt_iter, twit_account, status_id, full_status_url, get_vids)
 
@@ -293,12 +296,12 @@ def main(argv):
             tweet_text = 'RT from ' + author + ' (@' + author_account + ')\n\n' + tweet_text
 
         # Add footer with link to original tweet
-        tweet_text += '\n\nOriginal tweet : https://twitter.com' + tweet_id
+        tweet_text += '\n\nOriginal tweet : ' + full_status_url
 
         photos = []  # The no_js version of twitter only shows one photo
 
         # Check if there are photos attached
-        media = tmt.find('div', class_='media')
+        media = status.find('div', class_='media')
         if media:
             # Extract photo url and add it to list
             pic = str(media.img['src']).strip(':small')

From 3a2c8093a39caaee4535fc96b9ecfec57e5c1694 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Thu, 17 Dec 2020 10:15:46 +0100
Subject: [PATCH 12/42] Improved logging in cleanup_tweet_text

---
 twoot.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/twoot.py b/twoot.py
index 9b8f54d..0293685 100755
--- a/twoot.py
+++ b/twoot.py
@@ -104,9 +104,9 @@ def cleanup_tweet_text(tt_iter, twit_account, status_id, tweet_uri, get_vids):
                                         tweet_text += '\n\n[Video embedded in original tweet]'
                                 except OSError:
                                     logging.error("Could not execute twitterdl.py (is it there? Is it set as executable?)")
-                                    sys.exit(-1)
                                 except subprocess.TimeoutExpired:
                                     # Video download and encoding took too long
+                                    logging.error("twitterdl.py execution timed out")
                                     tweet_text += '\n\n[Video embedded in original tweet]'
                             else:
                                 tweet_text += '\n\n[Video embedded in original tweet]'
@@ -137,7 +137,7 @@ def cleanup_tweet_text(tt_iter, twit_account, status_id, tweet_uri, get_vids):
             pass
 
         else:
-            print("*** WARNING: No handler for tag in twitter text: " + tag.prettify())
+            logging.warning("No handler for tag in twitter text: " + tag.prettify())
 
     return tweet_text
 
@@ -349,7 +349,6 @@ def main(argv):
         tweet = {
             "author": author,
             "author_account": author_account,
-            "author_logo_url": author_logo_url,
             "timestamp": timestamp,
             "tweet_id": tweet_id,
             "tweet_text": tweet_text,

From d92bcea2a7e9d1bfa5e23d6ea88cda9f4fab1428 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Thu, 17 Dec 2020 10:44:30 +0100
Subject: [PATCH 13/42] Added cookie to preserve twitter and youtube addresses

---
 twoot.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/twoot.py b/twoot.py
index 0293685..35e8503 100755
--- a/twoot.py
+++ b/twoot.py
@@ -206,6 +206,7 @@ def main(argv):
     headers.update(
         {
             'User-Agent': USER_AGENTS[random.randint(0, len(USER_AGENTS)-1)],
+            'Cookie': 'replaceTwitter=; replaceYouTube=',
         }
     )
 

From 0787669a3a3557e7f9cce39a1382e663df0529b4 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Thu, 17 Dec 2020 17:31:43 +0100
Subject: [PATCH 14/42] Moved time check to beginning of process

---
 twoot.py | 85 +++++++++++++++++++++++++++-----------------------------
 1 file changed, 41 insertions(+), 44 deletions(-)

diff --git a/twoot.py b/twoot.py
index 35e8503..ee418c9 100755
--- a/twoot.py
+++ b/twoot.py
@@ -31,7 +31,6 @@ import re
 from pathlib import Path
 from mastodon import Mastodon, MastodonError, MastodonAPIError, MastodonIllegalArgumentError
 import subprocess
-import json.decoder
 import shutil
 
 
@@ -48,7 +47,7 @@ logging.basicConfig(filename="twoot.log", level=logging.INFO)
 logging.info('*********** NEW RUN ***********')
 
 
-def cleanup_tweet_text(tt_iter, twit_account, status_id, tweet_uri, get_vids):
+def process_tweet_content(tt_iter, twit_account, status_id, tweet_uri, get_vids):
     '''
     Receives an iterator over all the elements contained in the tweet-text container.
     Processes them to remove Twitter-specific stuff and make them suitable for
@@ -155,6 +154,18 @@ def contains_class(body_classes, some_class):
 
     return found
 
+def is_time_valid(timestamp, max_age, min_delay):
+    ret = True
+    # Check that the tweet is not too young (might be deleted) or too old
+    age_in_hours = (time.time() - float(timestamp)) / 3600.0
+    min_delay_in_hours = min_delay / 60.0
+    max_age_in_hours = max_age * 24.0
+
+    if age_in_hours < min_delay_in_hours or age_in_hours > max_age_in_hours:
+        ret = False
+
+    return ret
+
 
 def main(argv):
 
@@ -237,7 +248,12 @@ def main(argv):
     # Extract twitter timeline
     timeline = soup.find_all('div', class_='timeline-item')
 
-    logging.info('Processing timeline')
+    logging.info('Processing ' + len(timeline) + ' tweets found in timeline')
+
+    # **********************************************************
+    # Process each tweets and generate dictionary
+    # with data ready to be posted on Mastodon
+    # **********************************************************
     for status in timeline:
         # Extract tweet ID and status ID
         tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m')
@@ -245,6 +261,15 @@ def main(argv):
 
         logging.debug('processing tweet %s', tweet_id)
 
+        # Extract time stamp
+        time_string = status.find('span', class_='tweet-date').a.get('title')
+        timestamp = datetime.datetime.strptime(time_string, '%d/%m/%Y, %H:%M:%S').timestamp()
+
+        # Check if time is within acceptable range
+        if not is_time_valid(timestamp, max_age, min_delay):
+            logging.debug("Tweet outside valid time range, skipping")
+            continue
+
         # Check in database if tweet has already been posted
         db.execute("SELECT * FROM toots WHERE twitter_account=? AND mastodon_instance=? AND mastodon_account=? AND tweet_id=?",
                    (twit_account, mast_instance, mast_account, tweet_id))
@@ -257,19 +282,6 @@ def main(argv):
         else:
             logging.debug('Tweet %s not found in database', tweet_id)
 
-        reply_to_username = None
-        # TODO  Check if the tweet is a reply-to
-        reply_to_div = None
-        if reply_to_div is not None:
-            # Do we need to handle reply-to tweets?
-            if tweets_and_replies:
-                # TODO  Capture user name being replied to
-                pass
-            else:
-                # Skip this tweet
-                logging.debug("Tweet is a reply-to and we don't want that. Skipping.")
-                continue
-
         # extract author
         author = status.find('a', class_='fullname').get('title')
 
@@ -279,22 +291,16 @@ def main(argv):
         # Extract URL of full status page (for video download)
         full_status_url = 'https://twitter.com' + tweet_id
 
-        # Extract time stamp
-        time_string = status.find('span', class_='tweet-date').a.get('title')
-        timestamp = datetime.datetime.strptime(time_string, '%d/%m/%Y, %H:%M:%S').timestamp()
+        # TODO  Check if the tweet is a reply-to
+
+        # Check it the tweet is a retweet from somebody else
+        if author_account.lower() != twit_account.lower():
+            tweet_text = 'RT from ' + author + ' (@' + author_account + ')\n\n'
 
         # extract iterator over tweet text contents
         tt_iter = status.find('div', class_='tweet-content media-body').children
 
-        tweet_text = cleanup_tweet_text(tt_iter, twit_account, status_id, full_status_url, get_vids)
-
-        # Mention if the tweet is a reply-to
-        if reply_to_username is not None:
-            tweet_text = 'In reply to ' + reply_to_username + '\n\n' + tweet_text
-
-        # Check it the tweet is a retweet from somebody else
-        if author_account.lower() != twit_account.lower():
-            tweet_text = 'RT from ' + author + ' (@' + author_account + ')\n\n' + tweet_text
+        tweet_text += process_tweet_content(tt_iter, twit_account, status_id, full_status_url, get_vids)
 
         # Add footer with link to original tweet
         tweet_text += '\n\nOriginal tweet : ' + full_status_url
@@ -358,15 +364,17 @@ def main(argv):
         }
         tweets.append(tweet)
 
-        logging.debug('Tweet %s added to list to upload', tweet_id)
+        logging.debug('Tweet %s added to list of toots to upload', tweet_id)
+
+    # TODO  Log summary stats: how many not in db, how many in valid timeframe
 
     # DEBUG: Print extracted tweets
-#    for t in tweets:
-#         print(t)
+    #for t in tweets:
+    #print(t)
 
     # **********************************************************
     # Iterate tweets in list.
-    # post each on Mastodon and reference to it in database
+    # post each on Mastodon and record it in database
     # **********************************************************
 
     # Create Mastodon application if it does not exist yet
@@ -396,22 +404,12 @@ def main(argv):
         )
 
     except MastodonError as me:
-        print('ERROR: Login to ' + mast_instance + ' Failed')
-        print(me)
+        logging.fatal('ERROR: Login to ' + mast_instance + ' Failed\n' + me)
         sys.exit(1)
 
     # Upload tweets
     for tweet in reversed(tweets):
         logging.debug('Uploading Tweet %s', tweet["tweet_id"])
-        # Check that the tweet is not too young (might be deleted) or too old
-        age_in_hours = (time.time() - float(tweet['timestamp'])) / 3600.0
-        min_delay_in_hours = min_delay / 60.0
-        max_age_in_hours = max_age * 24.0
-
-        if age_in_hours < min_delay_in_hours or age_in_hours > max_age_in_hours:
-            # Skip to next tweet
-            logging.debug("Tweet too young or too old, skipping")
-            continue
 
         media_ids = []
 
@@ -444,7 +442,6 @@ def main(argv):
                         pass
 
         # Post toot
-        logging.debug('Doing it now')
         try:
             mastodon = Mastodon(
                 access_token=mast_account + '.secret',

From 557ef6deb9c74298e7044f11d7a605f11c456ff5 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Thu, 17 Dec 2020 17:50:10 +0100
Subject: [PATCH 15/42] Handling reply-to

---
 twoot.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/twoot.py b/twoot.py
index ee418c9..294ea21 100755
--- a/twoot.py
+++ b/twoot.py
@@ -222,6 +222,10 @@ def main(argv):
     )
 
     url = 'https://nitter.net/' + twit_account
+    # Use different page if we need to handle replies
+    if tweets_and_replies:
+        url += '/with_replies'
+
     # Download twitter page of user.
     twit_account_page = session.get(url, headers=headers)
 
@@ -248,7 +252,7 @@ def main(argv):
     # Extract twitter timeline
     timeline = soup.find_all('div', class_='timeline-item')
 
-    logging.info('Processing ' + len(timeline) + ' tweets found in timeline')
+    logging.info('Processing ' + str(len(timeline)) + ' tweets found in timeline')
 
     # **********************************************************
     # Process each tweets and generate dictionary
@@ -292,6 +296,8 @@ def main(argv):
         full_status_url = 'https://twitter.com' + tweet_id
 
         # TODO  Check if the tweet is a reply-to
+        # <div class="replying-to">Replying to <a href="/tomwarren">@tomwarren</a></div>
+        being_replied_to = status.find('div', class_='replying-to').a.get_text()
 
         # Check it the tweet is a retweet from somebody else
         if author_account.lower() != twit_account.lower():

From fbec4004f904b885af5bee1e73045e1e97bad48f Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Thu, 17 Dec 2020 17:56:12 +0100
Subject: [PATCH 16/42] Handled reply-to

---
 twoot.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/twoot.py b/twoot.py
index 294ea21..225ebc0 100755
--- a/twoot.py
+++ b/twoot.py
@@ -295,9 +295,14 @@ def main(argv):
         # Extract URL of full status page (for video download)
         full_status_url = 'https://twitter.com' + tweet_id
 
+        # Initialize tweet text
+        tweet_text = ''
+
         # TODO  Check if the tweet is a reply-to
         # <div class="replying-to">Replying to <a href="/tomwarren">@tomwarren</a></div>
-        being_replied_to = status.find('div', class_='replying-to').a.get_text()
+        replying_to_class = status.find('div', class_='replying-to')
+        if replying_to_class is not None:
+            tweet_text += 'Replying to ' + replying_to_class.a.get_text()
 
         # Check it the tweet is a retweet from somebody else
         if author_account.lower() != twit_account.lower():

From 992f91537f3ec38f82394d68ab96bbd683649911 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Thu, 17 Dec 2020 18:59:02 +0100
Subject: [PATCH 17/42] TODO done

---
 twoot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/twoot.py b/twoot.py
index 225ebc0..bf4b8cc 100755
--- a/twoot.py
+++ b/twoot.py
@@ -298,7 +298,7 @@ def main(argv):
         # Initialize tweet text
         tweet_text = ''
 
-        # TODO  Check if the tweet is a reply-to
+        # Add prefix if the tweet is a reply-to
         # <div class="replying-to">Replying to <a href="/tomwarren">@tomwarren</a></div>
         replying_to_class = status.find('div', class_='replying-to')
         if replying_to_class is not None:

From 711ec9677ad404161290735cfc35b620ca41d35d Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Thu, 17 Dec 2020 21:44:32 +0100
Subject: [PATCH 18/42] Added a bunch of TODO

---
 twoot.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/twoot.py b/twoot.py
index bf4b8cc..83dcf1d 100755
--- a/twoot.py
+++ b/twoot.py
@@ -47,7 +47,7 @@ logging.basicConfig(filename="twoot.log", level=logging.INFO)
 logging.info('*********** NEW RUN ***********')
 
 
-def process_tweet_content(tt_iter, twit_account, status_id, tweet_uri, get_vids):
+def process_media_body(tt_iter, twit_account, status_id, tweet_uri, get_vids):
     '''
     Receives an iterator over all the elements contained in the tweet-text container.
     Processes them to remove Twitter-specific stuff and make them suitable for
@@ -299,7 +299,6 @@ def main(argv):
         tweet_text = ''
 
         # Add prefix if the tweet is a reply-to
-        # <div class="replying-to">Replying to <a href="/tomwarren">@tomwarren</a></div>
         replying_to_class = status.find('div', class_='replying-to')
         if replying_to_class is not None:
             tweet_text += 'Replying to ' + replying_to_class.a.get_text()
@@ -311,7 +310,13 @@ def main(argv):
         # extract iterator over tweet text contents
         tt_iter = status.find('div', class_='tweet-content media-body').children
 
-        tweet_text += process_tweet_content(tt_iter, twit_account, status_id, full_status_url, get_vids)
+        tweet_text += process_media_body(tt_iter, twit_account, status_id, full_status_url, get_vids)
+
+        # TODO  Process quote: append link to tweet_text
+
+        # TODO  Process card : extract image or youtube link
+
+        # TODO  Process attachment: capture image or .mp4 url or download twitter video
 
         # Add footer with link to original tweet
         tweet_text += '\n\nOriginal tweet : ' + full_status_url

From 80799142828a477939c103e1a98e2fcf9a93b34c Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Thu, 17 Dec 2020 22:08:43 +0100
Subject: [PATCH 19/42] Reworked process_media_body

---
 twoot.py | 90 +++++++++-----------------------------------------------
 1 file changed, 14 insertions(+), 76 deletions(-)

diff --git a/twoot.py b/twoot.py
index 83dcf1d..fed7e62 100755
--- a/twoot.py
+++ b/twoot.py
@@ -47,16 +47,12 @@ logging.basicConfig(filename="twoot.log", level=logging.INFO)
 logging.info('*********** NEW RUN ***********')
 
 
-def process_media_body(tt_iter, twit_account, status_id, tweet_uri, get_vids):
+def process_media_body(tt_iter):
     '''
     Receives an iterator over all the elements contained in the tweet-text container.
-    Processes them to remove Twitter-specific stuff and make them suitable for
-    posting on Mastodon
+    Processes them to make them suitable for posting on Mastodon
     :param tt_iter: iterator over the HTML elements in the text of the tweet
-    :param twit_account: Used to name directory where videos are downloaded
-    :param status_id: Used to name directory where videos are downloaded
-    :param tweet_uri: Used to downloaded videos
-    :param get_vids: True to download embedded twitter videos and save them on the filesystem
+    :return:        cleaned up text of the tweet
     '''
     tweet_text = ''
     # Iterate elements
@@ -66,75 +62,17 @@ def process_media_body(tt_iter, twit_account, status_id, tweet_uri, get_vids):
             tweet_text += tag.string
 
         # If it is an 'a' html tag
-        elif tag.name == 'a' and tag.has_attr('class'):
-            # If element is a #hashtag, only keep text
-            for tc in tag['class']:
-                if tc == 'twitter-hashtag':
-                    tweet_text += tag.get_text()
-
-                # If element is a mention of @someuser, only keep text
-                elif tc == 'twitter-atreply':
-                    tweet_text += tag.get_text()
-
-                # If element is an external link
-                elif tc == 'twitter_external_link':
-                    # If element is a simple link
-                    if tag.has_attr('data-expanded-url'):
-                        # Add a sometimes missing space before url
-                        if not tweet_text.endswith(' ') and not tweet_text.endswith('\n'):
-                            tweet_text += ' '
-                        # Add full url
-                        tweet_text += tag['data-expanded-url']
-                    if tag.has_attr('data-expanded-path'):
-                        data_expanded_path = tag['data-expanded-path']
-                        if 'video' in data_expanded_path:
-                            if get_vids:
-                                # Download video from twitter and store in filesystem. Running as subprocess to avoid
-                                # requirement to install ffmpeg and ffmpeg-python for those that do not want to post videos
-                                try:
-                                    # Set output location to ./output/twit_account/status_id
-                                    dl_feedback = subprocess.run(
-                                        ["./twitterdl.py", tweet_uri, "-ooutput/" + twit_account + "/" + status_id, "-w 500"],
-                                        capture_output=True,
-                                        timeout=300  # let's try 5 minutes
-                                    )
-                                    if dl_feedback.returncode != 0:
-                                        logging.warning('Video in tweet ' + status_id + ' from ' + twit_account + ' failed to download')
-                                        tweet_text += '\n\n[Video embedded in original tweet]'
-                                except OSError:
-                                    logging.error("Could not execute twitterdl.py (is it there? Is it set as executable?)")
-                                except subprocess.TimeoutExpired:
-                                    # Video download and encoding took too long
-                                    logging.error("twitterdl.py execution timed out")
-                                    tweet_text += '\n\n[Video embedded in original tweet]'
-                            else:
-                                tweet_text += '\n\n[Video embedded in original tweet]'
-
-        # If element is hashflag (hashtag + icon), handle as simple hashtag
-        elif tag.name == 'span' and tag['class'][0] == 'twitter-hashflag-container':
-            tweet_text += tag.a.get_text()
-
-        # If tag is an image
-        elif tag.name == 'img':
-            # If it is of class 'Emoji'
-            for tc in tag['class']:
-                if tc == 'Emoji':
-                    # Get url of Emoji
-                    src = tag["src"]
-                    # Use regex to extract unicode characters from file name
-                    uni_str = re.search('/([0-9A-Fa-f\-]+?).png$', src).group(1)
-                    # build the list of hex unicode characters separated by '-' in the file name
-                    uni_list = uni_str.split('-')
-                    # Extract individual unicode chars and add them to the tweet
-                    for uni_char in uni_list:
-                        # convert string to hex value of unicode character
-                        tweet_text += chr(int(uni_char, 16))
-
-        # elif tag is a geographical point of interest
-        elif tag.name == 'span' and tag['class'][0] == 'tweet-poi-geo-text':
-            # Not sure what to do
-            pass
-
+        elif tag.name == 'a':
+            tag_text = tag.get_text()
+            if tag_text.starts_with('@'):
+                # Only keep user name
+                tweet_text += tag_text
+            elif tag_text.starts_with('#'):
+                # Only keep hashtag text
+                tweet_text += tag_text
+            else:
+                # This is a real link, keep url
+                tweet_text += tag.get('href')
         else:
             logging.warning("No handler for tag in twitter text: " + tag.prettify())
 

From 14c24fe847964976472c7d6826b69703339c358f Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Thu, 17 Dec 2020 22:59:21 +0100
Subject: [PATCH 20/42] started process_attachments()

---
 twoot.py | 36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/twoot.py b/twoot.py
index fed7e62..7b9c634 100755
--- a/twoot.py
+++ b/twoot.py
@@ -79,6 +79,22 @@ def process_media_body(tt_iter):
     return tweet_text
 
 
+def process_card(card_container):
+    '''
+    Extract image from card in case mastodon does not do it
+    :param card_container: soup of 'a' tag containing card markup
+    :return: list with url of image
+    '''
+    list = []
+    link = card_container.get('href')
+
+    # Dailymotion
+    if link.contains('dailymotion.com'):
+        image_url = 'twitter.com' + card_container.div.div.img.get('src')
+        list.append(image_url)
+
+    return list
+
 def contains_class(body_classes, some_class):
     '''
     :param body_classes: list of classes to search
@@ -233,8 +249,9 @@ def main(argv):
         # Extract URL of full status page (for video download)
         full_status_url = 'https://twitter.com' + tweet_id
 
-        # Initialize tweet text
+        # Initialize containers
         tweet_text = ''
+        photos = []
 
         # Add prefix if the tweet is a reply-to
         replying_to_class = status.find('div', class_='replying-to')
@@ -248,18 +265,27 @@ def main(argv):
         # extract iterator over tweet text contents
         tt_iter = status.find('div', class_='tweet-content media-body').children
 
-        tweet_text += process_media_body(tt_iter, twit_account, status_id, full_status_url, get_vids)
+        # Process text of tweet
+        tweet_text += process_media_body(tt_iter)
 
-        # TODO  Process quote: append link to tweet_text
+        # Process quote: append link to tweet_text
+        quote_div = status.find('div', class_='quote-link')
+        if quote_div is not None:
+            tweet_text += '\n twitter.com' + quote_div.get('href').strip('#m')
 
-        # TODO  Process card : extract image or youtube link
+        # Process card : extract image if necessary
+        card_class = status.find('a', class_='card-container')
+        if card_class is not None:
+            photos.extend(process_card(card_class))
 
         # TODO  Process attachment: capture image or .mp4 url or download twitter video
+        attachments_class = status.find('a', class_='attachments')
+        if card_class is not None:
+            photos.extend(process_attachments(attachments_class))
 
         # Add footer with link to original tweet
         tweet_text += '\n\nOriginal tweet : ' + full_status_url
 
-        photos = []  # The no_js version of twitter only shows one photo
 
         # Check if there are photos attached
         media = status.find('div', class_='media')

From b4a596eff26ccb36140939e07e43893141f19e96 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 18 Dec 2020 11:45:43 +0100
Subject: [PATCH 21/42] Downloaded pics attachments

---
 twoot.py | 42 +++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/twoot.py b/twoot.py
index 7b9c634..3bc7897 100755
--- a/twoot.py
+++ b/twoot.py
@@ -48,12 +48,12 @@ logging.info('*********** NEW RUN ***********')
 
 
 def process_media_body(tt_iter):
-    '''
+    """
     Receives an iterator over all the elements contained in the tweet-text container.
     Processes them to make them suitable for posting on Mastodon
     :param tt_iter: iterator over the HTML elements in the text of the tweet
     :return:        cleaned up text of the tweet
-    '''
+    """
     tweet_text = ''
     # Iterate elements
     for tag in tt_iter:
@@ -80,11 +80,11 @@ def process_media_body(tt_iter):
 
 
 def process_card(card_container):
-    '''
+    """
     Extract image from card in case mastodon does not do it
     :param card_container: soup of 'a' tag containing card markup
     :return: list with url of image
-    '''
+    """
     list = []
     link = card_container.get('href')
 
@@ -95,12 +95,32 @@ def process_card(card_container):
 
     return list
 
+
+def process_attachments(attachments_container):
+    """
+    Extract images or video from attachments. Videos are downloaded on the file system.
+    :param card_container: soup of 'div' tag containing attachments markup
+    :return: list with url of images
+    """
+    # Collect url of images
+    pics = []
+    images = attachments_container.find_all('a', class_='still-image')
+    for image in images:
+        pics.append(image.get('href'))
+
+    # TODO Download nitter video (converted animated GIF)
+
+    # TODO Download twitter video
+
+    return pics
+
+
 def contains_class(body_classes, some_class):
-    '''
+    """
     :param body_classes: list of classes to search
     :param some_class: class that we are interested in
     :return: True if found, false otherwise
-    '''
+    """
     found = False
     for body_class in body_classes:
         if body_class == some_class:
@@ -280,20 +300,12 @@ def main(argv):
 
         # TODO  Process attachment: capture image or .mp4 url or download twitter video
         attachments_class = status.find('a', class_='attachments')
-        if card_class is not None:
+        if attachments_class is not None:
             photos.extend(process_attachments(attachments_class))
 
         # Add footer with link to original tweet
         tweet_text += '\n\nOriginal tweet : ' + full_status_url
 
-
-        # Check if there are photos attached
-        media = status.find('div', class_='media')
-        if media:
-            # Extract photo url and add it to list
-            pic = str(media.img['src']).strip(':small')
-            photos.append(pic)
-
         # If no media was specifically added in the tweet, try to get the first picture
         # with "twitter:image" meta tag in first linked page in tweet text
         if not photos:

From efa84f85d3dbc1b996f66b06c32b0d75d9849713 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 18 Dec 2020 13:26:26 +0100
Subject: [PATCH 22/42] Download nitter video

---
 twoot.py | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/twoot.py b/twoot.py
index 3bc7897..21815a9 100755
--- a/twoot.py
+++ b/twoot.py
@@ -96,10 +96,13 @@ def process_card(card_container):
     return list
 
 
-def process_attachments(attachments_container):
+def process_attachments(attachments_container, twit_account, tweet_id, author_account):
     """
     Extract images or video from attachments. Videos are downloaded on the file system.
     :param card_container: soup of 'div' tag containing attachments markup
+    :param twit_account: name of twitter account
+    :param tweet_id: id of tweet being processed
+    :param author_account: author of tweet with video attachment
     :return: list with url of images
     """
     # Collect url of images
@@ -108,10 +111,30 @@ def process_attachments(attachments_container):
     for image in images:
         pics.append(image.get('href'))
 
-    # TODO Download nitter video (converted animated GIF)
+    # Download nitter video (converted animated GIF)
+    gif_class = attachments_container.find('video', class_='gif')
+    if gif_class is not None:
+        gif_video_file = 'https://nitter.com' + gif_class.source.get('src')
+
+    video_path = os.path.join('./output', twit_account, tweet_id, author_account, tweet_id)
+    os.makedirs(video_path, 0o777, exist_ok=True)
+
+    # Open directory for writing file
+    vp = os.open(video_path,  os.O_WRONLY)
+    os.fchdir(vp)
+    r = requests.get(gif_video_file, stream=True)
+
+    # Download chunks and write them to file
+    with open('gif_video.mp4', 'wb') as f:
+        for chunk in r.iter_content(chunk_size=16*1024):
+            f.write(chunk)
+
+    # Close directory
+    os.close(vp)
 
     # TODO Download twitter video
 
+
     return pics
 
 
@@ -301,7 +324,7 @@ def main(argv):
         # TODO  Process attachment: capture image or .mp4 url or download twitter video
         attachments_class = status.find('a', class_='attachments')
         if attachments_class is not None:
-            photos.extend(process_attachments(attachments_class))
+            photos.extend(process_attachments(attachments_class, twit_account, tweet_id, author_account))
 
         # Add footer with link to original tweet
         tweet_text += '\n\nOriginal tweet : ' + full_status_url

From 551c47d576488bfbda4dd3b14d826b7b7b558759 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 18 Dec 2020 14:28:17 +0100
Subject: [PATCH 23/42] Implemented process attachment

---
 twoot.py | 52 ++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 16 deletions(-)

diff --git a/twoot.py b/twoot.py
index 21815a9..5ac682f 100755
--- a/twoot.py
+++ b/twoot.py
@@ -96,10 +96,11 @@ def process_card(card_container):
     return list
 
 
-def process_attachments(attachments_container, twit_account, tweet_id, author_account):
+def process_attachments(attachments_container, get_vids, twit_account, tweet_id, author_account):
     """
     Extract images or video from attachments. Videos are downloaded on the file system.
     :param card_container: soup of 'div' tag containing attachments markup
+    :param get_vids: whether to download vids or not
     :param twit_account: name of twitter account
     :param tweet_id: id of tweet being processed
     :param author_account: author of tweet with video attachment
@@ -116,24 +117,43 @@ def process_attachments(attachments_container, twit_account, tweet_id, author_ac
     if gif_class is not None:
         gif_video_file = 'https://nitter.com' + gif_class.source.get('src')
 
-    video_path = os.path.join('./output', twit_account, tweet_id, author_account, tweet_id)
-    os.makedirs(video_path, 0o777, exist_ok=True)
+        video_path = os.path.join('./output', twit_account, tweet_id, author_account, tweet_id)
+        os.makedirs(video_path, 0o777, exist_ok=True)
 
-    # Open directory for writing file
-    vp = os.open(video_path,  os.O_WRONLY)
-    os.fchdir(vp)
-    r = requests.get(gif_video_file, stream=True)
+        # Open directory for writing file
+        vp = os.open(video_path,  os.O_WRONLY)
+        os.fchdir(vp)
+        r = requests.get(gif_video_file, stream=True)
 
-    # Download chunks and write them to file
-    with open('gif_video.mp4', 'wb') as f:
-        for chunk in r.iter_content(chunk_size=16*1024):
-            f.write(chunk)
+        # Download chunks and write them to file
+        with open('gif_video.mp4', 'wb') as f:
+            for chunk in r.iter_content(chunk_size=16*1024):
+                f.write(chunk)
 
-    # Close directory
-    os.close(vp)
-
-    # TODO Download twitter video
+        # Close directory
+        os.close(vp)
 
+    # Download twitter video
+    vid_class = attachments_container.find('div', class_='video-container')
+    if vid_class is not None:
+        video_file = 'https://twitter.com' + vid_class.video.get('data-url')
+        if get_vids:
+            # Download video from twitter and store in filesystem. Running as subprocess to avoid
+            # requirement to install ffmpeg and ffmpeg-python for those that do not want to post videos
+            try:
+                # Set output location to ./output/twit_account/status_id
+                dl_feedback = subprocess.run(
+                    ["./twitterdl.py", tweet_uri, "-ooutput/" + twit_account + "/" + status_id, "-w 500"],
+                    capture_output=True,
+                )
+                if dl_feedback.returncode != 0:
+                    logging.warning('Video in tweet ' + tweet_id + ' from ' + twit_account + ' failed to download')
+                    tweet_text += '\n\n[Video embedded in original tweet]'
+            except OSError:
+                logging.fatal("Could not execute twitterdl.py (is it there? Is it set as executable?)")
+                sys.exit(-1)
+        else:
+            tweet_text += '\n\n[Video embedded in original tweet]'
 
     return pics
 
@@ -324,7 +344,7 @@ def main(argv):
         # TODO  Process attachment: capture image or .mp4 url or download twitter video
         attachments_class = status.find('a', class_='attachments')
         if attachments_class is not None:
-            photos.extend(process_attachments(attachments_class, twit_account, tweet_id, author_account))
+            photos.extend(process_attachments(attachments_class, get_vids, twit_account, tweet_id, author_account))
 
         # Add footer with link to original tweet
         tweet_text += '\n\nOriginal tweet : ' + full_status_url

From f229976861d782743fccca6ae441500362b32b3a Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 18 Dec 2020 14:39:13 +0100
Subject: [PATCH 24/42] Improved logging. "OMG, it's full of bugs!"

---
 twoot.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/twoot.py b/twoot.py
index 5ac682f..b2fc4c2 100755
--- a/twoot.py
+++ b/twoot.py
@@ -43,7 +43,7 @@ USER_AGENTS = [
     ]
 
 # Setup logging to file
-logging.basicConfig(filename="twoot.log", level=logging.INFO)
+logging.basicConfig(filename="twoot.log", level=logging.DEBUG)
 logging.info('*********** NEW RUN ***********')
 
 
@@ -92,6 +92,7 @@ def process_card(card_container):
     if link.contains('dailymotion.com'):
         image_url = 'twitter.com' + card_container.div.div.img.get('src')
         list.append(image_url)
+        logging.debug('Extracted still image of dailymotion video from card')
 
     return list
 
@@ -111,6 +112,7 @@ def process_attachments(attachments_container, get_vids, twit_account, tweet_id,
     images = attachments_container.find_all('a', class_='still-image')
     for image in images:
         pics.append(image.get('href'))
+        logging.debug('collected ' + str(len(pics)) + ' images from attachments')
 
     # Download nitter video (converted animated GIF)
     gif_class = attachments_container.find('video', class_='gif')
@@ -130,6 +132,8 @@ def process_attachments(attachments_container, get_vids, twit_account, tweet_id,
             for chunk in r.iter_content(chunk_size=16*1024):
                 f.write(chunk)
 
+        logging.debug('downloaded video of GIF animation from attachments')
+
         # Close directory
         os.close(vp)
 
@@ -149,6 +153,9 @@ def process_attachments(attachments_container, get_vids, twit_account, tweet_id,
                 if dl_feedback.returncode != 0:
                     logging.warning('Video in tweet ' + tweet_id + ' from ' + twit_account + ' failed to download')
                     tweet_text += '\n\n[Video embedded in original tweet]'
+                else:
+                    logging.debug('downloaded twitter video from attachments')
+
             except OSError:
                 logging.fatal("Could not execute twitterdl.py (is it there? Is it set as executable?)")
                 sys.exit(-1)
@@ -247,10 +254,11 @@ def main(argv):
     twit_account_page = session.get(url, headers=headers)
 
     # Verify that download worked
-    assert twit_account_page.status_code == 200,\
-        'The nitter page did not download correctly. Aborting'
+    if twit_account_page.status_code != 200:
+        logging.fatal('The Nitter page did not download correctly. Aborting')
+        exit(-1)
 
-    logging.info('Page downloaded successfully')
+    logging.info('Nitter page downloaded successfully')
 
     # DEBUG: Save page to file
     of = open(twit_account + '.html', 'w')
@@ -341,7 +349,7 @@ def main(argv):
         if card_class is not None:
             photos.extend(process_card(card_class))
 
-        # TODO  Process attachment: capture image or .mp4 url or download twitter video
+        # Process attachment: capture image or .mp4 url or download twitter video
         attachments_class = status.find('a', class_='attachments')
         if attachments_class is not None:
             photos.extend(process_attachments(attachments_class, get_vids, twit_account, tweet_id, author_account))

From 3a88438ec2d8f286cc83b142d5e6beff1611c182 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 18 Dec 2020 14:57:22 +0100
Subject: [PATCH 25/42] Some easy bugs squashed

---
 twoot.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/twoot.py b/twoot.py
index b2fc4c2..ec5c53c 100755
--- a/twoot.py
+++ b/twoot.py
@@ -64,10 +64,10 @@ def process_media_body(tt_iter):
         # If it is an 'a' html tag
         elif tag.name == 'a':
             tag_text = tag.get_text()
-            if tag_text.starts_with('@'):
+            if tag_text.startswith('@'):
                 # Only keep user name
                 tweet_text += tag_text
-            elif tag_text.starts_with('#'):
+            elif tag_text.startswith('#'):
                 # Only keep hashtag text
                 tweet_text += tag_text
             else:
@@ -89,7 +89,7 @@ def process_card(card_container):
     link = card_container.get('href')
 
     # Dailymotion
-    if link.contains('dailymotion.com'):
+    if link.find('dailymotion.com') >= 0:
         image_url = 'twitter.com' + card_container.div.div.img.get('src')
         list.append(image_url)
         logging.debug('Extracted still image of dailymotion video from card')
@@ -112,7 +112,8 @@ def process_attachments(attachments_container, get_vids, twit_account, tweet_id,
     images = attachments_container.find_all('a', class_='still-image')
     for image in images:
         pics.append(image.get('href'))
-        logging.debug('collected ' + str(len(pics)) + ' images from attachments')
+
+    logging.debug('collected ' + str(len(pics)) + ' images from attachments')
 
     # Download nitter video (converted animated GIF)
     gif_class = attachments_container.find('video', class_='gif')
@@ -350,7 +351,7 @@ def main(argv):
             photos.extend(process_card(card_class))
 
         # Process attachment: capture image or .mp4 url or download twitter video
-        attachments_class = status.find('a', class_='attachments')
+        attachments_class = status.find('div', class_='attachments')
         if attachments_class is not None:
             photos.extend(process_attachments(attachments_class, get_vids, twit_account, tweet_id, author_account))
 

From 822215fefeda36e88066cdd99a22afd3a7d413f9 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 18 Dec 2020 17:06:09 +0100
Subject: [PATCH 26/42] download more images. Improved logging

---
 twoot.py | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/twoot.py b/twoot.py
index ec5c53c..d2ff2dd 100755
--- a/twoot.py
+++ b/twoot.py
@@ -42,10 +42,6 @@ USER_AGENTS = [
     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36 Edg/87.0.664.60',
     ]
 
-# Setup logging to file
-logging.basicConfig(filename="twoot.log", level=logging.DEBUG)
-logging.info('*********** NEW RUN ***********')
-
 
 def process_media_body(tt_iter):
     """
@@ -88,11 +84,10 @@ def process_card(card_container):
     list = []
     link = card_container.get('href')
 
-    # Dailymotion
-    if link.find('dailymotion.com') >= 0:
-        image_url = 'twitter.com' + card_container.div.div.img.get('src')
-        list.append(image_url)
-        logging.debug('Extracted still image of dailymotion video from card')
+    # Do not extract image for youtube links
+    image_url = 'twitter.com' + card_container.div.div.img.get('src')
+    list.append(image_url)
+    logging.debug('Extracted still image of dailymotion video from card')
 
     return list
 
@@ -217,7 +212,17 @@ def main(argv):
     max_age = float(args['a'])
     min_delay = float(args['d'])
 
-    logging.info('Updating ' + twit_account + ' on ' + mast_instance)
+    # Setup logging to file
+    os.remove(twit_account + '.log')
+    logging.basicConfig(filename=twit_account + '.log', level=logging.DEBUG)
+    logging.info('Running with the following parameters:')
+    logging.info('    -t ' + twit_account)
+    logging.info('    -i ' + mast_instance)
+    logging.info('    -m ' + mast_account)
+    logging.info('    -r ' + tweets_and_replies)
+    logging.info('    -v ' + get_vids)
+    logging.info('    -a ' + max_age)
+    logging.info('    -d ' + min_delay)
 
     # Try to open database. If it does not exist, create it
     sql = sqlite3.connect('twoot.db')
@@ -431,7 +436,7 @@ def main(argv):
             )
 
         except MastodonError as me:
-            print('failed to create app on ' + mast_instance)
+            logging.fatal('failed to create app on ' + mast_instance + '\n' + str(me))
             sys.exit(1)
 
     # Log in to Mastodon instance
@@ -448,7 +453,7 @@ def main(argv):
         )
 
     except MastodonError as me:
-        logging.fatal('ERROR: Login to ' + mast_instance + ' Failed\n' + me)
+        logging.fatal('ERROR: Login to ' + mast_instance + ' Failed\n')
         sys.exit(1)
 
     # Upload tweets

From 67bf87213d43258a3e12bf76a1bfceaa459c3b49 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 18 Dec 2020 17:21:41 +0100
Subject: [PATCH 27/42] Correct url in image downloads

---
 twoot.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/twoot.py b/twoot.py
index d2ff2dd..085e68a 100755
--- a/twoot.py
+++ b/twoot.py
@@ -85,9 +85,9 @@ def process_card(card_container):
     link = card_container.get('href')
 
     # Do not extract image for youtube links
-    image_url = 'twitter.com' + card_container.div.div.img.get('src')
+    image_url = 'https://nitter.net' + card_container.div.div.img.get('src')
     list.append(image_url)
-    logging.debug('Extracted still image of dailymotion video from card')
+    logging.debug('Extracted image from card')
 
     return list
 
@@ -106,7 +106,7 @@ def process_attachments(attachments_container, get_vids, twit_account, tweet_id,
     pics = []
     images = attachments_container.find_all('a', class_='still-image')
     for image in images:
-        pics.append(image.get('href'))
+        pics.append('https://nitter.net' + image.get('href'))
 
     logging.debug('collected ' + str(len(pics)) + ' images from attachments')
 
@@ -212,17 +212,22 @@ def main(argv):
     max_age = float(args['a'])
     min_delay = float(args['d'])
 
+    # Remove previous log file
+    try:
+        os.remove(twit_account + '.log')
+    except FileNotFoundError:
+        pass
+
     # Setup logging to file
-    os.remove(twit_account + '.log')
     logging.basicConfig(filename=twit_account + '.log', level=logging.DEBUG)
     logging.info('Running with the following parameters:')
     logging.info('    -t ' + twit_account)
     logging.info('    -i ' + mast_instance)
     logging.info('    -m ' + mast_account)
-    logging.info('    -r ' + tweets_and_replies)
-    logging.info('    -v ' + get_vids)
-    logging.info('    -a ' + max_age)
-    logging.info('    -d ' + min_delay)
+    logging.info('    -r ' + str(tweets_and_replies))
+    logging.info('    -v ' + str(get_vids))
+    logging.info('    -a ' + str(max_age))
+    logging.info('    -d ' + str(min_delay))
 
     # Try to open database. If it does not exist, create it
     sql = sqlite3.connect('twoot.db')

From a0ce29f4c5ff1239244004d9df5ed370ad4c2868 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 18 Dec 2020 17:35:50 +0100
Subject: [PATCH 28/42] Fine tuning

---
 twoot.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/twoot.py b/twoot.py
index 085e68a..cf0f485 100755
--- a/twoot.py
+++ b/twoot.py
@@ -338,7 +338,7 @@ def main(argv):
         # Add prefix if the tweet is a reply-to
         replying_to_class = status.find('div', class_='replying-to')
         if replying_to_class is not None:
-            tweet_text += 'Replying to ' + replying_to_class.a.get_text()
+            tweet_text += 'Replying to ' + replying_to_class.a.get_text() + '\n\n'
 
         # Check it the tweet is a retweet from somebody else
         if author_account.lower() != twit_account.lower():
@@ -456,6 +456,7 @@ def main(argv):
             password=mast_password,
             to_file=mast_account + ".secret"
         )
+        logging.info('Logging in to ' + mast_instance)
 
     except MastodonError as me:
         logging.fatal('ERROR: Login to ' + mast_instance + ' Failed\n')

From 62ba2f505ec0f067e0e44dbcdad9b1ffdbcecc55 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 18 Dec 2020 17:55:12 +0100
Subject: [PATCH 29/42] Issues with video download

---
 twoot.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/twoot.py b/twoot.py
index cf0f485..b0e343d 100755
--- a/twoot.py
+++ b/twoot.py
@@ -92,13 +92,13 @@ def process_card(card_container):
     return list
 
 
-def process_attachments(attachments_container, get_vids, twit_account, tweet_id, author_account):
+def process_attachments(attachments_container, get_vids, twit_account, status_id, author_account):
     """
     Extract images or video from attachments. Videos are downloaded on the file system.
     :param card_container: soup of 'div' tag containing attachments markup
     :param get_vids: whether to download vids or not
     :param twit_account: name of twitter account
-    :param tweet_id: id of tweet being processed
+    :param status_id: id of tweet being processed
     :param author_account: author of tweet with video attachment
     :return: list with url of images
     """
@@ -115,12 +115,12 @@ def process_attachments(attachments_container, get_vids, twit_account, tweet_id,
     if gif_class is not None:
         gif_video_file = 'https://nitter.com' + gif_class.source.get('src')
 
-        video_path = os.path.join('./output', twit_account, tweet_id, author_account, tweet_id)
-        os.makedirs(video_path, 0o777, exist_ok=True)
+        video_path = os.path.join('output', twit_account, status_id, author_account, status_id)
+        os.makedirs(video_path, exist_ok=True)
 
         # Open directory for writing file
-        vp = os.open(video_path,  os.O_WRONLY)
-        os.fchdir(vp)
+        vp = os.open(video_path, os.O_WRONLY)
+        os.chdir(vp)
         r = requests.get(gif_video_file, stream=True)
 
         # Download chunks and write them to file
@@ -252,7 +252,7 @@ def main(argv):
     headers.update(
         {
             'User-Agent': USER_AGENTS[random.randint(0, len(USER_AGENTS)-1)],
-            'Cookie': 'replaceTwitter=; replaceYouTube=',
+            'Cookie': 'replaceTwitter=; replaceYouTube=; hlsPlayback=on; proxyVideos=',
         }
     )
 
@@ -363,7 +363,7 @@ def main(argv):
         # Process attachment: capture image or .mp4 url or download twitter video
         attachments_class = status.find('div', class_='attachments')
         if attachments_class is not None:
-            photos.extend(process_attachments(attachments_class, get_vids, twit_account, tweet_id, author_account))
+            photos.extend(process_attachments(attachments_class, get_vids, twit_account, status_id, author_account))
 
         # Add footer with link to original tweet
         tweet_text += '\n\nOriginal tweet : ' + full_status_url

From 986d902ccd364287310f6780246970cbe907c689 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 18 Dec 2020 21:06:05 +0100
Subject: [PATCH 30/42] Fixed video download url

---
 twoot.py | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/twoot.py b/twoot.py
index b0e343d..ac009c5 100755
--- a/twoot.py
+++ b/twoot.py
@@ -113,42 +113,43 @@ def process_attachments(attachments_container, get_vids, twit_account, status_id
     # Download nitter video (converted animated GIF)
     gif_class = attachments_container.find('video', class_='gif')
     if gif_class is not None:
-        gif_video_file = 'https://nitter.com' + gif_class.source.get('src')
+        gif_video_file = 'https://nitter.net' + gif_class.source.get('src')
 
         video_path = os.path.join('output', twit_account, status_id, author_account, status_id)
         os.makedirs(video_path, exist_ok=True)
 
         # Open directory for writing file
-        vp = os.open(video_path, os.O_WRONLY)
-        os.chdir(vp)
-        r = requests.get(gif_video_file, stream=True)
-
-        # Download chunks and write them to file
-        with open('gif_video.mp4', 'wb') as f:
-            for chunk in r.iter_content(chunk_size=16*1024):
-                f.write(chunk)
+        orig_dir = os.getcwd()
+        os.chdir(video_path)
+        with requests.get(gif_video_file, stream=True) as r:
+            r.raise_for_status()
+            # Download chunks and write them to file
+            with open('gif_video.mp4', 'wb') as f:
+                for chunk in r.iter_content(chunk_size=16*1024):
+                    f.write(chunk)
 
         logging.debug('downloaded video of GIF animation from attachments')
 
         # Close directory
-        os.close(vp)
+        os.chdir(orig_dir)
 
     # Download twitter video
+    vid_in_tweet = False
     vid_class = attachments_container.find('div', class_='video-container')
     if vid_class is not None:
-        video_file = 'https://twitter.com' + vid_class.video.get('data-url')
+        video_file = os.path.join('https://twitter.com', author_account, 'status', status_id)
         if get_vids:
             # Download video from twitter and store in filesystem. Running as subprocess to avoid
             # requirement to install ffmpeg and ffmpeg-python for those that do not want to post videos
             try:
                 # Set output location to ./output/twit_account/status_id
                 dl_feedback = subprocess.run(
-                    ["./twitterdl.py", tweet_uri, "-ooutput/" + twit_account + "/" + status_id, "-w 500"],
+                    ["./twitterdl.py", video_file, "-ooutput/" + twit_account + "/" + status_id, "-w 500"],
                     capture_output=True,
                 )
                 if dl_feedback.returncode != 0:
-                    logging.warning('Video in tweet ' + tweet_id + ' from ' + twit_account + ' failed to download')
-                    tweet_text += '\n\n[Video embedded in original tweet]'
+                    logging.warning('Video in tweet ' + status_id + ' from ' + twit_account + ' failed to download')
+                    vid_in_tweet = True
                 else:
                     logging.debug('downloaded twitter video from attachments')
 
@@ -156,9 +157,9 @@ def process_attachments(attachments_container, get_vids, twit_account, status_id
                 logging.fatal("Could not execute twitterdl.py (is it there? Is it set as executable?)")
                 sys.exit(-1)
         else:
-            tweet_text += '\n\n[Video embedded in original tweet]'
+            vid_in_tweet = True
 
-    return pics
+    return pics, vid_in_tweet
 
 
 def contains_class(body_classes, some_class):
@@ -363,7 +364,10 @@ def main(argv):
         # Process attachment: capture image or .mp4 url or download twitter video
         attachments_class = status.find('div', class_='attachments')
         if attachments_class is not None:
-            photos.extend(process_attachments(attachments_class, get_vids, twit_account, status_id, author_account))
+            pics, vid_in_tweet = process_attachments(attachments_class, get_vids, twit_account, status_id, author_account)
+            photos.extend(pics)
+            if vid_in_tweet:
+                tweet_text += '\n\n[Video embedded in original tweet]'
 
         # Add footer with link to original tweet
         tweet_text += '\n\nOriginal tweet : ' + full_status_url

From 33342cdfb727f25d105784ddc6f0e3f2904219d5 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 18 Dec 2020 21:32:26 +0100
Subject: [PATCH 31/42] Cards can have no pic

---
 twoot.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/twoot.py b/twoot.py
index ac009c5..8b0ccfa 100755
--- a/twoot.py
+++ b/twoot.py
@@ -82,12 +82,12 @@ def process_card(card_container):
     :return: list with url of image
     """
     list = []
-    link = card_container.get('href')
 
-    # Do not extract image for youtube links
-    image_url = 'https://nitter.net' + card_container.div.div.img.get('src')
-    list.append(image_url)
-    logging.debug('Extracted image from card')
+    img = card_container.div.div.img
+    if img is not None:
+        image_url = 'https://nitter.net' + img.get('src')
+        list.append(image_url)
+        logging.debug('Extracted image from card')
 
     return list
 

From 1525955c5266223f2f8c30581d31980de4ac6a22 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 18 Dec 2020 22:09:34 +0100
Subject: [PATCH 32/42] Added info log messages

---
 twoot.py | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/twoot.py b/twoot.py
index 8b0ccfa..2476fdd 100755
--- a/twoot.py
+++ b/twoot.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 
 """
-    Copyright (C) 2019  Jean-Christophe Francois
+    Copyright (C) 2020  Jean-Christophe Francois
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -220,7 +220,7 @@ def main(argv):
         pass
 
     # Setup logging to file
-    logging.basicConfig(filename=twit_account + '.log', level=logging.DEBUG)
+    logging.basicConfig(filename=twit_account + '.log', level=logging.INFO)
     logging.info('Running with the following parameters:')
     logging.info('    -t ' + twit_account)
     logging.info('    -i ' + mast_instance)
@@ -273,9 +273,9 @@ def main(argv):
     logging.info('Nitter page downloaded successfully')
 
     # DEBUG: Save page to file
-    of = open(twit_account + '.html', 'w')
-    of.write(twit_account_page.text)
-    of.close()
+    #of = open(twit_account + '.html', 'w')
+    #of.write(twit_account_page.text)
+    #of.close()
 
     # Make soup
     soup = BeautifulSoup(twit_account_page.text, 'html.parser')
@@ -295,6 +295,8 @@ def main(argv):
     # Process each tweets and generate dictionary
     # with data ready to be posted on Mastodon
     # **********************************************************
+    out_date_cnt = 0
+    in_db_cnt = 0
     for status in timeline:
         # Extract tweet ID and status ID
         tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m')
@@ -308,6 +310,7 @@ def main(argv):
 
         # Check if time is within acceptable range
         if not is_time_valid(timestamp, max_age, min_delay):
+            out_date_cnt += 1
             logging.debug("Tweet outside valid time range, skipping")
             continue
 
@@ -317,6 +320,7 @@ def main(argv):
         tweet_in_db = db.fetchone()
 
         if tweet_in_db is not None:
+            in_db_cnt += 1
             logging.debug("Tweet %s already in database", tweet_id)
             # Skip to next tweet
             continue
@@ -425,6 +429,8 @@ def main(argv):
         logging.debug('Tweet %s added to list of toots to upload', tweet_id)
 
     # TODO  Log summary stats: how many not in db, how many in valid timeframe
+    logging.info(str(out_date_cnt) + ' tweets outside of valid time range')
+    logging.info(str(in_db_cnt) + ' tweets already in database')
 
     # DEBUG: Print extracted tweets
     #for t in tweets:
@@ -467,6 +473,7 @@ def main(argv):
         sys.exit(1)
 
     # Upload tweets
+    posted_cnt = 0
     for tweet in reversed(tweets):
         logging.debug('Uploading Tweet %s', tweet["tweet_id"])
 
@@ -515,9 +522,10 @@ def main(argv):
         except MastodonError as me:
             logging.error('posting ' + tweet['tweet_text'] + ' to ' + mast_instance + ' Failed')
             logging.error(me)
-            sys.exit(1)
 
-        logging.debug('Tweet %s posted on %s', tweet_id, mast_account)
+        else:
+            posted_cnt += 1
+            logging.debug('Tweet %s posted on %s', tweet['tweet_id'], mast_account)
 
         # Insert toot id into database
         if 'id' in toot:
@@ -525,6 +533,8 @@ def main(argv):
                        (twit_account, mast_instance, mast_account, tweet['tweet_id'], toot['id']))
             sql.commit()
 
+    logging.info(str(posted_cnt) + ' Tweets posted to Mastodon')
+
     # Cleanup downloaded video files
     try:
         shutil.rmtree('./output/' + twit_account)

From 60f7054fac98904c7c9b1f8fba9d51898b2461a6 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 18 Dec 2020 22:16:27 +0100
Subject: [PATCH 33/42] Separate logging for exceptions

---
 twoot.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/twoot.py b/twoot.py
index 2476fdd..1ef7edd 100755
--- a/twoot.py
+++ b/twoot.py
@@ -451,7 +451,8 @@ def main(argv):
             )
 
         except MastodonError as me:
-            logging.fatal('failed to create app on ' + mast_instance + '\n' + str(me))
+            logging.fatal('failed to create app on ' + mast_instance)
+            logging.fatal(me)
             sys.exit(1)
 
     # Log in to Mastodon instance
@@ -470,6 +471,7 @@ def main(argv):
 
     except MastodonError as me:
         logging.fatal('ERROR: Login to ' + mast_instance + ' Failed\n')
+        logging.fatal(me)
         sys.exit(1)
 
     # Upload tweets

From 066f737a617a41bbee3a0ecacdd8431f3614e18a Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 18 Dec 2020 22:41:57 +0100
Subject: [PATCH 34/42] quote is an 'a' tag

---
 twoot.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/twoot.py b/twoot.py
index 1ef7edd..d4c441f 100755
--- a/twoot.py
+++ b/twoot.py
@@ -356,9 +356,9 @@ def main(argv):
         tweet_text += process_media_body(tt_iter)
 
         # Process quote: append link to tweet_text
-        quote_div = status.find('div', class_='quote-link')
+        quote_div = status.find('a', class_='quote-link')
         if quote_div is not None:
-            tweet_text += '\n twitter.com' + quote_div.get('href').strip('#m')
+            tweet_text += '\n\nhttps://twitter.com' + quote_div.get('href').strip('#m')
 
         # Process card : extract image if necessary
         card_class = status.find('a', class_='card-container')

From bb52e54c0da683d70a78722454c40acbe2c5ad3f Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 18 Dec 2020 22:43:50 +0100
Subject: [PATCH 35/42] Logging set to debug

---
 twoot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/twoot.py b/twoot.py
index d4c441f..ad587fd 100755
--- a/twoot.py
+++ b/twoot.py
@@ -220,7 +220,7 @@ def main(argv):
         pass
 
     # Setup logging to file
-    logging.basicConfig(filename=twit_account + '.log', level=logging.INFO)
+    logging.basicConfig(filename=twit_account + '.log', level=logging.DEBUG)
     logging.info('Running with the following parameters:')
     logging.info('    -t ' + twit_account)
     logging.info('    -i ' + mast_instance)

From 43d63b1e5a18282a4a42b676206f164424d76aa5 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Sat, 19 Dec 2020 09:21:39 +0100
Subject: [PATCH 36/42] Added logging run time

---
 twoot.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/twoot.py b/twoot.py
index ad587fd..c43c9f3 100755
--- a/twoot.py
+++ b/twoot.py
@@ -26,7 +26,8 @@ import random
 import requests
 from bs4 import BeautifulSoup, element
 import sqlite3
-import datetime, time
+import datetime
+import time
 import re
 from pathlib import Path
 from mastodon import Mastodon, MastodonError, MastodonAPIError, MastodonIllegalArgumentError
@@ -189,6 +190,8 @@ def is_time_valid(timestamp, max_age, min_delay):
 
 
 def main(argv):
+    # Start stopwatch
+    start_time = time.time()
 
     # Build parser for command line arguments
     parser = argparse.ArgumentParser(description='toot tweets.')
@@ -536,6 +539,7 @@ def main(argv):
             sql.commit()
 
     logging.info(str(posted_cnt) + ' Tweets posted to Mastodon')
+    logging.info('Run time : ' + str(time.time() - start_time) + ' seconds')
 
     # Cleanup downloaded video files
     try:

From dc6c16ae16d62f450dfead58406f3c6160f9eab8 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Sat, 19 Dec 2020 10:09:03 +0100
Subject: [PATCH 37/42] Keep logs for now

---
 twoot.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/twoot.py b/twoot.py
index c43c9f3..dbc32f2 100755
--- a/twoot.py
+++ b/twoot.py
@@ -217,10 +217,10 @@ def main(argv):
     min_delay = float(args['d'])
 
     # Remove previous log file
-    try:
-        os.remove(twit_account + '.log')
-    except FileNotFoundError:
-        pass
+    #try:
+    #    os.remove(twit_account + '.log')
+    #except FileNotFoundError:
+    #    pass
 
     # Setup logging to file
     logging.basicConfig(filename=twit_account + '.log', level=logging.DEBUG)
@@ -539,7 +539,6 @@ def main(argv):
             sql.commit()
 
     logging.info(str(posted_cnt) + ' Tweets posted to Mastodon')
-    logging.info('Run time : ' + str(time.time() - start_time) + ' seconds')
 
     # Cleanup downloaded video files
     try:
@@ -547,6 +546,8 @@ def main(argv):
     except FileNotFoundError:  # The directory does not exist
         pass
 
+    logging.info('Run time : ' + str(time.time() - start_time) + ' seconds')
+    logging.info('_____________________________________________________________')
 
 if __name__ == "__main__":
     main(sys.argv)

From 3c7693fe66bbf9c3b3e1b849bc54e4a7888bd9de Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Sat, 19 Dec 2020 10:30:19 +0100
Subject: [PATCH 38/42] Updated README

Improved decimal format in log
---
 README.md | 14 +++++++++-----
 twoot.py  |  3 ++-
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 19f21da..674c28f 100644
--- a/README.md
+++ b/README.md
@@ -3,11 +3,10 @@
 Twoot is a python script that extracts tweets from a twitter feed and
 reposts them as toots on a Mastodon account.
 
-I started twoot when [tootbot](https://github.com/cquest/tootbot)
-stopped working. Tootbot relies on rss feeds from https://twitrss.me
-that broke when Twitter refreshed their web UI in July 2019.
-
-Instead twoot is self contained and handles all the processing.  
+**UPDATE 19 DEC 2020** VERSION 2.0 Twitter's *no-javascript* version
+has been retired. Twoot has been rewritten to get content from
+[nitter.net](https://nitter.net) which is a javascript-free mirror of
+twitter. As a bonus (or a curse?) twoot now also supports animated GIFs.
 
 **UPDATE 05 APR 2020** VERSION 1.0. Twoot can now optionally download
 videos from Twitter and upload them on Mastodon.
@@ -91,3 +90,8 @@ ago:
 ```
 1-59/15 * * * * /path/to/twoot.py -t SuperDuperBot -i botsin.space -m superduperbot -p my_Sup3r-S4f3*pw -a 5 -d 15
 ```
+
+# Background
+I started twoot when [tootbot](https://github.com/cquest/tootbot)
+stopped working. Tootbot relies on rss feeds from https://twitrss.me
+that broke when Twitter refreshed their web UI in July 2019.
\ No newline at end of file
diff --git a/twoot.py b/twoot.py
index dbc32f2..bf9157d 100755
--- a/twoot.py
+++ b/twoot.py
@@ -546,8 +546,9 @@ def main(argv):
     except FileNotFoundError:  # The directory does not exist
         pass
 
-    logging.info('Run time : ' + str(time.time() - start_time) + ' seconds')
+    logging.info('Run time : %2.1f seconds', str(time.time() - start_time))
     logging.info('_____________________________________________________________')
 
+
 if __name__ == "__main__":
     main(sys.argv)

From 5df11dbe4b6b47006e4ad3b7dcf4579cef98687a Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Sat, 19 Dec 2020 10:36:59 +0100
Subject: [PATCH 39/42] Fixed last logging syntax

---
 twoot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/twoot.py b/twoot.py
index bf9157d..305ac23 100755
--- a/twoot.py
+++ b/twoot.py
@@ -546,7 +546,7 @@ def main(argv):
     except FileNotFoundError:  # The directory does not exist
         pass
 
-    logging.info('Run time : %2.1f seconds', str(time.time() - start_time))
+    logging.info('Run time : %2.1f seconds' % (time.time() - start_time))
     logging.info('_____________________________________________________________')
 
 

From 40185ef8173ba0a30bee4b070d106a1b61ee5edd Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Sat, 19 Dec 2020 10:48:46 +0100
Subject: [PATCH 40/42] Improved last logging syntax

---
 twoot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/twoot.py b/twoot.py
index 305ac23..70de32c 100755
--- a/twoot.py
+++ b/twoot.py
@@ -547,7 +547,7 @@ def main(argv):
         pass
 
     logging.info('Run time : %2.1f seconds' % (time.time() - start_time))
-    logging.info('_____________________________________________________________')
+    logging.info('_____________________________________________________________________________________')
 
 
 if __name__ == "__main__":

From 1d40071b27abdc4f7bff343cbb0bf9b0d7c45a78 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Sat, 19 Dec 2020 10:53:11 +0100
Subject: [PATCH 41/42] Added log of twitter:image download

---
 twoot.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/twoot.py b/twoot.py
index 70de32c..5829043 100755
--- a/twoot.py
+++ b/twoot.py
@@ -401,6 +401,8 @@ def main(argv):
                             requests.exceptions.TooManyRedirects,
                             requests.exceptions.MissingSchema):
                         pass
+                    else:
+                        logging.debug("downloaded twitter:image from linked page")
 
         # Check if video was downloaded
         video_file = None

From a9109884a4d3fb6b2c9b6073bb73cdbb44732dbb Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Sat, 19 Dec 2020 10:59:23 +0100
Subject: [PATCH 42/42] More debug messages

---
 twoot.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/twoot.py b/twoot.py
index 5829043..5a6d215 100755
--- a/twoot.py
+++ b/twoot.py
@@ -489,7 +489,7 @@ def main(argv):
         # Upload video if there is one
         if tweet['video'] is not None:
             try:
-                logging.debug("Uploading video")
+                logging.debug("Uploading video to Mastodon")
                 media_posted = mastodon.media_post(tweet['video'])
                 media_ids.append(media_posted['id'])
             except (MastodonAPIError, MastodonIllegalArgumentError, TypeError):  # Media cannot be uploaded (invalid format, dead link, etc.)
@@ -502,6 +502,7 @@ def main(argv):
                 media = False
                 # Download picture
                 try:
+                    logging.debug('downloading picture')
                     media = requests.get(photo)
                 except:  # Picture cannot be downloaded for any reason
                     pass
@@ -509,6 +510,7 @@ def main(argv):
                 # Upload picture to Mastodon instance
                 if media:
                     try:
+                        logging.debug('uploading picture to Mastodon')
                         media_posted = mastodon.media_post(media.content, mime_type=media.headers['content-type'])
                         media_ids.append(media_posted['id'])
                     except (MastodonAPIError, MastodonIllegalArgumentError, TypeError):  # Media cannot be uploaded (invalid format, dead link, etc.)