From 99ffa52eb67fd3d99baa418fc307a9500ee7ef69 Mon Sep 17 00:00:00 2001
From: JC Francois <jc.francois@gmail.com>
Date: Thu, 26 Mar 2020 19:03:21 +0100
Subject: [PATCH 01/18] Added upload of video to Mastodon instance

---
 twoot.py | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/twoot.py b/twoot.py
index af311d8..d74bf87 100755
--- a/twoot.py
+++ b/twoot.py
@@ -470,24 +470,34 @@ def main(argv):
             # Skip to next tweet
             continue
 
-        # Upload photos
         media_ids = []
-        for photo in tweet['photos']:
-            media = False
-            # Download picture
+
+        # Upload video if there is one
+        if tweet['video'] is not None:
             try:
-                media = requests.get(photo)
-            except:  # Picture cannot be downloaded for any reason
+                media_posted = mastodon.media_post(tweet['video'])
+                media_ids.append(media_posted['id'])
+            except (MastodonAPIError, MastodonIllegalArgumentError, TypeError):  # Media cannot be uploaded (invalid format, dead link, etc.)
                 pass
 
-            # Upload picture to Mastodon instance
-            if media:
+        else:  # Only upload pic if no video was uploaded
+            # Upload photos
+            for photo in tweet['photos']:
+                media = False
+                # Download picture
                 try:
-                    media_posted = mastodon.media_post(media.content, mime_type=media.headers['content-type'])
-                    media_ids.append(media_posted['id'])
-                except (MastodonAPIError, MastodonIllegalArgumentError, TypeError):  # Media cannot be uploaded (invalid format, dead link, etc.)
+                    media = requests.get(photo)
+                except:  # Picture cannot be downloaded for any reason
                     pass
 
+                # Upload picture to Mastodon instance
+                if media:
+                    try:
+                        media_posted = mastodon.media_post(media.content, mime_type=media.headers['content-type'])
+                        media_ids.append(media_posted['id'])
+                    except (MastodonAPIError, MastodonIllegalArgumentError, TypeError):  # Media cannot be uploaded (invalid format, dead link, etc.)
+                        pass
+
         # Post toot
         try:
             mastodon = Mastodon(

From 04c95f3ad3898b96a3cb105f2b8bb9253d9e339a Mon Sep 17 00:00:00 2001
From: JC Francois <jc.francois@gmail.com>
Date: Thu, 26 Mar 2020 19:58:17 +0100
Subject: [PATCH 02/18] Added command-line option to download video from tweet
 and upload to Mastodon

---
 twitterdl.py |  1 -
 twoot.py     | 34 +++++++++++++++++++---------------
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/twitterdl.py b/twitterdl.py
index 1624ffa..f4b4316 100644
--- a/twitterdl.py
+++ b/twitterdl.py
@@ -1,6 +1,5 @@
 
 import argparse
-
 import requests
 import json
 import urllib.parse
diff --git a/twoot.py b/twoot.py
index d74bf87..eafd764 100755
--- a/twoot.py
+++ b/twoot.py
@@ -84,7 +84,7 @@ def handle_no_js(session, page, headers):
 
     return new_page
 
-def cleanup_tweet_text(tt_iter):
+def cleanup_tweet_text(tt_iter, get_vids):
     '''
     Receives an iterator over all the elements contained in the tweet-text container.
     Processes them to remove Twitter-specific stuff and make them suitable for
@@ -120,15 +120,17 @@ def cleanup_tweet_text(tt_iter):
                     if tag.has_attr('data-expanded-path'):
                         data_expanded_path = tag['data-expanded-path']
                         if 'video' in data_expanded_path:
-                            # Download video from twitter and store in filesystem
-                            tweet_uri = "https://twitter.com/" + data_expanded_path.strip("/video/1")
-                            twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=1)
-                            try:
-                                twitter_dl.download()
-                            except json.JSONDecodeError:
-                                print("ERROR: Could not get playlist")
-
-                            tweet_text += '\n\n[Video embedded in original tweet]'
+                            if get_vids:
+                                # Download video from twitter and store in filesystem
+                                tweet_uri = "https://twitter.com/" + data_expanded_path.strip("/video/1")
+                                twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=1)
+                                try:
+                                    twitter_dl.download()
+                                except json.JSONDecodeError:
+                                    print("ERROR: Could not get playlist")
+                                    tweet_text += '\n\n[Video embedded in original tweet]'
+                            else:
+                                tweet_text += '\n\n[Video embedded in original tweet]'
 
         # If element is hashflag (hashtag + icon), handle as simple hashtag
         elif tag.name == 'span' and tag['class'][0] == 'twitter-hashflag-container':
@@ -177,15 +179,15 @@ def contains_class(body_classes, some_class):
 def main(argv):
 
     # Build parser for command line arguments
-    # TODO  Add option for ingestion of video content
     parser = argparse.ArgumentParser(description='toot tweets.')
     parser.add_argument('-t', metavar='<twitter account>', action='store', required=True)
     parser.add_argument('-i', metavar='<mastodon instance>', action='store', required=True)
     parser.add_argument('-m', metavar='<mastodon account>', action='store', required=True)
     parser.add_argument('-p', metavar='<mastodon password>', action='store', required=True)
-    parser.add_argument('-r', action='store_true')
-    parser.add_argument('-a', metavar='<max age in days>', action='store', type=float, default=1)
-    parser.add_argument('-d', metavar='<min delay in mins>', action='store', type=float, default=0)
+    parser.add_argument('-r', action='store_true', help='Also post replies to other tweets')
+    parser.add_argument('-v', action='store_true', help='Ingest twitter videos and upload to Mastodon instance')
+    parser.add_argument('-a', metavar='<max age (in days)>', action='store', type=float, default=1)
+    parser.add_argument('-d', metavar='<min delay (in mins)>', action='store', type=float, default=0)
 
     # Parse command line
     args = vars(parser.parse_args())
@@ -195,6 +197,7 @@ def main(argv):
     mast_account = args['m']
     mast_password = args['p']
     tweets_and_replies = args['r']
+    get_vids = args['v']
     max_age = float(args['a'])
     min_delay = float(args['d'])
 
@@ -338,7 +341,7 @@ def main(argv):
         # extract iterator over tweet text contents
         tt_iter = tmt.find('div', class_='tweet-text').div.children
 
-        tweet_text = cleanup_tweet_text(tt_iter)
+        tweet_text = cleanup_tweet_text(tt_iter, get_vids)
 
         # Mention if the tweet is a reply-to
         if reply_to_username is not None:
@@ -521,6 +524,7 @@ def main(argv):
                        (twit_account, mast_instance, mast_account, tweet['tweet_id'], toot['id']))
             sql.commit()
 
+    # TODO  Cleanup downloaded video files
 
 if __name__ == "__main__":
     main(sys.argv)

From 9a8cd0ef651a7b48e01f0009e1cfb7034269154c Mon Sep 17 00:00:00 2001
From: JC Francois <jc.francois@gmail.com>
Date: Thu, 26 Mar 2020 20:50:59 +0100
Subject: [PATCH 03/18] TODO's and FIXME's

---
 twoot.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/twoot.py b/twoot.py
index eafd764..1c00cda 100755
--- a/twoot.py
+++ b/twoot.py
@@ -31,6 +31,7 @@ from pathlib import Path
 from mastodon import Mastodon, MastodonError, MastodonAPIError, MastodonIllegalArgumentError
 import twitterdl
 import json.decoder
+import shutil
 
 
 # Update from https://www.whatismybrowser.com/guides/the-latest-user-agent/
@@ -123,6 +124,7 @@ def cleanup_tweet_text(tt_iter, get_vids):
                             if get_vids:
                                 # Download video from twitter and store in filesystem
                                 tweet_uri = "https://twitter.com/" + data_expanded_path.strip("/video/1")
+                                # FIXME  Use specific directory for downloading videos (that can be easily deleted)
                                 twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=1)
                                 try:
                                     twitter_dl.download()

From 0231f224a37126df46cbb68930ff47a2c68cc52b Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 27 Mar 2020 17:26:04 +0100
Subject: [PATCH 04/18] Improved naming of downloaded videos and implemented
 cleanup

---
 twoot.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/twoot.py b/twoot.py
index 1c00cda..b22610a 100755
--- a/twoot.py
+++ b/twoot.py
@@ -85,11 +85,15 @@ def handle_no_js(session, page, headers):
 
     return new_page
 
-def cleanup_tweet_text(tt_iter, get_vids):
+
+def cleanup_tweet_text(tt_iter, tweet_uri, get_vids):
     '''
     Receives an iterator over all the elements contained in the tweet-text container.
     Processes them to remove Twitter-specific stuff and make them suitable for
     posting on Mastodon
+    :param tt_iter: iterator over the HTML elements in the text of the tweet
+    :param tweet_uri: Used to downloaded videos
+    :param get_vids:  True to download embedded twitter videos and save them on the filesystem
     '''
     tweet_text = ''
     # Iterate elements
@@ -123,8 +127,6 @@ def cleanup_tweet_text(tt_iter, get_vids):
                         if 'video' in data_expanded_path:
                             if get_vids:
                                 # Download video from twitter and store in filesystem
-                                tweet_uri = "https://twitter.com/" + data_expanded_path.strip("/video/1")
-                                # FIXME  Use specific directory for downloading videos (that can be easily deleted)
                                 twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=1)
                                 try:
                                     twitter_dl.download()
@@ -264,8 +266,9 @@ def main(argv):
                 # Skip this tweet
                 continue
 
-        # Extract tweet id
+        # Extract tweet ID and status ID
         tweet_id = str(status['href']).strip('?p=v')
+        status_id = tweet_id.split('/')[3]
 
         # Extract url of full status page
         full_status_url = 'https://mobile.twitter.com' + tweet_id + '?p=v'
@@ -310,7 +313,7 @@ def main(argv):
             authenticity_token = soup.find('input', {'name': 'authenticity_token'}).get('value')
             form_input = {'show_media': 1, 'authenticity_token': authenticity_token, 'commit': 'Display media'}
 
-            full_status_page = session.post(full_status_url + '?p=v', data=form_input, headers=headers)
+            full_status_page = session.post(full_status_url, data=form_input, headers=headers)
 
             # Verify that download worked
             assert full_status_page.status_code == 200, \
@@ -343,7 +346,7 @@ def main(argv):
         # extract iterator over tweet text contents
         tt_iter = tmt.find('div', class_='tweet-text').div.children
 
-        tweet_text = cleanup_tweet_text(tt_iter, get_vids)
+        tweet_text = cleanup_tweet_text(tt_iter, full_status_url, get_vids)
 
         # Mention if the tweet is a reply-to
         if reply_to_username is not None:
@@ -386,9 +389,7 @@ def main(argv):
                     pass
 
         # Check if video was downloaded
-        sid = re.search('/([0-9]+)$', tweet_id)
-        status_id = sid.groups()[0]
-        video_path = Path('./output') / author_account / status_id
+        video_path = Path('./output') / twit_account / status_id
         video_file_list = list(video_path.glob('*.mp4'))
         video_file = None
         if len(video_file_list) != 0:
@@ -526,7 +527,8 @@ def main(argv):
                        (twit_account, mast_instance, mast_account, tweet['tweet_id'], toot['id']))
             sql.commit()
 
-    # TODO  Cleanup downloaded video files
+    # Cleanup downloaded video files
+    shutil.rmtree('./output/' + twit_account)
 
 if __name__ == "__main__":
     main(sys.argv)

From 2fe06c0bbccd2d0ecd903f9ee7a8c13c87029972 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 27 Mar 2020 17:45:40 +0100
Subject: [PATCH 05/18] Use correct capitalization of twitter account name for
 deleting video directory

---
 twoot.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/twoot.py b/twoot.py
index b22610a..ae11ddb 100755
--- a/twoot.py
+++ b/twoot.py
@@ -246,8 +246,10 @@ def main(argv):
 
     # Verify that we now have the correct twitter page
     body_classes = soup.body.get_attribute_list('class')
-    assert contains_class(body_classes, 'users-show-page'), \
-        'This is not the correct twitter page. Quitting'
+    assert contains_class(body_classes, 'users-show-page'), 'This is not the correct twitter page. Quitting'
+
+    # Replace twit_account with version with correct capitalization
+    twit_account = soup.find('span', class_='screen-name').get_text()
 
     # Extract twitter timeline
     timeline = soup.find_all('table', class_='tweet')

From dd1d54d2a4fddab63a1ff488d54038d75bc3efb2 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Sat, 28 Mar 2020 11:08:09 +0100
Subject: [PATCH 06/18] Check if tweet in db before ingest to speed up
 processing of feed

---
 twoot.py | 45 ++++++++++++++++++++++-----------------------
 1 file changed, 22 insertions(+), 23 deletions(-)

diff --git a/twoot.py b/twoot.py
index ae11ddb..06d8861 100755
--- a/twoot.py
+++ b/twoot.py
@@ -205,6 +205,12 @@ def main(argv):
     max_age = float(args['a'])
     min_delay = float(args['d'])
 
+    # Try to open database. If it does not exist, create it
+    sql = sqlite3.connect('twoot.db')
+    db = sql.cursor()
+    db.execute('''CREATE TABLE IF NOT EXISTS toots (twitter_account TEXT, mastodon_instance TEXT,
+               mastodon_account TEXT, tweet_id TEXT, toot_id TEXT)''')
+
     # **********************************************************
     # Load twitter page of user. Process all tweets and generate
     # list of dictionaries ready to be posted on Mastodon
@@ -256,6 +262,20 @@ def main(argv):
 
     for status in timeline:
 
+        # Extract tweet ID and status ID
+        tweet_id = str(status['href']).strip('?p=v')
+        status_id = tweet_id.split('/')[3]
+
+        # Check in database if tweet has already been posted
+        db.execute('''SELECT * FROM toots WHERE twitter_account = ? AND mastodon_instance  = ? AND
+                   mastodon_account = ? AND tweet_id = ?''',
+                   (twit_account, mast_instance, mast_account, tweet_id))
+        tweet_in_db = db.fetchone()
+
+        if tweet_in_db is not None:
+            # Skip to next tweet
+            continue
+
         reply_to_username = None
         # Check if the tweet is a reply-to
         reply_to_div = status.find('div', class_='tweet-reply-context username')
@@ -268,10 +288,6 @@ def main(argv):
                 # Skip this tweet
                 continue
 
-        # Extract tweet ID and status ID
-        tweet_id = str(status['href']).strip('?p=v')
-        status_id = tweet_id.split('/')[3]
-
         # Extract url of full status page
         full_status_url = 'https://mobile.twitter.com' + tweet_id + '?p=v'
 
@@ -415,16 +431,10 @@ def main(argv):
     #     print(t)
 
     # **********************************************************
-    # Iterate tweets. Check if the tweet has already been posted
-    # on Mastodon. If not, post it and add it to database
+    # Iterate tweets in list.
+    # post each on Mastodon and reference to it in database
     # **********************************************************
 
-    # Try to open database. If it does not exist, create it
-    sql = sqlite3.connect('twoot.db')
-    db = sql.cursor()
-    db.execute('''CREATE TABLE IF NOT EXISTS toots (twitter_account TEXT, mastodon_instance TEXT,
-               mastodon_account TEXT, tweet_id TEXT, toot_id TEXT)''')
-
     # Create Mastodon application if it does not exist yet
     if not os.path.isfile(mast_instance + '.secret'):
         try:
@@ -458,17 +468,6 @@ def main(argv):
 
     # Upload tweets
     for tweet in reversed(tweets):
-        # Check in database if tweet has already been posted
-        # FIXME  Move tests to the front of the process to avoid the unnecessary processing of already ingested tweets
-        db.execute('''SELECT * FROM toots WHERE twitter_account = ? AND mastodon_instance  = ? AND
-                   mastodon_account = ? AND tweet_id = ?''',
-                   (twit_account, mast_instance, mast_account, tweet['tweet_id']))
-        tweet_in_db = db.fetchone()
-
-        if tweet_in_db is not None:
-            # Skip to next tweet
-            continue
-
         # Check that the tweet is not too young (might be deleted) or too old
         age_in_hours = (time.time() - float(tweet['timestamp'])) / 3600.0
         min_delay_in_hours = min_delay / 60.0

From ba3da6ab7cc24e76e2e889790d1d104691436310 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Sat, 28 Mar 2020 11:21:28 +0100
Subject: [PATCH 07/18] Handled exception of video download directory absent
 when trying to delete it

---
 twoot.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/twoot.py b/twoot.py
index 06d8861..61e572f 100755
--- a/twoot.py
+++ b/twoot.py
@@ -529,7 +529,10 @@ def main(argv):
             sql.commit()
 
     # Cleanup downloaded video files
-    shutil.rmtree('./output/' + twit_account)
+    try:
+        shutil.rmtree('./output/' + twit_account)
+    except FileNotFoundError:  # The directory does not exist
+        pass
 
 if __name__ == "__main__":
     main(sys.argv)

From b1c9ec3811d1179dc4c58d0f15bc4353318b153e Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Sat, 28 Mar 2020 11:33:38 +0100
Subject: [PATCH 08/18] Added documentation of -v option to README.md

---
 README.md | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 079f330..0c97936 100644
--- a/README.md
+++ b/README.md
@@ -20,11 +20,12 @@ of last week.
 * Fetch timeline of given users from twitter.com
 * Scrape html and formats tweets for post on mastodon
 * Emojis supported
-* Upload images from tweet to Mastodon (videos not supported)
+* Optionally upload videos from tweet to Mastodon
+* Upload images from tweet to Mastodon
 * Specify maximum age of tweet to be considered
 * Specify minimum delay before considering a tweet for upload
 * Remember tweets already tooted to prevent double posting
-* Can optionally post reply-to tweets on the mastodon account
+* Optionally post reply-to tweets on the mastodon account
 
 # usage
 
@@ -45,10 +46,16 @@ is @superduperbot@botsin.space
 | -i    | Mastodon instance domain name                    | `botsin.space`     | Yes |
 | -m    | Mastodon username                                | `superduperbot`    | Yes |
 | -p    | Mastodon password                                | `my_Sup3r-S4f3*pw` | Yes |
+| -v    | upload videos to Mastodon                        | *N/A*              | No  |
 | -r    | Post reply-to tweets (ignored by default)        | *N/A*              | No  |
 | -a    | Max. age of tweet to post (in days)              | `1`                | No  |
 | -d    | Min. delay before posting new tweet (in minutes) | `15`               | No  |
 
+When using the `-v` switch consider:
+* The copyright of the content that you want to cross-post
+* The storage / transfer limitations of the Mastodon instance that you are posting to
+* The upstream bandwidth that you may consume on your internet connection
+
 Default max age is 1 day. Decimal values are OK.
 
 Default min delay is 0 minutes.

From d056497f65e2fcae700dae6a72973f4ea66bc7fe Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Sat, 28 Mar 2020 11:47:53 +0100
Subject: [PATCH 09/18] Added additional dependencies to README.md

---
 README.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0c97936..faa2c3b 100644
--- a/README.md
+++ b/README.md
@@ -64,7 +64,10 @@ Default min delay is 0 minutes.
 
 Make sure python3 is installed.
 
-Twoot depends on beautifulsoup4 and mastodon python module: `sudo pip install beautifulsoup4 Mastodon.py`
+Twoot depends on beautifulsoup4 and mastodon python module. It also
+requires m3u8 and ffmpeg-python.
+
+    pip install beautifulsoup4 Mastodon.py m3u8 ffmpeg-python
 
 In your user folder, execute `git clone https://gitlab.com/jeancf/twoot.git`
 to clone repo with twoot.py script.

From cd482359a3643ac1ba12af4b0c00320773dc2579 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Sat, 28 Mar 2020 11:55:13 +0100
Subject: [PATCH 10/18] Updated .gitignore to disregard *.sh files

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index b043063..d467991 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@
 .vscode/
 venv/
 *.secret
+*.sh
 twoot.db

From ae60d2e00261031266b57d9653c0155cd66abb11 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Sat, 28 Mar 2020 11:59:47 +0100
Subject: [PATCH 11/18] Updated README.md with requirement to have ffmpeg
 installed

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index faa2c3b..fac834a 100644
--- a/README.md
+++ b/README.md
@@ -67,6 +67,8 @@ Make sure python3 is installed.
 Twoot depends on beautifulsoup4 and mastodon python module. It also
 requires m3u8 and ffmpeg-python.
 
+ffmpeg must also be installed.
+
     pip install beautifulsoup4 Mastodon.py m3u8 ffmpeg-python
 
 In your user folder, execute `git clone https://gitlab.com/jeancf/twoot.git`

From df4eaa0dd7022f19571007570f0f61debfdf76a1 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Sat, 28 Mar 2020 13:55:43 +0100
Subject: [PATCH 12/18] Set debug=0 on call to download to avoid mail spam

---
 twoot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/twoot.py b/twoot.py
index 61e572f..e6e411f 100755
--- a/twoot.py
+++ b/twoot.py
@@ -127,7 +127,7 @@ def cleanup_tweet_text(tt_iter, tweet_uri, get_vids):
                         if 'video' in data_expanded_path:
                             if get_vids:
                                 # Download video from twitter and store in filesystem
-                                twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=1)
+                                twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=0)
                                 try:
                                     twitter_dl.download()
                                 except json.JSONDecodeError:

From 9c56ad57c8dbe3bc6f7f56b73e7aa32f2e576bd9 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Sat, 28 Mar 2020 14:07:00 +0100
Subject: [PATCH 13/18] Added TODOs to improve management of locations of video
 download

---
 twoot.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/twoot.py b/twoot.py
index e6e411f..cf8a5ba 100755
--- a/twoot.py
+++ b/twoot.py
@@ -127,6 +127,7 @@ def cleanup_tweet_text(tt_iter, tweet_uri, get_vids):
                         if 'video' in data_expanded_path:
                             if get_vids:
                                 # Download video from twitter and store in filesystem
+                                # TODO  set output location to ./output/twit_account
                                 twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=0)
                                 try:
                                     twitter_dl.download()
@@ -407,6 +408,7 @@ def main(argv):
                     pass
 
         # Check if video was downloaded
+        # TODO  Check subdirectories of twit_account directory for video
         video_path = Path('./output') / twit_account / status_id
         video_file_list = list(video_path.glob('*.mp4'))
         video_file = None

From 2090d214b6464abad6033234fb41732d36246c41 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Sat, 28 Mar 2020 14:11:06 +0100
Subject: [PATCH 14/18] Trying to stop debug messages

---
 twoot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/twoot.py b/twoot.py
index cf8a5ba..503f5a1 100755
--- a/twoot.py
+++ b/twoot.py
@@ -128,7 +128,7 @@ def cleanup_tweet_text(tt_iter, tweet_uri, get_vids):
                             if get_vids:
                                 # Download video from twitter and store in filesystem
                                 # TODO  set output location to ./output/twit_account
-                                twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=0)
+                                twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500)
                                 try:
                                     twitter_dl.download()
                                 except json.JSONDecodeError:

From 6fa2019618d339b212dd7d955115c3f53bcb3f2e Mon Sep 17 00:00:00 2001
From: JC Francois <jc.francois@gmail.com>
Date: Sun, 29 Mar 2020 13:41:49 +0200
Subject: [PATCH 15/18] Calling twitterdl.py as subprocess

---
 twitterdl.py |  1 +
 twoot.py     | 14 ++++++++------
 2 files changed, 9 insertions(+), 6 deletions(-)
 mode change 100644 => 100755 twitterdl.py

diff --git a/twitterdl.py b/twitterdl.py
old mode 100644
new mode 100755
index f4b4316..3ba32fb
--- a/twitterdl.py
+++ b/twitterdl.py
@@ -1,3 +1,4 @@
+#! /usr/bin/env python3
 
 import argparse
 import requests
diff --git a/twoot.py b/twoot.py
index 503f5a1..2abc50b 100755
--- a/twoot.py
+++ b/twoot.py
@@ -29,7 +29,7 @@ import datetime, time
 import re
 from pathlib import Path
 from mastodon import Mastodon, MastodonError, MastodonAPIError, MastodonIllegalArgumentError
-import twitterdl
+import subprocess
 import json.decoder
 import shutil
 
@@ -128,12 +128,14 @@ def cleanup_tweet_text(tt_iter, tweet_uri, get_vids):
                             if get_vids:
                                 # Download video from twitter and store in filesystem
                                 # TODO  set output location to ./output/twit_account
-                                twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500)
                                 try:
-                                    twitter_dl.download()
-                                except json.JSONDecodeError:
-                                    print("ERROR: Could not get playlist")
-                                    tweet_text += '\n\n[Video embedded in original tweet]'
+                                    dl_feedback = subprocess.run(["./twitterdl.py", tweet_uri, "-w 500"], capture_output=True)
+                                    if dl_feedback.returncode != 0:
+                                        # TODO  Log dl_feedback.stderr
+                                        tweet_text += '\n\n[Video embedded in original tweet]'
+                                except OSError:
+                                    print("Could not execute twitterdl.py (is it there? Is it set as executable?)")
+                                    sys.exit(-1)
                             else:
                                 tweet_text += '\n\n[Video embedded in original tweet]'
 

From 965317f5b2324a8f6b30f881aa127b00925d5557 Mon Sep 17 00:00:00 2001
From: JC Francois <jc.francois@gmail.com>
Date: Sun, 29 Mar 2020 13:57:18 +0200
Subject: [PATCH 16/18] Added details on optional dependencies to README.md

---
 README.md | 24 +++++++++++++-----------
 twoot.py  |  5 +++--
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index fac834a..873f899 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ of last week.
 
 ```
 twoot.py [-h] -t <twitter account> -i <mastodon instance> -m <mastodon
-                account> -p <mastodon password> [-r]
+                account> -p <mastodon password> [-r] [-v]
                 [-a <max age in days>] [-d <min delay in mins>]
 ```
 
@@ -42,19 +42,19 @@ is @superduperbot@botsin.space
 
 |Switch |Description                                       | Example            | Req |
 |-------|--------------------------------------------------|--------------------|-----|
-| -t    | twitter account name without '@'                 | `SuperDuperBot`    | Yes |
+| -t    | twitter account name without '@'                 | `SuperDuper`    | Yes |
 | -i    | Mastodon instance domain name                    | `botsin.space`     | Yes |
 | -m    | Mastodon username                                | `superduperbot`    | Yes |
 | -p    | Mastodon password                                | `my_Sup3r-S4f3*pw` | Yes |
 | -v    | upload videos to Mastodon                        | *N/A*              | No  |
 | -r    | Post reply-to tweets (ignored by default)        | *N/A*              | No  |
-| -a    | Max. age of tweet to post (in days)              | `1`                | No  |
+| -a    | Max. age of tweet to post (in days)              | `5`                | No  |
 | -d    | Min. delay before posting new tweet (in minutes) | `15`               | No  |
 
 When using the `-v` switch consider:
-* The copyright of the content that you want to cross-post
-* The storage / transfer limitations of the Mastodon instance that you are posting to
-* The upstream bandwidth that you may consume on your internet connection
+* whether the copyright of the content that you want to cross-post allows it
+* the storage / transfer limitations of the Mastodon instance that you are posting to
+* the upstream bandwidth that you may consume on your internet connection
 
 Default max age is 1 day. Decimal values are OK.
 
@@ -64,13 +64,15 @@ Default min delay is 0 minutes.
 
 Make sure python3 is installed.
 
-Twoot depends on beautifulsoup4 and mastodon python module. It also
-requires m3u8 and ffmpeg-python.
+Twoot depends on `beautifulsoup4` and `Mastodon.py` python modules.
 
-ffmpeg must also be installed.
-
-    pip install beautifulsoup4 Mastodon.py m3u8 ffmpeg-python
+If you plan to use the `-v` switch to download videos, the additional depedencies are required:
+* Python modules `m3u8` and `ffmpeg-python`
+* [ffmpeg](https://ffmpeg.org/download.html) (check the package manager of your distribution) 
 
+```
+> pip install beautifulsoup4 Mastodon.py m3u8 ffmpeg-python
+```
 In your user folder, execute `git clone https://gitlab.com/jeancf/twoot.git`
 to clone repo with twoot.py script.
 
diff --git a/twoot.py b/twoot.py
index 2abc50b..a8e4dc4 100755
--- a/twoot.py
+++ b/twoot.py
@@ -126,9 +126,10 @@ def cleanup_tweet_text(tt_iter, tweet_uri, get_vids):
                         data_expanded_path = tag['data-expanded-path']
                         if 'video' in data_expanded_path:
                             if get_vids:
-                                # Download video from twitter and store in filesystem
-                                # TODO  set output location to ./output/twit_account
+                                # Download video from twitter and store in filesystem. Running as subprocess to avoid
+                                # requirement to install ffmpeg and ffmpeg-python for those that do not want to post videos
                                 try:
+                                    # TODO  set output location to ./output/twit_account
                                     dl_feedback = subprocess.run(["./twitterdl.py", tweet_uri, "-w 500"], capture_output=True)
                                     if dl_feedback.returncode != 0:
                                         # TODO  Log dl_feedback.stderr

From e32620d79b8c743d1e863557e6954af6b2b06bce Mon Sep 17 00:00:00 2001
From: JC Francois <jc.francois@gmail.com>
Date: Sun, 29 Mar 2020 17:16:54 +0200
Subject: [PATCH 17/18] Implemented proper naming of downloaded videos

---
 twoot.py | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/twoot.py b/twoot.py
index a8e4dc4..ec80a7e 100755
--- a/twoot.py
+++ b/twoot.py
@@ -86,14 +86,16 @@ def handle_no_js(session, page, headers):
     return new_page
 
 
-def cleanup_tweet_text(tt_iter, tweet_uri, get_vids):
+def cleanup_tweet_text(tt_iter, twit_account, status_id, tweet_uri, get_vids):
     '''
     Receives an iterator over all the elements contained in the tweet-text container.
     Processes them to remove Twitter-specific stuff and make them suitable for
     posting on Mastodon
     :param tt_iter: iterator over the HTML elements in the text of the tweet
+    :param twit_account: Used to name directory where videos are downloaded
+    :param status_id: Used to name directory where videos are downloaded
     :param tweet_uri: Used to downloaded videos
-    :param get_vids:  True to download embedded twitter videos and save them on the filesystem
+    :param get_vids: True to download embedded twitter videos and save them on the filesystem
     '''
     tweet_text = ''
     # Iterate elements
@@ -129,8 +131,11 @@ def cleanup_tweet_text(tt_iter, tweet_uri, get_vids):
                                 # Download video from twitter and store in filesystem. Running as subprocess to avoid
                                 # requirement to install ffmpeg and ffmpeg-python for those that do not want to post videos
                                 try:
-                                    # TODO  set output location to ./output/twit_account
-                                    dl_feedback = subprocess.run(["./twitterdl.py", tweet_uri, "-w 500"], capture_output=True)
+                                    # TODO  set output location to ./output/twit_account/status_id
+                                    dl_feedback = subprocess.run(
+                                        ["./twitterdl.py", tweet_uri, "-ooutput/" + twit_account + "/" + status_id, "-w 500"],
+                                        capture_output=True
+                                    )
                                     if dl_feedback.returncode != 0:
                                         # TODO  Log dl_feedback.stderr
                                         tweet_text += '\n\n[Video embedded in original tweet]'
@@ -368,7 +373,7 @@ def main(argv):
         # extract iterator over tweet text contents
         tt_iter = tmt.find('div', class_='tweet-text').div.children
 
-        tweet_text = cleanup_tweet_text(tt_iter, full_status_url, get_vids)
+        tweet_text = cleanup_tweet_text(tt_iter, twit_account, status_id, full_status_url, get_vids)
 
         # Mention if the tweet is a reply-to
         if reply_to_username is not None:
@@ -411,12 +416,19 @@ def main(argv):
                     pass
 
         # Check if video was downloaded
-        # TODO  Check subdirectories of twit_account directory for video
-        video_path = Path('./output') / twit_account / status_id
-        video_file_list = list(video_path.glob('*.mp4'))
         video_file = None
-        if len(video_file_list) != 0:
-            video_file = video_file_list[0].absolute().as_posix()
+
+        video_path = Path('./output') / twit_account / status_id
+        if video_path.exists():
+            # Take the first subdirectory of video path (named after original poster of video)
+            video_path = [p for p in video_path.iterdir() if p.is_dir()][0]
+            # Take again the first subdirectory of video path (named after status id of original post where vidoe is attached)
+            video_path = [p for p in video_path.iterdir() if p.is_dir()][0]
+            # list video files
+            video_file_list = list(video_path.glob('*.mp4'))
+            if len(video_file_list) != 0:
+                # Extract posix path of first video file in list
+                video_file = video_file_list[0].absolute().as_posix()
 
         # Add dictionary with content of tweet to list
         tweet = {

From 092f2ab3711c965af91e0ca3d963fc8dc62e8e32 Mon Sep 17 00:00:00 2001
From: JC Francois <jc.francois@gmail.com>
Date: Sun, 5 Apr 2020 10:37:54 +0200
Subject: [PATCH 18/18] Cleanup and README.md update for release

---
 README.md    | 12 ++++++++++--
 twitterdl.py | 22 ++++++++++++++++++++++
 twoot.py     |  5 +++--
 3 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 873f899..585777f 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,17 @@
+# Twoot
+
+Twoot is a python script that extracts tweets from a twitter feed and
+reposts them as toots on a Mastodon account.
+
 I started twoot when [tootbot](https://github.com/cquest/tootbot)
 stopped working. Tootbot relies on rss feeds from https://twitrss.me
 that broke when Twitter refreshed their web UI in July 2019.
 
 Instead twoot is self contained and handles all the processing.  
 
+**UPDATE 05 APR 2020** VERSION 1.0. Twoot can now optionally download
+videos from Twitter and upload them on Mastodon.
+
 **UPDATE 17 MAR 2020** Added command line switch (`-r`) to also post
 reply-to tweets on the mastodon account. They will not be included by
 default anymore.
@@ -66,9 +74,9 @@ Make sure python3 is installed.
 
 Twoot depends on `beautifulsoup4` and `Mastodon.py` python modules.
 
-If you plan to use the `-v` switch to download videos, the additional depedencies are required:
+**Only If you plan to download videos** with the `-v` switch, are the additional dependencies required:
 * Python modules `m3u8` and `ffmpeg-python`
-* [ffmpeg](https://ffmpeg.org/download.html) (check the package manager of your distribution) 
+* [ffmpeg](https://ffmpeg.org/download.html) (installed with the package manager of your distribution) 
 
 ```
 > pip install beautifulsoup4 Mastodon.py m3u8 ffmpeg-python
diff --git a/twitterdl.py b/twitterdl.py
index 3ba32fb..984f6a9 100755
--- a/twitterdl.py
+++ b/twitterdl.py
@@ -1,5 +1,27 @@
 #! /usr/bin/env python3
 
+"""
+    This file is a modification of
+    https://github.com/h4ckninja/twitter-video-downloader/
+	The original package has an unknown license. The modified version
+	is released here under GPL v3.
+
+    Copyright (C) 2019  Jean-Christophe Francois
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
 import argparse
 import requests
 import json
diff --git a/twoot.py b/twoot.py
index ec80a7e..0c65369 100755
--- a/twoot.py
+++ b/twoot.py
@@ -1,7 +1,7 @@
 #! /usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-'''
+"""
     Copyright (C) 2019  Jean-Christophe Francois
 
     This program is free software: you can redistribute it and/or modify
@@ -16,7 +16,7 @@
 
     You should have received a copy of the GNU General Public License
     along with this program.  If not, see <http://www.gnu.org/licenses/>.
-'''
+"""
 
 import sys
 import argparse
@@ -551,5 +551,6 @@ def main(argv):
     except FileNotFoundError:  # The directory does not exist
         pass
 
+
 if __name__ == "__main__":
     main(sys.argv)