From 0231f224a37126df46cbb68930ff47a2c68cc52b Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 27 Mar 2020 17:26:04 +0100
Subject: [PATCH] Improved naming of downloaded videos and implemented cleanup

---
 twoot.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/twoot.py b/twoot.py
index 1c00cda..b22610a 100755
--- a/twoot.py
+++ b/twoot.py
@@ -85,11 +85,15 @@ def handle_no_js(session, page, headers):
 
     return new_page
 
-def cleanup_tweet_text(tt_iter, get_vids):
+
+def cleanup_tweet_text(tt_iter, tweet_uri, get_vids):
     '''
     Receives an iterator over all the elements contained in the tweet-text container.
     Processes them to remove Twitter-specific stuff and make them suitable for
     posting on Mastodon
+    :param tt_iter: iterator over the HTML elements in the text of the tweet
+    :param tweet_uri: Used to downloaded videos
+    :param get_vids:  True to download embedded twitter videos and save them on the filesystem
     '''
     tweet_text = ''
     # Iterate elements
@@ -123,8 +127,6 @@ def cleanup_tweet_text(tt_iter, get_vids):
                         if 'video' in data_expanded_path:
                             if get_vids:
                                 # Download video from twitter and store in filesystem
-                                tweet_uri = "https://twitter.com/" + data_expanded_path.strip("/video/1")
-                                # FIXME  Use specific directory for downloading videos (that can be easily deleted)
                                 twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=1)
                                 try:
                                     twitter_dl.download()
@@ -264,8 +266,9 @@ def main(argv):
                 # Skip this tweet
                 continue
 
-        # Extract tweet id
+        # Extract tweet ID and status ID
         tweet_id = str(status['href']).strip('?p=v')
+        status_id = tweet_id.split('/')[3]
 
         # Extract url of full status page
         full_status_url = 'https://mobile.twitter.com' + tweet_id + '?p=v'
@@ -310,7 +313,7 @@ def main(argv):
             authenticity_token = soup.find('input', {'name': 'authenticity_token'}).get('value')
             form_input = {'show_media': 1, 'authenticity_token': authenticity_token, 'commit': 'Display media'}
 
-            full_status_page = session.post(full_status_url + '?p=v', data=form_input, headers=headers)
+            full_status_page = session.post(full_status_url, data=form_input, headers=headers)
 
             # Verify that download worked
             assert full_status_page.status_code == 200, \
@@ -343,7 +346,7 @@ def main(argv):
         # extract iterator over tweet text contents
         tt_iter = tmt.find('div', class_='tweet-text').div.children
 
-        tweet_text = cleanup_tweet_text(tt_iter, get_vids)
+        tweet_text = cleanup_tweet_text(tt_iter, full_status_url, get_vids)
 
         # Mention if the tweet is a reply-to
         if reply_to_username is not None:
@@ -386,9 +389,7 @@ def main(argv):
                     pass
 
         # Check if video was downloaded
-        sid = re.search('/([0-9]+)$', tweet_id)
-        status_id = sid.groups()[0]
-        video_path = Path('./output') / author_account / status_id
+        video_path = Path('./output') / twit_account / status_id
         video_file_list = list(video_path.glob('*.mp4'))
         video_file = None
         if len(video_file_list) != 0:
@@ -526,7 +527,8 @@ def main(argv):
                        (twit_account, mast_instance, mast_account, tweet['tweet_id'], toot['id']))
             sql.commit()
 
-    # TODO  Cleanup downloaded video files
+    # Cleanup downloaded video files
+    shutil.rmtree('./output/' + twit_account)
 
 if __name__ == "__main__":
     main(sys.argv)