From b7685616626f3a9e2a84e3b88ecc426e0b754683 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Thu, 26 Mar 2020 14:50:03 +0100
Subject: [PATCH] Added video file path to dictionary with content of tweet

---
 twoot.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/twoot.py b/twoot.py
index d1c7e89..98c9205 100755
--- a/twoot.py
+++ b/twoot.py
@@ -27,11 +27,12 @@ from bs4 import BeautifulSoup, element
 import sqlite3
 import datetime, time
 import re
+from pathlib import Path
 from mastodon import Mastodon, MastodonError, MastodonAPIError, MastodonIllegalArgumentError
-
 import twitterdl
 import json.decoder
 
+
 # Update from https://www.whatismybrowser.com/guides/the-latest-user-agent/
 USER_AGENTS = [
     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36',
@@ -119,7 +120,7 @@ def cleanup_tweet_text(tt_iter):
                     if tag.has_attr('data-expanded-path'):
                         data_expanded_path = tag['data-expanded-path']
                         if 'video' in data_expanded_path:
-                            # TODO  Optionally download video from twitter and upload to mastodon
+                            # Download video from twitter and store in filesystem
                             tweet_uri = "https://twitter.com/" + data_expanded_path.strip("/video/1")
                             twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=1)
                             try:
@@ -378,6 +379,15 @@ def main(argv):
                         requests.exceptions.TooManyRedirects):
                     pass
 
+        # Check if video was downloaded
+        sid = re.search('/([0-9]+)$', tweet_id)
+        status_id = sid.groups()[0]
+        video_path = Path('./output') / author_account / status_id
+        video_file_list = list(video_path.glob('*.mp4'))
+        video_file = None
+        if len(video_file_list) != 0:
+            video_file = video_file_list[0].absolute().as_posix()
+
         # Add dictionary with content of tweet to list
         tweet = {
             "author": author,
@@ -386,6 +396,7 @@ def main(argv):
             "timestamp": timestamp,
             "tweet_id": tweet_id,
             "tweet_text": tweet_text,
+            "video": video_file,
             "photos": photos,
         }
         tweets.append(tweet)