From b7685616626f3a9e2a84e3b88ecc426e0b754683 Mon Sep 17 00:00:00 2001 From: jeancf Date: Thu, 26 Mar 2020 14:50:03 +0100 Subject: [PATCH] Added video file path to dictionary with content of tweet --- twoot.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/twoot.py b/twoot.py index d1c7e89..98c9205 100755 --- a/twoot.py +++ b/twoot.py @@ -27,11 +27,12 @@ from bs4 import BeautifulSoup, element import sqlite3 import datetime, time import re +from pathlib import Path from mastodon import Mastodon, MastodonError, MastodonAPIError, MastodonIllegalArgumentError - import twitterdl import json.decoder + # Update from https://www.whatismybrowser.com/guides/the-latest-user-agent/ USER_AGENTS = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36', @@ -119,7 +120,7 @@ def cleanup_tweet_text(tt_iter): if tag.has_attr('data-expanded-path'): data_expanded_path = tag['data-expanded-path'] if 'video' in data_expanded_path: - # TODO Optionally download video from twitter and upload to mastodon + # Download video from twitter and store in filesystem tweet_uri = "https://twitter.com/" + data_expanded_path.strip("/video/1") twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=1) try: @@ -378,6 +379,15 @@ def main(argv): requests.exceptions.TooManyRedirects): pass + # Check if video was downloaded + sid = re.search('/([0-9]+)$', tweet_id) + status_id = sid.groups()[0] + video_path = Path('./output') / author_account / status_id + video_file_list = list(video_path.glob('*.mp4')) + video_file = None + if len(video_file_list) != 0: + video_file = video_file_list[0].absolute().as_posix() + # Add dictionary with content of tweet to list tweet = { "author": author, @@ -386,6 +396,7 @@ def main(argv): "timestamp": timestamp, "tweet_id": tweet_id, "tweet_text": tweet_text, + "video": video_file, "photos": photos, } tweets.append(tweet)