Added video file path to dictionary with content of tweet

This commit is contained in:
jeancf 2020-03-26 14:50:03 +01:00
parent ab7c68dff3
commit b768561662

View File

@ -27,11 +27,12 @@ from bs4 import BeautifulSoup, element
import sqlite3 import sqlite3
import datetime, time import datetime, time
import re import re
from pathlib import Path
from mastodon import Mastodon, MastodonError, MastodonAPIError, MastodonIllegalArgumentError from mastodon import Mastodon, MastodonError, MastodonAPIError, MastodonIllegalArgumentError
import twitterdl import twitterdl
import json.decoder import json.decoder
# Update from https://www.whatismybrowser.com/guides/the-latest-user-agent/ # Update from https://www.whatismybrowser.com/guides/the-latest-user-agent/
USER_AGENTS = [ USER_AGENTS = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36',
@ -119,7 +120,7 @@ def cleanup_tweet_text(tt_iter):
if tag.has_attr('data-expanded-path'): if tag.has_attr('data-expanded-path'):
data_expanded_path = tag['data-expanded-path'] data_expanded_path = tag['data-expanded-path']
if 'video' in data_expanded_path: if 'video' in data_expanded_path:
# TODO Optionally download video from twitter and upload to mastodon # Download video from twitter and store in filesystem
tweet_uri = "https://twitter.com/" + data_expanded_path.strip("/video/1") tweet_uri = "https://twitter.com/" + data_expanded_path.strip("/video/1")
twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=1) twitter_dl = twitterdl.TwitterDownloader(tweet_uri, target_width=500, debug=1)
try: try:
@ -378,6 +379,15 @@ def main(argv):
requests.exceptions.TooManyRedirects): requests.exceptions.TooManyRedirects):
pass pass
# Check if video was downloaded
sid = re.search('/([0-9]+)$', tweet_id)
status_id = sid.groups()[0]
video_path = Path('./output') / author_account / status_id
video_file_list = list(video_path.glob('*.mp4'))
video_file = None
if len(video_file_list) != 0:
video_file = video_file_list[0].absolute().as_posix()
# Add dictionary with content of tweet to list # Add dictionary with content of tweet to list
tweet = { tweet = {
"author": author, "author": author,
@ -386,6 +396,7 @@ def main(argv):
"timestamp": timestamp, "timestamp": timestamp,
"tweet_id": tweet_id, "tweet_id": tweet_id,
"tweet_text": tweet_text, "tweet_text": tweet_text,
"video": video_file,
"photos": photos, "photos": photos,
} }
tweets.append(tweet) tweets.append(tweet)