Replaced twitterdl.py by youtube-dl

2025-05-07 03:53:56 +00:00 · 2022-11-03 16:53:17 +01:00 · 2022-11-03 16:53:17 +01:00 · 10a329fdb1
commit 10a329fdb1
parent b386eae528
2 changed files with 4 additions and 252 deletions
--- a/twitterdl.py
+++ b/twitterdl.py
@ -1,243 +0,0 @@
-#! /usr/bin/env python3
-
-"""
-    This file is a modification of
-    https://github.com/h4ckninja/twitter-video-downloader/
-	The original package has an unknown license. The modified version
-	is released here under GPL v3.
-
-    Copyright (C) 2019  Jean-Christophe Francois
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-"""
-
-import argparse
-import requests
-import json
-import urllib.parse
-import m3u8
-from pathlib import Path
-import re
-import ffmpeg
-import shutil
-import copy
-
-class TwitterDownloader:
-	"""
-	tw-dl offers the ability to download videos from Twitter feeds.
-
-	**Disclaimer** I wrote this to recover a video for which the original was lost. Consider copyright before downloading
-	content you do not own.
-	"""
-	video_player_prefix = 'https://twitter.com/i/videos/tweet/'
-	video_api = 'https://api.twitter.com/1.1/videos/tweet/config/'
-	tweet_data = {}
-
-	def __init__(self, tweet_url, output_dir='./output', target_width=0, debug=0):
-		self.tweet_url = tweet_url
-		self.output_dir = output_dir
-		self.target_width = int(target_width)
-		self.debug = debug
-
-		if debug > 2:
-			self.debug = 2
-
-		"""
-		We split on ? to clean up the URL. Sharing tweets, for example, 
-		will add ? with data about which device shared it.
-		The rest is just getting the user and ID to work with.
-		"""
-		self.tweet_data['tweet_url'] = tweet_url.split('?', 1)[0]
-		self.tweet_data['user'] = self.tweet_data['tweet_url'].split('/')[3]
-		self.tweet_data['id'] = self.tweet_data['tweet_url'].split('/')[5]
-
-		output_path = Path(output_dir)
-		storage_dir = output_path / self.tweet_data['user'] / self.tweet_data['id']
-		Path.mkdir(storage_dir, parents=True, exist_ok=True)
-		self.storage = str(storage_dir)
-
-		self.requests = requests.Session()
-
-	def download(self):
-		self.__debug('Tweet URL', self.tweet_data['tweet_url'])
-
-		# Get the bearer token
-		token = self.__get_bearer_token()
-
-		# Get the M3u8 file - this is where rate limiting has been happening
-		video_host, playlist = self.__get_playlist(token)
-
-		if playlist.is_variant:
-			if self.target_width == 0:
-				print('[+] Multiple resolutions found. Slurping all resolutions.')
-			else:
-				print('[+] Multiple resolutions found. Selecting the one closest to target width of ' + str(self.target_width))
-				playlist = self.__filter_playlist(playlist)
-
-			for plist in playlist.playlists:
-				resolution = str(plist.stream_info.resolution[0]) + 'x' + str(plist.stream_info.resolution[1])
-				resolution_file = Path(self.storage) / Path(resolution + '.mp4')
-
-				print('[+] Downloading ' + resolution)
-
-				playlist_url = video_host + plist.uri
-
-				ts_m3u8_response = self.requests.get(playlist_url, headers = {'Authorization': None})
-				ts_m3u8_parse = m3u8.loads(ts_m3u8_response.text)
-
-				ts_list = []
-				ts_full_file_list = []
-
-				for ts_uri in ts_m3u8_parse.segments.uri:
-					# ts_list.append(video_host + ts_uri)
-
-					ts_file = requests.get(video_host + ts_uri)
-					fname = ts_uri.split('/')[-1]
-					ts_path = Path(self.storage) / Path(fname)
-					ts_list.append(ts_path)
-
-					ts_path.write_bytes(ts_file.content)
-
-				ts_full_file = Path(self.storage) / Path(resolution + '.ts')
-				ts_full_file = str(ts_full_file)
-				ts_full_file_list.append(ts_full_file)
-
-				# Shamelessly taken from https://stackoverflow.com/questions/13613336/python-concatenate-text-files/27077437#27077437
-				with open(str(ts_full_file), 'wb') as wfd:
-					for f in ts_list:
-						with open(f, 'rb') as fd:
-							shutil.copyfileobj(fd, wfd, 1024 * 1024 * 10)
-
-				for ts in ts_full_file_list:
-					print('\t[*] Doing the magic ...')
-					ffmpeg\
-						.input(ts)\
-						.output(str(resolution_file), acodec='copy', vcodec='libx264', format='mp4', loglevel='error')\
-						.overwrite_output()\
-						.run()
-
-				print('\t[+] Doing cleanup')
-
-				for ts in ts_list:
-					p = Path(ts)
-					p.unlink()
-
-				for ts in ts_full_file_list:
-					p = Path(ts)
-					p.unlink()
-
-		else:
-			print('[-] Sorry, single resolution video download is not yet implemented. Please submit a bug report with the link to the tweet.')
-
-
-	def __get_bearer_token(self):
-		video_player_url = self.video_player_prefix + self.tweet_data['id']
-		video_player_response = self.requests.get(video_player_url).text
-		self.__debug('Video Player Body', '', video_player_response)
-
-		js_file_url = re.findall('src="(.*js)', video_player_response)[0]
-		js_file_response = self.requests.get(js_file_url).text
-		self.__debug('JS File Body', '', js_file_response)
-
-		bearer_token_pattern = re.compile('Bearer ([a-zA-Z0-9%-])+')
-		bearer_token = bearer_token_pattern.search(js_file_response)
-		bearer_token = bearer_token.group(0)
-		self.requests.headers.update({'Authorization': bearer_token})
-		self.__debug('Bearer Token', bearer_token)
-		self.__get_guest_token()
-
-		return bearer_token
-
-
-	def __get_playlist(self, token):
-		player_config_req = self.requests.get(self.video_api + self.tweet_data['id'] + '.json')
-
-		player_config = json.loads(player_config_req.text)
-
-		if 'errors' not in player_config:
-			self.__debug('Player Config JSON', '', json.dumps(player_config))
-			m3u8_url = player_config['track']['playbackUrl']
-
-		else:
-			self.__debug('Player Config JSON - Error', json.dumps(player_config['errors']))
-			print('[-] Rate limit exceeded. Could not recover. Try again later.')
-			sys.exit(1)
-
-		# Get m3u8
-		m3u8_response = self.requests.get(m3u8_url)
-		self.__debug('M3U8 Response', '', m3u8_response.text)
-
-		m3u8_url_parse = urllib.parse.urlparse(m3u8_url)
-		video_host = m3u8_url_parse.scheme + '://' + m3u8_url_parse.hostname
-
-		m3u8_parse = m3u8.loads(m3u8_response.text)
-
-		return [video_host, m3u8_parse]
-
-
-	"""
-	Thanks to @devkarim for this fix: https://github.com/h4ckninja/twitter-video-downloader/issues/2#issuecomment-538773026
-	"""
-	def __get_guest_token(self):
-		res = self.requests.post("https://api.twitter.com/1.1/guest/activate.json")
-		res_json = json.loads(res.text)
-		self.requests.headers.update({'x-guest-token': res_json.get('guest_token')})
-
-	def __filter_playlist(self, playlist):
-		# Make a copy of the playlist object and reset 'playlists' member
-		new_playlist = copy.deepcopy(playlist)
-		new_playlist.playlists = []
-
-		# Arbitrary high number that any resolution will beat
-		min_dist_2_target = 100000
-
-		for instance in playlist.playlists:
-			# Calculate how far the width of considered resolution is from our target
-			dist_2_target = abs(instance.stream_info.resolution[0] - self.target_width)
-			if dist_2_target < min_dist_2_target:
-				min_dist_2_target = dist_2_target
-				# Replace the only item of new_playlist with this one
-				new_playlist.playlists = []
-				new_playlist.playlists.append(instance)
-
-		return new_playlist
-
-	def __debug(self, msg_prefix, msg_body, msg_body_full = ''):
-		if self.debug == 0:
-			return
-
-		if self.debug == 1:
-			print('[Debug] ' + '[' + msg_prefix + ']' + ' ' + msg_body)
-
-		if self.debug == 2:
-			print('[Debug+] ' + '[' + msg_prefix + ']' + ' ' + msg_body + ' - ' + msg_body_full)
-
-
-if __name__ == '__main__':
-	import sys
-
-	if sys.version_info[0] == 2:
-		print('Python3 is required.')
-		sys.exit(1)
-
-	parser = argparse.ArgumentParser()
-	parser.add_argument('tweet_url', help='The video URL on Twitter (https://twitter.com/<user>/status/<id>).')
-	parser.add_argument('-o', '--output', dest='output', default='./output', help='The directory to output to. The structure will be: <output>/<user>/<id>.')
-	parser.add_argument('-d', '--debug', default=0, action='count', dest='debug', help='Debug. Add more to print out response bodies (maximum 2).')
-	parser.add_argument('-w', '--target_width', dest='target_width', default=0, help='In pixels. Download only the video resolution closest to this value')
-
-	args = parser.parse_args()
-
-	twitter_dl = TwitterDownloader(args.tweet_url, output_dir=args.output, target_width=args.target_width, debug=args.debug)
-	twitter_dl.download()
--- a/twoot.py
+++ b/twoot.py
@ -39,7 +39,7 @@ MAX_REC_COUNT = 50

 # Set the desired verbosity of logging
 # One of logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL
-LOGGING_LEVEL = logging.INFO
+LOGGING_LEVEL = logging.DEBUG

 NITTER_URLS = [
    'https://nitter.42l.fr',
@ -143,7 +143,7 @@ def process_attachments(nitter_url, attachments_container, get_vids, twit_accoun
            r.raise_for_status()
            # Download chunks and write them to file
            with open('gif_video.mp4', 'wb') as f:
-                for chunk in r.iter_content(chunk_size=16*1024):
+                for chunk in r.iter_content(chunk_size=16 * 1024):
                    f.write(chunk)

        logging.debug('downloaded video of GIF animation from attachments')
@ -155,15 +155,14 @@ def process_attachments(nitter_url, attachments_container, get_vids, twit_accoun
    vid_in_tweet = False
    vid_class = attachments_container.find('div', class_='video-container')
    if vid_class is not None:
-        video_file = os.path.join('https://twitter.com', author_account, 'status', status_id)
        if get_vids:
            # Download video from twitter and store in filesystem. Running as subprocess to avoid
            # requirement to install ffmpeg and ffmpeg-python for those that do not want to post videos
            try:
+                video_file = os.path.join('https://twitter.com', author_account, 'status', status_id)
                # Set output location to ./output/twit_account/status_id
                dl_feedback = subprocess.run(
-                    ["./twitterdl.py", video_file, "-ooutput/" + twit_account + "/" + status_id, "-w 500"],
-                    capture_output=True,
+                    ["youtube-dl", video_file, "-ooutput/" + twit_account + "/" + status_id + "/%(id)s.%(ext)s", "-f best[width<=500]"],
                    timeout=300
                )
                if dl_feedback.returncode != 0:
@ -494,10 +493,6 @@ def main(argv):

        video_path = Path('./output') / twit_account / status_id
        if video_path.exists():
-            # Take the first subdirectory of video path (named after original poster of video)
-            video_path = [p for p in video_path.iterdir() if p.is_dir()][0]
-            # Take again the first subdirectory of video path (named after status id of original post where video is attached)
-            video_path = [p for p in video_path.iterdir() if p.is_dir()][0]
            # list video files
            video_file_list = list(video_path.glob('*.mp4'))
            if len(video_file_list) != 0: