From 9c2438382e49a7b7482c8af5b6b1841e7a90fe9b Mon Sep 17 00:00:00 2001 From: jeancf Date: Wed, 2 Nov 2022 18:38:23 +0100 Subject: [PATCH 1/2] Added timeout to get request --- twoot.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/twoot.py b/twoot.py index 88f268f..745f47e 100755 --- a/twoot.py +++ b/twoot.py @@ -342,10 +342,13 @@ def main(argv): # Download twitter page of user. try: - twit_account_page = session.get(url, headers=headers) + twit_account_page = session.get(url, headers=headers, timeout=5) except requests.exceptions.ConnectionError: logging.fatal('Host did not respond when trying to download ' + url) exit(-1) + except requests.exceptions.Timeout: + logging.fatal(nitter_url + ' took too long to respond') + exit(-1) # Verify that download worked if twit_account_page.status_code != 200: From 11b88e729a23341e68987635eac42f085bb591ca Mon Sep 17 00:00:00 2001 From: jeancf Date: Sun, 6 Nov 2022 11:50:08 +0100 Subject: [PATCH 2/2] Added timeout to all downloads --- twoot.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/twoot.py b/twoot.py index 745f47e..620ac06 100755 --- a/twoot.py +++ b/twoot.py @@ -41,6 +41,9 @@ MAX_REC_COUNT = 50 # One of logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL LOGGING_LEVEL = logging.INFO +# How many seconds to wait before giving up on a download (except video download) +HTTPS_REQ_TIMEOUT = 10 + NITTER_URLS = [ 'https://nitter.42l.fr', 'https://nitter.pussthecat.org', @@ -139,14 +142,20 @@ def process_attachments(nitter_url, attachments_container, get_vids, twit_accoun # Open directory for writing file orig_dir = os.getcwd() os.chdir(video_path) - with requests.get(gif_video_file, stream=True) as r: - r.raise_for_status() - # Download chunks and write them to file - with open('gif_video.mp4', 'wb') as f: - for chunk in r.iter_content(chunk_size=16*1024): - f.write(chunk) + with requests.get(gif_video_file, stream=True, timeout=HTTPS_REQ_TIMEOUT) as r: + try: + # Raise exception if response code is not 200 + r.raise_for_status() + # Download chunks and write them to file + with open('gif_video.mp4', 'wb') as f: + for chunk in r.iter_content(chunk_size=16 * 1024): + f.write(chunk) + + logging.debug('Downloaded video of GIF animation from attachments') + except: # Don't do anything if video can't be found or downloaded + logging.debug('Could not download video of GIF animation from attachments') + pass - logging.debug('downloaded video of GIF animation from attachments') # Close directory os.chdir(orig_dir) @@ -342,7 +351,7 @@ def main(argv): # Download twitter page of user. try: - twit_account_page = session.get(url, headers=headers, timeout=5) + twit_account_page = session.get(url, headers=headers, timeout=HTTPS_REQ_TIMEOUT) except requests.exceptions.ConnectionError: logging.fatal('Host did not respond when trying to download ' + url) exit(-1) @@ -475,7 +484,7 @@ def main(argv): link_url = m.group(0) if link_url.endswith(".html"): # Only process a web page try: - r = requests.get(link_url, timeout=10) + r = requests.get(link_url, timeout=HTTPS_REQ_TIMEOUT) if r.status_code == 200: # Matches the first instance of either twitter:image or twitter:image:src meta tag match = re.search(r'', r.text) @@ -567,7 +576,7 @@ def main(argv): # Download picture try: logging.debug('downloading picture') - media = requests.get(photo) + media = requests.get(photo, timeout=HTTPS_REQ_TIMEOUT) except: # Picture cannot be downloaded for any reason pass