mirror of
https://gitlab.com/jeancf/twoot.git
synced 2025-02-17 13:58:11 +00:00
Merge branch timeout into vid_dl
This commit is contained in:
commit
506c4a05b7
32
twoot.py
32
twoot.py
|
@ -41,6 +41,9 @@ MAX_REC_COUNT = 50
|
||||||
# One of logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL
|
# One of logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL
|
||||||
LOGGING_LEVEL = logging.DEBUG
|
LOGGING_LEVEL = logging.DEBUG
|
||||||
|
|
||||||
|
# How many seconds to wait before giving up on a download (except video download)
|
||||||
|
HTTPS_REQ_TIMEOUT = 10
|
||||||
|
|
||||||
NITTER_URLS = [
|
NITTER_URLS = [
|
||||||
'https://nitter.42l.fr',
|
'https://nitter.42l.fr',
|
||||||
'https://nitter.pussthecat.org',
|
'https://nitter.pussthecat.org',
|
||||||
|
@ -139,14 +142,20 @@ def process_attachments(nitter_url, attachments_container, get_vids, twit_accoun
|
||||||
# Open directory for writing file
|
# Open directory for writing file
|
||||||
orig_dir = os.getcwd()
|
orig_dir = os.getcwd()
|
||||||
os.chdir(video_path)
|
os.chdir(video_path)
|
||||||
with requests.get(gif_video_file, stream=True) as r:
|
with requests.get(gif_video_file, stream=True, timeout=HTTPS_REQ_TIMEOUT) as r:
|
||||||
r.raise_for_status()
|
try:
|
||||||
# Download chunks and write them to file
|
# Raise exception if response code is not 200
|
||||||
with open('gif_video.mp4', 'wb') as f:
|
r.raise_for_status()
|
||||||
for chunk in r.iter_content(chunk_size=16 * 1024):
|
# Download chunks and write them to file
|
||||||
f.write(chunk)
|
with open('gif_video.mp4', 'wb') as f:
|
||||||
|
for chunk in r.iter_content(chunk_size=16 * 1024):
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
logging.debug('Downloaded video of GIF animation from attachments')
|
||||||
|
except: # Don't do anything if video can't be found or downloaded
|
||||||
|
logging.debug('Could not download video of GIF animation from attachments')
|
||||||
|
pass
|
||||||
|
|
||||||
logging.debug('downloaded video of GIF animation from attachments')
|
|
||||||
|
|
||||||
# Close directory
|
# Close directory
|
||||||
os.chdir(orig_dir)
|
os.chdir(orig_dir)
|
||||||
|
@ -340,10 +349,13 @@ def main(argv):
|
||||||
|
|
||||||
# Download twitter page of user.
|
# Download twitter page of user.
|
||||||
try:
|
try:
|
||||||
twit_account_page = session.get(url, headers=headers)
|
twit_account_page = session.get(url, headers=headers, timeout=HTTPS_REQ_TIMEOUT)
|
||||||
except requests.exceptions.ConnectionError:
|
except requests.exceptions.ConnectionError:
|
||||||
logging.fatal('Host did not respond when trying to download ' + url)
|
logging.fatal('Host did not respond when trying to download ' + url)
|
||||||
exit(-1)
|
exit(-1)
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
logging.fatal(nitter_url + ' took too long to respond')
|
||||||
|
exit(-1)
|
||||||
|
|
||||||
# Verify that download worked
|
# Verify that download worked
|
||||||
if twit_account_page.status_code != 200:
|
if twit_account_page.status_code != 200:
|
||||||
|
@ -470,7 +482,7 @@ def main(argv):
|
||||||
link_url = m.group(0)
|
link_url = m.group(0)
|
||||||
if link_url.endswith(".html"): # Only process a web page
|
if link_url.endswith(".html"): # Only process a web page
|
||||||
try:
|
try:
|
||||||
r = requests.get(link_url, timeout=10)
|
r = requests.get(link_url, timeout=HTTPS_REQ_TIMEOUT)
|
||||||
if r.status_code == 200:
|
if r.status_code == 200:
|
||||||
# Matches the first instance of either twitter:image or twitter:image:src meta tag
|
# Matches the first instance of either twitter:image or twitter:image:src meta tag
|
||||||
match = re.search(r'<meta name="twitter:image(?:|:src)" content="(.+?)".*?>', r.text)
|
match = re.search(r'<meta name="twitter:image(?:|:src)" content="(.+?)".*?>', r.text)
|
||||||
|
@ -558,7 +570,7 @@ def main(argv):
|
||||||
# Download picture
|
# Download picture
|
||||||
try:
|
try:
|
||||||
logging.debug('downloading picture')
|
logging.debug('downloading picture')
|
||||||
media = requests.get(photo)
|
media = requests.get(photo, timeout=HTTPS_REQ_TIMEOUT)
|
||||||
except: # Picture cannot be downloaded for any reason
|
except: # Picture cannot be downloaded for any reason
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user