Stop trying to regex a string into linked picture file

This commit is contained in:
jeancf 2020-09-10 13:09:51 +02:00
parent 3500853dc8
commit 67fdbba510

View File

@ -401,21 +401,22 @@ def main(argv):
m = re.search(r"http[^ \n\xa0]*", tweet_text) m = re.search(r"http[^ \n\xa0]*", tweet_text)
if m is not None: if m is not None:
link_url = m.group(0) link_url = m.group(0)
try: if link_url.endswith(".html"): # Only process a web page
r = requests.get(link_url, timeout=10) try:
if r.status_code == 200: r = requests.get(link_url, timeout=10)
# Matches the first instance of either twitter:image or twitter:image:src meta tag if r.status_code == 200:
match = re.search(r'<meta name="twitter:image(?:|:src)" content="(.+?)".*?>', r.text) # Matches the first instance of either twitter:image or twitter:image:src meta tag
if match is not None: match = re.search(r'<meta name="twitter:image(?:|:src)" content="(.+?)".*?>', r.text)
url = match.group(1).replace('&amp;', '&') # Remove HTML-safe encoding from URL if any if match is not None:
photos.append(url) url = match.group(1).replace('&amp;', '&') # Remove HTML-safe encoding from URL if any
# Give up if anything goes wrong photos.append(url)
except (requests.exceptions.ConnectionError, # Give up if anything goes wrong
requests.exceptions.Timeout, except (requests.exceptions.ConnectionError,
requests.exceptions.ContentDecodingError, requests.exceptions.Timeout,
requests.exceptions.TooManyRedirects, requests.exceptions.ContentDecodingError,
requests.exceptions.MissingSchema): requests.exceptions.TooManyRedirects,
pass requests.exceptions.MissingSchema):
pass
# Check if video was downloaded # Check if video was downloaded
video_file = None video_file = None