Added tolerance for ConnectionError when visiting linked page to extract picture

This commit is contained in:
JC Francois 2019-08-31 20:30:40 +02:00
parent 1b1faf2d59
commit 1ec03c7d81

View File

@ -219,12 +219,15 @@ def main(argv):
m = re.search(r"http[^ \n\xa0]*", tweet_text) m = re.search(r"http[^ \n\xa0]*", tweet_text)
if m is not None: if m is not None:
link_url = m.group(0) link_url = m.group(0)
r = requests.get(link_url) try:
if r.status_code == 200: r = requests.get(link_url)
# Matches the first instance of either twitter:image or twitter:image:src meta tag if r.status_code == 200:
match = re.search(r'<meta name="twitter:image(?:|:src)" content="(.+?)".*?>', r.text) # Matches the first instance of either twitter:image or twitter:image:src meta tag
if match is not None: match = re.search(r'<meta name="twitter:image(?:|:src)" content="(.+?)".*?>', r.text)
photos.append(match.group(1)) if match is not None:
photos.append(match.group(1))
except ConnectionError:
pass
# Add dictionary with content of tweet to list # Add dictionary with content of tweet to list
tweet = { tweet = {