From 5b23c66b6b0a10fe5c2e142830612988f0bb6904 Mon Sep 17 00:00:00 2001 From: jeancf Date: Fri, 16 Aug 2019 15:27:55 +0200 Subject: [PATCH] Added option to scrape linked page if no pic is provided in tweet --- twoot.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/twoot.py b/twoot.py index fe6d789..6723fe9 100755 --- a/twoot.py +++ b/twoot.py @@ -213,6 +213,19 @@ def main(argv): if len(videos) != 0: tweet_text += '\n\n[Embedded video in original tweet]' + # If no media was specifically added in the tweet, try to get the first picture + # with "twitter:image" meta tag in first linked page in tweet text + if not photos: + m = re.search(r"http[^ \n\xa0]*", tweet_text) + if m is not None: + link_url = m.group(0) + r = requests.get(link_url) + if r.status_code == 200: + # Matches the first instance of either twitter:image or twitter:image:src meta tag + match = re.search(r'', r.text) + if match is not None: + photos.append(match.group(1)) + # Add dictionary with content of tweet to list tweet = { "author": author,