Added option to scrape linked page if no pic is provided in tweet

2024-11-27 22:11:11 +00:00 · 2019-08-16 15:27:55 +02:00 · 2019-08-16 15:27:55 +02:00 · 5b23c66b6b
commit 5b23c66b6b
parent 8059b062ac
1 changed files with 13 additions and 0 deletions
--- a/twoot.py
+++ b/twoot.py
@ -213,6 +213,19 @@ def main(argv):
            if len(videos) != 0:
                tweet_text += '\n\n[Embedded video in original tweet]'
        # If no media was specifically added in the tweet, try to get the first picture
        # with "twitter:image" meta tag in first linked page in tweet text
        if not photos:
            m = re.search(r"http[^ \n\xa0]*", tweet_text)
            if m is not None:
                link_url = m.group(0)
                r = requests.get(link_url)
                if r.status_code == 200:
                    # Matches the first instance of either twitter:image or twitter:image:src meta tag
                    match = re.search(r'<meta name="twitter:image(?:|:src)" content="(.+?)".*?>', r.text)
                    if match is not None:
                        photos.append(match.group(1))
        # Add dictionary with content of tweet to list
        tweet = {
            "author": author,