From 5b23c66b6b0a10fe5c2e142830612988f0bb6904 Mon Sep 17 00:00:00 2001
From: jeancf <jc@noirextreme.com>
Date: Fri, 16 Aug 2019 15:27:55 +0200
Subject: [PATCH] Added option to scrape linked page if no pic is provided in
 tweet

---
 twoot.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)
diff --git a/twoot.py b/twoot.py
index fe6d789..6723fe9 100755
--- a/twoot.py
+++ b/twoot.py
@@ -213,6 +213,19 @@ def main(argv):
             if len(videos) != 0:
                 tweet_text += '\n\n[Embedded video in original tweet]'
 
+        # If no media was specifically added in the tweet, try to get the first picture
+        # with "twitter:image" meta tag in first linked page in tweet text
+        if not photos:
+            m = re.search(r"http[^ \n\xa0]*", tweet_text)
+            if m is not None:
+                link_url = m.group(0)
+                r = requests.get(link_url)
+                if r.status_code == 200:
+                    # Matches the first instance of either twitter:image or twitter:image:src meta tag
+                    match = re.search(r'<meta name="twitter:image(?:|:src)" content="(.+?)".*?>', r.text)
+                    if match is not None:
+                        photos.append(match.group(1))
+
         # Add dictionary with content of tweet to list
         tweet = {
             "author": author,