mirror of
https://gitlab.com/jeancf/twoot.git
synced 2024-11-23 20:11:11 +00:00
Added option to scrape linked page if no pic is provided in tweet
This commit is contained in:
parent
8059b062ac
commit
5b23c66b6b
13
twoot.py
13
twoot.py
|
@ -213,6 +213,19 @@ def main(argv):
|
||||||
if len(videos) != 0:
|
if len(videos) != 0:
|
||||||
tweet_text += '\n\n[Embedded video in original tweet]'
|
tweet_text += '\n\n[Embedded video in original tweet]'
|
||||||
|
|
||||||
|
# If no media was specifically added in the tweet, try to get the first picture
|
||||||
|
# with "twitter:image" meta tag in first linked page in tweet text
|
||||||
|
if not photos:
|
||||||
|
m = re.search(r"http[^ \n\xa0]*", tweet_text)
|
||||||
|
if m is not None:
|
||||||
|
link_url = m.group(0)
|
||||||
|
r = requests.get(link_url)
|
||||||
|
if r.status_code == 200:
|
||||||
|
# Matches the first instance of either twitter:image or twitter:image:src meta tag
|
||||||
|
match = re.search(r'<meta name="twitter:image(?:|:src)" content="(.+?)".*?>', r.text)
|
||||||
|
if match is not None:
|
||||||
|
photos.append(match.group(1))
|
||||||
|
|
||||||
# Add dictionary with content of tweet to list
|
# Add dictionary with content of tweet to list
|
||||||
tweet = {
|
tweet = {
|
||||||
"author": author,
|
"author": author,
|
||||||
|
|
Loading…
Reference in New Issue
Block a user