mirror of
https://gitlab.com/jeancf/twoot.git
synced 2024-11-27 14:01:13 +00:00
Extract addresses of avatar and banner images
This commit is contained in:
parent
54c59fa676
commit
b8bd0a12f5
20
twoot.py
20
twoot.py
|
@ -29,7 +29,7 @@ import sqlite3
|
|||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse, parse_qsl, urlencode, urlunparse, urljoin
|
||||
from urllib.parse import urlparse, parse_qsl, urlencode, urlunparse, urljoin, unquote
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup, element
|
||||
|
@ -699,6 +699,8 @@ def main(argv):
|
|||
mastodon_account TEXT, tweet_id TEXT, toot_id TEXT)''')
|
||||
db.execute('''CREATE INDEX IF NOT EXISTS main_index ON toots (twitter_account,
|
||||
mastodon_instance, mastodon_account, tweet_id)''')
|
||||
db.execute('''CREATE TABLE IF NOT EXISTS profiles (mastodon_account TEXT, avatar text, banner, text)''')
|
||||
db.execute('''CREATE INDEX IF NOT EXIsTS profile_index ON profiles (mastodon_account)''')
|
||||
|
||||
# Select random nitter instance to fetch updates from
|
||||
nitter_url = NITTER_URLS[random.randint(0, len(NITTER_URLS) - 1)]
|
||||
|
@ -748,13 +750,23 @@ def main(argv):
|
|||
logging.debug('Nitter page downloaded successfully from ' + url)
|
||||
|
||||
# DEBUG: Save page to file
|
||||
# of = open(toml['config']['twitter_account'] + '.html', 'w')
|
||||
# of.write(twit_account_page.text)
|
||||
# of.close()
|
||||
of = open(TOML['config']['twitter_account'] + '.html', 'w')
|
||||
of.write(twit_account_page.text)
|
||||
of.close()
|
||||
|
||||
# Make soup
|
||||
soup = BeautifulSoup(twit_account_page.text, 'html.parser')
|
||||
|
||||
# Extract avatar picture address
|
||||
avatar = 'https://' + unquote(soup.find('div', class_='profile-card-info').findChild('a').findChild('img').get('src').removeprefix('/pic/'))
|
||||
|
||||
# Extract banner picture address
|
||||
banner = unquote(soup.find('div', class_='profile-banner').findChild('a').findChild('img').get('src').removeprefix('/pic/'))
|
||||
|
||||
print(avatar)
|
||||
print(banner)
|
||||
exit(0)
|
||||
|
||||
# Extract twitter timeline
|
||||
timeline = soup.find_all('div', class_='timeline-item')
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user