diff --git a/twoot.py b/twoot.py index 78454d0..04d219c 100755 --- a/twoot.py +++ b/twoot.py @@ -29,7 +29,7 @@ import sqlite3 import sys import time from pathlib import Path -from urllib.parse import urlparse, parse_qsl, urlencode, urlunparse, urljoin +from urllib.parse import urlparse, parse_qsl, urlencode, urlunparse, urljoin, unquote import requests from bs4 import BeautifulSoup, element @@ -699,6 +699,8 @@ def main(argv): mastodon_account TEXT, tweet_id TEXT, toot_id TEXT)''') db.execute('''CREATE INDEX IF NOT EXISTS main_index ON toots (twitter_account, mastodon_instance, mastodon_account, tweet_id)''') + db.execute('''CREATE TABLE IF NOT EXISTS profiles (mastodon_account TEXT, avatar text, banner, text)''') + db.execute('''CREATE INDEX IF NOT EXIsTS profile_index ON profiles (mastodon_account)''') # Select random nitter instance to fetch updates from nitter_url = NITTER_URLS[random.randint(0, len(NITTER_URLS) - 1)] @@ -748,13 +750,23 @@ def main(argv): logging.debug('Nitter page downloaded successfully from ' + url) # DEBUG: Save page to file - # of = open(toml['config']['twitter_account'] + '.html', 'w') - # of.write(twit_account_page.text) - # of.close() + of = open(TOML['config']['twitter_account'] + '.html', 'w') + of.write(twit_account_page.text) + of.close() # Make soup soup = BeautifulSoup(twit_account_page.text, 'html.parser') + # Extract avatar picture address + avatar = 'https://' + unquote(soup.find('div', class_='profile-card-info').findChild('a').findChild('img').get('src').removeprefix('/pic/')) + + # Extract banner picture address + banner = unquote(soup.find('div', class_='profile-banner').findChild('a').findChild('img').get('src').removeprefix('/pic/')) + + print(avatar) + print(banner) + exit(0) + # Extract twitter timeline timeline = soup.find_all('div', class_='timeline-item')