Created get_timeline function

This commit is contained in:
jeancf 2023-07-12 22:02:06 +02:00
parent 15663af09d
commit b842f6d471

View File

@ -192,6 +192,52 @@ def build_config(args):
exit(-1)
def get_timeline(url):
# Initiate session
session = requests.Session()
# Get a copy of the default headers that requests would use
headers = requests.utils.default_headers()
# Update default headers with randomly selected user agent
headers.update(
{
'User-Agent': USER_AGENTS[random.randint(0, len(USER_AGENTS) - 1)],
'Cookie': 'replaceTwitter=; replaceYouTube=; hlsPlayback=on; proxyVideos=',
}
)
# Download twitter page of user
try:
twit_account_page = session.get(url, headers=headers, timeout=HTTPS_REQ_TIMEOUT)
except requests.exceptions.ConnectionError:
logging.fatal('Host did not respond when trying to download ' + url)
shutdown(-1)
except requests.exceptions.Timeout:
logging.fatal(url + ' took too long to respond')
shutdown(-1)
# Verify that download worked
if twit_account_page.status_code != 200:
logging.fatal('The Nitter page did not download correctly from ' + url + ' (' + str(
twit_account_page.status_code) + '). Aborting')
shutdown(-1)
logging.debug('Nitter page downloaded successfully from ' + url)
# DEBUG: Save page to file
# of = open(TOML['config']['twitter_account'] + '.html', 'w')
# of.write(twit_account_page.text)
# of.close()
# Make soup
soup = BeautifulSoup(twit_account_page.text, 'html.parser')
# Extract twitter timeline
timeline = soup.find_all(has_class_timeline_item_but_not_thread)
return soup, timeline
def update_profile(nitter_url, soup, sql, mast_password):
"""
Update profile on Mastodon
@ -841,53 +887,12 @@ def main(argv):
# To store content of all tweets from this user
tweets = []
# Initiate session
session = requests.Session()
# Get a copy of the default headers that requests would use
headers = requests.utils.default_headers()
# Update default headers with randomly selected user agent
headers.update(
{
'User-Agent': USER_AGENTS[random.randint(0, len(USER_AGENTS) - 1)],
'Cookie': 'replaceTwitter=; replaceYouTube=; hlsPlayback=on; proxyVideos=',
}
)
url = nitter_url + '/' + TOML['config']['twitter_account']
# Use different page if we need to handle replies
if TOML['options']['post_reply_to']:
url += '/with_replies'
# Download twitter page of user
try:
twit_account_page = session.get(url, headers=headers, timeout=HTTPS_REQ_TIMEOUT)
except requests.exceptions.ConnectionError:
logging.fatal('Host did not respond when trying to download ' + url)
shutdown(-1)
except requests.exceptions.Timeout:
logging.fatal(nitter_url + ' took too long to respond')
shutdown(-1)
# Verify that download worked
if twit_account_page.status_code != 200:
logging.fatal('The Nitter page did not download correctly from ' + url + ' (' + str(
twit_account_page.status_code) + '). Aborting')
shutdown(-1)
logging.debug('Nitter page downloaded successfully from ' + url)
# DEBUG: Save page to file
# of = open(TOML['config']['twitter_account'] + '.html', 'w')
# of.write(twit_account_page.text)
# of.close()
# Make soup
soup = BeautifulSoup(twit_account_page.text, 'html.parser')
# Extract twitter timeline
timeline = soup.find_all(has_class_timeline_item_but_not_thread)
soup, timeline = get_timeline(url)
logging.info('Processing ' + str(len(timeline)) + ' tweets found in timeline')