diff --git a/twoot.py b/twoot.py index c6827b0..ba1601e 100755 --- a/twoot.py +++ b/twoot.py @@ -192,6 +192,52 @@ def build_config(args): exit(-1) +def get_timeline(url): + # Initiate session + session = requests.Session() + + # Get a copy of the default headers that requests would use + headers = requests.utils.default_headers() + + # Update default headers with randomly selected user agent + headers.update( + { + 'User-Agent': USER_AGENTS[random.randint(0, len(USER_AGENTS) - 1)], + 'Cookie': 'replaceTwitter=; replaceYouTube=; hlsPlayback=on; proxyVideos=', + } + ) + + # Download twitter page of user + try: + twit_account_page = session.get(url, headers=headers, timeout=HTTPS_REQ_TIMEOUT) + except requests.exceptions.ConnectionError: + logging.fatal('Host did not respond when trying to download ' + url) + shutdown(-1) + except requests.exceptions.Timeout: + logging.fatal(url + ' took too long to respond') + shutdown(-1) + + # Verify that download worked + if twit_account_page.status_code != 200: + logging.fatal('The Nitter page did not download correctly from ' + url + ' (' + str( + twit_account_page.status_code) + '). Aborting') + shutdown(-1) + + logging.debug('Nitter page downloaded successfully from ' + url) + + # DEBUG: Save page to file + # of = open(TOML['config']['twitter_account'] + '.html', 'w') + # of.write(twit_account_page.text) + # of.close() + + # Make soup + soup = BeautifulSoup(twit_account_page.text, 'html.parser') + + # Extract twitter timeline + timeline = soup.find_all(has_class_timeline_item_but_not_thread) + return soup, timeline + + def update_profile(nitter_url, soup, sql, mast_password): """ Update profile on Mastodon @@ -841,53 +887,12 @@ def main(argv): # To store content of all tweets from this user tweets = [] - # Initiate session - session = requests.Session() - - # Get a copy of the default headers that requests would use - headers = requests.utils.default_headers() - - # Update default headers with randomly selected user agent - headers.update( - { - 'User-Agent': USER_AGENTS[random.randint(0, len(USER_AGENTS) - 1)], - 'Cookie': 'replaceTwitter=; replaceYouTube=; hlsPlayback=on; proxyVideos=', - } - ) - url = nitter_url + '/' + TOML['config']['twitter_account'] # Use different page if we need to handle replies if TOML['options']['post_reply_to']: url += '/with_replies' - # Download twitter page of user - try: - twit_account_page = session.get(url, headers=headers, timeout=HTTPS_REQ_TIMEOUT) - except requests.exceptions.ConnectionError: - logging.fatal('Host did not respond when trying to download ' + url) - shutdown(-1) - except requests.exceptions.Timeout: - logging.fatal(nitter_url + ' took too long to respond') - shutdown(-1) - - # Verify that download worked - if twit_account_page.status_code != 200: - logging.fatal('The Nitter page did not download correctly from ' + url + ' (' + str( - twit_account_page.status_code) + '). Aborting') - shutdown(-1) - - logging.debug('Nitter page downloaded successfully from ' + url) - - # DEBUG: Save page to file - # of = open(TOML['config']['twitter_account'] + '.html', 'w') - # of.write(twit_account_page.text) - # of.close() - - # Make soup - soup = BeautifulSoup(twit_account_page.text, 'html.parser') - - # Extract twitter timeline - timeline = soup.find_all(has_class_timeline_item_but_not_thread) + soup, timeline = get_timeline(url) logging.info('Processing ' + str(len(timeline)) + ' tweets found in timeline')