From cdbb1bb8f21095267b27afb43932139c20cb8636 Mon Sep 17 00:00:00 2001 From: jeancf Date: Thu, 13 Jul 2023 11:53:07 +0200 Subject: [PATCH] Fine tune thread download --- twoot.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/twoot.py b/twoot.py index 14a7503..0fe6404 100755 --- a/twoot.py +++ b/twoot.py @@ -227,8 +227,10 @@ def _get_rest_of_thread(session, headers, url): # Make soup soup = BeautifulSoup(thread_page.text, 'html.parser') - # Get all items in thread - timeline = soup.find_all('div', class_='timeline-item') + # Get all items in thread after main tweet + after_tweet = soup.find('div', 'after-tweet') + + timeline = after_tweet.find_all('div', class_='timeline-item') return timeline """ @@ -972,13 +974,8 @@ def main(argv): in_db_cnt = 0 for status in timeline: # Extract tweet ID and status ID - try: - tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m') - status_id = tweet_id.split('/')[3] - except Exception as e: - logging.critical('Malformed timeline downloaded from nitter instance') - logging.debug(e) - shutdown(-1) + tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m') + status_id = tweet_id.split('/')[3] logging.debug('processing tweet %s', tweet_id)