Fine tune thread download

This commit is contained in:
jeancf 2023-07-13 11:53:07 +02:00
parent 5939484160
commit cdbb1bb8f2

View File

@ -227,8 +227,10 @@ def _get_rest_of_thread(session, headers, url):
# Make soup
soup = BeautifulSoup(thread_page.text, 'html.parser')
# Get all items in thread
timeline = soup.find_all('div', class_='timeline-item')
# Get all items in thread after main tweet
after_tweet = soup.find('div', 'after-tweet')
timeline = after_tweet.find_all('div', class_='timeline-item')
return timeline
"""
@ -972,13 +974,8 @@ def main(argv):
in_db_cnt = 0
for status in timeline:
# Extract tweet ID and status ID
try:
tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m')
status_id = tweet_id.split('/')[3]
except Exception as e:
logging.critical('Malformed timeline downloaded from nitter instance')
logging.debug(e)
shutdown(-1)
tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m')
status_id = tweet_id.split('/')[3]
logging.debug('processing tweet %s', tweet_id)