mirror of
https://gitlab.com/jeancf/twoot.git
synced 2024-11-23 20:11:11 +00:00
Fine tune thread download
This commit is contained in:
parent
5939484160
commit
cdbb1bb8f2
15
twoot.py
15
twoot.py
|
@ -227,8 +227,10 @@ def _get_rest_of_thread(session, headers, url):
|
|||
# Make soup
|
||||
soup = BeautifulSoup(thread_page.text, 'html.parser')
|
||||
|
||||
# Get all items in thread
|
||||
timeline = soup.find_all('div', class_='timeline-item')
|
||||
# Get all items in thread after main tweet
|
||||
after_tweet = soup.find('div', 'after-tweet')
|
||||
|
||||
timeline = after_tweet.find_all('div', class_='timeline-item')
|
||||
return timeline
|
||||
|
||||
"""
|
||||
|
@ -972,13 +974,8 @@ def main(argv):
|
|||
in_db_cnt = 0
|
||||
for status in timeline:
|
||||
# Extract tweet ID and status ID
|
||||
try:
|
||||
tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m')
|
||||
status_id = tweet_id.split('/')[3]
|
||||
except Exception as e:
|
||||
logging.critical('Malformed timeline downloaded from nitter instance')
|
||||
logging.debug(e)
|
||||
shutdown(-1)
|
||||
tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m')
|
||||
status_id = tweet_id.split('/')[3]
|
||||
|
||||
logging.debug('processing tweet %s', tweet_id)
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user