mirror of
https://gitlab.com/jeancf/twoot.git
synced 2025-04-26 14:43:34 +00:00
Fine tune thread download
This commit is contained in:
parent
5939484160
commit
cdbb1bb8f2
15
twoot.py
15
twoot.py
@ -227,8 +227,10 @@ def _get_rest_of_thread(session, headers, url):
|
|||||||
# Make soup
|
# Make soup
|
||||||
soup = BeautifulSoup(thread_page.text, 'html.parser')
|
soup = BeautifulSoup(thread_page.text, 'html.parser')
|
||||||
|
|
||||||
# Get all items in thread
|
# Get all items in thread after main tweet
|
||||||
timeline = soup.find_all('div', class_='timeline-item')
|
after_tweet = soup.find('div', 'after-tweet')
|
||||||
|
|
||||||
|
timeline = after_tweet.find_all('div', class_='timeline-item')
|
||||||
return timeline
|
return timeline
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -972,13 +974,8 @@ def main(argv):
|
|||||||
in_db_cnt = 0
|
in_db_cnt = 0
|
||||||
for status in timeline:
|
for status in timeline:
|
||||||
# Extract tweet ID and status ID
|
# Extract tweet ID and status ID
|
||||||
try:
|
tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m')
|
||||||
tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m')
|
status_id = tweet_id.split('/')[3]
|
||||||
status_id = tweet_id.split('/')[3]
|
|
||||||
except Exception as e:
|
|
||||||
logging.critical('Malformed timeline downloaded from nitter instance')
|
|
||||||
logging.debug(e)
|
|
||||||
shutdown(-1)
|
|
||||||
|
|
||||||
logging.debug('processing tweet %s', tweet_id)
|
logging.debug('processing tweet %s', tweet_id)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user