mirror of
https://gitlab.com/jeancf/twoot.git
synced 2024-11-23 20:11:11 +00:00
get_timeline and get_rest_of_thread return tuple
This commit is contained in:
parent
9bdcccf713
commit
3fab787738
30
twoot.py
30
twoot.py
|
@ -171,7 +171,7 @@ Only used by `get_timeline()`.
|
|||
:param headers: HTTP headers to use
|
||||
:param nitter url: url of the nitter instance to use
|
||||
:param thread_url: url of the first tweet in thread
|
||||
:return: List of tweets from the thread
|
||||
:return: list of tuples with url of tweet replied-to (or None) and content of tweet
|
||||
"""
|
||||
def _get_rest_of_thread(session, headers, nitter_url, thread_url):
|
||||
logging.debug("Downloading tweets in thread from separate page")
|
||||
|
@ -203,14 +203,25 @@ def _get_rest_of_thread(session, headers, nitter_url, thread_url):
|
|||
|
||||
# Get all items in thread after main tweet
|
||||
after_tweet = soup.find('div', 'after-tweet')
|
||||
list = after_tweet.find_all('div', class_='timeline-item')
|
||||
|
||||
# Build timeline of tuples
|
||||
timeline = []
|
||||
previous_tweet_url = thread_url
|
||||
for item in list:
|
||||
timeline.append((previous_tweet_url, item))
|
||||
# Get the url of the tweet
|
||||
previous_tweet_url = item.find('a', class_='tweet-link')
|
||||
if previous_tweet_url is None:
|
||||
logging.error('Thread tweet is missing link tag')
|
||||
|
||||
timeline = after_tweet.find_all('div', class_='timeline-item')
|
||||
return timeline
|
||||
|
||||
|
||||
"""
|
||||
Dowload page with full thread of tweets. Only used by `get_timeline()`.
|
||||
:param url: url of the thread page to download
|
||||
:return: List of tweets from the thread
|
||||
Download timeline of twitter account
|
||||
:param url: url of the account page to download
|
||||
:return: list of tuples with url of tweet replied-to (or None) and content of tweet
|
||||
"""
|
||||
def get_timeline(nitter_url):
|
||||
# Define url to use
|
||||
|
@ -270,16 +281,19 @@ def get_timeline(nitter_url):
|
|||
for item in list:
|
||||
classes = item['class']
|
||||
if 'timeline-item' in classes: # Individual tweet
|
||||
timeline.append(item)
|
||||
timeline.append((None, item))
|
||||
elif 'thread-line' in classes: # First tweet of a thread
|
||||
# Get the first item of thread
|
||||
first_item = item.find('div', class_='timeline-item')
|
||||
timeline.append(first_item)
|
||||
|
||||
# Get the rest of the items of the thread
|
||||
# Get the url of the tweet
|
||||
thread_link_tag = item.find('a', class_='tweet-link')
|
||||
if thread_link_tag is not None:
|
||||
thread_url = thread_link_tag.get('href')
|
||||
|
||||
timeline.append((thread_url, first_item))
|
||||
|
||||
# Get the rest of the items of the thread
|
||||
timeline.extend(_get_rest_of_thread(session, headers, nitter_url, thread_url))
|
||||
else:
|
||||
# Ignore other classes
|
||||
|
|
Loading…
Reference in New Issue
Block a user