Modify _get_rest_of_thread()

This commit is contained in:
jeancf 2023-07-22 13:43:48 +02:00
parent 4d596c3f01
commit a9dae52887

View File

@ -173,13 +173,12 @@ somebody else
def item_is_own_tweet(item):
# <a> with class username that has an ancestor of class tweet-header which has a
# parent of class tweet-body
username_tag = item.select_one(".tweet-body > .tweet-header .username")
print("tweet username_tag: ", str(username_tag))
username_tag = item.select_one(".tweet-body > div > .tweet-header .username")
if username_tag is not None:
username = username_tag.get('title').lstrip('@')
print(username)
if (username == TOML['config']['twitter_account']):
return True
logging.debug("item is not authored by ", TOML['config']['twitter_account'])
return False
"""
@ -223,21 +222,32 @@ def _get_rest_of_thread(session, headers, nitter_url, thread_url, first_item):
# Make soup
soup = BeautifulSoup(thread_page.text, 'html.parser')
list = []
# Get all items in thread after main tweet
after_tweet = soup.find('div', 'after-tweet')
list = after_tweet.find_all('div', class_='timeline-item')
if after_tweet is None:
list = after_tweet.find_all('div', class_='timeline-item')
# Get all the replies from tweet account in the replies section below thread
if TOML['config']['post_reply_to']:
previous_tweet_url = None
replies = soup.find('div', id='r')
if replies is not None:
list.extend(replies.find_all('div', class_='timeline_item'))
# Build timeline of tuples
previous_tweet_url = thread_url
for item in list:
# Add item to the list
if item_is_own_tweet(item):
timeline.append((previous_tweet_url, item))
# Get the url of the tweet
tweet_link_tag = item.find('a', class_='tweet-link')
if tweet_link_tag is not None:
previous_tweet_url = tweet_link_tag.get('href').strip('#m')
else:
previous_tweet_url = None
logging.error('Thread tweet is missing link tag')
# Get the url of the tweet
tweet_link_tag = item.find('a', class_='tweet-link')
if tweet_link_tag is not None:
previous_tweet_url = tweet_link_tag.get('href').strip('#m')
else:
previous_tweet_url = None
logging.error('Thread tweet is missing link tag')
# return timeline in reverse chronological order
timeline.reverse()
@ -319,6 +329,8 @@ def get_timeline(nitter_url):
thread_link_tag = item.find('a', class_='tweet-link')
if thread_link_tag is not None:
thread_url = thread_link_tag.get('href').strip('#m')
else:
thread_url = None
# Get the rest of the items of the thread
timeline.extend(_get_rest_of_thread(session, headers, nitter_url, thread_url, first_item))