Compare commits

..

2 Commits

Author SHA1 Message Date
jeancf
d460d2feac Correct some bugs
This is running but duplicate replies are collected.
2023-07-23 11:06:36 +02:00
jeancf
a9dae52887 Modify _get_rest_of_thread() 2023-07-22 13:43:48 +02:00

View File

@ -173,13 +173,12 @@ somebody else
def item_is_own_tweet(item):
# <a> with class username that has an ancestor of class tweet-header which has a
# parent of class tweet-body
username_tag = item.select_one(".tweet-body > .tweet-header .username")
print("tweet username_tag: ", str(username_tag))
username_tag = item.select_one(".tweet-body > div > .tweet-header .username")
if username_tag is not None:
username = username_tag.get('title').lstrip('@')
print(username)
if (username == TOML['config']['twitter_account']):
return True
logging.debug("item is not authored by " + TOML['config']['twitter_account'])
return False
"""
@ -223,13 +222,25 @@ def _get_rest_of_thread(session, headers, nitter_url, thread_url, first_item):
# Make soup
soup = BeautifulSoup(thread_page.text, 'html.parser')
list = []
# Get all items in thread after main tweet
after_tweet = soup.find('div', 'after-tweet')
if after_tweet is not None:
list = after_tweet.find_all('div', class_='timeline-item')
# Get all the replies from tweet account in the replies section below thread
if TOML['options']['post_reply_to']:
previous_tweet_url = None
replies = soup.find('div', id='r')
if replies is not None:
list.extend(replies.find_all('div', class_='timeline-item'))
# Build timeline of tuples
timeline = []
previous_tweet_url = thread_url
for item in list:
# Add item to the list
if item_is_own_tweet(item):
timeline.append((previous_tweet_url, item))
# Get the url of the tweet
tweet_link_tag = item.find('a', class_='tweet-link')
@ -319,6 +330,8 @@ def get_timeline(nitter_url):
thread_link_tag = item.find('a', class_='tweet-link')
if thread_link_tag is not None:
thread_url = thread_link_tag.get('href').strip('#m')
else:
thread_url = None
# Get the rest of the items of the thread
timeline.extend(_get_rest_of_thread(session, headers, nitter_url, thread_url, first_item))