Correct some bugs

This is running but duplicate replies are collected.
Modify _get_rest_of_thread()
2025-02-25 01:18:41 +00:00 · 2023-07-23 11:06:36 +02:00 · 2023-07-22 13:43:48 +02:00
1 changed files with 24 additions and 11 deletions
--- a/twoot.py
+++ b/twoot.py
@ -173,13 +173,12 @@ somebody else
 def item_is_own_tweet(item):
    # <a> with class username that has an ancestor of class tweet-header which has a
    # parent of class tweet-body
-    username_tag = item.select_one(".tweet-body > .tweet-header .username")
-    print("tweet username_tag: ", str(username_tag))
+    username_tag = item.select_one(".tweet-body > div > .tweet-header .username")
    if username_tag is not None:
        username = username_tag.get('title').lstrip('@')
-        print(username)
        if (username == TOML['config']['twitter_account']):
            return True
+    logging.debug("item is not authored by " + TOML['config']['twitter_account'])
    return False

 """
@ -223,13 +222,25 @@ def _get_rest_of_thread(session, headers, nitter_url, thread_url, first_item):
    # Make soup
    soup = BeautifulSoup(thread_page.text, 'html.parser')

+    list = []
    # Get all items in thread after main tweet
    after_tweet = soup.find('div', 'after-tweet')
+    if after_tweet is not None:
        list = after_tweet.find_all('div', class_='timeline-item')

+    # Get all the replies from tweet account in the replies section below thread
+    if TOML['options']['post_reply_to']:
+        previous_tweet_url = None
+        replies = soup.find('div', id='r')
+        if replies is not None:
+            list.extend(replies.find_all('div', class_='timeline-item'))
+
    # Build timeline of tuples
+    timeline = []
    previous_tweet_url = thread_url
    for item in list:
+        # Add item to the list
+        if item_is_own_tweet(item):
            timeline.append((previous_tweet_url, item))
        # Get the url of the tweet
        tweet_link_tag = item.find('a', class_='tweet-link')
@ -319,6 +330,8 @@ def get_timeline(nitter_url):
            thread_link_tag = item.find('a', class_='tweet-link')
            if thread_link_tag is not None:
                thread_url = thread_link_tag.get('href').strip('#m')
+            else:
+                thread_url = None

            # Get the rest of the items of the thread
            timeline.extend(_get_rest_of_thread(session, headers, nitter_url, thread_url, first_item))
Author	SHA1	Message	Date
jeancf	d460d2feac	Correct some bugs This is running but duplicate replies are collected.	2023-07-23 11:06:36 +02:00
jeancf	a9dae52887	Modify _get_rest_of_thread()	2023-07-22 13:43:48 +02:00