Implement item_is_own_tweet

2025-05-30 04:20:16 +00:00 · 2023-07-22 13:18:31 +02:00 · 2023-07-22 13:18:31 +02:00 · 4d596c3f01
commit 4d596c3f01
parent b09ee35f5c
1 changed files with 25 additions and 3 deletions
--- a/twoot.py
+++ b/twoot.py
@ -164,6 +164,24 @@ def build_config(args):
        exit(-1)


+"""
+Verifies that the item is a tweet from the Twitter account and not an reply from
+somebody else
+:param item: BeautifulSoup representation of the timeline-item
+:return: True if the item is from the Twitter account, false otherwise
+"""
+def item_is_own_tweet(item):
+    # <a> with class username that has an ancestor of class tweet-header which has a
+    # parent of class tweet-body
+    username_tag = item.select_one(".tweet-body > .tweet-header .username")
+    print("tweet username_tag: ", str(username_tag))
+    if username_tag is not None:
+        username = username_tag.get('title').lstrip('@')
+        print(username)
+        if (username == TOML['config']['twitter_account']):
+            return True
+    return False
+
 """
 Dowload page with full thread of tweets and extract all replied to tweet reference by url.
 Only used by `get_timeline()`.
@ -175,7 +193,8 @@ Only used by `get_timeline()`.
 """
 def _get_rest_of_thread(session, headers, nitter_url, thread_url, first_item):
    # Add first item to timeline
-    timeline = [(None, first_item)]
+    if first_item is not None:
+        timeline = [(None, first_item)]

    logging.debug("Downloading tweets in thread from separate page")
    # Download page with thread
@ -288,10 +307,13 @@ def get_timeline(nitter_url):
    for item in list:
        classes = item['class']
        if 'timeline-item' in classes:  # Individual tweet
-            timeline.append((None, item))
+            if item_is_own_tweet(item):
+                timeline.append((None, item))
        elif 'thread-line' in classes:  # First tweet of a thread
            # Get the first item of thread
            first_item = item.find('div', class_='timeline-item')
+            if not item_is_own_tweet(first_item):
+                first_item = None

            # Get the url of the tweet
            thread_link_tag = item.find('a', class_='tweet-link')
@ -979,7 +1001,7 @@ def main(argv):
        if tweet_link_tag is None:
            logging.debug("Malformed timeline item (no tweet link), skipping")
            continue
-        
+
        tweet_id = tweet_link_tag.get('href').strip('#m')
        status_id = tweet_id.split('/')[3]