2025-02-25 01:18:41 +00:00
1 changed files with 13 additions and 29 deletions
--- a/twoot.py
+++ b/twoot.py
@ -169,14 +169,12 @@ Dowload page with full thread of tweets and extract all replied to tweet referen
 Only used by `get_timeline()`.
 :param session: Existing HTTP session with Nitter instance
 :param headers: HTTP headers to use
-:param nitter url: url of the nitter instance to use
+:param url: url of the thread page to download
-:param thread_url: url of the first tweet in thread
+:return: List of tweets from the thread
 :return: list of tuples with url of tweet replied-to (or None) and content of tweet
 """
-def _get_rest_of_thread(session, headers, nitter_url, thread_url):
+def _get_rest_of_thread(session, headers, url):
    logging.debug("Downloading tweets in thread from separate page")
    # Download page with thread
    url = nitter_url + thread_url
    try:
        thread_page = session.get(url, headers=headers, timeout=HTTPS_REQ_TIMEOUT)
    except requests.exceptions.ConnectionError:
@ -203,25 +201,14 @@ def _get_rest_of_thread(session, headers, nitter_url, thread_url):
    # Get all items in thread after main tweet
    after_tweet = soup.find('div', 'after-tweet')
    list = after_tweet.find_all('div', class_='timeline-item')
    # Build timeline of tuples
    timeline = []
    previous_tweet_url = thread_url
    for item in list:
            timeline.append((previous_tweet_url, item))
            # Get the url of the tweet
            previous_tweet_url = item.find('a', class_='tweet-link')
            if previous_tweet_url is  None:
                logging.error('Thread tweet is missing link tag')
    timeline = after_tweet.find_all('div', class_='timeline-item')
    return timeline
 """
-Download timeline of twitter account
+Dowload page with full thread of tweets. Only used by `get_timeline()`.
-:param url: url of the account page to download
+:param url: url of the thread page to download
-:return: list of tuples with url of tweet replied-to (or None) and content of tweet
+:return: List of tweets from the thread
 """
 def get_timeline(nitter_url):
    # Define url to use
@ -281,20 +268,17 @@ def get_timeline(nitter_url):
    for item in list:
        classes = item['class']
        if 'timeline-item' in classes:  # Individual tweet
-            timeline.append((None, item))
+            timeline.append(item)
        elif 'thread-line' in classes:  # First tweet of a thread
            # Get the first item of thread
            first_item = item.find('div', class_='timeline-item')
            timeline.append(first_item)
-            # Get the url of the tweet
+            # Get the rest of the items of the thread
            thread_link_tag = item.find('a', class_='tweet-link')
            if thread_link_tag is not None:
                thread_url = thread_link_tag.get('href')
-
+            timeline.extend(_get_rest_of_thread(session, headers, nitter_url + thread_url))
            timeline.append((thread_url, first_item))
            # Get the rest of the items of the thread
            timeline.extend(_get_rest_of_thread(session, headers, nitter_url, thread_url))
        else:
            # Ignore other classes
            continue
@ -893,6 +877,7 @@ def main(argv):
        log_level = logging.INFO
    elif ll_str == "WARNING":
        log_level = logging.WARNING
        print('log level warning set')
    elif ll_str == "ERROR":
        log_level = logging.ERROR
    elif ll_str == "CRITICAL":
@ -959,7 +944,7 @@ def main(argv):
    tweets = []
    out_date_cnt = 0
    in_db_cnt = 0
-    for reply_to, status in timeline:
+    for status in timeline:
        # Extract tweet ID and status ID
        tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m')
        status_id = tweet_id.split('/')[3]
@ -1122,7 +1107,6 @@ def main(argv):
            "tweet_text": tweet_text,
            "video": video_file,
            "photos": photos,
            "reply-to": reply_to,
        }
        tweets.append(tweet)