diff --git a/twoot.py b/twoot.py index 5492aef..ad1608b 100755 --- a/twoot.py +++ b/twoot.py @@ -173,13 +173,12 @@ somebody else def item_is_own_tweet(item): # with class username that has an ancestor of class tweet-header which has a # parent of class tweet-body - username_tag = item.select_one(".tweet-body > .tweet-header .username") - print("tweet username_tag: ", str(username_tag)) + username_tag = item.select_one(".tweet-body > div > .tweet-header .username") if username_tag is not None: username = username_tag.get('title').lstrip('@') - print(username) if (username == TOML['config']['twitter_account']): return True + logging.debug("item is not authored by ", TOML['config']['twitter_account']) return False """ @@ -223,21 +222,32 @@ def _get_rest_of_thread(session, headers, nitter_url, thread_url, first_item): # Make soup soup = BeautifulSoup(thread_page.text, 'html.parser') + list = [] # Get all items in thread after main tweet after_tweet = soup.find('div', 'after-tweet') - list = after_tweet.find_all('div', class_='timeline-item') + if after_tweet is None: + list = after_tweet.find_all('div', class_='timeline-item') + + # Get all the replies from tweet account in the replies section below thread + if TOML['config']['post_reply_to']: + previous_tweet_url = None + replies = soup.find('div', id='r') + if replies is not None: + list.extend(replies.find_all('div', class_='timeline_item')) # Build timeline of tuples previous_tweet_url = thread_url for item in list: + # Add item to the list + if item_is_own_tweet(item): timeline.append((previous_tweet_url, item)) - # Get the url of the tweet - tweet_link_tag = item.find('a', class_='tweet-link') - if tweet_link_tag is not None: - previous_tweet_url = tweet_link_tag.get('href').strip('#m') - else: - previous_tweet_url = None - logging.error('Thread tweet is missing link tag') + # Get the url of the tweet + tweet_link_tag = item.find('a', class_='tweet-link') + if tweet_link_tag is not None: + previous_tweet_url = tweet_link_tag.get('href').strip('#m') + else: + previous_tweet_url = None + logging.error('Thread tweet is missing link tag') # return timeline in reverse chronological order timeline.reverse() @@ -319,6 +329,8 @@ def get_timeline(nitter_url): thread_link_tag = item.find('a', class_='tweet-link') if thread_link_tag is not None: thread_url = thread_link_tag.get('href').strip('#m') + else: + thread_url = None # Get the rest of the items of the thread timeline.extend(_get_rest_of_thread(session, headers, nitter_url, thread_url, first_item))