diff --git a/twoot.py b/twoot.py index 6afc449..5492aef 100755 --- a/twoot.py +++ b/twoot.py @@ -164,6 +164,24 @@ def build_config(args): exit(-1) +""" +Verifies that the item is a tweet from the Twitter account and not an reply from +somebody else +:param item: BeautifulSoup representation of the timeline-item +:return: True if the item is from the Twitter account, false otherwise +""" +def item_is_own_tweet(item): + # with class username that has an ancestor of class tweet-header which has a + # parent of class tweet-body + username_tag = item.select_one(".tweet-body > .tweet-header .username") + print("tweet username_tag: ", str(username_tag)) + if username_tag is not None: + username = username_tag.get('title').lstrip('@') + print(username) + if (username == TOML['config']['twitter_account']): + return True + return False + """ Dowload page with full thread of tweets and extract all replied to tweet reference by url. Only used by `get_timeline()`. @@ -175,7 +193,8 @@ Only used by `get_timeline()`. """ def _get_rest_of_thread(session, headers, nitter_url, thread_url, first_item): # Add first item to timeline - timeline = [(None, first_item)] + if first_item is not None: + timeline = [(None, first_item)] logging.debug("Downloading tweets in thread from separate page") # Download page with thread @@ -288,10 +307,13 @@ def get_timeline(nitter_url): for item in list: classes = item['class'] if 'timeline-item' in classes: # Individual tweet - timeline.append((None, item)) + if item_is_own_tweet(item): + timeline.append((None, item)) elif 'thread-line' in classes: # First tweet of a thread # Get the first item of thread first_item = item.find('div', class_='timeline-item') + if not item_is_own_tweet(first_item): + first_item = None # Get the url of the tweet thread_link_tag = item.find('a', class_='tweet-link') @@ -979,7 +1001,7 @@ def main(argv): if tweet_link_tag is None: logging.debug("Malformed timeline item (no tweet link), skipping") continue - + tweet_id = tweet_link_tag.get('href').strip('#m') status_id = tweet_id.split('/')[3]