Implement item_is_own_tweet

This commit is contained in:
jeancf 2023-07-22 13:18:31 +02:00
parent b09ee35f5c
commit 4d596c3f01

View File

@ -164,6 +164,24 @@ def build_config(args):
exit(-1) exit(-1)
"""
Verifies that the item is a tweet from the Twitter account and not an reply from
somebody else
:param item: BeautifulSoup representation of the timeline-item
:return: True if the item is from the Twitter account, false otherwise
"""
def item_is_own_tweet(item):
# <a> with class username that has an ancestor of class tweet-header which has a
# parent of class tweet-body
username_tag = item.select_one(".tweet-body > .tweet-header .username")
print("tweet username_tag: ", str(username_tag))
if username_tag is not None:
username = username_tag.get('title').lstrip('@')
print(username)
if (username == TOML['config']['twitter_account']):
return True
return False
""" """
Dowload page with full thread of tweets and extract all replied to tweet reference by url. Dowload page with full thread of tweets and extract all replied to tweet reference by url.
Only used by `get_timeline()`. Only used by `get_timeline()`.
@ -175,7 +193,8 @@ Only used by `get_timeline()`.
""" """
def _get_rest_of_thread(session, headers, nitter_url, thread_url, first_item): def _get_rest_of_thread(session, headers, nitter_url, thread_url, first_item):
# Add first item to timeline # Add first item to timeline
timeline = [(None, first_item)] if first_item is not None:
timeline = [(None, first_item)]
logging.debug("Downloading tweets in thread from separate page") logging.debug("Downloading tweets in thread from separate page")
# Download page with thread # Download page with thread
@ -288,10 +307,13 @@ def get_timeline(nitter_url):
for item in list: for item in list:
classes = item['class'] classes = item['class']
if 'timeline-item' in classes: # Individual tweet if 'timeline-item' in classes: # Individual tweet
timeline.append((None, item)) if item_is_own_tweet(item):
timeline.append((None, item))
elif 'thread-line' in classes: # First tweet of a thread elif 'thread-line' in classes: # First tweet of a thread
# Get the first item of thread # Get the first item of thread
first_item = item.find('div', class_='timeline-item') first_item = item.find('div', class_='timeline-item')
if not item_is_own_tweet(first_item):
first_item = None
# Get the url of the tweet # Get the url of the tweet
thread_link_tag = item.find('a', class_='tweet-link') thread_link_tag = item.find('a', class_='tweet-link')
@ -979,7 +1001,7 @@ def main(argv):
if tweet_link_tag is None: if tweet_link_tag is None:
logging.debug("Malformed timeline item (no tweet link), skipping") logging.debug("Malformed timeline item (no tweet link), skipping")
continue continue
tweet_id = tweet_link_tag.get('href').strip('#m') tweet_id = tweet_link_tag.get('href').strip('#m')
status_id = tweet_id.split('/')[3] status_id = tweet_id.split('/')[3]