diff --git a/twoot.py b/twoot.py index 29ad2c5..3d1809f 100755 --- a/twoot.py +++ b/twoot.py @@ -72,6 +72,22 @@ USER_AGENTS = [ 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Vivaldi/6.1.3035.84', ] +""" +Temporary mitigation for unability to parse threads. Skip tweets that are part of a thread +""" +def has_class_timeline_item_but_not_thread(tag): + if tag.has_attr('class'): + classes = tag['class'] + if 'timeline-item' in classes and 'thread' not in classes: + return True + elif 'timeline-item' in classes and 'thread' in classes: + logging.warning('Tweet is part of a thread which are a new nitter feature that is not handled yet. Skipping') + return False + else: + return False + else: + return False + def build_config(args): """ @@ -873,7 +889,7 @@ def main(argv): soup = BeautifulSoup(twit_account_page.text, 'html.parser') # Extract twitter timeline - timeline = soup.find_all('div', class_='timeline-item') + timeline = soup.find_all(has_class_timeline_item_but_not_thread) logging.info('Processing ' + str(len(timeline)) + ' tweets found in timeline')