mirror of
https://gitlab.com/jeancf/twoot.git
synced 2024-11-23 20:11:11 +00:00
Add Exclusion of thread tweets
This commit is contained in:
parent
ea12cea20f
commit
530953f48b
18
twoot.py
18
twoot.py
|
@ -72,6 +72,22 @@ USER_AGENTS = [
|
|||
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Vivaldi/6.1.3035.84',
|
||||
]
|
||||
|
||||
"""
|
||||
Temporary mitigation for unability to parse threads. Skip tweets that are part of a thread
|
||||
"""
|
||||
def has_class_timeline_item_but_not_thread(tag):
|
||||
if tag.has_attr('class'):
|
||||
classes = tag['class']
|
||||
if 'timeline-item' in classes and 'thread' not in classes:
|
||||
return True
|
||||
elif 'timeline-item' in classes and 'thread' in classes:
|
||||
logging.warning('Tweet is part of a thread which are a new nitter feature that is not handled yet. Skipping')
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def build_config(args):
|
||||
"""
|
||||
|
@ -873,7 +889,7 @@ def main(argv):
|
|||
soup = BeautifulSoup(twit_account_page.text, 'html.parser')
|
||||
|
||||
# Extract twitter timeline
|
||||
timeline = soup.find_all('div', class_='timeline-item')
|
||||
timeline = soup.find_all(has_class_timeline_item_but_not_thread)
|
||||
|
||||
logging.info('Processing ' + str(len(timeline)) + ' tweets found in timeline')
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user