mirror of
https://gitlab.com/jeancf/twoot.git
synced 2024-11-27 14:01:13 +00:00
Add Exclusion of thread tweets
This commit is contained in:
parent
ea12cea20f
commit
530953f48b
18
twoot.py
18
twoot.py
|
@ -72,6 +72,22 @@ USER_AGENTS = [
|
||||||
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Vivaldi/6.1.3035.84',
|
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Vivaldi/6.1.3035.84',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
"""
|
||||||
|
Temporary mitigation for unability to parse threads. Skip tweets that are part of a thread
|
||||||
|
"""
|
||||||
|
def has_class_timeline_item_but_not_thread(tag):
|
||||||
|
if tag.has_attr('class'):
|
||||||
|
classes = tag['class']
|
||||||
|
if 'timeline-item' in classes and 'thread' not in classes:
|
||||||
|
return True
|
||||||
|
elif 'timeline-item' in classes and 'thread' in classes:
|
||||||
|
logging.warning('Tweet is part of a thread which are a new nitter feature that is not handled yet. Skipping')
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def build_config(args):
|
def build_config(args):
|
||||||
"""
|
"""
|
||||||
|
@ -873,7 +889,7 @@ def main(argv):
|
||||||
soup = BeautifulSoup(twit_account_page.text, 'html.parser')
|
soup = BeautifulSoup(twit_account_page.text, 'html.parser')
|
||||||
|
|
||||||
# Extract twitter timeline
|
# Extract twitter timeline
|
||||||
timeline = soup.find_all('div', class_='timeline-item')
|
timeline = soup.find_all(has_class_timeline_item_but_not_thread)
|
||||||
|
|
||||||
logging.info('Processing ' + str(len(timeline)) + ' tweets found in timeline')
|
logging.info('Processing ' + str(len(timeline)) + ' tweets found in timeline')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user