mirror of
https://gitlab.com/jeancf/twoot.git
synced 2025-01-31 05:33:45 +00:00
Extracted author, author_account, time_string, timestamp
This commit is contained in:
parent
19d988dfcb
commit
e6e24cbfd5
9
twoot.py
9
twoot.py
|
@ -263,20 +263,21 @@ def main(argv):
|
||||||
# Do we need to handle reply-to tweets?
|
# Do we need to handle reply-to tweets?
|
||||||
if tweets_and_replies:
|
if tweets_and_replies:
|
||||||
# TODO Capture user name being replied to
|
# TODO Capture user name being replied to
|
||||||
|
pass
|
||||||
else:
|
else:
|
||||||
# Skip this tweet
|
# Skip this tweet
|
||||||
logging.debug("Tweet is a reply-to and we don't want that. Skipping.")
|
logging.debug("Tweet is a reply-to and we don't want that. Skipping.")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# extract author
|
# extract author
|
||||||
author = tmt.find('div', class_='fullname').a.strong.get_text()
|
author = status.find('a', class_='fullname').get('title')
|
||||||
|
|
||||||
# Extract user name
|
# Extract user name
|
||||||
author_account = str(tmt.find('span', class_='username').span.next_sibling).strip('\n ')
|
author_account = status.find('a', class_='username').get('title').lstrip('@')
|
||||||
|
|
||||||
# Extract time stamp
|
# Extract time stamp
|
||||||
time_string = tmt.find('div', class_='metadata').a.get_text()
|
time_string = status.find('span', class_='tweet-date').a.get('title')
|
||||||
timestamp = datetime.datetime.strptime(time_string, '%I:%M %p - %d %b %Y').timestamp()
|
timestamp = datetime.datetime.strptime(time_string, '%d/%m/%Y, %H:%M:%S').timestamp()
|
||||||
|
|
||||||
# extract iterator over tweet text contents
|
# extract iterator over tweet text contents
|
||||||
tt_iter = tmt.find('div', class_='tweet-text').div.children
|
tt_iter = tmt.find('div', class_='tweet-text').div.children
|
||||||
|
|
Loading…
Reference in New Issue
Block a user