mirror of
https://gitlab.com/jeancf/twoot.git
synced 2025-01-18 23:37:05 +00:00
Text and links are fixed
This commit is contained in:
parent
9dbf40bb5d
commit
296d124c35
30
twoot.py
30
twoot.py
|
@ -66,11 +66,17 @@ def cleanup_tweet_text(tt_iter):
|
||||||
|
|
||||||
# If element is an external link
|
# If element is an external link
|
||||||
elif tc == 'twitter_external_link':
|
elif tc == 'twitter_external_link':
|
||||||
# Add a sometimes missing space before url
|
# If element is a simple link
|
||||||
if not tweet_text.endswith(' ') and not tweet_text.endswith('\n'):
|
if tag.has_attr('data-expanded-url'):
|
||||||
tweet_text += ' '
|
# Add a sometimes missing space before url
|
||||||
# Add full url
|
if not tweet_text.endswith(' ') and not tweet_text.endswith('\n'):
|
||||||
tweet_text += tag['data-expanded-url']
|
tweet_text += ' '
|
||||||
|
# Add full url
|
||||||
|
tweet_text += tag['data-expanded-url']
|
||||||
|
# If element is a picture
|
||||||
|
elif tag.has_attr('data-url'):
|
||||||
|
# TODO handle photo
|
||||||
|
pass
|
||||||
|
|
||||||
# If element is hashflag (hashtag + icon), handle as simple hashtag
|
# If element is hashflag (hashtag + icon), handle as simple hashtag
|
||||||
elif tag.name == 'span' and tag['class'][0] == 'twitter-hashflag-container':
|
elif tag.name == 'span' and tag['class'][0] == 'twitter-hashflag-container':
|
||||||
|
@ -151,7 +157,7 @@ def main(argv):
|
||||||
|
|
||||||
# Verify that this is the no_js page that we expected
|
# Verify that this is the no_js page that we expected
|
||||||
soup = BeautifulSoup(r1.text, 'html.parser')
|
soup = BeautifulSoup(r1.text, 'html.parser')
|
||||||
assert (str(soup.form.p.string).find('JavaScript is disabled') != -1),\
|
assert 'JavaScript is disabled' in str(soup.form.p.string),\
|
||||||
'this is not the no_js page we expected. Quitting'
|
'this is not the no_js page we expected. Quitting'
|
||||||
|
|
||||||
# Submit POST form response with cookies
|
# Submit POST form response with cookies
|
||||||
|
@ -175,13 +181,16 @@ def main(argv):
|
||||||
|
|
||||||
# Verify that we now have the correct twitter page
|
# Verify that we now have the correct twitter page
|
||||||
soup = BeautifulSoup(response.text, 'html.parser')
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
assert (str(soup.head.title.string).find(twit_account) != -1),\
|
assert twit_account.lower() in str(soup.head.title.string).lower(),\
|
||||||
'This is not the correct twitter page. Quitting'
|
'This is not the correct twitter page. Quitting'
|
||||||
|
|
||||||
# Extract twitter timeline
|
# Extract twitter timeline
|
||||||
results = soup.find_all('table', class_='tweet')
|
results = soup.find_all('table', class_='tweet')
|
||||||
|
|
||||||
for result in results:
|
for result in results:
|
||||||
|
# Extract tweet id
|
||||||
|
tweet_id = str(result['href']).strip('?p=v')
|
||||||
|
|
||||||
# Isolate tweet header
|
# Isolate tweet header
|
||||||
sih = result.find('tr', class_='tweet-header')
|
sih = result.find('tr', class_='tweet-header')
|
||||||
|
|
||||||
|
@ -201,9 +210,6 @@ def main(argv):
|
||||||
# Isolate tweet text container
|
# Isolate tweet text container
|
||||||
ttc = result.find('tr', class_='tweet-container')
|
ttc = result.find('tr', class_='tweet-container')
|
||||||
|
|
||||||
# Extract tweet id
|
|
||||||
tweet_id = ttc.find('div', class_='tweet-text')['data-id']
|
|
||||||
|
|
||||||
# extract iterator over tweet text contents
|
# extract iterator over tweet text contents
|
||||||
tt_iter = ttc.find('div', class_='dir-ltr').children
|
tt_iter = ttc.find('div', class_='dir-ltr').children
|
||||||
|
|
||||||
|
@ -211,10 +217,10 @@ def main(argv):
|
||||||
|
|
||||||
# Check it the tweet is a retweet from somebody else
|
# Check it the tweet is a retweet from somebody else
|
||||||
if author_account.lower() != twit_account.lower():
|
if author_account.lower() != twit_account.lower():
|
||||||
tweet_text = 'RT from ' + author + ' @' + author_account + '\n\n' + tweet_text
|
tweet_text = 'RT from ' + author + '(@' + author_account + '\n\n)' + tweet_text
|
||||||
|
|
||||||
# Add footer with link to original tweet
|
# Add footer with link to original tweet
|
||||||
tweet_text += '\n\nOriginal tweet : https://twitter.com' + tweet_id
|
tweet_text += '\n\nOriginal tweet : https://twitter.com/' + tweet_id
|
||||||
|
|
||||||
# Isolate attached media container
|
# Isolate attached media container
|
||||||
amoc = result.find('div', class_='AdaptiveMediaOuterContainer')
|
amoc = result.find('div', class_='AdaptiveMediaOuterContainer')
|
||||||
|
|
Loading…
Reference in New Issue
Block a user