mirror of
https://gitlab.com/jeancf/twoot.git
synced 2025-03-30 17:36:49 +00:00
Done but to be tested
This commit is contained in:
parent
27ed4cf10e
commit
21ac2cf7dd
13
twoot.py
13
twoot.py
@ -73,7 +73,6 @@ def deredir_url(url):
|
|||||||
:param url: url to be de-redirected
|
:param url: url to be de-redirected
|
||||||
:return: direct url
|
:return: direct url
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Check if we need to do anyting
|
# Check if we need to do anyting
|
||||||
if TOML['options']['remove_link_redirections'] is False:
|
if TOML['options']['remove_link_redirections'] is False:
|
||||||
return url
|
return url
|
||||||
@ -155,11 +154,10 @@ def substitute_source(orig_url):
|
|||||||
param orig_url: url to check for substitutes
|
param orig_url: url to check for substitutes
|
||||||
:return: url with replaced domains
|
:return: url with replaced domains
|
||||||
"""
|
"""
|
||||||
|
|
||||||
parsed_url = urlparse(orig_url)
|
parsed_url = urlparse(orig_url)
|
||||||
domain = parsed_url.netloc
|
domain = parsed_url.netloc
|
||||||
|
|
||||||
logging.debug("Checking domain " + domain)
|
logging.debug("Checking domain %s for substitution ", domain)
|
||||||
|
|
||||||
# Handle twitter
|
# Handle twitter
|
||||||
twitter_subst = TOML["options"]["substitution"]["twitter"]
|
twitter_subst = TOML["options"]["substitution"]["twitter"]
|
||||||
@ -199,7 +197,6 @@ def clean_url(orig_url):
|
|||||||
>>> clean_url('https://example.com/video/this-aerial-ropeway?utm_source=Twitter&utm_medium=video&utm_campaign=organic&utm_content=Nov13&a=aaa&b=1#mkt_tok=tik&mkt_tik=tok')
|
>>> clean_url('https://example.com/video/this-aerial-ropeway?utm_source=Twitter&utm_medium=video&utm_campaign=organic&utm_content=Nov13&a=aaa&b=1#mkt_tok=tik&mkt_tik=tok')
|
||||||
'https://example.com/video/this-aerial-ropeway?a=aaa&b=1#mkt_tik=tok'
|
'https://example.com/video/this-aerial-ropeway?a=aaa&b=1#mkt_tik=tok'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Check if we have to do anything
|
# Check if we have to do anything
|
||||||
if TOML['options']['remove_trackers_from_urls'] is False:
|
if TOML['options']['remove_trackers_from_urls'] is False:
|
||||||
return orig_url
|
return orig_url
|
||||||
@ -227,6 +224,7 @@ def process_media_body(tt_iter):
|
|||||||
:param tt_iter: iterator over the HTML elements in the text of the tweet
|
:param tt_iter: iterator over the HTML elements in the text of the tweet
|
||||||
:return: cleaned up text of the tweet
|
:return: cleaned up text of the tweet
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tweet_text = ''
|
tweet_text = ''
|
||||||
# Iterate elements
|
# Iterate elements
|
||||||
for tag in tt_iter:
|
for tag in tt_iter:
|
||||||
@ -289,7 +287,7 @@ def process_attachments(nitter_url, attachments_container, status_id, author_acc
|
|||||||
for image in images:
|
for image in images:
|
||||||
pics.append(nitter_url + image.get('href'))
|
pics.append(nitter_url + image.get('href'))
|
||||||
|
|
||||||
logging.debug('collected ' + str(len(pics)) + ' images from attachments')
|
logging.debug('collected ' + str(len(pics)) + ' image(s) from attachments')
|
||||||
|
|
||||||
# Download nitter video (converted animated GIF)
|
# Download nitter video (converted animated GIF)
|
||||||
gif_class = attachments_container.find('video', class_='gif')
|
gif_class = attachments_container.find('video', class_='gif')
|
||||||
@ -700,7 +698,7 @@ def main(argv):
|
|||||||
# Process quote: append link to tweet_text
|
# Process quote: append link to tweet_text
|
||||||
quote_div = status.find('a', class_='quote-link')
|
quote_div = status.find('a', class_='quote-link')
|
||||||
if quote_div is not None:
|
if quote_div is not None:
|
||||||
tweet_text += '\n\nhttps://twitter.com' + quote_div.get('href').strip('#m')
|
tweet_text += substitute_source('\n\nhttps://twitter.com' + quote_div.get('href').strip('#m'))
|
||||||
|
|
||||||
# Process card : extract image if necessary
|
# Process card : extract image if necessary
|
||||||
card_class = status.find('a', class_='card-container')
|
card_class = status.find('a', class_='card-container')
|
||||||
@ -719,7 +717,7 @@ def main(argv):
|
|||||||
tweet_text += '\n\n[Video embedded in original tweet]'
|
tweet_text += '\n\n[Video embedded in original tweet]'
|
||||||
|
|
||||||
# Add footer with link to original tweet
|
# Add footer with link to original tweet
|
||||||
tweet_text += '\n\nOriginal tweet : ' + full_status_url
|
tweet_text += '\n\nOriginal tweet : ' + substitute_source(full_status_url)
|
||||||
|
|
||||||
# If no media was specifically added in the tweet, try to get the first picture
|
# If no media was specifically added in the tweet, try to get the first picture
|
||||||
# with "twitter:image" meta tag in first linked page in tweet text
|
# with "twitter:image" meta tag in first linked page in tweet text
|
||||||
@ -770,7 +768,6 @@ def main(argv):
|
|||||||
tweets.append(tweet)
|
tweets.append(tweet)
|
||||||
|
|
||||||
logging.debug('Tweet %s added to list of toots to upload', tweet_id)
|
logging.debug('Tweet %s added to list of toots to upload', tweet_id)
|
||||||
logging.debug('TEXT:\n' + tweet["tweet_text"])
|
|
||||||
|
|
||||||
# Log summary stats
|
# Log summary stats
|
||||||
logging.info(str(out_date_cnt) + ' tweets outside of valid time range')
|
logging.info(str(out_date_cnt) + ' tweets outside of valid time range')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user