diff --git a/twoot.py b/twoot.py index 4f15177..bbb2b44 100755 --- a/twoot.py +++ b/twoot.py @@ -73,7 +73,6 @@ def deredir_url(url): :param url: url to be de-redirected :return: direct url """ - # Check if we need to do anyting if TOML['options']['remove_link_redirections'] is False: return url @@ -155,11 +154,10 @@ def substitute_source(orig_url): param orig_url: url to check for substitutes :return: url with replaced domains """ - parsed_url = urlparse(orig_url) domain = parsed_url.netloc - logging.debug("Checking domain " + domain) + logging.debug("Checking domain %s for substitution ", domain) # Handle twitter twitter_subst = TOML["options"]["substitution"]["twitter"] @@ -199,7 +197,6 @@ def clean_url(orig_url): >>> clean_url('https://example.com/video/this-aerial-ropeway?utm_source=Twitter&utm_medium=video&utm_campaign=organic&utm_content=Nov13&a=aaa&b=1#mkt_tok=tik&mkt_tik=tok') 'https://example.com/video/this-aerial-ropeway?a=aaa&b=1#mkt_tik=tok' """ - # Check if we have to do anything if TOML['options']['remove_trackers_from_urls'] is False: return orig_url @@ -227,6 +224,7 @@ def process_media_body(tt_iter): :param tt_iter: iterator over the HTML elements in the text of the tweet :return: cleaned up text of the tweet """ + tweet_text = '' # Iterate elements for tag in tt_iter: @@ -289,7 +287,7 @@ def process_attachments(nitter_url, attachments_container, status_id, author_acc for image in images: pics.append(nitter_url + image.get('href')) - logging.debug('collected ' + str(len(pics)) + ' images from attachments') + logging.debug('collected ' + str(len(pics)) + ' image(s) from attachments') # Download nitter video (converted animated GIF) gif_class = attachments_container.find('video', class_='gif') @@ -700,7 +698,7 @@ def main(argv): # Process quote: append link to tweet_text quote_div = status.find('a', class_='quote-link') if quote_div is not None: - tweet_text += '\n\nhttps://twitter.com' + quote_div.get('href').strip('#m') + tweet_text += substitute_source('\n\nhttps://twitter.com' + quote_div.get('href').strip('#m')) # Process card : extract image if necessary card_class = status.find('a', class_='card-container') @@ -719,7 +717,7 @@ def main(argv): tweet_text += '\n\n[Video embedded in original tweet]' # Add footer with link to original tweet - tweet_text += '\n\nOriginal tweet : ' + full_status_url + tweet_text += '\n\nOriginal tweet : ' + substitute_source(full_status_url) # If no media was specifically added in the tweet, try to get the first picture # with "twitter:image" meta tag in first linked page in tweet text @@ -770,7 +768,6 @@ def main(argv): tweets.append(tweet) logging.debug('Tweet %s added to list of toots to upload', tweet_id) - logging.debug('TEXT:\n' + tweet["tweet_text"]) # Log summary stats logging.info(str(out_date_cnt) + ' tweets outside of valid time range')