Improved formatting

This commit is contained in:
jeancf 2023-06-12 17:43:08 +02:00
parent 725b7b5dc9
commit 54c59fa676

View File

@ -25,7 +25,6 @@ import os
import shutil import shutil
import random import random
import re import re
import shutil
import sqlite3 import sqlite3
import sys import sys
import time import time
@ -43,20 +42,20 @@ MAX_REC_COUNT = 50
HTTPS_REQ_TIMEOUT = 10 HTTPS_REQ_TIMEOUT = 10
NITTER_URLS = [ NITTER_URLS = [
'https://nitter.lacontrevoie.fr', # rate limited 'https://nitter.lacontrevoie.fr', # rate limited
'https://n.l5.ca', 'https://n.l5.ca',
'https://nitter.it', # added 27/02/2023 'https://nitter.it', # added 27/02/2023
'https://nitter.sethforprivacy.com', # added on 01/06/2023 'https://nitter.sethforprivacy.com', # added on 01/06/2023
'https://nitter.cutelab.space', # USA, added 16/02/2023 'https://nitter.cutelab.space', # USA, added 16/02/2023
'https://nitter.fly.dev', # anycast, added 06/02/2023 'https://nitter.fly.dev', # anycast, added 06/02/2023
'https://notabird.site', # anycast, added 06/02/2023 'https://notabird.site', # anycast, added 06/02/2023
# 'https://twitter.femboy.hu', # 404 on 06/05/2023 # 'https://twitter.femboy.hu', # 404 on 06/05/2023
# 'https://nitter.grimneko.de', # 404 on 01/06/2023 # 'https://nitter.grimneko.de', # 404 on 01/06/2023
# 'https://nitter.namazso.eu', # lots of 403 27/02/2023 # 'https://nitter.namazso.eu', # lots of 403 27/02/2023
# 'https://twitter.beparanoid.de', # moved 27/022023 # 'https://twitter.beparanoid.de', # moved 27/022023
# 'https://nitter.fdn.fr', # not updated, rate limited, removed 06/02/2023 # 'https://nitter.fdn.fr', # not updated, rate limited, removed 06/02/2023
# 'https://nitter.hu', # 'https://nitter.hu',
# 'https://nitter.privacydev.net', # USA, added 06/02/2023, removed 15/02/2023 too slow # 'https://nitter.privacydev.net', # USA, added 06/02/2023, removed 15/02/2023 too slow
] ]
# Update from https://www.whatismybrowser.com/guides/the-latest-user-agent/ # Update from https://www.whatismybrowser.com/guides/the-latest-user-agent/
@ -103,12 +102,12 @@ def build_config(args):
} }
# Create default config object # Create default config object
TOML = {'config': {},'options': options} TOML = {'config': {}, 'options': options}
# Load config file if it was provided # Load config file if it was provided
toml_file = args['f'] toml_file = args['f']
if toml_file is not None: if toml_file is not None:
try: # Included in python from version 3.11 try: # Included in python from version 3.11
import tomllib import tomllib
except ModuleNotFoundError: except ModuleNotFoundError:
# for python < 3.11, tomli module must be installed # for python < 3.11, tomli module must be installed
@ -267,21 +266,21 @@ def substitute_source(orig_url):
# Handle twitter # Handle twitter
twitter_subst = TOML["options"]["subst_twitter"] twitter_subst = TOML["options"]["subst_twitter"]
# Do not substitiute if subdomain is present (e.g. i.twitter.com) # Do not substitiute if subdomain is present (e.g. i.twitter.com)
if (domain == 'twitter.com' or domain == 'www.twitter.com') and twitter_subst != []: if (domain == 'twitter.com' or domain == 'www.twitter.com') and twitter_subst != []:
domain = twitter_subst[random.randint(0, len(twitter_subst) - 1)] domain = twitter_subst[random.randint(0, len(twitter_subst) - 1)]
logging.debug("Replaced twitter.com by " + domain) logging.debug("Replaced twitter.com by " + domain)
# Handle youtube # Handle youtube
youtube_subst = TOML["options"]["subst_youtube"] youtube_subst = TOML["options"]["subst_youtube"]
# Do not substitiute if subdomain is present (e.g. i.youtube.com) # Do not substitiute if subdomain is present (e.g. i.youtube.com)
if (domain == 'youtube.com' or domain == 'wwww.youtube.com') and youtube_subst != []: if (domain == 'youtube.com' or domain == 'wwww.youtube.com') and youtube_subst != []:
domain = youtube_subst[random.randint(0, len(youtube_subst) - 1)] domain = youtube_subst[random.randint(0, len(youtube_subst) - 1)]
logging.debug("Replaced youtube.com by " + domain) logging.debug("Replaced youtube.com by " + domain)
# Handle reddit # Handle reddit
reddit_subst = TOML["options"]["subst_reddit"] reddit_subst = TOML["options"]["subst_reddit"]
# Do not substitiute if subdomain is present (e.g. i.reddit.com) # Do not substitiute if subdomain is present (e.g. i.reddit.com)
if (domain == 'reddit.com' or domain == 'www.reddit.com') and reddit_subst != []: if (domain == 'reddit.com' or domain == 'www.reddit.com') and reddit_subst != []:
domain = reddit_subst[random.randint(0, len(reddit_subst) - 1)] domain = reddit_subst[random.randint(0, len(reddit_subst) - 1)]
logging.debug("Replaced reddit.com by " + domain) logging.debug("Replaced reddit.com by " + domain)
@ -294,9 +293,9 @@ def substitute_source(orig_url):
parsed_url.fragment parsed_url.fragment
]) ])
return dest_url return dest_url
def clean_url(orig_url): def clean_url(orig_url):
""" """
Given a URL, return it with the UTM parameters removed from query and fragment Given a URL, return it with the UTM parameters removed from query and fragment
@ -529,14 +528,13 @@ def login(password):
logging.warning('You successfully logged in using a password and an access token \ logging.warning('You successfully logged in using a password and an access token \
has been saved. The password can therefore be omitted from the \ has been saved. The password can therefore be omitted from the \
command-line in future invocations') command-line in future invocations')
else: # No password provided, login with token else: # No password provided, login with token
# Using token in existing .secret file # Using token in existing .secret file
if os.path.isfile(TOML['config']['mastodon_user'] + '.secret'): if os.path.isfile(TOML['config']['mastodon_user'] + '.secret'):
try: try:
mastodon = Mastodon( mastodon = Mastodon(
access_token=TOML['config']['mastodon_user'] + '.secret', access_token=TOML['config']['mastodon_user'] + '.secret',
api_base_url='https://' + TOML['config']['mastodon_instance'] api_base_url='https://' + TOML['config']['mastodon_instance'])
)
except MastodonError as me: except MastodonError as me:
logging.fatal('Login to ' + TOML['config']['mastodon_instance'] + ' Failed\n') logging.fatal('Login to ' + TOML['config']['mastodon_instance'] + ' Failed\n')
logging.fatal(me) logging.fatal(me)
@ -757,12 +755,6 @@ def main(argv):
# Make soup # Make soup
soup = BeautifulSoup(twit_account_page.text, 'html.parser') soup = BeautifulSoup(twit_account_page.text, 'html.parser')
# Replace twitter_account with version with correct capitalization
# ta = soup.find('meta', property='og:title').get('content')
# ta_match = re.search(r'\(@(.+)\)', ta)
# if ta_match is not None:
# TOML['config']['twitter_account'] = ta_match.group(1)
# Extract twitter timeline # Extract twitter timeline
timeline = soup.find_all('div', class_='timeline-item') timeline = soup.find_all('div', class_='timeline-item')
@ -861,8 +853,7 @@ def main(argv):
if attachments_class is not None: if attachments_class is not None:
pics, vid_in_tweet = process_attachments(nitter_url, pics, vid_in_tweet = process_attachments(nitter_url,
attachments_class, attachments_class,
status_id, author_account status_id, author_account)
)
photos.extend(pics) photos.extend(pics)
if vid_in_tweet: if vid_in_tweet:
tweet_text += '\n\n[Video embedded in original tweet]' tweet_text += '\n\n[Video embedded in original tweet]'
@ -872,7 +863,7 @@ def main(argv):
tweet_text += '\n\n' + TOML['options']['footer'] tweet_text += '\n\n' + TOML['options']['footer']
# Add footer with link to original tweet # Add footer with link to original tweet
if TOML['options']['remove_original_tweet_ref'] == False: if TOML['options']['remove_original_tweet_ref'] is False:
if TOML['options']['footer'] != '': if TOML['options']['footer'] != '':
tweet_text += '\nOriginal tweet : ' + substitute_source(full_status_url) tweet_text += '\nOriginal tweet : ' + substitute_source(full_status_url)
else: else:
@ -1054,5 +1045,6 @@ def main(argv):
terminate(0) terminate(0)
if __name__ == "__main__": if __name__ == "__main__":
main(sys.argv) main(sys.argv)