Compare commits

..

33 Commits

Author SHA1 Message Date
jeancf
7ebc2927a7 Keep log files 2022-11-23 16:28:37 +01:00
jeancf
db8d99fc4e Updated .gitignore 2022-11-23 15:19:33 +01:00
jeancf
d79da68b02 Simplified login signature 2022-11-23 15:02:31 +01:00
jeancf
10616d6c88 Simplified is_time_valid() 2022-11-23 14:55:43 +01:00
jeancf
b6315f193c Simplified process_media_body and _attachments 2022-11-23 14:49:06 +01:00
jeancf
294bf1fae1 Changed config struct to global 2022-11-23 14:31:17 +01:00
jeancf
82951bfbd3 Added missing params to info 2022-11-23 11:42:56 +01:00
jeancf
4a73a6252e Updated logging info 2022-11-23 11:25:19 +01:00
jeancf
85c5c2ef48 Updated config file 2022-11-23 11:17:53 +01:00
jeancf
0b58df16e2 Merge branch 'clean_url' into cfg_file
# Conflicts:
#	twoot.py
2022-11-23 11:00:06 +01:00
jeancf
f0b5ee98d2 Added missing parameter in docstring 2022-11-23 10:50:41 +01:00
jeancf
3930acc93f Updated README 2022-11-23 09:59:45 +01:00
jeancf
7e7fa4620f Implemented -l command-line option 2022-11-23 09:59:06 +01:00
jeancf
0d1be42dcc Added code to remove trackers from fragments 2022-11-22 22:01:27 +01:00
jeancf
9b5a76db60 updated README.md 2022-11-22 12:50:34 +01:00
jeancf
9625c2128b modified get request in deredir_url() 2022-11-22 11:38:49 +01:00
jeancf
e11102f4a6 User agent removed 2022-11-22 11:33:45 +01:00
jeancf
68e4918b02 Added debug message 2022-11-22 11:08:29 +01:00
jeancf
40d14c4d5d Added de-redirection of URL in tweet 2022-11-22 11:05:16 +01:00
jeancf
8930d5329f Updated README for release 2022-11-22 10:14:42 +01:00
jeancf
6860c53b11 Trying additional instance 2022-11-22 10:11:02 +01:00
jeancf
19eae4f210 Removed unreliable nitter instance 2022-11-22 09:56:56 +01:00
jeancf
f88414bb35 Added _remove_tracker_fragment() 2022-11-19 13:12:41 +01:00
jeancf
94294c6792 Updated command-line description 2022-11-18 14:16:04 +01:00
jeancf
2d0d1bc688 Updated README and CHANGELOG 2022-11-18 14:10:19 +01:00
jeancf
e6e6a77d3e Looking for better nitter instances 2022-11-18 13:59:34 +01:00
jeancf
6308fdc348 Reduced debug logging to essential in clean_url() 2022-11-18 13:56:22 +01:00
jeancf
37a4419ea6 Added missing parameter to process_media_body() 2022-11-18 13:32:16 +01:00
jeancf
9b1f4c9cee Swapped another nitter instance 2022-11-18 13:04:30 +01:00
jeancf
203e90dcd4 Added debug messager to clean_url() 2022-11-18 12:57:44 +01:00
jeancf
2a736de0c7 Replaced poor performing nitter instances 2022-11-18 12:17:34 +01:00
BuildTools
e2eff0445c Changed mode of twoot.py 2022-11-18 12:07:02 +01:00
jeancf
26b0619880 added command-line option 2022-11-18 11:55:06 +01:00
5 changed files with 196 additions and 97 deletions

3
.gitignore vendored
View File

@ -7,4 +7,5 @@ venv/
*.png *.png
*.xcf *.xcf
twoot.db twoot.db
__pycache__ *.toml
!default.toml

View File

@ -1,3 +1,9 @@
**XX NOV 2022** VERSION 2.4 Added command-line option (`-u`) to
remove tracking parameters from URLs included in tweets. A tracking URL
is a normal URL with parameters attached to it. These parameters are used
by marketing companies to identify the source of a click and the effectiveness
of a communication campaign.
**15 NOV 2022** VERSION 2.3 Added command-line option (`-s`) to **15 NOV 2022** VERSION 2.3 Added command-line option (`-s`) to
skip retweets. With this option, retweets will be ignored and not posted skip retweets. With this option, retweets will be ignored and not posted
on Mastodon. on Mastodon.

View File

@ -1,11 +1,11 @@
# Twoot # Twoot
Twoot is a python script that extracts tweets from a twitter feed and Twoot is a python script that mirrors tweets from a twitter account to a Mastodon account.
reposts them as toots on a Mastodon account. It is simple to set-up on a local machine, configurable and feature-rich.
**UPDATE 15 NOV 2022** VERSION 2.3 Added command-line option (`-s`) to **UPDATE XX NOV 2022** VERSION 2.5 Added command-line option (`-l`) to remove redirection
skip retweets. With this option, retweets will be ignored and not posted from links included in tweets. Obfuscated links are replaced by the URL that the resource
on Mastodon. is directly downloaded from.
> Previous updates can be found in CHANGELOG. > Previous updates can be found in CHANGELOG.
@ -23,15 +23,15 @@ on Mastodon.
* Optionally ignore retweets * Optionally ignore retweets
* Allows rate-limiting posts to Mastodon instance * Allows rate-limiting posts to Mastodon instance
## usage ## Usage
``` ```
twoot.py [-h] -t <twitter account> -i <mastodon instance> -m <mastodon account> twoot.py [-h] -t <twitter account> -i <mastodon instance> -m <mastodon account>
-p <mastodon password> [-r] [-s] [-v] [-a <max age in days)>] -p <mastodon password> [-r] [-s] [-u] [-v] [-a <max age in days)>]
[-d <min delay (in mins)>] [-c <max # of toots to post>] [-d <min delay (in mins)>] [-c <max # of toots to post>]
``` ```
## arguments ## Arguments
Assuming that the Twitter handle is @SuperDuperBot and the Mastodon account Assuming that the Twitter handle is @SuperDuperBot and the Mastodon account
is @superduperbot@botsin.space is @superduperbot@botsin.space
@ -40,15 +40,24 @@ is @superduperbot@botsin.space
|-------|--------------------------------------------------|--------------------|-----| |-------|--------------------------------------------------|--------------------|-----|
| -t | twitter account name without '@' | `SuperDuper` | Yes | | -t | twitter account name without '@' | `SuperDuper` | Yes |
| -i | Mastodon instance domain name | `botsin.space` | Yes | | -i | Mastodon instance domain name | `botsin.space` | Yes |
| -m | Mastodon username | `superduperbot` | Yes | | -m | Mastodon username | `sd@example.com` | Yes |
| -p | Mastodon password | `my_Sup3r-S4f3*pw` | Yes | | -p | Mastodon password | `my_Sup3r-S4f3*pw` | Yes |
| -v | upload videos to Mastodon | *N/A* | No | | -v | upload videos to Mastodon | *N/A* | No |
| -r | Post reply-to tweets (ignored by default) | *N/A* | No | | -r | Post reply-to tweets (ignored by default) | *N/A* | No |
| -s | Skip retweets (posted by default) | *N/A* | No | | -s | Skip retweets (posted by default) | *N/A* | No |
| -l | Remove link redirections | *N/A* | No |
| -u | Remove trackers from URLs | *N/A* | No |
| -a | Max. age of tweet to post (in days) | `5` | No | | -a | Max. age of tweet to post (in days) | `5` | No |
| -d | Min. age before posting new tweet (in minutes) | `15` | No | | -d | Min. age before posting new tweet (in minutes) | `15` | No |
| -c | Max number of toots allowed to post (cap) | `1` | No | | -c | Max number of toots allowed to post (cap) | `1` | No |
## Notes
`-l` will follow every link included in the tweet and replace them with the url that the
resource is directly dowmnloaded from (if applicable). e.g. bit.ly/xxyyyzz -> example.com
Every link visit can take up to 5 sec (timeout) therefore this option will slow down
tweet processing.
When using the `-v` switch consider: When using the `-v` switch consider:
* whether the copyright of the content that you want to cross-post allows it * whether the copyright of the content that you want to cross-post allows it
@ -61,7 +70,8 @@ Default min delay is 0 minutes.
No limitation is applied to the number of toots uploaded if `-c` is not specified. No limitation is applied to the number of toots uploaded if `-c` is not specified.
## installation
## Installation
Make sure python3 is installed. Make sure python3 is installed.
@ -104,5 +114,5 @@ Twoot is known to be used for the following feeds (older first):
## Background ## Background
I started twoot when [tootbot](https://github.com/cquest/tootbot) I started twoot when [tootbot](https://github.com/cquest/tootbot)
stopped working. Tootbot relies on rss feeds from https://twitrss.me stopped working. Tootbot relied on RSS feeds from https://twitrss.me
that broke when Twitter refreshed their web UI in July 2019. that broke when Twitter refreshed their web UI in July 2019.

View File

@ -21,6 +21,10 @@ post_reply_to = false
# Default is false # Default is false
skip_retweets = false skip_retweets = false
# Replace redirected links in tweets with direct URLs
# Default is false
remove_link_redirections = false
# Clean up URLs in tweets to remove trackers # Clean up URLs in tweets to remove trackers
# Default is false # Default is false
remove_trackers_from_urls = false remove_trackers_from_urls = false

248
twoot.py
View File

@ -46,13 +46,13 @@ LOGGING_LEVEL = logging.DEBUG
HTTPS_REQ_TIMEOUT = 10 HTTPS_REQ_TIMEOUT = 10
NITTER_URLS = [ NITTER_URLS = [
'https://nitter.42l.fr', 'https://nitter.lacontrevoie.fr',
'https://nitter.pussthecat.org', 'https://nitter.pussthecat.org',
'https://nitter.fdn.fr', 'https://nitter.fdn.fr',
'https://nitter.eu', 'https://nitter.eu',
'https://nitter.namazso.eu', 'https://nitter.namazso.eu',
'https://nitter.moomoo.me', 'https://n.l5.ca',
'https://n.ramle.be', 'https://nitter.bus-hit.me',
] ]
# Update from https://www.whatismybrowser.com/guides/the-latest-user-agent/ # Update from https://www.whatismybrowser.com/guides/the-latest-user-agent/
@ -67,7 +67,39 @@ USER_AGENTS = [
] ]
def _remove_tracker_params(query_str): def deredir_url(url):
"""
Given a URL, return the URL that the page really downloads from
:param url: url to be de-redirected
:return: direct url
"""
# Get a copy of the default headers that requests would use
headers = requests.utils.default_headers()
# Update default headers with randomly selected user agent
headers.update(
{
'User-Agent': USER_AGENTS[random.randint(0, len(USER_AGENTS) - 1)],
}
)
ret = None
try:
# Download the page
ret = requests.get(url, headers=headers, timeout=5)
except:
# If anything goes wrong keep the URL intact
return url
if ret.url != url:
logging.debug("Removed redirection from: " + url + " to: " + ret.url)
# Return the URL that the page was downloaded from
return ret.url
def _remove_trackers_query(query_str):
""" """
private function private function
Given a query string from a URL, strip out the known trackers Given a query string from a URL, strip out the known trackers
@ -79,25 +111,49 @@ def _remove_tracker_params(query_str):
# tag by TikTok # tag by TikTok
# tags by Snapchat # tags by Snapchat
# tags by Facebook # tags by Facebook
params_to_remove = [ params_to_remove = {
"utm_source", "utm_medium", "utm_campaign", "utm_term", "utm_content", "gclid", "_ga", "gclsrc", "dclid",
"utm_source", "utm_medium", "utm_campaign", "utm_term", "utm_content", "utm_cid", "utm_reader", "utm_name", "utm_referrer", "utm_social", "utm_social-type",
"mkt_tok", "mkt_tok",
"campaign_name", "ad_set_name", "campaign_id", "ad_set_id", "campaign_name", "ad_set_name", "campaign_id", "ad_set_id",
"media", "interest_group_name", "fbclid", "campaign_name", "ad_set_name", "ad_set_id", "media", "interest_group_name", "ad_set_id"
"xtor" "igshid",
] "cvid", "oicd", "msclkid",
"soc_src", "soc_trk",
"_openstat", "yclid",
"xtor", "xtref", "adid",
}
query_to_clean = dict(parse_qsl(query_str, keep_blank_values=True)) query_to_clean = dict(parse_qsl(query_str, keep_blank_values=True))
query_cleaned = [(k, v) for k, v in query_to_clean.items() if not k in params_to_remove] query_cleaned = [(k, v) for k, v in query_to_clean.items() if k not in params_to_remove]
return urlencode(query_cleaned, doseq=True) return urlencode(query_cleaned, doseq=True)
def _remove_trackers_fragment(fragment_str):
"""
private function
Given a fragment string from a URL, strip out the known trackers
:param query_str: fragment to be cleaned
:return: cleaned fragment
"""
params_to_remove = {
"Echobox",
}
if '=' in fragment_str:
fragment_str = fragment_str.split('&')
query_cleaned = [i for i in fragment_str if i.split('=')[0] not in params_to_remove]
fragment_str = '&'.join(query_cleaned)
return fragment_str
def clean_url(dirty_url): def clean_url(dirty_url):
""" """
Given a URL, return it with the UTM parameters removed from query and fragment Given a URL, return it with the UTM parameters removed from query and fragment
:param dirty_url: url to be cleaned :param dirty_url: url to be cleaned
:return: url cleaned :return: url cleaned
>>> clean_url('https://exemple.com/video/this-aerial-ropeway?utm_source=Twitter&utm_medium=video&utm_campaign=organic&utm_content=Nov13&a=aaa&b=1#mkt_tok=tik&mkt_tik=tok') >>> clean_url('https://example.com/video/this-aerial-ropeway?utm_source=Twitter&utm_medium=video&utm_campaign=organic&utm_content=Nov13&a=aaa&b=1#mkt_tok=tik&mkt_tik=tok')
'https://exemple.com/video/this-aerial-ropeway?a=aaa&b=1#mkt_tik=tok' 'https://example.com/video/this-aerial-ropeway?a=aaa&b=1#mkt_tik=tok'
""" """
url_parsed = urlparse(dirty_url) url_parsed = urlparse(dirty_url)
@ -107,10 +163,13 @@ def clean_url(dirty_url):
url_parsed.netloc, url_parsed.netloc,
url_parsed.path, url_parsed.path,
url_parsed.params, url_parsed.params,
_remove_tracker_params(url_parsed.query), _remove_trackers_query(url_parsed.query),
_remove_tracker_params(url_parsed.fragment) _remove_trackers_fragment(url_parsed.fragment)
]) ])
if cleaned_url != dirty_url:
logging.debug('Cleaned URL from: ' + dirty_url + ' to: ' + cleaned_url)
return cleaned_url return cleaned_url
@ -138,8 +197,16 @@ def process_media_body(tt_iter):
# Only keep hashtag text # Only keep hashtag text
tweet_text += tag_text tweet_text += tag_text
else: else:
# This is a real link, keep url # This is a real link
tweet_text += clean_url(tag.get('href')) if TOML['options']['remove_link_redirections']:
url = deredir_url(tag.get('href'))
else:
url = tag.get('href')
if TOML['options']['remove_trackers_from_urls']:
tweet_text += clean_url(url)
else:
tweet_text += url
else: else:
logging.warning("No handler for tag in twitter text: " + tag.prettify()) logging.warning("No handler for tag in twitter text: " + tag.prettify())
@ -163,12 +230,11 @@ def process_card(nitter_url, card_container):
return list return list
def process_attachments(nitter_url, attachments_container, get_vids, twit_account, status_id, author_account): def process_attachments(nitter_url, attachments_container, status_id, author_account):
""" """
Extract images or video from attachments. Videos are downloaded on the file system. Extract images or video from attachments. Videos are downloaded on the file system.
:param nitter_url: url of nitter mirror :param nitter_url: url of nitter mirror
:param attachments_container: soup of 'div' tag containing attachments markup :param attachments_container: soup of 'div' tag containing attachments markup
:param get_vids: whether to download videos or not
:param twit_account: name of twitter account :param twit_account: name of twitter account
:param status_id: id of tweet being processed :param status_id: id of tweet being processed
:param author_account: author of tweet with video attachment :param author_account: author of tweet with video attachment
@ -187,7 +253,7 @@ def process_attachments(nitter_url, attachments_container, get_vids, twit_accoun
if gif_class is not None: if gif_class is not None:
gif_video_file = nitter_url + gif_class.source.get('src') gif_video_file = nitter_url + gif_class.source.get('src')
video_path = os.path.join('output', twit_account, status_id, author_account, status_id) video_path = os.path.join('output', TOML['config']['twitter_account'], status_id, author_account, status_id)
os.makedirs(video_path, exist_ok=True) os.makedirs(video_path, exist_ok=True)
# Open directory for writing file # Open directory for writing file
@ -214,12 +280,12 @@ def process_attachments(nitter_url, attachments_container, get_vids, twit_accoun
vid_in_tweet = False vid_in_tweet = False
vid_class = attachments_container.find('div', class_='video-container') vid_class = attachments_container.find('div', class_='video-container')
if vid_class is not None: if vid_class is not None:
if get_vids: if TOML['options']['upload_videos']:
import youtube_dl import youtube_dl
video_file = os.path.join('https://twitter.com', author_account, 'status', status_id) video_file = os.path.join('https://twitter.com', author_account, 'status', status_id)
ydl_opts = { ydl_opts = {
'outtmpl': "output/" + twit_account + "/" + status_id + "/%(id)s.%(ext)s", 'outtmpl': "output/" + TOML['config']['twitter_account'] + "/" + status_id + "/%(id)s.%(ext)s",
'format': "best[width<=500]", 'format': "best[width<=500]",
'socket_timeout': 60, 'socket_timeout': 60,
'quiet': True, 'quiet': True,
@ -251,12 +317,12 @@ def contains_class(body_classes, some_class):
return found return found
def is_time_valid(timestamp, max_age, min_delay): def is_time_valid(timestamp):
ret = True ret = True
# Check that the tweet is not too young (might be deleted) or too old # Check that the tweet is not too young (might be deleted) or too old
age_in_hours = (time.time() - float(timestamp)) / 3600.0 age_in_hours = (time.time() - float(timestamp)) / 3600.0
min_delay_in_hours = min_delay / 60.0 min_delay_in_hours = TOML['options']['tweet_delay'] / 60.0
max_age_in_hours = max_age * 24.0 max_age_in_hours = TOML['options']['tweet_max_age'] * 24.0
if age_in_hours < min_delay_in_hours or age_in_hours > max_age_in_hours: if age_in_hours < min_delay_in_hours or age_in_hours > max_age_in_hours:
ret = False ret = False
@ -264,7 +330,9 @@ def is_time_valid(timestamp, max_age, min_delay):
return ret return ret
def login(instance, account, password): def login(password):
instance = TOML['config']['mastodon_instance']
# Create Mastodon application if it does not exist yet # Create Mastodon application if it does not exist yet
if not os.path.isfile(instance + '.secret'): if not os.path.isfile(instance + '.secret'):
try: try:
@ -287,9 +355,9 @@ def login(instance, account, password):
) )
mastodon.log_in( mastodon.log_in(
username=account, username=TOML['options']['twitter_account'],
password=password, password=password,
to_file=account + ".secret" to_file=TOML['options']['twitter_account'] + ".secret"
) )
logging.info('Logging in to ' + instance) logging.info('Logging in to ' + instance)
@ -319,15 +387,16 @@ def main(argv):
parser.add_argument('-m', metavar='<mastodon account>', action='store') parser.add_argument('-m', metavar='<mastodon account>', action='store')
parser.add_argument('-p', metavar='<mastodon password>', action='store') parser.add_argument('-p', metavar='<mastodon password>', action='store')
parser.add_argument('-r', action='store_true', help='Also post replies to other tweets') parser.add_argument('-r', action='store_true', help='Also post replies to other tweets')
parser.add_argument('-s', action='store_true', help='Skip retweets') parser.add_argument('-s', action='store_true', help='Suppress retweets')
parser.add_argument('-l', action='store_true', help='Remove link redirection')
parser.add_argument('-u', action='store_true', help='Remove trackers from URLs') parser.add_argument('-u', action='store_true', help='Remove trackers from URLs')
parser.add_argument('-v', action='store_true', help='Ingest twitter videos and upload to Mastodon instance') parser.add_argument('-v', action='store_true', help='Ingest twitter videos and upload to Mastodon instance')
parser.add_argument('-a', metavar='<max age (in days)>', action='store', type=float) parser.add_argument('-a', metavar='<max age (in days)>', action='store', type=float)
parser.add_argument('-d', metavar='<min delay (in mins)>', action='store', type=float) parser.add_argument('-d', metavar='<min delay (in mins)>', action='store', type=float)
parser.add_argument('-c', metavar='<max # of toots to post>', action='store', type=int) parser.add_argument('-c', metavar='<max # of toots to post>', action='store', type=int)
# Parse command line # Create global struct containing configuration
args = vars(parser.parse_args()) global TOML
# We build the configuration by layering for each parameter: # We build the configuration by layering for each parameter:
# 1. A default value # 1. A default value
@ -339,15 +408,18 @@ def main(argv):
'upload_videos': False, 'upload_videos': False,
'post_reply_to': False, 'post_reply_to': False,
'skip_retweets': False, 'skip_retweets': False,
'remove_link_redirections': False,
'remove_trackers_from_urls': False, 'remove_trackers_from_urls': False,
'tweet_max_age': float(1), 'tweet_max_age': float(1),
'tweet_delay': float(0), 'tweet_delay': float(0),
'toot_cap': int(0), 'toot_cap': int(0),
} }
# Default empty toml # Default toml
# toml = {'config': {}, 'options': options} TOML = {'config': {}, 'options': options}
toml = {}
# Parse command line
args = vars(parser.parse_args())
# Load config file if it was provided # Load config file if it was provided
toml_file = args['f'] toml_file = args['f']
@ -355,7 +427,7 @@ def main(argv):
import tomli import tomli
try: try:
with open(toml_file, 'rb') as config_file: with open(toml_file, 'rb') as config_file:
toml = tomli.load(config_file) TOML = tomli.load(config_file)
except FileNotFoundError: except FileNotFoundError:
print('config file not found') print('config file not found')
exit(-1) exit(-1)
@ -363,37 +435,39 @@ def main(argv):
print('Malformed config file') print('Malformed config file')
exit(-1) exit(-1)
# Override config file parameter values with command-line values if provided # Override config parameters with command-line values if provided
if args['t'] is not None: if args['t'] is not None:
toml['config']['twitter_account'] = args['t'] TOML['config']['twitter_account'] = args['t']
if args['i'] is not None: if args['i'] is not None:
toml['config']['mastodon_instance'] = args['i'] TOML['config']['mastodon_instance'] = args['i']
if args['m'] is not None: if args['m'] is not None:
toml['config']['mastodon_user'] = args['m'] TOML['config']['mastodon_user'] = args['m']
if args['v'] is True: if args['v'] is True:
toml['options']['upload_videos'] = args['v'] TOML['options']['upload_videos'] = args['v']
if args['r'] is True: if args['r'] is True:
toml['options']['post_reply_to'] = args['r'] TOML['options']['post_reply_to'] = args['r']
if args['s'] is True: if args['s'] is True:
toml['options']['skip_retweets'] = args['s'] TOML['options']['skip_retweets'] = args['s']
if args['l'] is True:
TOML['options']['remove_link_redirections'] = args['l']
if args['u'] is True: if args['u'] is True:
toml['options']['remove_trackers_from_urls'] = args['u'] TOML['options']['remove_trackers_from_urls'] = args['u']
if args['a'] is not None: if args['a'] is not None:
toml['options']['tweet_max_age'] = float(args['a']) TOML['options']['tweet_max_age'] = float(args['a'])
if args['d'] is not None: if args['d'] is not None:
toml['options']['tweet_delay'] = float(args['d']) TOML['options']['tweet_delay'] = float(args['d'])
if args['c'] is not None: if args['c'] is not None:
toml['options']['toot_cap'] = int(args['c']) TOML['options']['toot_cap'] = int(args['c'])
mast_password = args['p'] mast_password = args['p']
# Verify that we have a minimum config to run # Verify that we have a minimum config to run
if 'twitter_account' not in toml['config'].keys(): if 'twitter_account' not in TOML['config'].keys():
print('CRITICAL: Missing Twitter account') print('CRITICAL: Missing Twitter account')
exit(-1) exit(-1)
if 'mastodon_instance' not in toml['config'].keys(): if 'mastodon_instance' not in TOML['config'].keys():
print('CRITICAL: Missing Mastodon instance') print('CRITICAL: Missing Mastodon instance')
exit(-1) exit(-1)
if 'mastodon_user' not in toml['config'].keys(): if 'mastodon_user' not in TOML['config'].keys():
print('CRITICAL: Missing Mastodon user') print('CRITICAL: Missing Mastodon user')
exit(-1) exit(-1)
if mast_password is None: if mast_password is None:
@ -401,30 +475,32 @@ def main(argv):
exit(-1) exit(-1)
# Remove previous log file # Remove previous log file
try: # try:
os.remove(toml['config']['twitter_account'] + '.log') # os.remove(TOML['config']['twitter_account'] + '.log')
except FileNotFoundError: # except FileNotFoundError:
pass # pass
# Setup logging to file # Setup logging to file
logging.basicConfig( logging.basicConfig(
filename=toml['config']['twitter_account'] + '.log', filename=TOML['config']['twitter_account'] + '.log',
level=LOGGING_LEVEL, level=LOGGING_LEVEL,
format='%(asctime)s %(levelname)-8s %(message)s', format='%(asctime)s %(levelname)-8s %(message)s',
datefmt='%Y-%m-%d %H:%M:%S', datefmt='%Y-%m-%d %H:%M:%S',
) )
logging.info('Running with the following parameters:') logging.info('Running with the following configuration:')
logging.info(' -f ' + str(toml_file)) logging.info(' Config file : ' + str(toml_file))
logging.info(' -t ' + toml['config']['twitter_account']) logging.info(' twitter_account : ' + TOML['config']['twitter_account'])
logging.info(' -i ' + toml['config']['mastodon_instance']) logging.info(' mastodon_instance : ' + TOML['config']['mastodon_instance'])
logging.info(' -m ' + toml['config']['mastodon_user']) logging.info(' mastodon_user : ' + TOML['config']['mastodon_user'])
logging.info(' -r ' + str(toml['options']['post_reply_to'])) logging.info(' post_reply_to : ' + str(TOML['options']['post_reply_to']))
logging.info(' -s ' + str(toml['options']['skip_retweets'])) logging.info(' skip_retweets : ' + str(TOML['options']['skip_retweets']))
logging.info(' -v ' + str(toml['options']['upload_videos'])) logging.info(' remove_link_redirections : ' + str(TOML['options']['remove_link_redirections']))
logging.info(' -a ' + str(toml['options']['tweet_max_age'])) logging.info(' remove_trackers_from_urls: ' + str(TOML['options']['remove_trackers_from_urls']))
logging.info(' -d ' + str(toml['options']['tweet_delay'])) logging.info(' upload_videos : ' + str(TOML['options']['upload_videos']))
logging.info(' -c ' + str(toml['options']['toot_cap'])) logging.info(' tweet_max_age : ' + str(TOML['options']['tweet_max_age']))
logging.info(' tweet_delay : ' + str(TOML['options']['tweet_delay']))
logging.info(' toot_cap : ' + str(TOML['options']['toot_cap']))
# Try to open database. If it does not exist, create it # Try to open database. If it does not exist, create it
sql = sqlite3.connect('twoot.db') sql = sqlite3.connect('twoot.db')
@ -458,12 +534,12 @@ def main(argv):
} }
) )
url = nitter_url + '/' + toml['config']['twitter_account'] url = nitter_url + '/' + TOML['config']['twitter_account']
# Use different page if we need to handle replies # Use different page if we need to handle replies
if toml['options']['post_reply_to']: if TOML['options']['post_reply_to']:
url += '/with_replies' url += '/with_replies'
# Download twitter page of user. # Download twitter page of user
try: try:
twit_account_page = session.get(url, headers=headers, timeout=HTTPS_REQ_TIMEOUT) twit_account_page = session.get(url, headers=headers, timeout=HTTPS_REQ_TIMEOUT)
except requests.exceptions.ConnectionError: except requests.exceptions.ConnectionError:
@ -493,7 +569,7 @@ def main(argv):
ta = soup.find('meta', property='og:title').get('content') ta = soup.find('meta', property='og:title').get('content')
ta_match = re.search(r'\(@(.+)\)', ta) ta_match = re.search(r'\(@(.+)\)', ta)
if ta_match is not None: if ta_match is not None:
toml['config']['twitter_account'] = ta_match.group(1) TOML['config']['twitter_account'] = ta_match.group(1)
# Extract twitter timeline # Extract twitter timeline
timeline = soup.find_all('div', class_='timeline-item') timeline = soup.find_all('div', class_='timeline-item')
@ -522,13 +598,13 @@ def main(argv):
timestamp = datetime.datetime.strptime(time_string, '%b %d, %Y · %I:%M %p %Z').timestamp() timestamp = datetime.datetime.strptime(time_string, '%b %d, %Y · %I:%M %p %Z').timestamp()
# Check if time is within acceptable range # Check if time is within acceptable range
if not is_time_valid(timestamp, toml['options']['tweet_max_age'], toml['options']['tweet_delay']): if not is_time_valid(timestamp):
out_date_cnt += 1 out_date_cnt += 1
logging.debug("Tweet outside valid time range, skipping") logging.debug("Tweet outside valid time range, skipping")
continue continue
# Check if retweets must be skipped # Check if retweets must be skipped
if toml['options']['skip_retweets']: if TOML['options']['skip_retweets']:
# Check if this tweet is a retweet # Check if this tweet is a retweet
if len(status.select("div.tweet-body > div > div.retweet-header")) != 0: if len(status.select("div.tweet-body > div > div.retweet-header")) != 0:
logging.debug("Retweet ignored per command-line configuration") logging.debug("Retweet ignored per command-line configuration")
@ -537,7 +613,7 @@ def main(argv):
# Check in database if tweet has already been posted # Check in database if tweet has already been posted
db.execute( db.execute(
"SELECT * FROM toots WHERE twitter_account=? AND mastodon_instance=? AND mastodon_account=? AND tweet_id=?", "SELECT * FROM toots WHERE twitter_account=? AND mastodon_instance=? AND mastodon_account=? AND tweet_id=?",
(toml['config']['twitter_account'], toml['config']['mastodon_instance'], toml['config']['mastodon_user'], tweet_id)) (TOML['config']['twitter_account'], TOML['config']['mastodon_instance'], TOML['config']['mastodon_user'], tweet_id))
tweet_in_db = db.fetchone() tweet_in_db = db.fetchone()
if tweet_in_db is not None: if tweet_in_db is not None:
@ -591,8 +667,10 @@ def main(argv):
# Process attachment: capture image or .mp4 url or download twitter video # Process attachment: capture image or .mp4 url or download twitter video
attachments_class = status.find('div', class_='attachments') attachments_class = status.find('div', class_='attachments')
if attachments_class is not None: if attachments_class is not None:
pics, vid_in_tweet = process_attachments(nitter_url, attachments_class, toml['options']['upload_videos'], toml['config']['twitter_account'], status_id, pics, vid_in_tweet = process_attachments(nitter_url,
author_account) attachments_class,
status_id, author_account
)
photos.extend(pics) photos.extend(pics)
if vid_in_tweet: if vid_in_tweet:
tweet_text += '\n\n[Video embedded in original tweet]' tweet_text += '\n\n[Video embedded in original tweet]'
@ -628,7 +706,7 @@ def main(argv):
# Check if video was downloaded # Check if video was downloaded
video_file = None video_file = None
video_path = Path('./output') / toml['config']['twitter_account'] / status_id video_path = Path('./output') / TOML['config']['twitter_account'] / status_id
if video_path.exists(): if video_path.exists():
# list video files # list video files
video_file_list = list(video_path.glob('*.mp4')) video_file_list = list(video_path.glob('*.mp4'))
@ -661,7 +739,7 @@ def main(argv):
# Login to account on maston instance # Login to account on maston instance
mastodon = None mastodon = None
if len(tweets) != 0: if len(tweets) != 0:
mastodon = login(toml['config']['mastodon_instance'], toml['config']['mastodon_user'], mast_password) mastodon = login(mast_password)
# ********************************************************** # **********************************************************
# Iterate tweets in list. # Iterate tweets in list.
@ -671,8 +749,8 @@ def main(argv):
posted_cnt = 0 posted_cnt = 0
for tweet in reversed(tweets): for tweet in reversed(tweets):
# Check if we have reached the cap on the number of toots to post # Check if we have reached the cap on the number of toots to post
if toml['options']['toot_cap'] != 0 and posted_cnt >= toml['options']['toot_cap']: if TOML['options']['toot_cap'] != 0 and posted_cnt >= TOML['options']['toot_cap']:
logging.info('%d toots not posted due to configured cap', len(tweets) - toml['options']['toot_cap']) logging.info('%d toots not posted due to configured cap', len(tweets) - TOML['options']['toot_cap'])
break break
logging.debug('Uploading Tweet %s', tweet["tweet_id"]) logging.debug('Uploading Tweet %s', tweet["tweet_id"])
@ -715,8 +793,8 @@ def main(argv):
toot = {} toot = {}
try: try:
mastodon = Mastodon( mastodon = Mastodon(
access_token=toml['config']['mastodon_user'] + '.secret', access_token=TOML['config']['mastodon_user'] + '.secret',
api_base_url='https://' + toml['config']['mastodon_instance'] api_base_url='https://' + TOML['config']['mastodon_instance']
) )
if len(media_ids) == 0: if len(media_ids) == 0:
@ -725,31 +803,31 @@ def main(argv):
toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids, visibility='public') toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids, visibility='public')
except MastodonError as me: except MastodonError as me:
logging.error('posting ' + tweet['tweet_text'] + ' to ' + toml['config']['mastodon_instance'] + ' Failed') logging.error('posting ' + tweet['tweet_text'] + ' to ' + TOML['config']['mastodon_instance'] + ' Failed')
logging.error(me) logging.error(me)
else: else:
posted_cnt += 1 posted_cnt += 1
logging.debug('Tweet %s posted on %s', tweet['tweet_id'], toml['config']['mastodon_user']) logging.debug('Tweet %s posted on %s', tweet['tweet_id'], TOML['config']['mastodon_user'])
# Insert toot id into database # Insert toot id into database
if 'id' in toot: if 'id' in toot:
db.execute("INSERT INTO toots VALUES ( ? , ? , ? , ? , ? )", db.execute("INSERT INTO toots VALUES ( ? , ? , ? , ? , ? )",
(toml['config']['twitter_account'], toml['config']['mastodon_instance'], toml['config']['mastodon_user'], tweet['tweet_id'], toot['id'])) (TOML['config']['twitter_account'], TOML['config']['mastodon_instance'], TOML['config']['mastodon_user'], tweet['tweet_id'], toot['id']))
sql.commit() sql.commit()
logging.info(str(posted_cnt) + ' tweets posted to Mastodon') logging.info(str(posted_cnt) + ' tweets posted to Mastodon')
# Cleanup downloaded video files # Cleanup downloaded video files
try: try:
shutil.rmtree('./output/' + toml['config']['twitter_account']) shutil.rmtree('./output/' + TOML['config']['twitter_account'])
except FileNotFoundError: # The directory does not exist except FileNotFoundError: # The directory does not exist
pass pass
# Evaluate excess records in database # Evaluate excess records in database
excess_count = 0 excess_count = 0
db.execute('SELECT count(*) FROM toots WHERE twitter_account=?', (toml['config']['twitter_account'],)) db.execute('SELECT count(*) FROM toots WHERE twitter_account=?', (TOML['config']['twitter_account'],))
db_count = db.fetchone() db_count = db.fetchone()
if db_count is not None: if db_count is not None:
excess_count = db_count[0] - MAX_REC_COUNT excess_count = db_count[0] - MAX_REC_COUNT
@ -765,7 +843,7 @@ def main(argv):
LIMIT ? LIMIT ?
) )
DELETE from toots DELETE from toots
WHERE tweet_id IN excess''', (toml['config']['twitter_account'], excess_count)) WHERE tweet_id IN excess''', (TOML['config']['twitter_account'], excess_count))
sql.commit() sql.commit()
logging.info('Deleted ' + str(excess_count) + ' old records from database.') logging.info('Deleted ' + str(excess_count) + ' old records from database.')