mirror of
https://gitlab.com/jeancf/twoot.git
synced 2024-11-30 15:31:11 +00:00
Compare commits
33 Commits
14d698f057
...
7ebc2927a7
Author | SHA1 | Date | |
---|---|---|---|
|
7ebc2927a7 | ||
|
db8d99fc4e | ||
|
d79da68b02 | ||
|
10616d6c88 | ||
|
b6315f193c | ||
|
294bf1fae1 | ||
|
82951bfbd3 | ||
|
4a73a6252e | ||
|
85c5c2ef48 | ||
|
0b58df16e2 | ||
|
f0b5ee98d2 | ||
|
3930acc93f | ||
|
7e7fa4620f | ||
|
0d1be42dcc | ||
|
9b5a76db60 | ||
|
9625c2128b | ||
|
e11102f4a6 | ||
|
68e4918b02 | ||
|
40d14c4d5d | ||
|
8930d5329f | ||
|
6860c53b11 | ||
|
19eae4f210 | ||
|
f88414bb35 | ||
|
94294c6792 | ||
|
2d0d1bc688 | ||
|
e6e6a77d3e | ||
|
6308fdc348 | ||
|
37a4419ea6 | ||
|
9b1f4c9cee | ||
|
203e90dcd4 | ||
|
2a736de0c7 | ||
|
e2eff0445c | ||
|
26b0619880 |
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -7,4 +7,5 @@ venv/
|
|||
*.png
|
||||
*.xcf
|
||||
twoot.db
|
||||
__pycache__
|
||||
*.toml
|
||||
!default.toml
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
**XX NOV 2022** VERSION 2.4 Added command-line option (`-u`) to
|
||||
remove tracking parameters from URLs included in tweets. A tracking URL
|
||||
is a normal URL with parameters attached to it. These parameters are used
|
||||
by marketing companies to identify the source of a click and the effectiveness
|
||||
of a communication campaign.
|
||||
|
||||
**15 NOV 2022** VERSION 2.3 Added command-line option (`-s`) to
|
||||
skip retweets. With this option, retweets will be ignored and not posted
|
||||
on Mastodon.
|
||||
|
|
32
README.md
32
README.md
|
@ -1,11 +1,11 @@
|
|||
# Twoot
|
||||
|
||||
Twoot is a python script that extracts tweets from a twitter feed and
|
||||
reposts them as toots on a Mastodon account.
|
||||
Twoot is a python script that mirrors tweets from a twitter account to a Mastodon account.
|
||||
It is simple to set-up on a local machine, configurable and feature-rich.
|
||||
|
||||
**UPDATE 15 NOV 2022** VERSION 2.3 Added command-line option (`-s`) to
|
||||
skip retweets. With this option, retweets will be ignored and not posted
|
||||
on Mastodon.
|
||||
**UPDATE XX NOV 2022** VERSION 2.5 Added command-line option (`-l`) to remove redirection
|
||||
from links included in tweets. Obfuscated links are replaced by the URL that the resource
|
||||
is directly downloaded from.
|
||||
|
||||
> Previous updates can be found in CHANGELOG.
|
||||
|
||||
|
@ -23,15 +23,15 @@ on Mastodon.
|
|||
* Optionally ignore retweets
|
||||
* Allows rate-limiting posts to Mastodon instance
|
||||
|
||||
## usage
|
||||
## Usage
|
||||
|
||||
```
|
||||
twoot.py [-h] -t <twitter account> -i <mastodon instance> -m <mastodon account>
|
||||
-p <mastodon password> [-r] [-s] [-v] [-a <max age in days)>]
|
||||
-p <mastodon password> [-r] [-s] [-u] [-v] [-a <max age in days)>]
|
||||
[-d <min delay (in mins)>] [-c <max # of toots to post>]
|
||||
```
|
||||
|
||||
## arguments
|
||||
## Arguments
|
||||
|
||||
Assuming that the Twitter handle is @SuperDuperBot and the Mastodon account
|
||||
is @superduperbot@botsin.space
|
||||
|
@ -40,15 +40,24 @@ is @superduperbot@botsin.space
|
|||
|-------|--------------------------------------------------|--------------------|-----|
|
||||
| -t | twitter account name without '@' | `SuperDuper` | Yes |
|
||||
| -i | Mastodon instance domain name | `botsin.space` | Yes |
|
||||
| -m | Mastodon username | `superduperbot` | Yes |
|
||||
| -m | Mastodon username | `sd@example.com` | Yes |
|
||||
| -p | Mastodon password | `my_Sup3r-S4f3*pw` | Yes |
|
||||
| -v | upload videos to Mastodon | *N/A* | No |
|
||||
| -r | Post reply-to tweets (ignored by default) | *N/A* | No |
|
||||
| -s | Skip retweets (posted by default) | *N/A* | No |
|
||||
| -l | Remove link redirections | *N/A* | No |
|
||||
| -u | Remove trackers from URLs | *N/A* | No |
|
||||
| -a | Max. age of tweet to post (in days) | `5` | No |
|
||||
| -d | Min. age before posting new tweet (in minutes) | `15` | No |
|
||||
| -c | Max number of toots allowed to post (cap) | `1` | No |
|
||||
|
||||
## Notes
|
||||
|
||||
`-l` will follow every link included in the tweet and replace them with the url that the
|
||||
resource is directly dowmnloaded from (if applicable). e.g. bit.ly/xxyyyzz -> example.com
|
||||
Every link visit can take up to 5 sec (timeout) therefore this option will slow down
|
||||
tweet processing.
|
||||
|
||||
When using the `-v` switch consider:
|
||||
|
||||
* whether the copyright of the content that you want to cross-post allows it
|
||||
|
@ -61,7 +70,8 @@ Default min delay is 0 minutes.
|
|||
|
||||
No limitation is applied to the number of toots uploaded if `-c` is not specified.
|
||||
|
||||
## installation
|
||||
|
||||
## Installation
|
||||
|
||||
Make sure python3 is installed.
|
||||
|
||||
|
@ -104,5 +114,5 @@ Twoot is known to be used for the following feeds (older first):
|
|||
## Background
|
||||
|
||||
I started twoot when [tootbot](https://github.com/cquest/tootbot)
|
||||
stopped working. Tootbot relies on rss feeds from https://twitrss.me
|
||||
stopped working. Tootbot relied on RSS feeds from https://twitrss.me
|
||||
that broke when Twitter refreshed their web UI in July 2019.
|
||||
|
|
|
@ -21,6 +21,10 @@ post_reply_to = false
|
|||
# Default is false
|
||||
skip_retweets = false
|
||||
|
||||
# Replace redirected links in tweets with direct URLs
|
||||
# Default is false
|
||||
remove_link_redirections = false
|
||||
|
||||
# Clean up URLs in tweets to remove trackers
|
||||
# Default is false
|
||||
remove_trackers_from_urls = false
|
||||
|
|
248
twoot.py
248
twoot.py
|
@ -46,13 +46,13 @@ LOGGING_LEVEL = logging.DEBUG
|
|||
HTTPS_REQ_TIMEOUT = 10
|
||||
|
||||
NITTER_URLS = [
|
||||
'https://nitter.42l.fr',
|
||||
'https://nitter.lacontrevoie.fr',
|
||||
'https://nitter.pussthecat.org',
|
||||
'https://nitter.fdn.fr',
|
||||
'https://nitter.eu',
|
||||
'https://nitter.namazso.eu',
|
||||
'https://nitter.moomoo.me',
|
||||
'https://n.ramle.be',
|
||||
'https://n.l5.ca',
|
||||
'https://nitter.bus-hit.me',
|
||||
]
|
||||
|
||||
# Update from https://www.whatismybrowser.com/guides/the-latest-user-agent/
|
||||
|
@ -67,7 +67,39 @@ USER_AGENTS = [
|
|||
]
|
||||
|
||||
|
||||
def _remove_tracker_params(query_str):
|
||||
def deredir_url(url):
|
||||
"""
|
||||
Given a URL, return the URL that the page really downloads from
|
||||
:param url: url to be de-redirected
|
||||
:return: direct url
|
||||
"""
|
||||
|
||||
# Get a copy of the default headers that requests would use
|
||||
headers = requests.utils.default_headers()
|
||||
|
||||
# Update default headers with randomly selected user agent
|
||||
headers.update(
|
||||
{
|
||||
'User-Agent': USER_AGENTS[random.randint(0, len(USER_AGENTS) - 1)],
|
||||
}
|
||||
)
|
||||
|
||||
ret = None
|
||||
try:
|
||||
# Download the page
|
||||
ret = requests.get(url, headers=headers, timeout=5)
|
||||
except:
|
||||
# If anything goes wrong keep the URL intact
|
||||
return url
|
||||
|
||||
if ret.url != url:
|
||||
logging.debug("Removed redirection from: " + url + " to: " + ret.url)
|
||||
|
||||
# Return the URL that the page was downloaded from
|
||||
return ret.url
|
||||
|
||||
|
||||
def _remove_trackers_query(query_str):
|
||||
"""
|
||||
private function
|
||||
Given a query string from a URL, strip out the known trackers
|
||||
|
@ -79,25 +111,49 @@ def _remove_tracker_params(query_str):
|
|||
# tag by TikTok
|
||||
# tags by Snapchat
|
||||
# tags by Facebook
|
||||
params_to_remove = [
|
||||
"utm_source", "utm_medium", "utm_campaign", "utm_term", "utm_content",
|
||||
params_to_remove = {
|
||||
"gclid", "_ga", "gclsrc", "dclid",
|
||||
"utm_source", "utm_medium", "utm_campaign", "utm_term", "utm_content", "utm_cid", "utm_reader", "utm_name", "utm_referrer", "utm_social", "utm_social-type",
|
||||
"mkt_tok",
|
||||
"campaign_name", "ad_set_name", "campaign_id", "ad_set_id",
|
||||
"media", "interest_group_name",
|
||||
"xtor"
|
||||
]
|
||||
"fbclid", "campaign_name", "ad_set_name", "ad_set_id", "media", "interest_group_name", "ad_set_id"
|
||||
"igshid",
|
||||
"cvid", "oicd", "msclkid",
|
||||
"soc_src", "soc_trk",
|
||||
"_openstat", "yclid",
|
||||
"xtor", "xtref", "adid",
|
||||
}
|
||||
query_to_clean = dict(parse_qsl(query_str, keep_blank_values=True))
|
||||
query_cleaned = [(k, v) for k, v in query_to_clean.items() if not k in params_to_remove]
|
||||
query_cleaned = [(k, v) for k, v in query_to_clean.items() if k not in params_to_remove]
|
||||
return urlencode(query_cleaned, doseq=True)
|
||||
|
||||
|
||||
def _remove_trackers_fragment(fragment_str):
|
||||
"""
|
||||
private function
|
||||
Given a fragment string from a URL, strip out the known trackers
|
||||
:param query_str: fragment to be cleaned
|
||||
:return: cleaned fragment
|
||||
"""
|
||||
|
||||
params_to_remove = {
|
||||
"Echobox",
|
||||
}
|
||||
|
||||
if '=' in fragment_str:
|
||||
fragment_str = fragment_str.split('&')
|
||||
query_cleaned = [i for i in fragment_str if i.split('=')[0] not in params_to_remove]
|
||||
fragment_str = '&'.join(query_cleaned)
|
||||
return fragment_str
|
||||
|
||||
|
||||
def clean_url(dirty_url):
|
||||
"""
|
||||
Given a URL, return it with the UTM parameters removed from query and fragment
|
||||
:param dirty_url: url to be cleaned
|
||||
:return: url cleaned
|
||||
>>> clean_url('https://exemple.com/video/this-aerial-ropeway?utm_source=Twitter&utm_medium=video&utm_campaign=organic&utm_content=Nov13&a=aaa&b=1#mkt_tok=tik&mkt_tik=tok')
|
||||
'https://exemple.com/video/this-aerial-ropeway?a=aaa&b=1#mkt_tik=tok'
|
||||
>>> clean_url('https://example.com/video/this-aerial-ropeway?utm_source=Twitter&utm_medium=video&utm_campaign=organic&utm_content=Nov13&a=aaa&b=1#mkt_tok=tik&mkt_tik=tok')
|
||||
'https://example.com/video/this-aerial-ropeway?a=aaa&b=1#mkt_tik=tok'
|
||||
"""
|
||||
|
||||
url_parsed = urlparse(dirty_url)
|
||||
|
@ -107,10 +163,13 @@ def clean_url(dirty_url):
|
|||
url_parsed.netloc,
|
||||
url_parsed.path,
|
||||
url_parsed.params,
|
||||
_remove_tracker_params(url_parsed.query),
|
||||
_remove_tracker_params(url_parsed.fragment)
|
||||
_remove_trackers_query(url_parsed.query),
|
||||
_remove_trackers_fragment(url_parsed.fragment)
|
||||
])
|
||||
|
||||
if cleaned_url != dirty_url:
|
||||
logging.debug('Cleaned URL from: ' + dirty_url + ' to: ' + cleaned_url)
|
||||
|
||||
return cleaned_url
|
||||
|
||||
|
||||
|
@ -138,8 +197,16 @@ def process_media_body(tt_iter):
|
|||
# Only keep hashtag text
|
||||
tweet_text += tag_text
|
||||
else:
|
||||
# This is a real link, keep url
|
||||
tweet_text += clean_url(tag.get('href'))
|
||||
# This is a real link
|
||||
if TOML['options']['remove_link_redirections']:
|
||||
url = deredir_url(tag.get('href'))
|
||||
else:
|
||||
url = tag.get('href')
|
||||
|
||||
if TOML['options']['remove_trackers_from_urls']:
|
||||
tweet_text += clean_url(url)
|
||||
else:
|
||||
tweet_text += url
|
||||
else:
|
||||
logging.warning("No handler for tag in twitter text: " + tag.prettify())
|
||||
|
||||
|
@ -163,12 +230,11 @@ def process_card(nitter_url, card_container):
|
|||
return list
|
||||
|
||||
|
||||
def process_attachments(nitter_url, attachments_container, get_vids, twit_account, status_id, author_account):
|
||||
def process_attachments(nitter_url, attachments_container, status_id, author_account):
|
||||
"""
|
||||
Extract images or video from attachments. Videos are downloaded on the file system.
|
||||
:param nitter_url: url of nitter mirror
|
||||
:param attachments_container: soup of 'div' tag containing attachments markup
|
||||
:param get_vids: whether to download videos or not
|
||||
:param twit_account: name of twitter account
|
||||
:param status_id: id of tweet being processed
|
||||
:param author_account: author of tweet with video attachment
|
||||
|
@ -187,7 +253,7 @@ def process_attachments(nitter_url, attachments_container, get_vids, twit_accoun
|
|||
if gif_class is not None:
|
||||
gif_video_file = nitter_url + gif_class.source.get('src')
|
||||
|
||||
video_path = os.path.join('output', twit_account, status_id, author_account, status_id)
|
||||
video_path = os.path.join('output', TOML['config']['twitter_account'], status_id, author_account, status_id)
|
||||
os.makedirs(video_path, exist_ok=True)
|
||||
|
||||
# Open directory for writing file
|
||||
|
@ -214,12 +280,12 @@ def process_attachments(nitter_url, attachments_container, get_vids, twit_accoun
|
|||
vid_in_tweet = False
|
||||
vid_class = attachments_container.find('div', class_='video-container')
|
||||
if vid_class is not None:
|
||||
if get_vids:
|
||||
if TOML['options']['upload_videos']:
|
||||
import youtube_dl
|
||||
|
||||
video_file = os.path.join('https://twitter.com', author_account, 'status', status_id)
|
||||
ydl_opts = {
|
||||
'outtmpl': "output/" + twit_account + "/" + status_id + "/%(id)s.%(ext)s",
|
||||
'outtmpl': "output/" + TOML['config']['twitter_account'] + "/" + status_id + "/%(id)s.%(ext)s",
|
||||
'format': "best[width<=500]",
|
||||
'socket_timeout': 60,
|
||||
'quiet': True,
|
||||
|
@ -251,12 +317,12 @@ def contains_class(body_classes, some_class):
|
|||
return found
|
||||
|
||||
|
||||
def is_time_valid(timestamp, max_age, min_delay):
|
||||
def is_time_valid(timestamp):
|
||||
ret = True
|
||||
# Check that the tweet is not too young (might be deleted) or too old
|
||||
age_in_hours = (time.time() - float(timestamp)) / 3600.0
|
||||
min_delay_in_hours = min_delay / 60.0
|
||||
max_age_in_hours = max_age * 24.0
|
||||
min_delay_in_hours = TOML['options']['tweet_delay'] / 60.0
|
||||
max_age_in_hours = TOML['options']['tweet_max_age'] * 24.0
|
||||
|
||||
if age_in_hours < min_delay_in_hours or age_in_hours > max_age_in_hours:
|
||||
ret = False
|
||||
|
@ -264,7 +330,9 @@ def is_time_valid(timestamp, max_age, min_delay):
|
|||
return ret
|
||||
|
||||
|
||||
def login(instance, account, password):
|
||||
def login(password):
|
||||
|
||||
instance = TOML['config']['mastodon_instance']
|
||||
# Create Mastodon application if it does not exist yet
|
||||
if not os.path.isfile(instance + '.secret'):
|
||||
try:
|
||||
|
@ -287,9 +355,9 @@ def login(instance, account, password):
|
|||
)
|
||||
|
||||
mastodon.log_in(
|
||||
username=account,
|
||||
username=TOML['options']['twitter_account'],
|
||||
password=password,
|
||||
to_file=account + ".secret"
|
||||
to_file=TOML['options']['twitter_account'] + ".secret"
|
||||
)
|
||||
logging.info('Logging in to ' + instance)
|
||||
|
||||
|
@ -319,15 +387,16 @@ def main(argv):
|
|||
parser.add_argument('-m', metavar='<mastodon account>', action='store')
|
||||
parser.add_argument('-p', metavar='<mastodon password>', action='store')
|
||||
parser.add_argument('-r', action='store_true', help='Also post replies to other tweets')
|
||||
parser.add_argument('-s', action='store_true', help='Skip retweets')
|
||||
parser.add_argument('-s', action='store_true', help='Suppress retweets')
|
||||
parser.add_argument('-l', action='store_true', help='Remove link redirection')
|
||||
parser.add_argument('-u', action='store_true', help='Remove trackers from URLs')
|
||||
parser.add_argument('-v', action='store_true', help='Ingest twitter videos and upload to Mastodon instance')
|
||||
parser.add_argument('-a', metavar='<max age (in days)>', action='store', type=float)
|
||||
parser.add_argument('-d', metavar='<min delay (in mins)>', action='store', type=float)
|
||||
parser.add_argument('-c', metavar='<max # of toots to post>', action='store', type=int)
|
||||
|
||||
# Parse command line
|
||||
args = vars(parser.parse_args())
|
||||
# Create global struct containing configuration
|
||||
global TOML
|
||||
|
||||
# We build the configuration by layering for each parameter:
|
||||
# 1. A default value
|
||||
|
@ -339,15 +408,18 @@ def main(argv):
|
|||
'upload_videos': False,
|
||||
'post_reply_to': False,
|
||||
'skip_retweets': False,
|
||||
'remove_link_redirections': False,
|
||||
'remove_trackers_from_urls': False,
|
||||
'tweet_max_age': float(1),
|
||||
'tweet_delay': float(0),
|
||||
'toot_cap': int(0),
|
||||
}
|
||||
|
||||
# Default empty toml
|
||||
# toml = {'config': {}, 'options': options}
|
||||
toml = {}
|
||||
# Default toml
|
||||
TOML = {'config': {}, 'options': options}
|
||||
|
||||
# Parse command line
|
||||
args = vars(parser.parse_args())
|
||||
|
||||
# Load config file if it was provided
|
||||
toml_file = args['f']
|
||||
|
@ -355,7 +427,7 @@ def main(argv):
|
|||
import tomli
|
||||
try:
|
||||
with open(toml_file, 'rb') as config_file:
|
||||
toml = tomli.load(config_file)
|
||||
TOML = tomli.load(config_file)
|
||||
except FileNotFoundError:
|
||||
print('config file not found')
|
||||
exit(-1)
|
||||
|
@ -363,37 +435,39 @@ def main(argv):
|
|||
print('Malformed config file')
|
||||
exit(-1)
|
||||
|
||||
# Override config file parameter values with command-line values if provided
|
||||
# Override config parameters with command-line values if provided
|
||||
if args['t'] is not None:
|
||||
toml['config']['twitter_account'] = args['t']
|
||||
TOML['config']['twitter_account'] = args['t']
|
||||
if args['i'] is not None:
|
||||
toml['config']['mastodon_instance'] = args['i']
|
||||
TOML['config']['mastodon_instance'] = args['i']
|
||||
if args['m'] is not None:
|
||||
toml['config']['mastodon_user'] = args['m']
|
||||
TOML['config']['mastodon_user'] = args['m']
|
||||
if args['v'] is True:
|
||||
toml['options']['upload_videos'] = args['v']
|
||||
TOML['options']['upload_videos'] = args['v']
|
||||
if args['r'] is True:
|
||||
toml['options']['post_reply_to'] = args['r']
|
||||
TOML['options']['post_reply_to'] = args['r']
|
||||
if args['s'] is True:
|
||||
toml['options']['skip_retweets'] = args['s']
|
||||
TOML['options']['skip_retweets'] = args['s']
|
||||
if args['l'] is True:
|
||||
TOML['options']['remove_link_redirections'] = args['l']
|
||||
if args['u'] is True:
|
||||
toml['options']['remove_trackers_from_urls'] = args['u']
|
||||
TOML['options']['remove_trackers_from_urls'] = args['u']
|
||||
if args['a'] is not None:
|
||||
toml['options']['tweet_max_age'] = float(args['a'])
|
||||
TOML['options']['tweet_max_age'] = float(args['a'])
|
||||
if args['d'] is not None:
|
||||
toml['options']['tweet_delay'] = float(args['d'])
|
||||
TOML['options']['tweet_delay'] = float(args['d'])
|
||||
if args['c'] is not None:
|
||||
toml['options']['toot_cap'] = int(args['c'])
|
||||
TOML['options']['toot_cap'] = int(args['c'])
|
||||
mast_password = args['p']
|
||||
|
||||
# Verify that we have a minimum config to run
|
||||
if 'twitter_account' not in toml['config'].keys():
|
||||
if 'twitter_account' not in TOML['config'].keys():
|
||||
print('CRITICAL: Missing Twitter account')
|
||||
exit(-1)
|
||||
if 'mastodon_instance' not in toml['config'].keys():
|
||||
if 'mastodon_instance' not in TOML['config'].keys():
|
||||
print('CRITICAL: Missing Mastodon instance')
|
||||
exit(-1)
|
||||
if 'mastodon_user' not in toml['config'].keys():
|
||||
if 'mastodon_user' not in TOML['config'].keys():
|
||||
print('CRITICAL: Missing Mastodon user')
|
||||
exit(-1)
|
||||
if mast_password is None:
|
||||
|
@ -401,30 +475,32 @@ def main(argv):
|
|||
exit(-1)
|
||||
|
||||
# Remove previous log file
|
||||
try:
|
||||
os.remove(toml['config']['twitter_account'] + '.log')
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
# try:
|
||||
# os.remove(TOML['config']['twitter_account'] + '.log')
|
||||
# except FileNotFoundError:
|
||||
# pass
|
||||
|
||||
# Setup logging to file
|
||||
logging.basicConfig(
|
||||
filename=toml['config']['twitter_account'] + '.log',
|
||||
filename=TOML['config']['twitter_account'] + '.log',
|
||||
level=LOGGING_LEVEL,
|
||||
format='%(asctime)s %(levelname)-8s %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S',
|
||||
)
|
||||
|
||||
logging.info('Running with the following parameters:')
|
||||
logging.info(' -f ' + str(toml_file))
|
||||
logging.info(' -t ' + toml['config']['twitter_account'])
|
||||
logging.info(' -i ' + toml['config']['mastodon_instance'])
|
||||
logging.info(' -m ' + toml['config']['mastodon_user'])
|
||||
logging.info(' -r ' + str(toml['options']['post_reply_to']))
|
||||
logging.info(' -s ' + str(toml['options']['skip_retweets']))
|
||||
logging.info(' -v ' + str(toml['options']['upload_videos']))
|
||||
logging.info(' -a ' + str(toml['options']['tweet_max_age']))
|
||||
logging.info(' -d ' + str(toml['options']['tweet_delay']))
|
||||
logging.info(' -c ' + str(toml['options']['toot_cap']))
|
||||
logging.info('Running with the following configuration:')
|
||||
logging.info(' Config file : ' + str(toml_file))
|
||||
logging.info(' twitter_account : ' + TOML['config']['twitter_account'])
|
||||
logging.info(' mastodon_instance : ' + TOML['config']['mastodon_instance'])
|
||||
logging.info(' mastodon_user : ' + TOML['config']['mastodon_user'])
|
||||
logging.info(' post_reply_to : ' + str(TOML['options']['post_reply_to']))
|
||||
logging.info(' skip_retweets : ' + str(TOML['options']['skip_retweets']))
|
||||
logging.info(' remove_link_redirections : ' + str(TOML['options']['remove_link_redirections']))
|
||||
logging.info(' remove_trackers_from_urls: ' + str(TOML['options']['remove_trackers_from_urls']))
|
||||
logging.info(' upload_videos : ' + str(TOML['options']['upload_videos']))
|
||||
logging.info(' tweet_max_age : ' + str(TOML['options']['tweet_max_age']))
|
||||
logging.info(' tweet_delay : ' + str(TOML['options']['tweet_delay']))
|
||||
logging.info(' toot_cap : ' + str(TOML['options']['toot_cap']))
|
||||
|
||||
# Try to open database. If it does not exist, create it
|
||||
sql = sqlite3.connect('twoot.db')
|
||||
|
@ -458,12 +534,12 @@ def main(argv):
|
|||
}
|
||||
)
|
||||
|
||||
url = nitter_url + '/' + toml['config']['twitter_account']
|
||||
url = nitter_url + '/' + TOML['config']['twitter_account']
|
||||
# Use different page if we need to handle replies
|
||||
if toml['options']['post_reply_to']:
|
||||
if TOML['options']['post_reply_to']:
|
||||
url += '/with_replies'
|
||||
|
||||
# Download twitter page of user.
|
||||
# Download twitter page of user
|
||||
try:
|
||||
twit_account_page = session.get(url, headers=headers, timeout=HTTPS_REQ_TIMEOUT)
|
||||
except requests.exceptions.ConnectionError:
|
||||
|
@ -493,7 +569,7 @@ def main(argv):
|
|||
ta = soup.find('meta', property='og:title').get('content')
|
||||
ta_match = re.search(r'\(@(.+)\)', ta)
|
||||
if ta_match is not None:
|
||||
toml['config']['twitter_account'] = ta_match.group(1)
|
||||
TOML['config']['twitter_account'] = ta_match.group(1)
|
||||
|
||||
# Extract twitter timeline
|
||||
timeline = soup.find_all('div', class_='timeline-item')
|
||||
|
@ -522,13 +598,13 @@ def main(argv):
|
|||
timestamp = datetime.datetime.strptime(time_string, '%b %d, %Y · %I:%M %p %Z').timestamp()
|
||||
|
||||
# Check if time is within acceptable range
|
||||
if not is_time_valid(timestamp, toml['options']['tweet_max_age'], toml['options']['tweet_delay']):
|
||||
if not is_time_valid(timestamp):
|
||||
out_date_cnt += 1
|
||||
logging.debug("Tweet outside valid time range, skipping")
|
||||
continue
|
||||
|
||||
# Check if retweets must be skipped
|
||||
if toml['options']['skip_retweets']:
|
||||
if TOML['options']['skip_retweets']:
|
||||
# Check if this tweet is a retweet
|
||||
if len(status.select("div.tweet-body > div > div.retweet-header")) != 0:
|
||||
logging.debug("Retweet ignored per command-line configuration")
|
||||
|
@ -537,7 +613,7 @@ def main(argv):
|
|||
# Check in database if tweet has already been posted
|
||||
db.execute(
|
||||
"SELECT * FROM toots WHERE twitter_account=? AND mastodon_instance=? AND mastodon_account=? AND tweet_id=?",
|
||||
(toml['config']['twitter_account'], toml['config']['mastodon_instance'], toml['config']['mastodon_user'], tweet_id))
|
||||
(TOML['config']['twitter_account'], TOML['config']['mastodon_instance'], TOML['config']['mastodon_user'], tweet_id))
|
||||
tweet_in_db = db.fetchone()
|
||||
|
||||
if tweet_in_db is not None:
|
||||
|
@ -591,8 +667,10 @@ def main(argv):
|
|||
# Process attachment: capture image or .mp4 url or download twitter video
|
||||
attachments_class = status.find('div', class_='attachments')
|
||||
if attachments_class is not None:
|
||||
pics, vid_in_tweet = process_attachments(nitter_url, attachments_class, toml['options']['upload_videos'], toml['config']['twitter_account'], status_id,
|
||||
author_account)
|
||||
pics, vid_in_tweet = process_attachments(nitter_url,
|
||||
attachments_class,
|
||||
status_id, author_account
|
||||
)
|
||||
photos.extend(pics)
|
||||
if vid_in_tweet:
|
||||
tweet_text += '\n\n[Video embedded in original tweet]'
|
||||
|
@ -628,7 +706,7 @@ def main(argv):
|
|||
# Check if video was downloaded
|
||||
video_file = None
|
||||
|
||||
video_path = Path('./output') / toml['config']['twitter_account'] / status_id
|
||||
video_path = Path('./output') / TOML['config']['twitter_account'] / status_id
|
||||
if video_path.exists():
|
||||
# list video files
|
||||
video_file_list = list(video_path.glob('*.mp4'))
|
||||
|
@ -661,7 +739,7 @@ def main(argv):
|
|||
# Login to account on maston instance
|
||||
mastodon = None
|
||||
if len(tweets) != 0:
|
||||
mastodon = login(toml['config']['mastodon_instance'], toml['config']['mastodon_user'], mast_password)
|
||||
mastodon = login(mast_password)
|
||||
|
||||
# **********************************************************
|
||||
# Iterate tweets in list.
|
||||
|
@ -671,8 +749,8 @@ def main(argv):
|
|||
posted_cnt = 0
|
||||
for tweet in reversed(tweets):
|
||||
# Check if we have reached the cap on the number of toots to post
|
||||
if toml['options']['toot_cap'] != 0 and posted_cnt >= toml['options']['toot_cap']:
|
||||
logging.info('%d toots not posted due to configured cap', len(tweets) - toml['options']['toot_cap'])
|
||||
if TOML['options']['toot_cap'] != 0 and posted_cnt >= TOML['options']['toot_cap']:
|
||||
logging.info('%d toots not posted due to configured cap', len(tweets) - TOML['options']['toot_cap'])
|
||||
break
|
||||
|
||||
logging.debug('Uploading Tweet %s', tweet["tweet_id"])
|
||||
|
@ -715,8 +793,8 @@ def main(argv):
|
|||
toot = {}
|
||||
try:
|
||||
mastodon = Mastodon(
|
||||
access_token=toml['config']['mastodon_user'] + '.secret',
|
||||
api_base_url='https://' + toml['config']['mastodon_instance']
|
||||
access_token=TOML['config']['mastodon_user'] + '.secret',
|
||||
api_base_url='https://' + TOML['config']['mastodon_instance']
|
||||
)
|
||||
|
||||
if len(media_ids) == 0:
|
||||
|
@ -725,31 +803,31 @@ def main(argv):
|
|||
toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids, visibility='public')
|
||||
|
||||
except MastodonError as me:
|
||||
logging.error('posting ' + tweet['tweet_text'] + ' to ' + toml['config']['mastodon_instance'] + ' Failed')
|
||||
logging.error('posting ' + tweet['tweet_text'] + ' to ' + TOML['config']['mastodon_instance'] + ' Failed')
|
||||
logging.error(me)
|
||||
|
||||
else:
|
||||
posted_cnt += 1
|
||||
logging.debug('Tweet %s posted on %s', tweet['tweet_id'], toml['config']['mastodon_user'])
|
||||
logging.debug('Tweet %s posted on %s', tweet['tweet_id'], TOML['config']['mastodon_user'])
|
||||
|
||||
# Insert toot id into database
|
||||
if 'id' in toot:
|
||||
db.execute("INSERT INTO toots VALUES ( ? , ? , ? , ? , ? )",
|
||||
(toml['config']['twitter_account'], toml['config']['mastodon_instance'], toml['config']['mastodon_user'], tweet['tweet_id'], toot['id']))
|
||||
(TOML['config']['twitter_account'], TOML['config']['mastodon_instance'], TOML['config']['mastodon_user'], tweet['tweet_id'], toot['id']))
|
||||
sql.commit()
|
||||
|
||||
logging.info(str(posted_cnt) + ' tweets posted to Mastodon')
|
||||
|
||||
# Cleanup downloaded video files
|
||||
try:
|
||||
shutil.rmtree('./output/' + toml['config']['twitter_account'])
|
||||
shutil.rmtree('./output/' + TOML['config']['twitter_account'])
|
||||
except FileNotFoundError: # The directory does not exist
|
||||
pass
|
||||
|
||||
# Evaluate excess records in database
|
||||
excess_count = 0
|
||||
|
||||
db.execute('SELECT count(*) FROM toots WHERE twitter_account=?', (toml['config']['twitter_account'],))
|
||||
db.execute('SELECT count(*) FROM toots WHERE twitter_account=?', (TOML['config']['twitter_account'],))
|
||||
db_count = db.fetchone()
|
||||
if db_count is not None:
|
||||
excess_count = db_count[0] - MAX_REC_COUNT
|
||||
|
@ -765,7 +843,7 @@ def main(argv):
|
|||
LIMIT ?
|
||||
)
|
||||
DELETE from toots
|
||||
WHERE tweet_id IN excess''', (toml['config']['twitter_account'], excess_count))
|
||||
WHERE tweet_id IN excess''', (TOML['config']['twitter_account'], excess_count))
|
||||
sql.commit()
|
||||
|
||||
logging.info('Deleted ' + str(excess_count) + ' old records from database.')
|
||||
|
|
Loading…
Reference in New Issue
Block a user