Merge remote-tracking branch 'gitlab/master' into cleandb

# Conflicts:
#	twoot.py
This commit is contained in:
jeancf 2022-09-15 20:35:27 +02:00
commit cfd1232f35
2 changed files with 33 additions and 15 deletions

View File

@ -3,6 +3,10 @@
Twoot is a python script that extracts tweets from a twitter feed and Twoot is a python script that extracts tweets from a twitter feed and
reposts them as toots on a Mastodon account. reposts them as toots on a Mastodon account.
**UPDATE 14 SEP 2022** Added information about the status of throttling
applied by the Mastodon instance in the debug log. Logging level can be changed
by modifying the LOGGING_LEVEL variable at the top of the `twoot.py` file.
**UPDATE 22 AUG 2022** Fixed bug that would incorrectly mark a new tweet **UPDATE 22 AUG 2022** Fixed bug that would incorrectly mark a new tweet
as a "reply to" if it quoted a tweet that is a reply-to. as a "reply to" if it quoted a tweet that is a reply-to.
@ -28,7 +32,7 @@ from tweets considered as "sensitive content"
mobile twitter page without JavaScript after the breaking change mobile twitter page without JavaScript after the breaking change
of last week. of last week.
# Features ## Features
* Fetch timeline of given users from twitter.com * Fetch timeline of given users from twitter.com
* Scrape html and formats tweets for post on mastodon * Scrape html and formats tweets for post on mastodon
@ -40,7 +44,7 @@ of last week.
* Remember tweets already tooted to prevent double posting * Remember tweets already tooted to prevent double posting
* Optionally post reply-to tweets on the mastodon account * Optionally post reply-to tweets on the mastodon account
# usage ## usage
``` ```
twoot.py [-h] -t <twitter account> -i <mastodon instance> -m <mastodon account> twoot.py [-h] -t <twitter account> -i <mastodon instance> -m <mastodon account>
@ -48,7 +52,7 @@ twoot.py [-h] -t <twitter account> -i <mastodon instance> -m <mastodon account>
[-d <min delay (in mins>] [-c <max # of toots to post>] [-d <min delay (in mins>] [-c <max # of toots to post>]
``` ```
# arguments ## arguments
Assuming that the Twitter handle is @SuperDuperBot and the Mastodon account Assuming that the Twitter handle is @SuperDuperBot and the Mastodon account
is @superduperbot@botsin.space is @superduperbot@botsin.space
@ -74,19 +78,21 @@ Default max age is 1 day. Decimal values are OK.
Default min delay is 0 minutes. Default min delay is 0 minutes.
# installation ## installation
Make sure python3 is installed. Make sure python3 is installed.
Twoot depends on `beautifulsoup4` and `Mastodon.py` python modules. Twoot depends on `beautifulsoup4` and `Mastodon.py` python modules.
**Only If you plan to download videos** with the `-v` switch, are the additional dependencies required: **Only If you plan to download videos** with the `-v` switch, are the additional dependencies required:
* Python modules `m3u8` and `ffmpeg-python` * Python modules `m3u8` and `ffmpeg-python`
* [ffmpeg](https://ffmpeg.org/download.html) (installed with the package manager of your distribution) * [ffmpeg](https://ffmpeg.org/download.html) (installed with the package manager of your distribution)
```sh
pip install beautifulsoup4 Mastodon.py m3u8 ffmpeg-python
``` ```
> pip install beautifulsoup4 Mastodon.py m3u8 ffmpeg-python
```
In your user folder, execute `git clone https://gitlab.com/jeancf/twoot.git` In your user folder, execute `git clone https://gitlab.com/jeancf/twoot.git`
to clone repo with twoot.py script. to clone repo with twoot.py script.
@ -98,7 +104,8 @@ ago:
1-59/15 * * * * /path/to/twoot.py -t SuperDuperBot -i botsin.space -m superduperbot -p my_Sup3r-S4f3*pw -a 5 -d 15 1-59/15 * * * * /path/to/twoot.py -t SuperDuperBot -i botsin.space -m superduperbot -p my_Sup3r-S4f3*pw -a 5 -d 15
``` ```
# Background ## Background
I started twoot when [tootbot](https://github.com/cquest/tootbot) I started twoot when [tootbot](https://github.com/cquest/tootbot)
stopped working. Tootbot relies on rss feeds from https://twitrss.me stopped working. Tootbot relies on rss feeds from https://twitrss.me
that broke when Twitter refreshed their web UI in July 2019. that broke when Twitter refreshed their web UI in July 2019.

View File

@ -37,6 +37,10 @@ import shutil
# Number of records to keep in db table for each twitter account # Number of records to keep in db table for each twitter account
MAX_REC_COUNT = 50 MAX_REC_COUNT = 50
# Set the desired verbosity of logging
# One of logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL
LOGGING_LEVEL = logging.INFO
NITTER_URLS = [ NITTER_URLS = [
'https://nitter.42l.fr', 'https://nitter.42l.fr',
'https://nitter.pussthecat.org', 'https://nitter.pussthecat.org',
@ -44,7 +48,7 @@ NITTER_URLS = [
'https://nitter.eu', 'https://nitter.eu',
'https://nitter.namazso.eu', 'https://nitter.namazso.eu',
'https://n.actionsack.com', 'https://n.actionsack.com',
'https://nittereu.moomoo.me', 'https://nitter.moomoo.me',
'https://n.ramle.be', 'https://n.ramle.be',
] ]
@ -203,6 +207,7 @@ def is_time_valid(timestamp, max_age, min_delay):
return ret return ret
def login(instance, account, password): def login(instance, account, password):
# Create Mastodon application if it does not exist yet # Create Mastodon application if it does not exist yet
if not os.path.isfile(instance + '.secret'): if not os.path.isfile(instance + '.secret'):
@ -233,10 +238,16 @@ def login(instance, account, password):
logging.info('Logging in to ' + instance) logging.info('Logging in to ' + instance)
except MastodonError as me: except MastodonError as me:
logging.fatal('ERROR: Login to ' + instance + ' Failed\n') logging.fatal('ERROR: Login to ' + instance + ' Failed')
logging.fatal(me) logging.fatal(me)
sys.exit(-1) sys.exit(-1)
# Check ratelimit status
logging.debug('Ratelimit allowed requests: ' + str(mastodon.ratelimit_limit))
logging.debug('Ratelimit remaining requests: ' + str(mastodon.ratelimit_remaining))
logging.debug('Ratelimit reset time: ' + time.asctime(time.localtime(mastodon.ratelimit_reset)))
logging.debug('Ratelimit last call: ' + time.asctime(time.localtime(mastodon.ratelimit_lastcall)))
return mastodon return mastodon
@ -278,7 +289,7 @@ def main(argv):
# Setup logging to file # Setup logging to file
logging.basicConfig( logging.basicConfig(
filename=twit_account + '.log', filename=twit_account + '.log',
level=logging.WARN, level=LOGGING_LEVEL,
format='%(asctime)s %(levelname)-8s %(message)s', format='%(asctime)s %(levelname)-8s %(message)s',
datefmt='%Y-%m-%d %H:%M:%S', datefmt='%Y-%m-%d %H:%M:%S',
) )
@ -302,7 +313,7 @@ def main(argv):
mastodon_instance, mastodon_account, tweet_id)''') mastodon_instance, mastodon_account, tweet_id)''')
# Select random nitter instance to fetch updates from # Select random nitter instance to fetch updates from
nitter_url = NITTER_URLS[random.randint(0, len(NITTER_URLS)-1)] nitter_url = NITTER_URLS[random.randint(0, len(NITTER_URLS) - 1)]
# ********************************************************** # **********************************************************
# Load twitter page of user. Process all tweets and generate # Load twitter page of user. Process all tweets and generate
@ -345,9 +356,9 @@ def main(argv):
logging.info('Nitter page downloaded successfully from ' + url) logging.info('Nitter page downloaded successfully from ' + url)
# DEBUG: Save page to file # DEBUG: Save page to file
#of = open(twit_account + '.html', 'w') # of = open(twit_account + '.html', 'w')
#of.write(twit_account_page.text) # of.write(twit_account_page.text)
#of.close() # of.close()
# Make soup # Make soup
soup = BeautifulSoup(twit_account_page.text, 'html.parser') soup = BeautifulSoup(twit_account_page.text, 'html.parser')
@ -418,7 +429,7 @@ def main(argv):
# Add prefix if the tweet is a reply-to # Add prefix if the tweet is a reply-to
# Only consider item of class 'replying-to' that is a direct child # Only consider item of class 'replying-to' that is a direct child
# of class 'tweet-body' in status. Others can be in a quoted tweet. # of class 'tweet-body' in status. Others can be in a quoted tweet.
replying_to_class = status.select("div.tweet-body > div.replying-to") replying_to_class = status.select("div.tweet-body > div.replying-to")
if len(replying_to_class) != 0: if len(replying_to_class) != 0:
tweet_text += 'Replying to ' + replying_to_class[0].a.get_text() + '\n\n' tweet_text += 'Replying to ' + replying_to_class[0].a.get_text() + '\n\n'