mirror of
https://gitlab.com/jeancf/twoot.git
synced 2024-11-27 14:01:13 +00:00
Merge remote-tracking branch 'gitlab/master' into cleandb
# Conflicts: # twoot.py
This commit is contained in:
commit
cfd1232f35
21
README.md
21
README.md
|
@ -3,6 +3,10 @@
|
||||||
Twoot is a python script that extracts tweets from a twitter feed and
|
Twoot is a python script that extracts tweets from a twitter feed and
|
||||||
reposts them as toots on a Mastodon account.
|
reposts them as toots on a Mastodon account.
|
||||||
|
|
||||||
|
**UPDATE 14 SEP 2022** Added information about the status of throttling
|
||||||
|
applied by the Mastodon instance in the debug log. Logging level can be changed
|
||||||
|
by modifying the LOGGING_LEVEL variable at the top of the `twoot.py` file.
|
||||||
|
|
||||||
**UPDATE 22 AUG 2022** Fixed bug that would incorrectly mark a new tweet
|
**UPDATE 22 AUG 2022** Fixed bug that would incorrectly mark a new tweet
|
||||||
as a "reply to" if it quoted a tweet that is a reply-to.
|
as a "reply to" if it quoted a tweet that is a reply-to.
|
||||||
|
|
||||||
|
@ -28,7 +32,7 @@ from tweets considered as "sensitive content"
|
||||||
mobile twitter page without JavaScript after the breaking change
|
mobile twitter page without JavaScript after the breaking change
|
||||||
of last week.
|
of last week.
|
||||||
|
|
||||||
# Features
|
## Features
|
||||||
|
|
||||||
* Fetch timeline of given users from twitter.com
|
* Fetch timeline of given users from twitter.com
|
||||||
* Scrape html and formats tweets for post on mastodon
|
* Scrape html and formats tweets for post on mastodon
|
||||||
|
@ -40,7 +44,7 @@ of last week.
|
||||||
* Remember tweets already tooted to prevent double posting
|
* Remember tweets already tooted to prevent double posting
|
||||||
* Optionally post reply-to tweets on the mastodon account
|
* Optionally post reply-to tweets on the mastodon account
|
||||||
|
|
||||||
# usage
|
## usage
|
||||||
|
|
||||||
```
|
```
|
||||||
twoot.py [-h] -t <twitter account> -i <mastodon instance> -m <mastodon account>
|
twoot.py [-h] -t <twitter account> -i <mastodon instance> -m <mastodon account>
|
||||||
|
@ -48,7 +52,7 @@ twoot.py [-h] -t <twitter account> -i <mastodon instance> -m <mastodon account>
|
||||||
[-d <min delay (in mins>] [-c <max # of toots to post>]
|
[-d <min delay (in mins>] [-c <max # of toots to post>]
|
||||||
```
|
```
|
||||||
|
|
||||||
# arguments
|
## arguments
|
||||||
|
|
||||||
Assuming that the Twitter handle is @SuperDuperBot and the Mastodon account
|
Assuming that the Twitter handle is @SuperDuperBot and the Mastodon account
|
||||||
is @superduperbot@botsin.space
|
is @superduperbot@botsin.space
|
||||||
|
@ -74,19 +78,21 @@ Default max age is 1 day. Decimal values are OK.
|
||||||
|
|
||||||
Default min delay is 0 minutes.
|
Default min delay is 0 minutes.
|
||||||
|
|
||||||
# installation
|
## installation
|
||||||
|
|
||||||
Make sure python3 is installed.
|
Make sure python3 is installed.
|
||||||
|
|
||||||
Twoot depends on `beautifulsoup4` and `Mastodon.py` python modules.
|
Twoot depends on `beautifulsoup4` and `Mastodon.py` python modules.
|
||||||
|
|
||||||
**Only If you plan to download videos** with the `-v` switch, are the additional dependencies required:
|
**Only If you plan to download videos** with the `-v` switch, are the additional dependencies required:
|
||||||
|
|
||||||
* Python modules `m3u8` and `ffmpeg-python`
|
* Python modules `m3u8` and `ffmpeg-python`
|
||||||
* [ffmpeg](https://ffmpeg.org/download.html) (installed with the package manager of your distribution)
|
* [ffmpeg](https://ffmpeg.org/download.html) (installed with the package manager of your distribution)
|
||||||
|
|
||||||
|
```sh
|
||||||
|
pip install beautifulsoup4 Mastodon.py m3u8 ffmpeg-python
|
||||||
```
|
```
|
||||||
> pip install beautifulsoup4 Mastodon.py m3u8 ffmpeg-python
|
|
||||||
```
|
|
||||||
In your user folder, execute `git clone https://gitlab.com/jeancf/twoot.git`
|
In your user folder, execute `git clone https://gitlab.com/jeancf/twoot.git`
|
||||||
to clone repo with twoot.py script.
|
to clone repo with twoot.py script.
|
||||||
|
|
||||||
|
@ -98,7 +104,8 @@ ago:
|
||||||
1-59/15 * * * * /path/to/twoot.py -t SuperDuperBot -i botsin.space -m superduperbot -p my_Sup3r-S4f3*pw -a 5 -d 15
|
1-59/15 * * * * /path/to/twoot.py -t SuperDuperBot -i botsin.space -m superduperbot -p my_Sup3r-S4f3*pw -a 5 -d 15
|
||||||
```
|
```
|
||||||
|
|
||||||
# Background
|
## Background
|
||||||
|
|
||||||
I started twoot when [tootbot](https://github.com/cquest/tootbot)
|
I started twoot when [tootbot](https://github.com/cquest/tootbot)
|
||||||
stopped working. Tootbot relies on rss feeds from https://twitrss.me
|
stopped working. Tootbot relies on rss feeds from https://twitrss.me
|
||||||
that broke when Twitter refreshed their web UI in July 2019.
|
that broke when Twitter refreshed their web UI in July 2019.
|
||||||
|
|
27
twoot.py
27
twoot.py
|
@ -37,6 +37,10 @@ import shutil
|
||||||
# Number of records to keep in db table for each twitter account
|
# Number of records to keep in db table for each twitter account
|
||||||
MAX_REC_COUNT = 50
|
MAX_REC_COUNT = 50
|
||||||
|
|
||||||
|
# Set the desired verbosity of logging
|
||||||
|
# One of logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL
|
||||||
|
LOGGING_LEVEL = logging.INFO
|
||||||
|
|
||||||
NITTER_URLS = [
|
NITTER_URLS = [
|
||||||
'https://nitter.42l.fr',
|
'https://nitter.42l.fr',
|
||||||
'https://nitter.pussthecat.org',
|
'https://nitter.pussthecat.org',
|
||||||
|
@ -44,7 +48,7 @@ NITTER_URLS = [
|
||||||
'https://nitter.eu',
|
'https://nitter.eu',
|
||||||
'https://nitter.namazso.eu',
|
'https://nitter.namazso.eu',
|
||||||
'https://n.actionsack.com',
|
'https://n.actionsack.com',
|
||||||
'https://nittereu.moomoo.me',
|
'https://nitter.moomoo.me',
|
||||||
'https://n.ramle.be',
|
'https://n.ramle.be',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -203,6 +207,7 @@ def is_time_valid(timestamp, max_age, min_delay):
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def login(instance, account, password):
|
def login(instance, account, password):
|
||||||
# Create Mastodon application if it does not exist yet
|
# Create Mastodon application if it does not exist yet
|
||||||
if not os.path.isfile(instance + '.secret'):
|
if not os.path.isfile(instance + '.secret'):
|
||||||
|
@ -233,10 +238,16 @@ def login(instance, account, password):
|
||||||
logging.info('Logging in to ' + instance)
|
logging.info('Logging in to ' + instance)
|
||||||
|
|
||||||
except MastodonError as me:
|
except MastodonError as me:
|
||||||
logging.fatal('ERROR: Login to ' + instance + ' Failed\n')
|
logging.fatal('ERROR: Login to ' + instance + ' Failed')
|
||||||
logging.fatal(me)
|
logging.fatal(me)
|
||||||
sys.exit(-1)
|
sys.exit(-1)
|
||||||
|
|
||||||
|
# Check ratelimit status
|
||||||
|
logging.debug('Ratelimit allowed requests: ' + str(mastodon.ratelimit_limit))
|
||||||
|
logging.debug('Ratelimit remaining requests: ' + str(mastodon.ratelimit_remaining))
|
||||||
|
logging.debug('Ratelimit reset time: ' + time.asctime(time.localtime(mastodon.ratelimit_reset)))
|
||||||
|
logging.debug('Ratelimit last call: ' + time.asctime(time.localtime(mastodon.ratelimit_lastcall)))
|
||||||
|
|
||||||
return mastodon
|
return mastodon
|
||||||
|
|
||||||
|
|
||||||
|
@ -278,7 +289,7 @@ def main(argv):
|
||||||
# Setup logging to file
|
# Setup logging to file
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
filename=twit_account + '.log',
|
filename=twit_account + '.log',
|
||||||
level=logging.WARN,
|
level=LOGGING_LEVEL,
|
||||||
format='%(asctime)s %(levelname)-8s %(message)s',
|
format='%(asctime)s %(levelname)-8s %(message)s',
|
||||||
datefmt='%Y-%m-%d %H:%M:%S',
|
datefmt='%Y-%m-%d %H:%M:%S',
|
||||||
)
|
)
|
||||||
|
@ -302,7 +313,7 @@ def main(argv):
|
||||||
mastodon_instance, mastodon_account, tweet_id)''')
|
mastodon_instance, mastodon_account, tweet_id)''')
|
||||||
|
|
||||||
# Select random nitter instance to fetch updates from
|
# Select random nitter instance to fetch updates from
|
||||||
nitter_url = NITTER_URLS[random.randint(0, len(NITTER_URLS)-1)]
|
nitter_url = NITTER_URLS[random.randint(0, len(NITTER_URLS) - 1)]
|
||||||
|
|
||||||
# **********************************************************
|
# **********************************************************
|
||||||
# Load twitter page of user. Process all tweets and generate
|
# Load twitter page of user. Process all tweets and generate
|
||||||
|
@ -345,9 +356,9 @@ def main(argv):
|
||||||
logging.info('Nitter page downloaded successfully from ' + url)
|
logging.info('Nitter page downloaded successfully from ' + url)
|
||||||
|
|
||||||
# DEBUG: Save page to file
|
# DEBUG: Save page to file
|
||||||
#of = open(twit_account + '.html', 'w')
|
# of = open(twit_account + '.html', 'w')
|
||||||
#of.write(twit_account_page.text)
|
# of.write(twit_account_page.text)
|
||||||
#of.close()
|
# of.close()
|
||||||
|
|
||||||
# Make soup
|
# Make soup
|
||||||
soup = BeautifulSoup(twit_account_page.text, 'html.parser')
|
soup = BeautifulSoup(twit_account_page.text, 'html.parser')
|
||||||
|
@ -418,7 +429,7 @@ def main(argv):
|
||||||
|
|
||||||
# Add prefix if the tweet is a reply-to
|
# Add prefix if the tweet is a reply-to
|
||||||
# Only consider item of class 'replying-to' that is a direct child
|
# Only consider item of class 'replying-to' that is a direct child
|
||||||
# of class 'tweet-body' in status. Others can be in a quoted tweet.
|
# of class 'tweet-body' in status. Others can be in a quoted tweet.
|
||||||
replying_to_class = status.select("div.tweet-body > div.replying-to")
|
replying_to_class = status.select("div.tweet-body > div.replying-to")
|
||||||
if len(replying_to_class) != 0:
|
if len(replying_to_class) != 0:
|
||||||
tweet_text += 'Replying to ' + replying_to_class[0].a.get_text() + '\n\n'
|
tweet_text += 'Replying to ' + replying_to_class[0].a.get_text() + '\n\n'
|
||||||
|
|
Loading…
Reference in New Issue
Block a user