tootbot/tootbot.py

283 lines
9.8 KiB
Python
Raw Normal View History

2017-05-28 07:41:05 +00:00
import os.path
import sys
2019-04-03 15:44:07 +00:00
import re
2022-11-05 08:48:13 +00:00
import html
import time
2022-11-05 08:48:13 +00:00
2019-04-03 15:44:07 +00:00
import sqlite3
from datetime import datetime, timedelta
2022-11-05 08:44:34 +00:00
import json
import subprocess
2019-04-03 15:44:07 +00:00
2017-05-28 07:41:05 +00:00
import feedparser
from mastodon import Mastodon
import requests
if len(sys.argv) < 4:
2019-06-25 13:05:16 +00:00
print("Usage: python3 tootbot.py twitter_account mastodon_login mastodon_passwd mastodon_instance [max_days [footer_tags [delay]]]") # noqa
2017-05-28 07:41:05 +00:00
sys.exit(1)
# sqlite db to store processed tweets (and corresponding toots ids)
sql = sqlite3.connect('tootbot.db')
db = sql.cursor()
2019-04-03 15:46:13 +00:00
db.execute('''CREATE TABLE IF NOT EXISTS tweets (tweet text, toot text,
twitter text, mastodon text, instance text)''')
2017-05-28 07:41:05 +00:00
2019-04-03 15:46:13 +00:00
if len(sys.argv) > 4:
2017-05-28 07:41:05 +00:00
instance = sys.argv[4]
else:
instance = 'amicale.net'
2019-04-03 15:46:13 +00:00
if len(sys.argv) > 5:
days = int(sys.argv[5])
else:
days = 1
2019-04-03 15:47:28 +00:00
if len(sys.argv) > 6:
tags = sys.argv[6]
else:
tags = None
2019-06-25 13:05:16 +00:00
if len(sys.argv) > 7:
delay = int(sys.argv[7])
else:
delay = 0
2019-04-03 15:47:28 +00:00
source = sys.argv[1]
2017-05-28 07:41:05 +00:00
mastodon = sys.argv[2]
passwd = sys.argv[3]
2022-11-05 08:44:34 +00:00
# Create application if it does not exist
if not os.path.isfile(instance+'.secret'):
if Mastodon.create_app(
'tootbot',
api_base_url='https://'+instance,
to_file=instance+'.secret'
):
print('tootbot app created on instance '+instance)
else:
print('failed to create app on instance '+instance)
sys.exit(1)
try:
mastodon_api = Mastodon(
client_id=instance+'.secret',
api_base_url='https://'+instance
)
mastodon_api.log_in(
username=mastodon,
password=passwd,
scopes=['read', 'write'],
to_file=mastodon+".secret"
)
except:
print("ERROR: First Login Failed!")
sys.exit(1)
2017-05-28 07:41:05 +00:00
2022-11-06 08:31:06 +00:00
print(source)
print("---------------------------")
2019-04-03 15:47:28 +00:00
if source[:4] == 'http':
d = feedparser.parse(source)
twitter = None
2022-11-06 08:31:06 +00:00
print(len(d.entries))
2022-11-05 08:45:06 +00:00
for t in reversed(d.entries):
# check if this tweet has been processed
if id in t:
id = t.id
else:
id = t.title
db.execute('SELECT * FROM tweets WHERE tweet = ? AND twitter = ? and mastodon = ? and instance = ?', (id, source, mastodon, instance)) # noqa
last = db.fetchone()
dt = t.published_parsed
age = datetime.now()-datetime(dt.tm_year, dt.tm_mon, dt.tm_mday,
dt.tm_hour, dt.tm_min, dt.tm_sec)
# process only unprocessed tweets less than 1 day old, after delay
if last is None and age < timedelta(days=days) and age > timedelta(days=delay):
c = t.title
if twitter and t.author.lower() != ('(@%s)' % twitter).lower():
c = ("RT https://twitter.com/%s\n" % t.author[2:-1]) + c
toot_media = []
# get the pictures...
if 'summary' in t:
for p in re.finditer(r"https://pbs.twimg.com/[^ \xa0\"]*", t.summary):
media = requests.get(p.group(0))
media_posted = mastodon_api.media_post(
media.content, mime_type=media.headers.get('content-type'))
toot_media.append(media_posted['id'])
if 'links' in t:
for l in t.links:
if l.type in ('image/jpg', 'image/png'):
media = requests.get(l.url)
media_posted = mastodon_api.media_post(
media.content, mime_type=media.headers.get('content-type'))
toot_media.append(media_posted['id'])
# replace short links by original URL
m = re.search(r"http[^ \xa0]*", c)
if m is not None:
l = m.group(0)
r = requests.get(l, allow_redirects=False)
if r.status_code in {301, 302}:
c = c.replace(l, r.headers.get('Location'))
# remove ellipsis
c = c.replace('\xa0', ' ')
if 'authors' in t:
c = c + '\nSource: ' + t.authors[0].name
c = c + '\n\n' + t.link
if tags:
c = c + '\n' + tags
if toot_media is not None:
toot = mastodon_api.status_post(c,
in_reply_to_id=None,
media_ids=toot_media,
sensitive=False,
visibility='public',
spoiler_text=None)
if "id" in toot:
db.execute("INSERT INTO tweets VALUES ( ? , ? , ? , ? , ? )",
(id, toot["id"], source, mastodon, instance))
sql.commit()
2019-04-03 15:47:28 +00:00
else:
2022-11-08 10:24:01 +00:00
try:
os.mkdir(source)
except:
pass
os.chdir(source)
2022-11-06 08:31:36 +00:00
subprocess.run('rm -f tweets.*json; twint -u %s -tl --limit 10 --json -o tweets.sjson; jq -s . tweets.sjson > tweets.json' %
2022-11-05 08:47:45 +00:00
(source,), shell=True, capture_output=True)
d = json.load(open('tweets.json','r'))
2019-04-03 15:47:28 +00:00
twitter = source
2017-05-28 07:41:05 +00:00
2022-11-06 08:31:06 +00:00
print(len(d))
2022-11-05 08:47:45 +00:00
for t in reversed(d):
c = html.unescape(t['tweet'])
# do not toot twitter replies
if 'reply_to' in t and len(t['reply_to'])>0:
print('Reply:',c)
continue
# do not toot twitter quoted RT
if 'quote_url' in t and t['quote_url'] != '':
print('Quoted:', c)
continue
# check if this tweet has been processed
id = t['id']
db.execute('SELECT * FROM tweets WHERE tweet = ? AND twitter = ? and mastodon = ? and instance = ?', (id, source, mastodon, instance)) # noqa
last = db.fetchone()
# process only unprocessed tweets
if last:
continue
if c[-1] == "":
continue
2017-05-28 07:41:05 +00:00
toot_media = []
2022-11-05 08:47:45 +00:00
if twitter and t['username'].lower() != twitter.lower():
c = ("RT https://twitter.com/%s\n" % t['username']) + c
# get the pictures...
for p in re.finditer(r"https://pbs.twimg.com/[^ \xa0\"]*", t['tweet']):
media = requests.get(p.group(0))
2022-11-05 08:47:45 +00:00
media_posted = mastodon_api.media_post(
media.content, mime_type=media.headers.get('content-type'))
toot_media.append(media_posted['id'])
2022-11-05 08:47:45 +00:00
if 'photos' in t:
for url in t['photos']:
print('photo', url)
media = requests.get(url)
print("received")
media_posted = mastodon_api.media_post(
media.content, mime_type=media.headers.get('content-type'))
print("posted")
toot_media.append(media_posted['id'])
2017-05-28 07:41:05 +00:00
2019-04-03 15:47:28 +00:00
# replace short links by original URL
2022-11-05 08:47:45 +00:00
links = re.findall(r"http[^ \xa0]*", c)
for l in links:
2017-05-28 07:41:05 +00:00
r = requests.get(l, allow_redirects=False)
2019-04-03 15:46:13 +00:00
if r.status_code in {301, 302}:
2022-11-05 08:47:45 +00:00
m = re.search(r'twitter.com/.*/photo/', r.headers.get('Location'))
if m is None:
c = c.replace(l, r.headers.get('Location'))
else:
c = c.replace(l, '')
m = re.search(r'(twitter.com/.*/video/|youtube.com)', r.headers.get('Location'))
if m is None:
c = c.replace(l, r.headers.get('Location'))
else:
print('lien:',l)
c = c.replace(l, '')
video = r.headers.get('Location')
print('video:', video)
2022-11-06 08:32:06 +00:00
subprocess.run('rm -f out.mp4; yt-dlp -N 8 -o out.mp4 --recode-video mp4 %s' %
2022-11-05 08:47:45 +00:00
(video,), shell=True, capture_output=False)
print("received")
try:
2022-11-06 08:32:06 +00:00
file = open("out.mp4", "rb")
2022-11-05 08:47:45 +00:00
video_data = file.read()
file.close()
2022-11-06 08:32:06 +00:00
media_posted = mastodon_api.media_post(video_data, mime_type='video/mp4')
2022-11-05 08:47:45 +00:00
c = c.replace(video, '')
print("posted")
toot_media.append(media_posted['id'])
except:
pass
2017-05-28 07:41:05 +00:00
# remove pic.twitter.com links
m = re.search(r"pic.twitter.com[^ \xa0]*", c)
2019-04-03 15:47:28 +00:00
if m is not None:
2017-05-28 07:41:05 +00:00
l = m.group(0)
2019-04-03 15:46:13 +00:00
c = c.replace(l, ' ')
2017-05-28 07:41:05 +00:00
# remove ellipsis
2019-04-03 15:47:28 +00:00
c = c.replace('\xa0', ' ')
#c = c.replace(' ', '\n').replace('. ', '.\n')
# replace links to twitter by nitter ones
c = c.replace('/twitter.com/', '/nitter.net/')
# remove utm_? tracking
c = re.sub('\?utm.*$', '', c)
2019-04-03 15:47:28 +00:00
if tags:
c = c + '\n' + tags
2022-11-05 08:47:45 +00:00
2017-05-28 07:41:05 +00:00
if toot_media is not None:
try:
toot = mastodon_api.status_post(c,
2022-05-02 14:07:24 +00:00
in_reply_to_id=None,
2019-04-03 15:46:13 +00:00
media_ids=toot_media,
sensitive=False,
visibility='unlisted',
2019-04-03 15:46:13 +00:00
spoiler_text=None)
except:
print("10s delay")
time.sleep(10)
toot = mastodon_api.status_post(c,
in_reply_to_id=None,
media_ids=toot_media,
sensitive=False,
visibility='unlisted',
spoiler_text=None)
pass
2022-11-05 08:47:45 +00:00
#break
2017-05-28 07:41:05 +00:00
if "id" in toot:
2022-11-05 08:47:45 +00:00
db.execute("INSERT INTO tweets VALUES ( ? , ? , ? , ? , ? )", (id, toot["id"], source, mastodon, instance))
2017-05-28 07:41:05 +00:00
sql.commit()
2022-11-05 08:47:45 +00:00
print("---------------------------")
2022-11-06 08:31:06 +00:00
print()