mirror of
https://github.com/cquest/tootbot.git
synced 2024-11-27 14:01:07 +00:00
retrieve tweets with twint
This commit is contained in:
parent
66c5298507
commit
9c03e5c262
124
tootbot.py
124
tootbot.py
|
@ -140,76 +140,86 @@ if source[:4] == 'http':
|
|||
sql.commit()
|
||||
|
||||
else:
|
||||
d = feedparser.parse('http://twitrss.me/twitter_user_to_rss/?user='+source)
|
||||
subprocess.run('rm -f tweets.sjson; twint -u %s -tl --limit 10 --json -o tweets.sjson; jq -s . tweets.sjson > tweets.json' %
|
||||
(source,), shell=True, capture_output=True)
|
||||
d = json.load(open('tweets.json','r'))
|
||||
twitter = source
|
||||
|
||||
for t in reversed(d.entries):
|
||||
for t in reversed(d):
|
||||
c = html.unescape(t['tweet'])
|
||||
# do not toot twitter replies
|
||||
if 'reply_to' in t and len(t['reply_to'])>0:
|
||||
print('Reply:',c)
|
||||
continue
|
||||
# do not toot twitter quoted RT
|
||||
if 'quote_url' in t and t['quote_url'] != '':
|
||||
print('Quoted:', c)
|
||||
continue
|
||||
|
||||
# check if this tweet has been processed
|
||||
if id in t:
|
||||
id = t.id
|
||||
else:
|
||||
id = t.title
|
||||
id = t['id']
|
||||
db.execute('SELECT * FROM tweets WHERE tweet = ? AND twitter = ? and mastodon = ? and instance = ?', (id, source, mastodon, instance)) # noqa
|
||||
last = db.fetchone()
|
||||
dt = t.published_parsed
|
||||
age = datetime.now()-datetime(dt.tm_year, dt.tm_mon, dt.tm_mday,
|
||||
dt.tm_hour, dt.tm_min, dt.tm_sec)
|
||||
# process only unprocessed tweets less than 1 day old, after delay
|
||||
if last is None and age < timedelta(days=days) and age > timedelta(days=delay):
|
||||
if mastodon_api is None:
|
||||
# Create application if it does not exist
|
||||
if not os.path.isfile(instance+'.secret'):
|
||||
if Mastodon.create_app(
|
||||
'tootbot',
|
||||
api_base_url='https://'+instance,
|
||||
to_file=instance+'.secret'
|
||||
):
|
||||
print('tootbot app created on instance '+instance)
|
||||
else:
|
||||
print('failed to create app on instance '+instance)
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
mastodon_api = Mastodon(
|
||||
client_id=instance+'.secret',
|
||||
api_base_url='https://'+instance
|
||||
)
|
||||
mastodon_api.log_in(
|
||||
username=mastodon,
|
||||
password=passwd,
|
||||
scopes=['read', 'write'],
|
||||
to_file=mastodon+".secret"
|
||||
)
|
||||
except:
|
||||
print("ERROR: First Login Failed!")
|
||||
sys.exit(1)
|
||||
# process only unprocessed tweets
|
||||
if last:
|
||||
continue
|
||||
|
||||
if c[-1] == "…":
|
||||
continue
|
||||
|
||||
c = t.title
|
||||
if twitter and t.author.lower() != ('(@%s)' % twitter).lower():
|
||||
c = ("RT https://twitter.com/%s\n" % t.author[2:-1]) + c
|
||||
toot_media = []
|
||||
if twitter and t['username'].lower() != twitter.lower():
|
||||
c = ("RT https://twitter.com/%s\n" % t['username']) + c
|
||||
# get the pictures...
|
||||
if 'summary' in t:
|
||||
for p in re.finditer(r"https://pbs.twimg.com/[^ \xa0\"]*", t.summary):
|
||||
for p in re.finditer(r"https://pbs.twimg.com/[^ \xa0\"]*", t['tweet']):
|
||||
media = requests.get(p.group(0))
|
||||
media_posted = mastodon_api.media_post(media.content, mime_type=media.headers.get('content-type'))
|
||||
toot_media.append(media_posted['id'])
|
||||
|
||||
if 'links' in t:
|
||||
for l in t.links:
|
||||
if l.type in ('image/jpg', 'image/png'):
|
||||
media = requests.get(l.url)
|
||||
media_posted = mastodon_api.media_post(
|
||||
media.content, mime_type=media.headers.get('content-type'))
|
||||
toot_media.append(media_posted['id'])
|
||||
|
||||
if 'photos' in t:
|
||||
for url in t['photos']:
|
||||
print('photo', url)
|
||||
media = requests.get(url)
|
||||
print("received")
|
||||
media_posted = mastodon_api.media_post(
|
||||
media.content, mime_type=media.headers.get('content-type'))
|
||||
print("posted")
|
||||
toot_media.append(media_posted['id'])
|
||||
|
||||
# replace short links by original URL
|
||||
m = re.search(r"http[^ \xa0]*", c)
|
||||
if m is not None:
|
||||
l = m.group(0)
|
||||
links = re.findall(r"http[^ \xa0]*", c)
|
||||
for l in links:
|
||||
r = requests.get(l, allow_redirects=False)
|
||||
if r.status_code in {301, 302}:
|
||||
m = re.search(r'twitter.com/.*/photo/', r.headers.get('Location'))
|
||||
if m is None:
|
||||
c = c.replace(l, r.headers.get('Location'))
|
||||
else:
|
||||
c = c.replace(l, '')
|
||||
|
||||
m = re.search(r'(twitter.com/.*/video/|youtube.com)', r.headers.get('Location'))
|
||||
if m is None:
|
||||
c = c.replace(l, r.headers.get('Location'))
|
||||
else:
|
||||
print('lien:',l)
|
||||
c = c.replace(l, '')
|
||||
video = r.headers.get('Location')
|
||||
print('video:', video)
|
||||
subprocess.run('rm -f out.webm; yt-dlp -N 8 -o out.webm --recode-video webm %s' %
|
||||
(video,), shell=True, capture_output=False)
|
||||
print("received")
|
||||
try:
|
||||
file = open("out.webm", "rb")
|
||||
video_data = file.read()
|
||||
file.close()
|
||||
media_posted = mastodon_api.media_post(video_data, mime_type='video/webm')
|
||||
c = c.replace(video, '')
|
||||
print("posted")
|
||||
toot_media.append(media_posted['id'])
|
||||
except:
|
||||
pass
|
||||
|
||||
# remove pic.twitter.com links
|
||||
m = re.search(r"pic.twitter.com[^ \xa0]*", c)
|
||||
|
@ -220,10 +230,7 @@ for t in reversed(d.entries):
|
|||
# remove ellipsis
|
||||
c = c.replace('\xa0…', ' ')
|
||||
|
||||
if twitter is None:
|
||||
if 'authors' in t:
|
||||
c = c + '\nSource: '+ t.authors[0].name
|
||||
c = c + '\n\n' + t.link
|
||||
c = c.replace(' ', '\n').replace('. ', '.\n')
|
||||
|
||||
if tags:
|
||||
c = c + '\n' + tags
|
||||
|
@ -235,7 +242,8 @@ for t in reversed(d.entries):
|
|||
sensitive=False,
|
||||
visibility='public',
|
||||
spoiler_text=None)
|
||||
#break
|
||||
if "id" in toot:
|
||||
db.execute("INSERT INTO tweets VALUES ( ? , ? , ? , ? , ? )",
|
||||
(id, toot["id"], source, mastodon, instance))
|
||||
db.execute("INSERT INTO tweets VALUES ( ? , ? , ? , ? , ? )", (id, toot["id"], source, mastodon, instance))
|
||||
sql.commit()
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user