factorize (un)redirect processing and make it multi-level

This commit is contained in:
cquest 2022-12-19 09:38:57 +01:00
parent 46be184d41
commit d068a9f1cb

View File

@ -14,6 +14,20 @@ import feedparser
from mastodon import Mastodon from mastodon import Mastodon
import requests import requests
def unredir(redir):
r = requests.get(redir, allow_redirects=False)
while r.status_code in {301, 302}:
redir = r.headers.get('Location')
print('redir', redir)
if '//ow.ly/' in redir or '//bit.ly/' in redir:
redir = redir.replace('https://ow.ly/', 'http://ow.ly/') # only http
redir = requests.get(redir, allow_redirects=False).headers.get('Location')
print('redir+', redir)
r = requests.get(redir, allow_redirects=False)
return redir
if len(sys.argv) < 4: if len(sys.argv) < 4:
print("Usage: python3 tootbot.py twitter_account mastodon_login mastodon_passwd mastodon_instance [max_days [footer_tags [delay]]]") # noqa print("Usage: python3 tootbot.py twitter_account mastodon_login mastodon_passwd mastodon_instance [max_days [footer_tags [delay]]]") # noqa
sys.exit(1) sys.exit(1)
@ -150,9 +164,8 @@ if source[:4] == 'http':
if m is not None: if m is not None:
l = m.group(0) l = m.group(0)
try: try:
r = requests.get(l, allow_redirects=False) redir = unredir(l)
if r.status_code in {301, 302}: c = c.replace(l, redir)
c = c.replace(l, r.headers.get('Location'))
except: except:
print('Cannot resolve link redirect: ' + l) print('Cannot resolve link redirect: ' + l)
@ -250,35 +263,34 @@ else:
# replace short links by original URL # replace short links by original URL
links = re.findall(r"http[^ \xa0]*", c) links = re.findall(r"http[^ \xa0]*", c)
for l in links: for l in links:
r = requests.get(l, allow_redirects=False) redir = unredir(l)
if r.status_code in {301, 302}: m = re.search(r'twitter.com/.*/photo/', redir)
m = re.search(r'twitter.com/.*/photo/', r.headers.get('Location')) if m is None:
if m is None: c = c.replace(l, redir)
c = c.replace(l, r.headers.get('Location')) else:
else: c = c.replace(l, '')
c = c.replace(l, '')
m = re.search(r'(twitter.com/.*/video/|youtube.com)', r.headers.get('Location')) m = re.search(r'(twitter.com/.*/video/|youtube.com)', redir)
if m is None: if m is None:
c = c.replace(l, r.headers.get('Location')) c = c.replace(l, redir)
else: else:
print('lien:',l) print('lien:',l)
c = c.replace(l, '') c = c.replace(l, '')
video = r.headers.get('Location') video = redir
print('video:', video) print('video:', video)
subprocess.run('rm -f out.mp4; yt-dlp -N 8 -o out.mp4 --recode-video mp4 %s' % subprocess.run('rm -f out.mp4; yt-dlp -N 8 -o out.mp4 --recode-video mp4 %s' %
(video,), shell=True, capture_output=False) (video,), shell=True, capture_output=False)
print("received") print("received")
try: try:
file = open("out.mp4", "rb") file = open("out.mp4", "rb")
video_data = file.read() video_data = file.read()
file.close() file.close()
media_posted = mastodon_api.media_post(video_data, mime_type='video/mp4') media_posted = mastodon_api.media_post(video_data, mime_type='video/mp4')
c = c.replace(video, '') c = c.replace(video, '')
print("posted") print("posted")
toot_media.append(media_posted['id']) toot_media.append(media_posted['id'])
except: except:
pass pass
# remove pic.twitter.com links # remove pic.twitter.com links
m = re.search(r"pic.twitter.com[^ \xa0]*", c) m = re.search(r"pic.twitter.com[^ \xa0]*", c)