factorize (un)redirect processing and make it multi-level

This commit is contained in:
cquest 2022-12-19 09:38:57 +01:00
parent 46be184d41
commit d068a9f1cb

View File

@ -14,6 +14,20 @@ import feedparser
from mastodon import Mastodon from mastodon import Mastodon
import requests import requests
def unredir(redir):
r = requests.get(redir, allow_redirects=False)
while r.status_code in {301, 302}:
redir = r.headers.get('Location')
print('redir', redir)
if '//ow.ly/' in redir or '//bit.ly/' in redir:
redir = redir.replace('https://ow.ly/', 'http://ow.ly/') # only http
redir = requests.get(redir, allow_redirects=False).headers.get('Location')
print('redir+', redir)
r = requests.get(redir, allow_redirects=False)
return redir
if len(sys.argv) < 4: if len(sys.argv) < 4:
print("Usage: python3 tootbot.py twitter_account mastodon_login mastodon_passwd mastodon_instance [max_days [footer_tags [delay]]]") # noqa print("Usage: python3 tootbot.py twitter_account mastodon_login mastodon_passwd mastodon_instance [max_days [footer_tags [delay]]]") # noqa
sys.exit(1) sys.exit(1)
@ -150,9 +164,8 @@ if source[:4] == 'http':
if m is not None: if m is not None:
l = m.group(0) l = m.group(0)
try: try:
r = requests.get(l, allow_redirects=False) redir = unredir(l)
if r.status_code in {301, 302}: c = c.replace(l, redir)
c = c.replace(l, r.headers.get('Location'))
except: except:
print('Cannot resolve link redirect: ' + l) print('Cannot resolve link redirect: ' + l)
@ -250,21 +263,20 @@ else:
# replace short links by original URL # replace short links by original URL
links = re.findall(r"http[^ \xa0]*", c) links = re.findall(r"http[^ \xa0]*", c)
for l in links: for l in links:
r = requests.get(l, allow_redirects=False) redir = unredir(l)
if r.status_code in {301, 302}: m = re.search(r'twitter.com/.*/photo/', redir)
m = re.search(r'twitter.com/.*/photo/', r.headers.get('Location'))
if m is None: if m is None:
c = c.replace(l, r.headers.get('Location')) c = c.replace(l, redir)
else: else:
c = c.replace(l, '') c = c.replace(l, '')
m = re.search(r'(twitter.com/.*/video/|youtube.com)', r.headers.get('Location')) m = re.search(r'(twitter.com/.*/video/|youtube.com)', redir)
if m is None: if m is None:
c = c.replace(l, r.headers.get('Location')) c = c.replace(l, redir)
else: else:
print('lien:',l) print('lien:',l)
c = c.replace(l, '') c = c.replace(l, '')
video = r.headers.get('Location') video = redir
print('video:', video) print('video:', video)
subprocess.run('rm -f out.mp4; yt-dlp -N 8 -o out.mp4 --recode-video mp4 %s' % subprocess.run('rm -f out.mp4; yt-dlp -N 8 -o out.mp4 --recode-video mp4 %s' %
(video,), shell=True, capture_output=False) (video,), shell=True, capture_output=False)