factorize (un)redirect processing and make it multi-level

2025-05-31 20:37:00 +00:00 · 2022-12-19 09:38:57 +01:00 · 2022-12-19 09:38:57 +01:00 · d068a9f1cb
commit d068a9f1cb
parent 46be184d41
1 changed files with 43 additions and 31 deletions
--- a/tootbot.py
+++ b/tootbot.py
@ -14,6 +14,20 @@ import feedparser
 from mastodon import Mastodon
 import requests
 def unredir(redir):
    r = requests.get(redir, allow_redirects=False)
    while r.status_code in {301, 302}:
        redir = r.headers.get('Location')
        print('redir', redir)
        if '//ow.ly/' in redir or '//bit.ly/' in redir:
            redir = redir.replace('https://ow.ly/', 'http://ow.ly/') # only http
            redir = requests.get(redir, allow_redirects=False).headers.get('Location')
            print('redir+', redir)
        r = requests.get(redir, allow_redirects=False)
    return redir
 if len(sys.argv) < 4:
    print("Usage: python3 tootbot.py twitter_account mastodon_login mastodon_passwd mastodon_instance [max_days [footer_tags [delay]]]")  # noqa
    sys.exit(1)
@ -150,9 +164,8 @@ if source[:4] == 'http':
            if m is not None:
                l = m.group(0)
                try:
-                    r = requests.get(l, allow_redirects=False)
+                    redir = unredir(l)
-                    if r.status_code in {301, 302}:
+                    c = c.replace(l, redir)
                        c = c.replace(l, r.headers.get('Location'))
                except:
                    print('Cannot resolve link redirect: ' + l)
@ -250,35 +263,34 @@ else:
        # replace short links by original URL
        links = re.findall(r"http[^ \xa0]*", c)
        for l in links:
-            r = requests.get(l, allow_redirects=False)
+            redir = unredir(l)
-            if r.status_code in {301, 302}:
+            m = re.search(r'twitter.com/.*/photo/', redir)
-                m = re.search(r'twitter.com/.*/photo/', r.headers.get('Location'))
+            if m is None:
-                if m is None:
+                c = c.replace(l, redir)
-                    c = c.replace(l, r.headers.get('Location'))
+            else:
-                else:
+                c = c.replace(l, '')
                    c = c.replace(l, '')
-                m = re.search(r'(twitter.com/.*/video/|youtube.com)', r.headers.get('Location'))
+            m = re.search(r'(twitter.com/.*/video/|youtube.com)', redir)
-                if m is None:
+            if m is None:
-                    c = c.replace(l, r.headers.get('Location'))
+                c = c.replace(l, redir)
-                else:
+            else:
-                    print('lien:',l)
+                print('lien:',l)
-                    c = c.replace(l, '')
+                c = c.replace(l, '')
-                    video = r.headers.get('Location')
+                video = redir
-                    print('video:', video)
+                print('video:', video)
-                    subprocess.run('rm -f out.mp4; yt-dlp -N 8 -o out.mp4 --recode-video mp4 %s' %
+                subprocess.run('rm -f out.mp4; yt-dlp -N 8 -o out.mp4 --recode-video mp4 %s' %
-                                (video,), shell=True, capture_output=False)
+                            (video,), shell=True, capture_output=False)
-                    print("received")
+                print("received")
-                    try:
+                try:
-                        file = open("out.mp4", "rb")
+                    file = open("out.mp4", "rb")
-                        video_data = file.read()
+                    video_data = file.read()
-                        file.close()
+                    file.close()
-                        media_posted = mastodon_api.media_post(video_data, mime_type='video/mp4')
+                    media_posted = mastodon_api.media_post(video_data, mime_type='video/mp4')
-                        c = c.replace(video, '')
+                    c = c.replace(video, '')
-                        print("posted")
+                    print("posted")
-                        toot_media.append(media_posted['id'])
+                    toot_media.append(media_posted['id'])
-                    except:
+                except:
-                        pass
+                    pass
        # remove pic.twitter.com links
        m = re.search(r"pic.twitter.com[^ \xa0]*", c)