mirror of
https://gitlab.com/jeancf/twoot.git
synced 2025-02-17 13:58:11 +00:00
Updated config file
This commit is contained in:
parent
0b58df16e2
commit
85c5c2ef48
|
@ -45,7 +45,7 @@ is @superduperbot@botsin.space
|
||||||
| -v | upload videos to Mastodon | *N/A* | No |
|
| -v | upload videos to Mastodon | *N/A* | No |
|
||||||
| -r | Post reply-to tweets (ignored by default) | *N/A* | No |
|
| -r | Post reply-to tweets (ignored by default) | *N/A* | No |
|
||||||
| -s | Skip retweets (posted by default) | *N/A* | No |
|
| -s | Skip retweets (posted by default) | *N/A* | No |
|
||||||
| -l | Remove link redirection | *N/A* | No |
|
| -l | Remove link redirections | *N/A* | No |
|
||||||
| -u | Remove trackers from URLs | *N/A* | No |
|
| -u | Remove trackers from URLs | *N/A* | No |
|
||||||
| -a | Max. age of tweet to post (in days) | `5` | No |
|
| -a | Max. age of tweet to post (in days) | `5` | No |
|
||||||
| -d | Min. age before posting new tweet (in minutes) | `15` | No |
|
| -d | Min. age before posting new tweet (in minutes) | `15` | No |
|
||||||
|
|
|
@ -21,6 +21,10 @@ post_reply_to = false
|
||||||
# Default is false
|
# Default is false
|
||||||
skip_retweets = false
|
skip_retweets = false
|
||||||
|
|
||||||
|
# Replace redirected links in tweets with direct URLs
|
||||||
|
# Default is false
|
||||||
|
remove_link_redirections = false
|
||||||
|
|
||||||
# Clean up URLs in tweets to remove trackers
|
# Clean up URLs in tweets to remove trackers
|
||||||
# Default is false
|
# Default is false
|
||||||
remove_trackers_from_urls = false
|
remove_trackers_from_urls = false
|
||||||
|
|
99
test.py
99
test.py
|
@ -1,99 +0,0 @@
|
||||||
#! /usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from urllib.parse import urlparse, parse_qsl, urlencode, urlunparse
|
|
||||||
import requests
|
|
||||||
|
|
||||||
def deredir_url(url):
|
|
||||||
"""
|
|
||||||
Given a URL, return the URL that the page really downloads from
|
|
||||||
:param url: url to be de-redirected
|
|
||||||
:return: direct url
|
|
||||||
"""
|
|
||||||
|
|
||||||
ret = None
|
|
||||||
try:
|
|
||||||
# Download the page
|
|
||||||
ret = requests.get(url, timeout=5)
|
|
||||||
except:
|
|
||||||
# If anything goes wrong keep the URL intact
|
|
||||||
return url
|
|
||||||
|
|
||||||
# Return the URL that the page was downloaded from
|
|
||||||
return ret.url
|
|
||||||
|
|
||||||
def _remove_tracker_params(query_str):
|
|
||||||
"""
|
|
||||||
private function
|
|
||||||
Given a query string from a URL, strip out the known trackers
|
|
||||||
:param query_str: query to be cleaned
|
|
||||||
:return: query cleaned
|
|
||||||
"""
|
|
||||||
# Avalaible URL tracking parameters :
|
|
||||||
# UTM tags by Google Ads, M$ Ads, ...
|
|
||||||
# tag by TikTok
|
|
||||||
# tags by Snapchat
|
|
||||||
# tags by Facebook
|
|
||||||
params_to_remove = [
|
|
||||||
"utm_source", "utm_medium", "utm_campaign", "utm_term", "utm_content",
|
|
||||||
"mkt_tok",
|
|
||||||
"campaign_name", "ad_set_name", "campaign_id", "ad_set_id",
|
|
||||||
"media", "interest_group_name",
|
|
||||||
"xtor"
|
|
||||||
]
|
|
||||||
query_to_clean = dict(parse_qsl(query_str, keep_blank_values=True))
|
|
||||||
query_cleaned = [(k, v) for k, v in query_to_clean.items() if not k in params_to_remove]
|
|
||||||
return urlencode(query_cleaned, safe='#', doseq=True)
|
|
||||||
|
|
||||||
|
|
||||||
def _remove_trackers_fragment(fragment_str):
|
|
||||||
"""
|
|
||||||
private function
|
|
||||||
Given a fragment string from a URL, strip out the known trackers
|
|
||||||
:param query_str: fragment to be cleaned
|
|
||||||
:return: cleaned fragment
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Not implemented
|
|
||||||
# Unclear what, if anything, can be done
|
|
||||||
# Need better understanding of fragment-based tracking
|
|
||||||
# https://builtvisible.com/one-weird-trick-to-avoid-utm-parameters/
|
|
||||||
|
|
||||||
return fragment_str
|
|
||||||
|
|
||||||
|
|
||||||
def clean_url(dirty_url):
|
|
||||||
"""
|
|
||||||
Given a URL, return it with the UTM parameters removed from query and fragment
|
|
||||||
:param dirty_url: url to be cleaned
|
|
||||||
:return: url cleaned
|
|
||||||
>>> clean_url('https://example.com/video/this-aerial-ropeway?utm_source=Twitter&utm_medium=video&utm_campaign=organic&utm_content=Nov13&a=aaa&b=1#mkt_tok=tik&mkt_tik=tok')
|
|
||||||
'https://example.com/video/this-aerial-ropeway?a=aaa&b=1#mkt_tik=tok'
|
|
||||||
"""
|
|
||||||
|
|
||||||
url_parsed = urlparse(dirty_url, allow_fragments=False)
|
|
||||||
|
|
||||||
cleaned_url = urlunparse([
|
|
||||||
url_parsed.scheme,
|
|
||||||
url_parsed.netloc,
|
|
||||||
url_parsed.path,
|
|
||||||
url_parsed.params,
|
|
||||||
_remove_tracker_params(url_parsed.query),
|
|
||||||
_remove_trackers_fragment(url_parsed.fragment)
|
|
||||||
])
|
|
||||||
|
|
||||||
return cleaned_url
|
|
||||||
|
|
||||||
def main():
|
|
||||||
# url = 'https://example.com/video/this-aerial-ropeway?utm_source=Twitter&utm_medium=video&utm_campaign=organic&utm_content=Nov13&a=aaa&b=1#mkt_tok=tik&mkt_tik=tok'
|
|
||||||
# url = "https://docs.helix-editor.com/keymap.html#movement"
|
|
||||||
# url = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7071508/#sec1-nutrients-12-00530title"
|
|
||||||
# url = "https://uscode.house.gov/view.xhtml?req=granuleid:USC-prelim-title42-section12208&num=0&edition=prelim"
|
|
||||||
url = "https://shorturl.at/qwP38"
|
|
||||||
print('Orig: ' + url)
|
|
||||||
direct_url = deredir_url(url)
|
|
||||||
print('dir : ' + direct_url)
|
|
||||||
print('to : ' + clean_url(direct_url))
|
|
||||||
|
|
||||||
if __name__=="__main__":
|
|
||||||
main()
|
|
19
twoot.py
19
twoot.py
|
@ -409,6 +409,7 @@ def main(argv):
|
||||||
'upload_videos': False,
|
'upload_videos': False,
|
||||||
'post_reply_to': False,
|
'post_reply_to': False,
|
||||||
'skip_retweets': False,
|
'skip_retweets': False,
|
||||||
|
'remove_link_redirections': False,
|
||||||
'remove_trackers_from_urls': False,
|
'remove_trackers_from_urls': False,
|
||||||
'tweet_max_age': float(1),
|
'tweet_max_age': float(1),
|
||||||
'tweet_delay': float(0),
|
'tweet_delay': float(0),
|
||||||
|
@ -416,8 +417,7 @@ def main(argv):
|
||||||
}
|
}
|
||||||
|
|
||||||
# Default empty toml
|
# Default empty toml
|
||||||
# toml = {'config': {}, 'options': options}
|
toml = {'config': {}, 'options': options}
|
||||||
toml = {}
|
|
||||||
|
|
||||||
# Load config file if it was provided
|
# Load config file if it was provided
|
||||||
toml_file = args['f']
|
toml_file = args['f']
|
||||||
|
@ -446,6 +446,8 @@ def main(argv):
|
||||||
toml['options']['post_reply_to'] = args['r']
|
toml['options']['post_reply_to'] = args['r']
|
||||||
if args['s'] is True:
|
if args['s'] is True:
|
||||||
toml['options']['skip_retweets'] = args['s']
|
toml['options']['skip_retweets'] = args['s']
|
||||||
|
if args['l'] is True:
|
||||||
|
toml['options']['remove_link_redirections'] = args['l']
|
||||||
if args['u'] is True:
|
if args['u'] is True:
|
||||||
toml['options']['remove_trackers_from_urls'] = args['u']
|
toml['options']['remove_trackers_from_urls'] = args['u']
|
||||||
if args['a'] is not None:
|
if args['a'] is not None:
|
||||||
|
@ -646,7 +648,10 @@ def main(argv):
|
||||||
tt_iter = status.find('div', class_='tweet-content media-body').children
|
tt_iter = status.find('div', class_='tweet-content media-body').children
|
||||||
|
|
||||||
# Process text of tweet
|
# Process text of tweet
|
||||||
tweet_text += process_media_body(tt_iter, remove_redir, remove_trackers)
|
tweet_text += process_media_body(tt_iter,
|
||||||
|
toml['options']['remove_link_redirections'],
|
||||||
|
toml['options']['remove_trackers_from_urls']
|
||||||
|
)
|
||||||
|
|
||||||
# Process quote: append link to tweet_text
|
# Process quote: append link to tweet_text
|
||||||
quote_div = status.find('a', class_='quote-link')
|
quote_div = status.find('a', class_='quote-link')
|
||||||
|
@ -661,8 +666,12 @@ def main(argv):
|
||||||
# Process attachment: capture image or .mp4 url or download twitter video
|
# Process attachment: capture image or .mp4 url or download twitter video
|
||||||
attachments_class = status.find('div', class_='attachments')
|
attachments_class = status.find('div', class_='attachments')
|
||||||
if attachments_class is not None:
|
if attachments_class is not None:
|
||||||
pics, vid_in_tweet = process_attachments(nitter_url, attachments_class, toml['options']['upload_videos'], toml['config']['twitter_account'], status_id,
|
pics, vid_in_tweet = process_attachments(nitter_url,
|
||||||
author_account)
|
attachments_class,
|
||||||
|
toml['options']['upload_videos'],
|
||||||
|
toml['config']['twitter_account'],
|
||||||
|
status_id, author_account
|
||||||
|
)
|
||||||
photos.extend(pics)
|
photos.extend(pics)
|
||||||
if vid_in_tweet:
|
if vid_in_tweet:
|
||||||
tweet_text += '\n\n[Video embedded in original tweet]'
|
tweet_text += '\n\n[Video embedded in original tweet]'
|
||||||
|
|
Loading…
Reference in New Issue
Block a user