mirror of
https://gitlab.com/jeancf/twoot.git
synced 2025-04-17 02:07:37 +00:00
Implemented command line parsing
This commit is contained in:
parent
9b8b748b5a
commit
32f3eccc70
378
twoot.py
Normal file → Executable file
378
twoot.py
Normal file → Executable file
@ -19,6 +19,7 @@
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
import argparse
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import requests
|
import requests
|
||||||
@ -29,14 +30,6 @@ import re
|
|||||||
from mastodon import Mastodon, MastodonError
|
from mastodon import Mastodon, MastodonError
|
||||||
|
|
||||||
|
|
||||||
#TODO manage command line
|
|
||||||
TWIT_ACCOUNT = 'hackaday'
|
|
||||||
MAST_ACCOUNT = 'twoot@noirextreme.com'
|
|
||||||
MAST_PASSWORD = 'AcX/ZK5Ml6fRVDFi'
|
|
||||||
MAST_INSTANCE = 'mastodon.host'
|
|
||||||
MAX_AGE = 5 # in days
|
|
||||||
MIN_DELAY = 0 # in minutes
|
|
||||||
|
|
||||||
USER_AGENTS = [
|
USER_AGENTS = [
|
||||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36',
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36',
|
||||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/68.0',
|
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/68.0',
|
||||||
@ -50,6 +43,7 @@ USER_AGENTS = [
|
|||||||
|
|
||||||
#TODO log to file
|
#TODO log to file
|
||||||
|
|
||||||
|
|
||||||
def cleanup_tweet_text(tt_iter):
|
def cleanup_tweet_text(tt_iter):
|
||||||
'''
|
'''
|
||||||
Receives an iterator over all the elements contained in the tweet-text container
|
Receives an iterator over all the elements contained in the tweet-text container
|
||||||
@ -109,200 +103,224 @@ def cleanup_tweet_text(tt_iter):
|
|||||||
return tweet_text
|
return tweet_text
|
||||||
|
|
||||||
|
|
||||||
# **********************************************************
|
def main(argv):
|
||||||
# Load twitter page of user. Process all tweets and generate
|
|
||||||
# list of dictionaries ready to be posted on Mastodon
|
|
||||||
# **********************************************************
|
|
||||||
# To store content of all tweets from this user
|
|
||||||
tweets = []
|
|
||||||
|
|
||||||
# Get a copy of the default headers that requests would use
|
# Build parser for command line arguments
|
||||||
headers = requests.utils.default_headers()
|
parser = argparse.ArgumentParser(description='toot tweets.')
|
||||||
|
parser.add_argument('-t', metavar='<twitter account>', action='store', required=True)
|
||||||
|
parser.add_argument('-i', metavar='<mastodon instance>', action='store', required=True)
|
||||||
|
parser.add_argument('-m', metavar='<mastodon account>', action='store', required=True)
|
||||||
|
parser.add_argument('-p', metavar='<mastodon password>', action='store', required=True)
|
||||||
|
parser.add_argument('-a', metavar='<max age in days>', action='store', type=float, default=1)
|
||||||
|
parser.add_argument('-d', metavar='<min delay in mins>', action='store', type=float, default=0)
|
||||||
|
|
||||||
# Update default headers with randomly selected user agent
|
# Parse command line
|
||||||
headers.update(
|
args = vars(parser.parse_args())
|
||||||
{
|
|
||||||
'User-Agent': USER_AGENTS[random.randint(0, len(USER_AGENTS)-1)],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Download twitter page of user
|
twit_account = args['t']
|
||||||
response = requests.get('https://twitter.com/' + TWIT_ACCOUNT, headers=headers)
|
mast_instance = args['i']
|
||||||
|
mast_account = args['m']
|
||||||
|
mast_password = args['p']
|
||||||
|
max_age = float(args['a'])
|
||||||
|
min_delay = float(args['d'])
|
||||||
|
|
||||||
# DEBUG: Save page to file
|
# **********************************************************
|
||||||
of = open('twitter.html', 'w')
|
# Load twitter page of user. Process all tweets and generate
|
||||||
of.write(response.text)
|
# list of dictionaries ready to be posted on Mastodon
|
||||||
of.close()
|
# **********************************************************
|
||||||
|
# To store content of all tweets from this user
|
||||||
|
tweets = []
|
||||||
|
|
||||||
# Verify that download worked
|
# Get a copy of the default headers that requests would use
|
||||||
if response.status_code != 200:
|
headers = requests.utils.default_headers()
|
||||||
print("Could not download twitter timeline. Aborting.")
|
|
||||||
exit(-1)
|
|
||||||
|
|
||||||
# Build tree of html elements for processing
|
# Update default headers with randomly selected user agent
|
||||||
soup = BeautifulSoup(response.text, 'html.parser')
|
headers.update(
|
||||||
|
{
|
||||||
# Extract twitter timeline
|
'User-Agent': USER_AGENTS[random.randint(0, len(USER_AGENTS)-1)],
|
||||||
results = soup.find_all('div', class_='content')
|
}
|
||||||
|
|
||||||
for result in results:
|
|
||||||
# Isolate tweet header
|
|
||||||
sih = result.find('div', class_='stream-item-header')
|
|
||||||
|
|
||||||
# extract author
|
|
||||||
author = sih.find('strong', class_='fullname').get_text()
|
|
||||||
|
|
||||||
# Extract author's logo
|
|
||||||
author_logo_url = sih.find('img', class_='avatar')['src']
|
|
||||||
|
|
||||||
# Extract time stamp
|
|
||||||
timestamp = sih.find('a', class_='tweet-timestamp').find('span', class_='_timestamp')['data-time']
|
|
||||||
|
|
||||||
# Extract tweet id
|
|
||||||
tweet_id = sih.find('a', class_='tweet-timestamp')['href']
|
|
||||||
|
|
||||||
# Extract user name
|
|
||||||
author_account = re.search('^/(.+?)/', tweet_id).group(1)
|
|
||||||
|
|
||||||
# Isolate tweet text container
|
|
||||||
ttc = result.find('div', class_='js-tweet-text-container')
|
|
||||||
|
|
||||||
# extract iterator over tweet text contents
|
|
||||||
tt_iter = ttc.find('p', class_='tweet-text').children
|
|
||||||
|
|
||||||
tweet_text = cleanup_tweet_text(tt_iter)
|
|
||||||
|
|
||||||
# Check it the tweet is a retweet from somebody else
|
|
||||||
if author_account.lower() != TWIT_ACCOUNT.lower():
|
|
||||||
tweet_text = 'RT from ' + author + ' @' + author_account + '\n\n' + tweet_text
|
|
||||||
|
|
||||||
# Add footer with link to original tweet
|
|
||||||
tweet_text += '\n\nOriginal tweet : https://twitter.com' + tweet_id
|
|
||||||
|
|
||||||
# Isolate attached media container
|
|
||||||
amoc = result.find('div', class_='AdaptiveMediaOuterContainer')
|
|
||||||
|
|
||||||
photos = []
|
|
||||||
if amoc:
|
|
||||||
# Extract photos
|
|
||||||
photo_conts = amoc.find_all('div', class_='AdaptiveMedia-photoContainer')
|
|
||||||
for p in photo_conts:
|
|
||||||
photos.append(p['data-image-url'])
|
|
||||||
|
|
||||||
# Mention presence in videos in tweet
|
|
||||||
videos = amoc.find_all('div', class_='AdaptiveMedia-videoContainer')
|
|
||||||
if len(videos) != 0:
|
|
||||||
tweet_text += '\n\n[Embedded video in original tweet]'
|
|
||||||
|
|
||||||
# Add dictionary with content of tweet to list
|
|
||||||
tweet = {
|
|
||||||
"author": author,
|
|
||||||
"author_account": author_account,
|
|
||||||
"author_logo_url": author_logo_url,
|
|
||||||
"timestamp": timestamp,
|
|
||||||
"tweet_id": tweet_id,
|
|
||||||
"tweet_text": tweet_text,
|
|
||||||
"photos": photos,
|
|
||||||
}
|
|
||||||
tweets.append(tweet)
|
|
||||||
|
|
||||||
# DEBUG: Print extracted tweets
|
|
||||||
for t in tweets:
|
|
||||||
print(t)
|
|
||||||
|
|
||||||
|
|
||||||
# **********************************************************
|
|
||||||
# Iterate tweets. Check if the tweet has already been posted
|
|
||||||
# on Mastodon. If not, post it and add it to database
|
|
||||||
# **********************************************************
|
|
||||||
|
|
||||||
# Try to open database. If it does not exist, create it
|
|
||||||
sql = sqlite3.connect('twoot.db')
|
|
||||||
db = sql.cursor()
|
|
||||||
db.execute('''CREATE TABLE IF NOT EXISTS toots (twitter_account TEXT, mastodon_instance TEXT,
|
|
||||||
mastodon_account TEXT, tweet_id TEXT, toot_id TEXT)''')
|
|
||||||
|
|
||||||
# Create Mastodon application if it does not exist yet
|
|
||||||
if not os.path.isfile(MAST_INSTANCE + '.secret'):
|
|
||||||
try:
|
|
||||||
Mastodon.create_app(
|
|
||||||
'twoot',
|
|
||||||
api_base_url='https://' + MAST_INSTANCE,
|
|
||||||
to_file=MAST_INSTANCE + '.secret'
|
|
||||||
)
|
|
||||||
|
|
||||||
except MastodonError as me:
|
|
||||||
print('failed to create app on ' + MAST_INSTANCE)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# Log in to Mastodon instance
|
|
||||||
try:
|
|
||||||
mastodon = Mastodon(
|
|
||||||
client_id=MAST_INSTANCE + '.secret',
|
|
||||||
api_base_url='https://' + MAST_INSTANCE
|
|
||||||
)
|
)
|
||||||
|
|
||||||
mastodon.log_in(
|
# Download twitter page of user
|
||||||
username=MAST_ACCOUNT,
|
response = requests.get('https://twitter.com/' + twit_account, headers=headers)
|
||||||
password=MAST_PASSWORD,
|
|
||||||
to_file=MAST_ACCOUNT + ".secret"
|
|
||||||
)
|
|
||||||
|
|
||||||
except MastodonError as me:
|
# DEBUG: Save page to file
|
||||||
print('ERROR: Login to ' + MAST_INSTANCE + ' Failed')
|
of = open('twitter.html', 'w')
|
||||||
print(me)
|
of.write(response.text)
|
||||||
sys.exit(1)
|
of.close()
|
||||||
|
|
||||||
# Upload tweets
|
# Verify that download worked
|
||||||
for tweet in reversed(tweets):
|
if response.status_code != 200:
|
||||||
# Check in database if tweet has already been posted
|
print("Could not download twitter timeline. Aborting.")
|
||||||
db.execute('''SELECT * FROM toots WHERE twitter_account = ? AND mastodon_instance = ? AND
|
exit(-1)
|
||||||
mastodon_account = ? AND tweet_id = ?''',
|
|
||||||
(TWIT_ACCOUNT, MAST_INSTANCE, MAST_ACCOUNT, tweet['tweet_id']))
|
|
||||||
tweet_in_db = db.fetchone()
|
|
||||||
|
|
||||||
if tweet_in_db is not None:
|
# Build tree of html elements for processing
|
||||||
# Skip to next tweet
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
continue
|
|
||||||
|
|
||||||
# Check that the tweet is not too young (might be deleted) or too old
|
# Extract twitter timeline
|
||||||
age_in_hours = (time.time() - float(tweet['timestamp'])) / 3600.0
|
results = soup.find_all('div', class_='content')
|
||||||
min_delay_in_hours = float(MIN_DELAY) / 60.0
|
|
||||||
max_age_in_hours = float(MAX_AGE) * 24.0
|
|
||||||
|
|
||||||
if age_in_hours < min_delay_in_hours or age_in_hours > max_age_in_hours:
|
for result in results:
|
||||||
# Skip to next tweet
|
# Isolate tweet header
|
||||||
continue
|
sih = result.find('div', class_='stream-item-header')
|
||||||
|
|
||||||
# Upload photos
|
# extract author
|
||||||
media_ids = []
|
author = sih.find('strong', class_='fullname').get_text()
|
||||||
for photo in tweet['photos']:
|
|
||||||
# Download picture
|
|
||||||
media = requests.get(photo)
|
|
||||||
|
|
||||||
# Upload picture to Mastodon instance
|
# Extract author's logo
|
||||||
media_posted = mastodon.media_post(media.content, mime_type=media.headers.get('content-type'))
|
author_logo_url = sih.find('img', class_='avatar')['src']
|
||||||
media_ids.append(media_posted['id'])
|
|
||||||
|
|
||||||
# Post toot
|
# Extract time stamp
|
||||||
|
timestamp = sih.find('a', class_='tweet-timestamp').find('span', class_='_timestamp')['data-time']
|
||||||
|
|
||||||
|
# Extract tweet id
|
||||||
|
tweet_id = sih.find('a', class_='tweet-timestamp')['href']
|
||||||
|
|
||||||
|
# Extract user name
|
||||||
|
author_account = re.search('^/(.+?)/', tweet_id).group(1)
|
||||||
|
|
||||||
|
# Isolate tweet text container
|
||||||
|
ttc = result.find('div', class_='js-tweet-text-container')
|
||||||
|
|
||||||
|
# extract iterator over tweet text contents
|
||||||
|
tt_iter = ttc.find('p', class_='tweet-text').children
|
||||||
|
|
||||||
|
tweet_text = cleanup_tweet_text(tt_iter)
|
||||||
|
|
||||||
|
# Check it the tweet is a retweet from somebody else
|
||||||
|
if author_account.lower() != twit_account.lower():
|
||||||
|
tweet_text = 'RT from ' + author + ' @' + author_account + '\n\n' + tweet_text
|
||||||
|
|
||||||
|
# Add footer with link to original tweet
|
||||||
|
tweet_text += '\n\nOriginal tweet : https://twitter.com' + tweet_id
|
||||||
|
|
||||||
|
# Isolate attached media container
|
||||||
|
amoc = result.find('div', class_='AdaptiveMediaOuterContainer')
|
||||||
|
|
||||||
|
photos = []
|
||||||
|
if amoc:
|
||||||
|
# Extract photos
|
||||||
|
photo_conts = amoc.find_all('div', class_='AdaptiveMedia-photoContainer')
|
||||||
|
for p in photo_conts:
|
||||||
|
photos.append(p['data-image-url'])
|
||||||
|
|
||||||
|
# Mention presence in videos in tweet
|
||||||
|
videos = amoc.find_all('div', class_='AdaptiveMedia-videoContainer')
|
||||||
|
if len(videos) != 0:
|
||||||
|
tweet_text += '\n\n[Embedded video in original tweet]'
|
||||||
|
|
||||||
|
# Add dictionary with content of tweet to list
|
||||||
|
tweet = {
|
||||||
|
"author": author,
|
||||||
|
"author_account": author_account,
|
||||||
|
"author_logo_url": author_logo_url,
|
||||||
|
"timestamp": timestamp,
|
||||||
|
"tweet_id": tweet_id,
|
||||||
|
"tweet_text": tweet_text,
|
||||||
|
"photos": photos,
|
||||||
|
}
|
||||||
|
tweets.append(tweet)
|
||||||
|
|
||||||
|
# DEBUG: Print extracted tweets
|
||||||
|
for t in tweets:
|
||||||
|
print(t)
|
||||||
|
|
||||||
|
# **********************************************************
|
||||||
|
# Iterate tweets. Check if the tweet has already been posted
|
||||||
|
# on Mastodon. If not, post it and add it to database
|
||||||
|
# **********************************************************
|
||||||
|
|
||||||
|
# Try to open database. If it does not exist, create it
|
||||||
|
sql = sqlite3.connect('twoot.db')
|
||||||
|
db = sql.cursor()
|
||||||
|
db.execute('''CREATE TABLE IF NOT EXISTS toots (twitter_account TEXT, mastodon_instance TEXT,
|
||||||
|
mastodon_account TEXT, tweet_id TEXT, toot_id TEXT)''')
|
||||||
|
|
||||||
|
# Create Mastodon application if it does not exist yet
|
||||||
|
if not os.path.isfile(mast_instance + '.secret'):
|
||||||
|
try:
|
||||||
|
Mastodon.create_app(
|
||||||
|
'twoot',
|
||||||
|
api_base_url='https://' + mast_instance,
|
||||||
|
to_file=mast_instance + '.secret'
|
||||||
|
)
|
||||||
|
|
||||||
|
except MastodonError as me:
|
||||||
|
print('failed to create app on ' + mast_instance)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Log in to Mastodon instance
|
||||||
try:
|
try:
|
||||||
mastodon = Mastodon(
|
mastodon = Mastodon(
|
||||||
access_token=MAST_ACCOUNT + '.secret',
|
client_id=mast_instance + '.secret',
|
||||||
api_base_url='https://' + MAST_INSTANCE
|
api_base_url='https://' + mast_instance
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(media_ids) == 0:
|
mastodon.log_in(
|
||||||
toot = mastodon.status_post(tweet['tweet_text'], visibility='public')
|
username=mast_account,
|
||||||
else:
|
password=mast_password,
|
||||||
toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids, visibility='public')
|
to_file=mast_account + ".secret"
|
||||||
|
)
|
||||||
|
|
||||||
except MastodonError as me:
|
except MastodonError as me:
|
||||||
print('ERROR: posting ' + tweet['tweet_text'] + ' to ' + MAST_INSTANCE + ' Failed')
|
print('ERROR: Login to ' + mast_instance + ' Failed')
|
||||||
print(me)
|
print(me)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# Insert toot id into database
|
# Upload tweets
|
||||||
if 'id' in toot:
|
for tweet in reversed(tweets):
|
||||||
db.execute("INSERT INTO toots VALUES ( ? , ? , ? , ? , ? )",
|
# Check in database if tweet has already been posted
|
||||||
(TWIT_ACCOUNT, MAST_INSTANCE, MAST_ACCOUNT, tweet['tweet_id'], toot['id']))
|
db.execute('''SELECT * FROM toots WHERE twitter_account = ? AND mastodon_instance = ? AND
|
||||||
sql.commit()
|
mastodon_account = ? AND tweet_id = ?''',
|
||||||
|
(twit_account, mast_instance, mast_account, tweet['tweet_id']))
|
||||||
|
tweet_in_db = db.fetchone()
|
||||||
|
|
||||||
|
if tweet_in_db is not None:
|
||||||
|
# Skip to next tweet
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check that the tweet is not too young (might be deleted) or too old
|
||||||
|
age_in_hours = (time.time() - float(tweet['timestamp'])) / 3600.0
|
||||||
|
min_delay_in_hours = min_delay / 60.0
|
||||||
|
max_age_in_hours = max_age * 24.0
|
||||||
|
|
||||||
|
if age_in_hours < min_delay_in_hours or age_in_hours > max_age_in_hours:
|
||||||
|
# Skip to next tweet
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Upload photos
|
||||||
|
media_ids = []
|
||||||
|
for photo in tweet['photos']:
|
||||||
|
# Download picture
|
||||||
|
media = requests.get(photo)
|
||||||
|
|
||||||
|
# Upload picture to Mastodon instance
|
||||||
|
media_posted = mastodon.media_post(media.content, mime_type=media.headers.get('content-type'))
|
||||||
|
media_ids.append(media_posted['id'])
|
||||||
|
|
||||||
|
# Post toot
|
||||||
|
try:
|
||||||
|
mastodon = Mastodon(
|
||||||
|
access_token=mast_account + '.secret',
|
||||||
|
api_base_url='https://' + mast_instance
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(media_ids) == 0:
|
||||||
|
toot = mastodon.status_post(tweet['tweet_text'], visibility='public')
|
||||||
|
else:
|
||||||
|
toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids, visibility='public')
|
||||||
|
|
||||||
|
except MastodonError as me:
|
||||||
|
print('ERROR: posting ' + tweet['tweet_text'] + ' to ' + mast_instance + ' Failed')
|
||||||
|
print(me)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Insert toot id into database
|
||||||
|
if 'id' in toot:
|
||||||
|
db.execute("INSERT INTO toots VALUES ( ? , ? , ? , ? , ? )",
|
||||||
|
(twit_account, mast_instance, mast_account, tweet['tweet_id'], toot['id']))
|
||||||
|
sql.commit()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main(sys.argv)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user