Implemented command line parsing

2025-05-28 19:46:25 +00:00 · 2019-08-01 14:58:41 +02:00 · 2019-08-01 14:58:41 +02:00 · 32f3eccc70
commit 32f3eccc70
parent 9b8b748b5a
1 changed files with 198 additions and 180 deletions
--- a/twoot.py
+++ b/twoot.py
@ -19,6 +19,7 @@
 '''
 import sys
 import argparse
 import os
 import random
 import requests
@ -29,14 +30,6 @@ import re
 from mastodon import Mastodon, MastodonError
 #TODO manage command line
 TWIT_ACCOUNT = 'hackaday'
 MAST_ACCOUNT = 'twoot@noirextreme.com'
 MAST_PASSWORD = 'AcX/ZK5Ml6fRVDFi'
 MAST_INSTANCE = 'mastodon.host'
 MAX_AGE = 5  # in days
 MIN_DELAY = 0  # in minutes
 USER_AGENTS = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36',
    'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/68.0',
@ -50,6 +43,7 @@ USER_AGENTS = [
 #TODO log to file
 def cleanup_tweet_text(tt_iter):
    '''
    Receives an iterator over all the elements contained in the tweet-text container
@ -109,200 +103,224 @@ def cleanup_tweet_text(tt_iter):
    return tweet_text
-# **********************************************************
+def main(argv):
 # Load twitter page of user. Process all tweets and generate
 # list of dictionaries ready to be posted on Mastodon
 # **********************************************************
 # To store content of all tweets from this user
 tweets = []
-# Get a copy of the default headers that requests would use
+    # Build parser for command line arguments
-headers = requests.utils.default_headers()
+    parser = argparse.ArgumentParser(description='toot tweets.')
    parser.add_argument('-t', metavar='<twitter account>', action='store', required=True)
    parser.add_argument('-i', metavar='<mastodon instance>', action='store', required=True)
    parser.add_argument('-m', metavar='<mastodon account>', action='store', required=True)
    parser.add_argument('-p', metavar='<mastodon password>', action='store', required=True)
    parser.add_argument('-a', metavar='<max age in days>', action='store', type=float, default=1)
    parser.add_argument('-d', metavar='<min delay in mins>', action='store', type=float, default=0)
-# Update default headers with randomly selected user agent
+    # Parse command line
-headers.update(
+    args = vars(parser.parse_args())
    {
        'User-Agent': USER_AGENTS[random.randint(0, len(USER_AGENTS)-1)],
    }
 )
-# Download twitter page of user
+    twit_account = args['t']
-response = requests.get('https://twitter.com/' + TWIT_ACCOUNT, headers=headers)
+    mast_instance = args['i']
    mast_account = args['m']
    mast_password = args['p']
    max_age = float(args['a'])
    min_delay = float(args['d'])
-# DEBUG: Save page to file
+    # **********************************************************
-of = open('twitter.html', 'w')
+    # Load twitter page of user. Process all tweets and generate
-of.write(response.text)
+    # list of dictionaries ready to be posted on Mastodon
-of.close()
+    # **********************************************************
    # To store content of all tweets from this user
    tweets = []
-# Verify that download worked
+    # Get a copy of the default headers that requests would use
-if response.status_code != 200:
+    headers = requests.utils.default_headers()
    print("Could not download twitter timeline. Aborting.")
    exit(-1)
-# Build tree of html elements for processing
+    # Update default headers with randomly selected user agent
-soup = BeautifulSoup(response.text, 'html.parser')
+    headers.update(
-
+        {
-# Extract twitter timeline
+            'User-Agent': USER_AGENTS[random.randint(0, len(USER_AGENTS)-1)],
-results = soup.find_all('div', class_='content')
+        }
 for result in results:
    # Isolate tweet header
    sih = result.find('div', class_='stream-item-header')
    # extract author
    author = sih.find('strong', class_='fullname').get_text()
    # Extract author's logo
    author_logo_url = sih.find('img', class_='avatar')['src']
    # Extract time stamp
    timestamp = sih.find('a', class_='tweet-timestamp').find('span', class_='_timestamp')['data-time']
    # Extract tweet id
    tweet_id = sih.find('a', class_='tweet-timestamp')['href']
    # Extract user name
    author_account = re.search('^/(.+?)/', tweet_id).group(1)
    # Isolate tweet text container
    ttc = result.find('div', class_='js-tweet-text-container')
    # extract iterator over tweet text contents
    tt_iter = ttc.find('p', class_='tweet-text').children
    tweet_text = cleanup_tweet_text(tt_iter)
    # Check it the tweet is a retweet from somebody else
    if author_account.lower() != TWIT_ACCOUNT.lower():
        tweet_text = 'RT from ' + author + ' @' + author_account + '\n\n' + tweet_text
    # Add footer with link to original tweet
    tweet_text += '\n\nOriginal tweet : https://twitter.com' + tweet_id
    # Isolate attached media container
    amoc = result.find('div', class_='AdaptiveMediaOuterContainer')
    photos = []
    if amoc:
        # Extract photos
        photo_conts = amoc.find_all('div', class_='AdaptiveMedia-photoContainer')
        for p in photo_conts:
            photos.append(p['data-image-url'])
        # Mention presence in videos in tweet
        videos = amoc.find_all('div', class_='AdaptiveMedia-videoContainer')
        if len(videos) != 0:
            tweet_text += '\n\n[Embedded video in original tweet]'
    # Add dictionary with content of tweet to list
    tweet = {
        "author": author,
        "author_account": author_account,
        "author_logo_url": author_logo_url,
        "timestamp": timestamp,
        "tweet_id": tweet_id,
        "tweet_text": tweet_text,
        "photos": photos,
    }
    tweets.append(tweet)
 # DEBUG: Print extracted tweets
 for t in tweets:
    print(t)
 # **********************************************************
 # Iterate tweets. Check if the tweet has already been posted
 # on Mastodon. If not, post it and add it to database
 # **********************************************************
 # Try to open database. If it does not exist, create it
 sql = sqlite3.connect('twoot.db')
 db = sql.cursor()
 db.execute('''CREATE TABLE IF NOT EXISTS toots (twitter_account TEXT, mastodon_instance TEXT,
           mastodon_account TEXT, tweet_id TEXT, toot_id TEXT)''')
 # Create Mastodon application if it does not exist yet
 if not os.path.isfile(MAST_INSTANCE + '.secret'):
    try:
        Mastodon.create_app(
            'twoot',
            api_base_url='https://' + MAST_INSTANCE,
            to_file=MAST_INSTANCE + '.secret'
        )
    except MastodonError as me:
        print('failed to create app on ' + MAST_INSTANCE)
        sys.exit(1)
 # Log in to Mastodon instance
 try:
    mastodon = Mastodon(
        client_id=MAST_INSTANCE + '.secret',
        api_base_url='https://' + MAST_INSTANCE
    )
-    mastodon.log_in(
+    # Download twitter page of user
-        username=MAST_ACCOUNT,
+    response = requests.get('https://twitter.com/' + twit_account, headers=headers)
        password=MAST_PASSWORD,
        to_file=MAST_ACCOUNT + ".secret"
    )
-except MastodonError as me:
+    # DEBUG: Save page to file
-    print('ERROR: Login to ' + MAST_INSTANCE + ' Failed')
+    of = open('twitter.html', 'w')
-    print(me)
+    of.write(response.text)
-    sys.exit(1)
+    of.close()
-# Upload tweets
+    # Verify that download worked
-for tweet in reversed(tweets):
+    if response.status_code != 200:
-    # Check in database if tweet has already been posted
+        print("Could not download twitter timeline. Aborting.")
-    db.execute('''SELECT * FROM toots WHERE twitter_account = ? AND mastodon_instance  = ? AND
+        exit(-1)
               mastodon_account = ? AND tweet_id = ?''',
               (TWIT_ACCOUNT, MAST_INSTANCE, MAST_ACCOUNT, tweet['tweet_id']))
    tweet_in_db = db.fetchone()
-    if tweet_in_db is not None:
+    # Build tree of html elements for processing
-        # Skip to next tweet
+    soup = BeautifulSoup(response.text, 'html.parser')
        continue
-    # Check that the tweet is not too young (might be deleted) or too old
+    # Extract twitter timeline
-    age_in_hours = (time.time() - float(tweet['timestamp'])) / 3600.0
+    results = soup.find_all('div', class_='content')
    min_delay_in_hours = float(MIN_DELAY) / 60.0
    max_age_in_hours = float(MAX_AGE) * 24.0
-    if age_in_hours < min_delay_in_hours or age_in_hours > max_age_in_hours:
+    for result in results:
-        # Skip to next tweet
+        # Isolate tweet header
-        continue
+        sih = result.find('div', class_='stream-item-header')
-    # Upload photos
+        # extract author
-    media_ids = []
+        author = sih.find('strong', class_='fullname').get_text()
    for photo in tweet['photos']:
        # Download picture
        media = requests.get(photo)
-        # Upload picture to Mastodon instance
+        # Extract author's logo
-        media_posted = mastodon.media_post(media.content, mime_type=media.headers.get('content-type'))
+        author_logo_url = sih.find('img', class_='avatar')['src']
        media_ids.append(media_posted['id'])
-    # Post toot
+        # Extract time stamp
        timestamp = sih.find('a', class_='tweet-timestamp').find('span', class_='_timestamp')['data-time']
        # Extract tweet id
        tweet_id = sih.find('a', class_='tweet-timestamp')['href']
        # Extract user name
        author_account = re.search('^/(.+?)/', tweet_id).group(1)
        # Isolate tweet text container
        ttc = result.find('div', class_='js-tweet-text-container')
        # extract iterator over tweet text contents
        tt_iter = ttc.find('p', class_='tweet-text').children
        tweet_text = cleanup_tweet_text(tt_iter)
        # Check it the tweet is a retweet from somebody else
        if author_account.lower() != twit_account.lower():
            tweet_text = 'RT from ' + author + ' @' + author_account + '\n\n' + tweet_text
        # Add footer with link to original tweet
        tweet_text += '\n\nOriginal tweet : https://twitter.com' + tweet_id
        # Isolate attached media container
        amoc = result.find('div', class_='AdaptiveMediaOuterContainer')
        photos = []
        if amoc:
            # Extract photos
            photo_conts = amoc.find_all('div', class_='AdaptiveMedia-photoContainer')
            for p in photo_conts:
                photos.append(p['data-image-url'])
            # Mention presence in videos in tweet
            videos = amoc.find_all('div', class_='AdaptiveMedia-videoContainer')
            if len(videos) != 0:
                tweet_text += '\n\n[Embedded video in original tweet]'
        # Add dictionary with content of tweet to list
        tweet = {
            "author": author,
            "author_account": author_account,
            "author_logo_url": author_logo_url,
            "timestamp": timestamp,
            "tweet_id": tweet_id,
            "tweet_text": tweet_text,
            "photos": photos,
        }
        tweets.append(tweet)
    # DEBUG: Print extracted tweets
    for t in tweets:
        print(t)
    # **********************************************************
    # Iterate tweets. Check if the tweet has already been posted
    # on Mastodon. If not, post it and add it to database
    # **********************************************************
    # Try to open database. If it does not exist, create it
    sql = sqlite3.connect('twoot.db')
    db = sql.cursor()
    db.execute('''CREATE TABLE IF NOT EXISTS toots (twitter_account TEXT, mastodon_instance TEXT,
               mastodon_account TEXT, tweet_id TEXT, toot_id TEXT)''')
    # Create Mastodon application if it does not exist yet
    if not os.path.isfile(mast_instance + '.secret'):
        try:
            Mastodon.create_app(
                'twoot',
                api_base_url='https://' + mast_instance,
                to_file=mast_instance + '.secret'
            )
        except MastodonError as me:
            print('failed to create app on ' + mast_instance)
            sys.exit(1)
    # Log in to Mastodon instance
    try:
        mastodon = Mastodon(
-            access_token=MAST_ACCOUNT + '.secret',
+            client_id=mast_instance + '.secret',
-            api_base_url='https://' + MAST_INSTANCE
+            api_base_url='https://' + mast_instance
        )
-        if len(media_ids) == 0:
+        mastodon.log_in(
-            toot = mastodon.status_post(tweet['tweet_text'], visibility='public')
+            username=mast_account,
-        else:
+            password=mast_password,
-            toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids, visibility='public')
+            to_file=mast_account + ".secret"
        )
    except MastodonError as me:
-        print('ERROR: posting ' + tweet['tweet_text'] + ' to ' + MAST_INSTANCE + ' Failed')
+        print('ERROR: Login to ' + mast_instance + ' Failed')
        print(me)
        sys.exit(1)
-    # Insert toot id into database
+    # Upload tweets
-    if 'id' in toot:
+    for tweet in reversed(tweets):
-        db.execute("INSERT INTO toots VALUES ( ? , ? , ? , ? , ? )",
+        # Check in database if tweet has already been posted
-                   (TWIT_ACCOUNT, MAST_INSTANCE, MAST_ACCOUNT, tweet['tweet_id'], toot['id']))
+        db.execute('''SELECT * FROM toots WHERE twitter_account = ? AND mastodon_instance  = ? AND
-        sql.commit()
+                   mastodon_account = ? AND tweet_id = ?''',
                   (twit_account, mast_instance, mast_account, tweet['tweet_id']))
        tweet_in_db = db.fetchone()
        if tweet_in_db is not None:
            # Skip to next tweet
            continue
        # Check that the tweet is not too young (might be deleted) or too old
        age_in_hours = (time.time() - float(tweet['timestamp'])) / 3600.0
        min_delay_in_hours = min_delay / 60.0
        max_age_in_hours = max_age * 24.0
        if age_in_hours < min_delay_in_hours or age_in_hours > max_age_in_hours:
            # Skip to next tweet
            continue
        # Upload photos
        media_ids = []
        for photo in tweet['photos']:
            # Download picture
            media = requests.get(photo)
            # Upload picture to Mastodon instance
            media_posted = mastodon.media_post(media.content, mime_type=media.headers.get('content-type'))
            media_ids.append(media_posted['id'])
        # Post toot
        try:
            mastodon = Mastodon(
                access_token=mast_account + '.secret',
                api_base_url='https://' + mast_instance
            )
            if len(media_ids) == 0:
                toot = mastodon.status_post(tweet['tweet_text'], visibility='public')
            else:
                toot = mastodon.status_post(tweet['tweet_text'], media_ids=media_ids, visibility='public')
        except MastodonError as me:
            print('ERROR: posting ' + tweet['tweet_text'] + ' to ' + mast_instance + ' Failed')
            print(me)
            sys.exit(1)
        # Insert toot id into database
        if 'id' in toot:
            db.execute("INSERT INTO toots VALUES ( ? , ? , ? , ? , ? )",
                       (twit_account, mast_instance, mast_account, tweet['tweet_id'], toot['id']))
            sql.commit()
 if __name__ == "__main__":
    main(sys.argv)