mirror of
https://gitlab.com/jeancf/twoot.git
synced 2025-01-31 05:33:45 +00:00
Check if tweet in db before ingest to speed up processing of feed
This commit is contained in:
parent
2fe06c0bbc
commit
dd1d54d2a4
45
twoot.py
45
twoot.py
|
@ -205,6 +205,12 @@ def main(argv):
|
||||||
max_age = float(args['a'])
|
max_age = float(args['a'])
|
||||||
min_delay = float(args['d'])
|
min_delay = float(args['d'])
|
||||||
|
|
||||||
|
# Try to open database. If it does not exist, create it
|
||||||
|
sql = sqlite3.connect('twoot.db')
|
||||||
|
db = sql.cursor()
|
||||||
|
db.execute('''CREATE TABLE IF NOT EXISTS toots (twitter_account TEXT, mastodon_instance TEXT,
|
||||||
|
mastodon_account TEXT, tweet_id TEXT, toot_id TEXT)''')
|
||||||
|
|
||||||
# **********************************************************
|
# **********************************************************
|
||||||
# Load twitter page of user. Process all tweets and generate
|
# Load twitter page of user. Process all tweets and generate
|
||||||
# list of dictionaries ready to be posted on Mastodon
|
# list of dictionaries ready to be posted on Mastodon
|
||||||
|
@ -256,6 +262,20 @@ def main(argv):
|
||||||
|
|
||||||
for status in timeline:
|
for status in timeline:
|
||||||
|
|
||||||
|
# Extract tweet ID and status ID
|
||||||
|
tweet_id = str(status['href']).strip('?p=v')
|
||||||
|
status_id = tweet_id.split('/')[3]
|
||||||
|
|
||||||
|
# Check in database if tweet has already been posted
|
||||||
|
db.execute('''SELECT * FROM toots WHERE twitter_account = ? AND mastodon_instance = ? AND
|
||||||
|
mastodon_account = ? AND tweet_id = ?''',
|
||||||
|
(twit_account, mast_instance, mast_account, tweet_id))
|
||||||
|
tweet_in_db = db.fetchone()
|
||||||
|
|
||||||
|
if tweet_in_db is not None:
|
||||||
|
# Skip to next tweet
|
||||||
|
continue
|
||||||
|
|
||||||
reply_to_username = None
|
reply_to_username = None
|
||||||
# Check if the tweet is a reply-to
|
# Check if the tweet is a reply-to
|
||||||
reply_to_div = status.find('div', class_='tweet-reply-context username')
|
reply_to_div = status.find('div', class_='tweet-reply-context username')
|
||||||
|
@ -268,10 +288,6 @@ def main(argv):
|
||||||
# Skip this tweet
|
# Skip this tweet
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Extract tweet ID and status ID
|
|
||||||
tweet_id = str(status['href']).strip('?p=v')
|
|
||||||
status_id = tweet_id.split('/')[3]
|
|
||||||
|
|
||||||
# Extract url of full status page
|
# Extract url of full status page
|
||||||
full_status_url = 'https://mobile.twitter.com' + tweet_id + '?p=v'
|
full_status_url = 'https://mobile.twitter.com' + tweet_id + '?p=v'
|
||||||
|
|
||||||
|
@ -415,16 +431,10 @@ def main(argv):
|
||||||
# print(t)
|
# print(t)
|
||||||
|
|
||||||
# **********************************************************
|
# **********************************************************
|
||||||
# Iterate tweets. Check if the tweet has already been posted
|
# Iterate tweets in list.
|
||||||
# on Mastodon. If not, post it and add it to database
|
# post each on Mastodon and reference to it in database
|
||||||
# **********************************************************
|
# **********************************************************
|
||||||
|
|
||||||
# Try to open database. If it does not exist, create it
|
|
||||||
sql = sqlite3.connect('twoot.db')
|
|
||||||
db = sql.cursor()
|
|
||||||
db.execute('''CREATE TABLE IF NOT EXISTS toots (twitter_account TEXT, mastodon_instance TEXT,
|
|
||||||
mastodon_account TEXT, tweet_id TEXT, toot_id TEXT)''')
|
|
||||||
|
|
||||||
# Create Mastodon application if it does not exist yet
|
# Create Mastodon application if it does not exist yet
|
||||||
if not os.path.isfile(mast_instance + '.secret'):
|
if not os.path.isfile(mast_instance + '.secret'):
|
||||||
try:
|
try:
|
||||||
|
@ -458,17 +468,6 @@ def main(argv):
|
||||||
|
|
||||||
# Upload tweets
|
# Upload tweets
|
||||||
for tweet in reversed(tweets):
|
for tweet in reversed(tweets):
|
||||||
# Check in database if tweet has already been posted
|
|
||||||
# FIXME Move tests to the front of the process to avoid the unnecessary processing of already ingested tweets
|
|
||||||
db.execute('''SELECT * FROM toots WHERE twitter_account = ? AND mastodon_instance = ? AND
|
|
||||||
mastodon_account = ? AND tweet_id = ?''',
|
|
||||||
(twit_account, mast_instance, mast_account, tweet['tweet_id']))
|
|
||||||
tweet_in_db = db.fetchone()
|
|
||||||
|
|
||||||
if tweet_in_db is not None:
|
|
||||||
# Skip to next tweet
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check that the tweet is not too young (might be deleted) or too old
|
# Check that the tweet is not too young (might be deleted) or too old
|
||||||
age_in_hours = (time.time() - float(tweet['timestamp'])) / 3600.0
|
age_in_hours = (time.time() - float(tweet['timestamp'])) / 3600.0
|
||||||
min_delay_in_hours = min_delay / 60.0
|
min_delay_in_hours = min_delay / 60.0
|
||||||
|
|
Loading…
Reference in New Issue
Block a user