mirror of
https://gitlab.com/jeancf/twoot.git
synced 2024-11-24 04:21:13 +00:00
Created get_timeline function
This commit is contained in:
parent
15663af09d
commit
b842f6d471
89
twoot.py
89
twoot.py
|
@ -192,6 +192,52 @@ def build_config(args):
|
||||||
exit(-1)
|
exit(-1)
|
||||||
|
|
||||||
|
|
||||||
|
def get_timeline(url):
|
||||||
|
# Initiate session
|
||||||
|
session = requests.Session()
|
||||||
|
|
||||||
|
# Get a copy of the default headers that requests would use
|
||||||
|
headers = requests.utils.default_headers()
|
||||||
|
|
||||||
|
# Update default headers with randomly selected user agent
|
||||||
|
headers.update(
|
||||||
|
{
|
||||||
|
'User-Agent': USER_AGENTS[random.randint(0, len(USER_AGENTS) - 1)],
|
||||||
|
'Cookie': 'replaceTwitter=; replaceYouTube=; hlsPlayback=on; proxyVideos=',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Download twitter page of user
|
||||||
|
try:
|
||||||
|
twit_account_page = session.get(url, headers=headers, timeout=HTTPS_REQ_TIMEOUT)
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
logging.fatal('Host did not respond when trying to download ' + url)
|
||||||
|
shutdown(-1)
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
logging.fatal(url + ' took too long to respond')
|
||||||
|
shutdown(-1)
|
||||||
|
|
||||||
|
# Verify that download worked
|
||||||
|
if twit_account_page.status_code != 200:
|
||||||
|
logging.fatal('The Nitter page did not download correctly from ' + url + ' (' + str(
|
||||||
|
twit_account_page.status_code) + '). Aborting')
|
||||||
|
shutdown(-1)
|
||||||
|
|
||||||
|
logging.debug('Nitter page downloaded successfully from ' + url)
|
||||||
|
|
||||||
|
# DEBUG: Save page to file
|
||||||
|
# of = open(TOML['config']['twitter_account'] + '.html', 'w')
|
||||||
|
# of.write(twit_account_page.text)
|
||||||
|
# of.close()
|
||||||
|
|
||||||
|
# Make soup
|
||||||
|
soup = BeautifulSoup(twit_account_page.text, 'html.parser')
|
||||||
|
|
||||||
|
# Extract twitter timeline
|
||||||
|
timeline = soup.find_all(has_class_timeline_item_but_not_thread)
|
||||||
|
return soup, timeline
|
||||||
|
|
||||||
|
|
||||||
def update_profile(nitter_url, soup, sql, mast_password):
|
def update_profile(nitter_url, soup, sql, mast_password):
|
||||||
"""
|
"""
|
||||||
Update profile on Mastodon
|
Update profile on Mastodon
|
||||||
|
@ -841,53 +887,12 @@ def main(argv):
|
||||||
# To store content of all tweets from this user
|
# To store content of all tweets from this user
|
||||||
tweets = []
|
tweets = []
|
||||||
|
|
||||||
# Initiate session
|
|
||||||
session = requests.Session()
|
|
||||||
|
|
||||||
# Get a copy of the default headers that requests would use
|
|
||||||
headers = requests.utils.default_headers()
|
|
||||||
|
|
||||||
# Update default headers with randomly selected user agent
|
|
||||||
headers.update(
|
|
||||||
{
|
|
||||||
'User-Agent': USER_AGENTS[random.randint(0, len(USER_AGENTS) - 1)],
|
|
||||||
'Cookie': 'replaceTwitter=; replaceYouTube=; hlsPlayback=on; proxyVideos=',
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
url = nitter_url + '/' + TOML['config']['twitter_account']
|
url = nitter_url + '/' + TOML['config']['twitter_account']
|
||||||
# Use different page if we need to handle replies
|
# Use different page if we need to handle replies
|
||||||
if TOML['options']['post_reply_to']:
|
if TOML['options']['post_reply_to']:
|
||||||
url += '/with_replies'
|
url += '/with_replies'
|
||||||
|
|
||||||
# Download twitter page of user
|
soup, timeline = get_timeline(url)
|
||||||
try:
|
|
||||||
twit_account_page = session.get(url, headers=headers, timeout=HTTPS_REQ_TIMEOUT)
|
|
||||||
except requests.exceptions.ConnectionError:
|
|
||||||
logging.fatal('Host did not respond when trying to download ' + url)
|
|
||||||
shutdown(-1)
|
|
||||||
except requests.exceptions.Timeout:
|
|
||||||
logging.fatal(nitter_url + ' took too long to respond')
|
|
||||||
shutdown(-1)
|
|
||||||
|
|
||||||
# Verify that download worked
|
|
||||||
if twit_account_page.status_code != 200:
|
|
||||||
logging.fatal('The Nitter page did not download correctly from ' + url + ' (' + str(
|
|
||||||
twit_account_page.status_code) + '). Aborting')
|
|
||||||
shutdown(-1)
|
|
||||||
|
|
||||||
logging.debug('Nitter page downloaded successfully from ' + url)
|
|
||||||
|
|
||||||
# DEBUG: Save page to file
|
|
||||||
# of = open(TOML['config']['twitter_account'] + '.html', 'w')
|
|
||||||
# of.write(twit_account_page.text)
|
|
||||||
# of.close()
|
|
||||||
|
|
||||||
# Make soup
|
|
||||||
soup = BeautifulSoup(twit_account_page.text, 'html.parser')
|
|
||||||
|
|
||||||
# Extract twitter timeline
|
|
||||||
timeline = soup.find_all(has_class_timeline_item_but_not_thread)
|
|
||||||
|
|
||||||
logging.info('Processing ' + str(len(timeline)) + ' tweets found in timeline')
|
logging.info('Processing ' + str(len(timeline)) + ' tweets found in timeline')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user