Created get_timeline function

2025-05-06 03:23:57 +00:00 · 2023-07-12 22:02:06 +02:00 · 2023-07-12 22:02:06 +02:00 · b842f6d471
commit b842f6d471
parent 15663af09d
1 changed files with 47 additions and 42 deletions
--- a/twoot.py
+++ b/twoot.py
@ -192,6 +192,52 @@ def build_config(args):
        exit(-1)


+def get_timeline(url):
+    # Initiate session
+    session = requests.Session()
+
+    # Get a copy of the default headers that requests would use
+    headers = requests.utils.default_headers()
+
+    # Update default headers with randomly selected user agent
+    headers.update(
+        {
+            'User-Agent': USER_AGENTS[random.randint(0, len(USER_AGENTS) - 1)],
+            'Cookie': 'replaceTwitter=; replaceYouTube=; hlsPlayback=on; proxyVideos=',
+        }
+    )
+
+    # Download twitter page of user
+    try:
+        twit_account_page = session.get(url, headers=headers, timeout=HTTPS_REQ_TIMEOUT)
+    except requests.exceptions.ConnectionError:
+        logging.fatal('Host did not respond when trying to download ' + url)
+        shutdown(-1)
+    except requests.exceptions.Timeout:
+        logging.fatal(url + ' took too long to respond')
+        shutdown(-1)
+
+    # Verify that download worked
+    if twit_account_page.status_code != 200:
+        logging.fatal('The Nitter page did not download correctly from ' + url + ' (' + str(
+            twit_account_page.status_code) + '). Aborting')
+        shutdown(-1)
+
+    logging.debug('Nitter page downloaded successfully from ' + url)
+
+    # DEBUG: Save page to file
+    # of = open(TOML['config']['twitter_account'] + '.html', 'w')
+    # of.write(twit_account_page.text)
+    # of.close()
+
+    # Make soup
+    soup = BeautifulSoup(twit_account_page.text, 'html.parser')
+
+    # Extract twitter timeline
+    timeline = soup.find_all(has_class_timeline_item_but_not_thread)
+    return soup, timeline
+
+
 def update_profile(nitter_url, soup, sql, mast_password):
    """
    Update profile on Mastodon
@ -841,53 +887,12 @@ def main(argv):
    # To store content of all tweets from this user
    tweets = []

-    # Initiate session
-    session = requests.Session()
-
-    # Get a copy of the default headers that requests would use
-    headers = requests.utils.default_headers()
-
-    # Update default headers with randomly selected user agent
-    headers.update(
-        {
-            'User-Agent': USER_AGENTS[random.randint(0, len(USER_AGENTS) - 1)],
-            'Cookie': 'replaceTwitter=; replaceYouTube=; hlsPlayback=on; proxyVideos=',
-        }
-    )
-
    url = nitter_url + '/' + TOML['config']['twitter_account']
    # Use different page if we need to handle replies
    if TOML['options']['post_reply_to']:
        url += '/with_replies'

-    # Download twitter page of user
-    try:
-        twit_account_page = session.get(url, headers=headers, timeout=HTTPS_REQ_TIMEOUT)
-    except requests.exceptions.ConnectionError:
-        logging.fatal('Host did not respond when trying to download ' + url)
-        shutdown(-1)
-    except requests.exceptions.Timeout:
-        logging.fatal(nitter_url + ' took too long to respond')
-        shutdown(-1)
-
-    # Verify that download worked
-    if twit_account_page.status_code != 200:
-        logging.fatal('The Nitter page did not download correctly from ' + url + ' (' + str(
-            twit_account_page.status_code) + '). Aborting')
-        shutdown(-1)
-
-    logging.debug('Nitter page downloaded successfully from ' + url)
-
-    # DEBUG: Save page to file
-    # of = open(TOML['config']['twitter_account'] + '.html', 'w')
-    # of.write(twit_account_page.text)
-    # of.close()
-
-    # Make soup
-    soup = BeautifulSoup(twit_account_page.text, 'html.parser')
-
-    # Extract twitter timeline
-    timeline = soup.find_all(has_class_timeline_item_but_not_thread)
+    soup, timeline = get_timeline(url)

    logging.info('Processing ' + str(len(timeline)) + ' tweets found in timeline')