Compare commits

..

No commits in common. "497d9f3a2069e8dedfabd7498ef81e6e9a473872" and "f585878d0fa7538f571859114344aa1992836be9" have entirely different histories.

View File

@ -169,14 +169,12 @@ Dowload page with full thread of tweets and extract all replied to tweet referen
Only used by `get_timeline()`. Only used by `get_timeline()`.
:param session: Existing HTTP session with Nitter instance :param session: Existing HTTP session with Nitter instance
:param headers: HTTP headers to use :param headers: HTTP headers to use
:param nitter url: url of the nitter instance to use :param url: url of the thread page to download
:param thread_url: url of the first tweet in thread :return: List of tweets from the thread
:return: list of tuples with url of tweet replied-to (or None) and content of tweet
""" """
def _get_rest_of_thread(session, headers, nitter_url, thread_url): def _get_rest_of_thread(session, headers, url):
logging.debug("Downloading tweets in thread from separate page") logging.debug("Downloading tweets in thread from separate page")
# Download page with thread # Download page with thread
url = nitter_url + thread_url
try: try:
thread_page = session.get(url, headers=headers, timeout=HTTPS_REQ_TIMEOUT) thread_page = session.get(url, headers=headers, timeout=HTTPS_REQ_TIMEOUT)
except requests.exceptions.ConnectionError: except requests.exceptions.ConnectionError:
@ -203,25 +201,14 @@ def _get_rest_of_thread(session, headers, nitter_url, thread_url):
# Get all items in thread after main tweet # Get all items in thread after main tweet
after_tweet = soup.find('div', 'after-tweet') after_tweet = soup.find('div', 'after-tweet')
list = after_tweet.find_all('div', class_='timeline-item')
# Build timeline of tuples
timeline = []
previous_tweet_url = thread_url
for item in list:
timeline.append((previous_tweet_url, item))
# Get the url of the tweet
previous_tweet_url = item.find('a', class_='tweet-link')
if previous_tweet_url is None:
logging.error('Thread tweet is missing link tag')
timeline = after_tweet.find_all('div', class_='timeline-item')
return timeline return timeline
""" """
Download timeline of twitter account Dowload page with full thread of tweets. Only used by `get_timeline()`.
:param url: url of the account page to download :param url: url of the thread page to download
:return: list of tuples with url of tweet replied-to (or None) and content of tweet :return: List of tweets from the thread
""" """
def get_timeline(nitter_url): def get_timeline(nitter_url):
# Define url to use # Define url to use
@ -281,20 +268,17 @@ def get_timeline(nitter_url):
for item in list: for item in list:
classes = item['class'] classes = item['class']
if 'timeline-item' in classes: # Individual tweet if 'timeline-item' in classes: # Individual tweet
timeline.append((None, item)) timeline.append(item)
elif 'thread-line' in classes: # First tweet of a thread elif 'thread-line' in classes: # First tweet of a thread
# Get the first item of thread # Get the first item of thread
first_item = item.find('div', class_='timeline-item') first_item = item.find('div', class_='timeline-item')
timeline.append(first_item)
# Get the url of the tweet # Get the rest of the items of the thread
thread_link_tag = item.find('a', class_='tweet-link') thread_link_tag = item.find('a', class_='tweet-link')
if thread_link_tag is not None: if thread_link_tag is not None:
thread_url = thread_link_tag.get('href') thread_url = thread_link_tag.get('href')
timeline.extend(_get_rest_of_thread(session, headers, nitter_url + thread_url))
timeline.append((thread_url, first_item))
# Get the rest of the items of the thread
timeline.extend(_get_rest_of_thread(session, headers, nitter_url, thread_url))
else: else:
# Ignore other classes # Ignore other classes
continue continue
@ -893,6 +877,7 @@ def main(argv):
log_level = logging.INFO log_level = logging.INFO
elif ll_str == "WARNING": elif ll_str == "WARNING":
log_level = logging.WARNING log_level = logging.WARNING
print('log level warning set')
elif ll_str == "ERROR": elif ll_str == "ERROR":
log_level = logging.ERROR log_level = logging.ERROR
elif ll_str == "CRITICAL": elif ll_str == "CRITICAL":
@ -959,7 +944,7 @@ def main(argv):
tweets = [] tweets = []
out_date_cnt = 0 out_date_cnt = 0
in_db_cnt = 0 in_db_cnt = 0
for reply_to, status in timeline: for status in timeline:
# Extract tweet ID and status ID # Extract tweet ID and status ID
tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m') tweet_id = status.find('a', class_='tweet-link').get('href').strip('#m')
status_id = tweet_id.split('/')[3] status_id = tweet_id.split('/')[3]
@ -1122,7 +1107,6 @@ def main(argv):
"tweet_text": tweet_text, "tweet_text": tweet_text,
"video": video_file, "video": video_file,
"photos": photos, "photos": photos,
"reply-to": reply_to,
} }
tweets.append(tweet) tweets.append(tweet)