From 945803f65d79d0277c663a0e043228ed10996a92 Mon Sep 17 00:00:00 2001 From: Caeden Perelli-Harris Date: Fri, 18 Aug 2023 13:19:25 +0100 Subject: [PATCH] Unmark fetch anime and play as BROKEN and fix type errors (#8988) * updating DIRECTORY.md * type(fetch-anime-and-play): Fix type errors and re-enable * updating DIRECTORY.md --------- Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> --- DIRECTORY.md | 1 + ...play.py.BROKEN => fetch_anime_and_play.py} | 71 ++++++++++--------- 2 files changed, 38 insertions(+), 34 deletions(-) rename web_programming/{fetch_anime_and_play.py.BROKEN => fetch_anime_and_play.py} (70%) diff --git a/DIRECTORY.md b/DIRECTORY.md index 1ff093d88..6af4ead56 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -1213,6 +1213,7 @@ * [Daily Horoscope](web_programming/daily_horoscope.py) * [Download Images From Google Query](web_programming/download_images_from_google_query.py) * [Emails From Url](web_programming/emails_from_url.py) + * [Fetch Anime And Play](web_programming/fetch_anime_and_play.py) * [Fetch Bbc News](web_programming/fetch_bbc_news.py) * [Fetch Github Info](web_programming/fetch_github_info.py) * [Fetch Jobs](web_programming/fetch_jobs.py) diff --git a/web_programming/fetch_anime_and_play.py.BROKEN b/web_programming/fetch_anime_and_play.py similarity index 70% rename from web_programming/fetch_anime_and_play.py.BROKEN rename to web_programming/fetch_anime_and_play.py index 3bd4f704d..366807785 100644 --- a/web_programming/fetch_anime_and_play.py.BROKEN +++ b/web_programming/fetch_anime_and_play.py @@ -1,7 +1,5 @@ -from xml.dom import NotFoundErr - import requests -from bs4 import BeautifulSoup, NavigableString +from bs4 import BeautifulSoup, NavigableString, Tag from fake_useragent import UserAgent BASE_URL = "https://ww1.gogoanime2.org" @@ -41,25 +39,23 @@ def search_scraper(anime_name: str) -> list: # get list of anime anime_ul = soup.find("ul", {"class": "items"}) + if anime_ul is None or isinstance(anime_ul, NavigableString): + msg = f"Could not find and anime with name {anime_name}" + raise ValueError(msg) anime_li = anime_ul.children # for each anime, insert to list. the name and url. anime_list = [] for anime in anime_li: - if not isinstance(anime, NavigableString): - try: - anime_url, anime_title = ( - anime.find("a")["href"], - anime.find("a")["title"], - ) - anime_list.append( - { - "title": anime_title, - "url": anime_url, - } - ) - except (NotFoundErr, KeyError): - pass + if isinstance(anime, Tag): + anime_url = anime.find("a") + if anime_url is None or isinstance(anime_url, NavigableString): + continue + anime_title = anime.find("a") + if anime_title is None or isinstance(anime_title, NavigableString): + continue + + anime_list.append({"title": anime_title["title"], "url": anime_url["href"]}) return anime_list @@ -93,22 +89,24 @@ def search_anime_episode_list(episode_endpoint: str) -> list: # With this id. get the episode list. episode_page_ul = soup.find("ul", {"id": "episode_related"}) + if episode_page_ul is None or isinstance(episode_page_ul, NavigableString): + msg = f"Could not find any anime eposiodes with name {anime_name}" + raise ValueError(msg) episode_page_li = episode_page_ul.children episode_list = [] for episode in episode_page_li: - try: - if not isinstance(episode, NavigableString): - episode_list.append( - { - "title": episode.find("div", {"class": "name"}).text.replace( - " ", "" - ), - "url": episode.find("a")["href"], - } - ) - except (KeyError, NotFoundErr): - pass + if isinstance(episode, Tag): + url = episode.find("a") + if url is None or isinstance(url, NavigableString): + continue + title = episode.find("div", {"class": "name"}) + if title is None or isinstance(title, NavigableString): + continue + + episode_list.append( + {"title": title.text.replace(" ", ""), "url": url["href"]} + ) return episode_list @@ -140,11 +138,16 @@ def get_anime_episode(episode_endpoint: str) -> list: soup = BeautifulSoup(response.text, "html.parser") - try: - episode_url = soup.find("iframe", {"id": "playerframe"})["src"] - download_url = episode_url.replace("/embed/", "/playlist/") + ".m3u8" - except (KeyError, NotFoundErr) as e: - raise e + url = soup.find("iframe", {"id": "playerframe"}) + if url is None or isinstance(url, NavigableString): + msg = f"Could not find url and download url from {episode_endpoint}" + raise RuntimeError(msg) + + episode_url = url["src"] + if not isinstance(episode_url, str): + msg = f"Could not find url and download url from {episode_endpoint}" + raise RuntimeError(msg) + download_url = episode_url.replace("/embed/", "/playlist/") + ".m3u8" return [f"{BASE_URL}{episode_url}", f"{BASE_URL}{download_url}"]