From 37385883aaa140a505488d23fa37be4049768f2b Mon Sep 17 00:00:00 2001 From: Appledora Date: Sat, 16 Oct 2021 07:32:33 +0600 Subject: [PATCH] Improved readability of web_programming/get_imdbtop.py and added documentations with doctests (#4855) * improved readability of the existing method by reformatting, adding documentations with doctests. * improved readability of the existing method by reformatting, adding documentations with doctests. * fixed typo in test * added doctest to parse dictionary method * added doctest to parse dictionary method * Changed return type, removed print() from method and implemented doctests as suggested * Fixed doctest error, removed print() from method, created new script as suggested * Update get_imdbtop.py * Fix typo discovered by codespell * return () Co-authored-by: Christian Clauss --- web_programming/get_imdbtop.py | 57 +++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/web_programming/get_imdbtop.py b/web_programming/get_imdbtop.py index 669e7f898..5f7105f83 100644 --- a/web_programming/get_imdbtop.py +++ b/web_programming/get_imdbtop.py @@ -1,20 +1,53 @@ +import bs4 import requests -from bs4 import BeautifulSoup -def imdb_top(imdb_top_n): +def get_movie_data_from_soup(soup: bs4.element.ResultSet) -> dict[str, str]: + return { + "name": soup.h3.a.text, + "genre": soup.find("span", class_="genre").text.strip(), + "rating": soup.strong.text, + "page_link": f"https://www.imdb.com{soup.a.get('href')}", + } + + +def get_imdb_top_movies(num_movies: int = 5) -> tuple: + """Get the top num_movies most highly rated movies from IMDB and + return a tuple of dicts describing each movie's name, genre, rating, and URL. + + Args: + num_movies: The number of movies to get. Defaults to 5. + + Returns: + A list of tuples containing information about the top n movies. + + >>> len(get_imdb_top_movies(5)) + 5 + >>> len(get_imdb_top_movies(-3)) + 0 + >>> len(get_imdb_top_movies(4.99999)) + 4 + """ + num_movies = int(float(num_movies)) + if num_movies < 1: + return () base_url = ( - f"https://www.imdb.com/search/title?title_type=" - f"feature&sort=num_votes,desc&count={imdb_top_n}" + "https://www.imdb.com/search/title?title_type=" + f"feature&sort=num_votes,desc&count={num_movies}" + ) + source = bs4.BeautifulSoup(requests.get(base_url).content, "html.parser") + return tuple( + get_movie_data_from_soup(movie) + for movie in source.find_all("div", class_="lister-item mode-advanced") ) - source = BeautifulSoup(requests.get(base_url).content, "html.parser") - for m in source.findAll("div", class_="lister-item mode-advanced"): - print("\n" + m.h3.a.text) # movie's name - print(m.find("span", attrs={"class": "genre"}).text) # genre - print(m.strong.text) # movie's rating - print(f"https://www.imdb.com{m.a.get('href')}") # movie's page link - print("*" * 40) if __name__ == "__main__": - imdb_top(input("How many movies would you like to see? ")) + import json + + num_movies = int(input("How many movies would you like to see? ")) + print( + ", ".join( + json.dumps(movie, indent=4) for movie in get_imdb_top_movies(num_movies) + ) + )