diff --git a/web_programming/get_imdbtop.py b/web_programming/get_imdbtop.py index 669e7f898..5f7105f83 100644 --- a/web_programming/get_imdbtop.py +++ b/web_programming/get_imdbtop.py @@ -1,20 +1,53 @@ +import bs4 import requests -from bs4 import BeautifulSoup -def imdb_top(imdb_top_n): +def get_movie_data_from_soup(soup: bs4.element.ResultSet) -> dict[str, str]: + return { + "name": soup.h3.a.text, + "genre": soup.find("span", class_="genre").text.strip(), + "rating": soup.strong.text, + "page_link": f"https://www.imdb.com{soup.a.get('href')}", + } + + +def get_imdb_top_movies(num_movies: int = 5) -> tuple: + """Get the top num_movies most highly rated movies from IMDB and + return a tuple of dicts describing each movie's name, genre, rating, and URL. + + Args: + num_movies: The number of movies to get. Defaults to 5. + + Returns: + A list of tuples containing information about the top n movies. + + >>> len(get_imdb_top_movies(5)) + 5 + >>> len(get_imdb_top_movies(-3)) + 0 + >>> len(get_imdb_top_movies(4.99999)) + 4 + """ + num_movies = int(float(num_movies)) + if num_movies < 1: + return () base_url = ( - f"https://www.imdb.com/search/title?title_type=" - f"feature&sort=num_votes,desc&count={imdb_top_n}" + "https://www.imdb.com/search/title?title_type=" + f"feature&sort=num_votes,desc&count={num_movies}" + ) + source = bs4.BeautifulSoup(requests.get(base_url).content, "html.parser") + return tuple( + get_movie_data_from_soup(movie) + for movie in source.find_all("div", class_="lister-item mode-advanced") ) - source = BeautifulSoup(requests.get(base_url).content, "html.parser") - for m in source.findAll("div", class_="lister-item mode-advanced"): - print("\n" + m.h3.a.text) # movie's name - print(m.find("span", attrs={"class": "genre"}).text) # genre - print(m.strong.text) # movie's rating - print(f"https://www.imdb.com{m.a.get('href')}") # movie's page link - print("*" * 40) if __name__ == "__main__": - imdb_top(input("How many movies would you like to see? ")) + import json + + num_movies = int(input("How many movies would you like to see? ")) + print( + ", ".join( + json.dumps(movie, indent=4) for movie in get_imdb_top_movies(num_movies) + ) + )