Python/web_programming/get_imdbtop.py
Appledora 37385883aa
Improved readability of web_programming/get_imdbtop.py and added documentations with doctests ()
* improved readability of the existing method by reformatting, adding documentations with doctests.

* improved readability of the existing method by reformatting, adding documentations with doctests.

* fixed typo in test

* added doctest to parse dictionary method

* added doctest to parse dictionary method

* Changed return type, removed print() from method and implemented doctests as suggested

* Fixed doctest error, removed print() from method, created new script as suggested

* Update get_imdbtop.py

* Fix typo discovered by codespell

* return ()

Co-authored-by: Christian Clauss <cclauss@me.com>
2021-10-16 03:32:33 +02:00

54 lines
1.5 KiB
Python

import bs4
import requests
def get_movie_data_from_soup(soup: bs4.element.ResultSet) -> dict[str, str]:
return {
"name": soup.h3.a.text,
"genre": soup.find("span", class_="genre").text.strip(),
"rating": soup.strong.text,
"page_link": f"https://www.imdb.com{soup.a.get('href')}",
}
def get_imdb_top_movies(num_movies: int = 5) -> tuple:
"""Get the top num_movies most highly rated movies from IMDB and
return a tuple of dicts describing each movie's name, genre, rating, and URL.
Args:
num_movies: The number of movies to get. Defaults to 5.
Returns:
A list of tuples containing information about the top n movies.
>>> len(get_imdb_top_movies(5))
5
>>> len(get_imdb_top_movies(-3))
0
>>> len(get_imdb_top_movies(4.99999))
4
"""
num_movies = int(float(num_movies))
if num_movies < 1:
return ()
base_url = (
"https://www.imdb.com/search/title?title_type="
f"feature&sort=num_votes,desc&count={num_movies}"
)
source = bs4.BeautifulSoup(requests.get(base_url).content, "html.parser")
return tuple(
get_movie_data_from_soup(movie)
for movie in source.find_all("div", class_="lister-item mode-advanced")
)
if __name__ == "__main__":
import json
num_movies = int(input("How many movies would you like to see? "))
print(
", ".join(
json.dumps(movie, indent=4) for movie in get_imdb_top_movies(num_movies)
)
)