Improved readability of web_programming/get_imdbtop.py and added documentations with doctests (#4855)

* improved readability of the existing method by reformatting, adding documentations with doctests. * improved readability of the existing method by reformatting, adding documentations with doctests. * fixed typo in test * added doctest to parse dictionary method * added doctest to parse dictionary method * Changed return type, removed print() from method and implemented doctests as suggested * Fixed doctest error, removed print() from method, created new script as suggested * Update get_imdbtop.py * Fix typo discovered by codespell * return () Co-authored-by: Christian Clauss <cclauss@me.com>
2025-05-22 00:43:22 +00:00 · 2021-10-16 07:32:33 +06:00 · 2021-10-16 07:32:33 +06:00 · 37385883aa
commit 37385883aa
parent 152261765a
1 changed files with 45 additions and 12 deletions
--- a/web_programming/get_imdbtop.py
+++ b/web_programming/get_imdbtop.py
@ -1,20 +1,53 @@
+import bs4
 import requests
-from bs4 import BeautifulSoup


-def imdb_top(imdb_top_n):
+def get_movie_data_from_soup(soup: bs4.element.ResultSet) -> dict[str, str]:
+    return {
+        "name": soup.h3.a.text,
+        "genre": soup.find("span", class_="genre").text.strip(),
+        "rating": soup.strong.text,
+        "page_link": f"https://www.imdb.com{soup.a.get('href')}",
+    }
+
+
+def get_imdb_top_movies(num_movies: int = 5) -> tuple:
+    """Get the top num_movies most highly rated movies from IMDB and
+    return a tuple of dicts describing each movie's name, genre, rating, and URL.
+
+    Args:
+        num_movies: The number of movies to get. Defaults to 5.
+
+    Returns:
+        A list of tuples containing information about the top n movies.
+
+    >>> len(get_imdb_top_movies(5))
+    5
+    >>> len(get_imdb_top_movies(-3))
+    0
+    >>> len(get_imdb_top_movies(4.99999))
+    4
+    """
+    num_movies = int(float(num_movies))
+    if num_movies < 1:
+        return ()
    base_url = (
-        f"https://www.imdb.com/search/title?title_type="
-        f"feature&sort=num_votes,desc&count={imdb_top_n}"
+        "https://www.imdb.com/search/title?title_type="
+        f"feature&sort=num_votes,desc&count={num_movies}"
+    )
+    source = bs4.BeautifulSoup(requests.get(base_url).content, "html.parser")
+    return tuple(
+        get_movie_data_from_soup(movie)
+        for movie in source.find_all("div", class_="lister-item mode-advanced")
    )
-    source = BeautifulSoup(requests.get(base_url).content, "html.parser")
-    for m in source.findAll("div", class_="lister-item mode-advanced"):
-        print("\n" + m.h3.a.text)  # movie's name
-        print(m.find("span", attrs={"class": "genre"}).text)  # genre
-        print(m.strong.text)  # movie's rating
-        print(f"https://www.imdb.com{m.a.get('href')}")  # movie's page link
-        print("*" * 40)


 if __name__ == "__main__":
-    imdb_top(input("How many movies would you like to see? "))
+    import json
+
+    num_movies = int(input("How many movies would you like to see? "))
+    print(
+        ", ".join(
+            json.dumps(movie, indent=4) for movie in get_imdb_top_movies(num_movies)
+        )
+    )