mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-01-18 16:27:02 +00:00
Improved readability of web_programming/get_imdbtop.py and added documentations with doctests (#4855)
* improved readability of the existing method by reformatting, adding documentations with doctests. * improved readability of the existing method by reformatting, adding documentations with doctests. * fixed typo in test * added doctest to parse dictionary method * added doctest to parse dictionary method * Changed return type, removed print() from method and implemented doctests as suggested * Fixed doctest error, removed print() from method, created new script as suggested * Update get_imdbtop.py * Fix typo discovered by codespell * return () Co-authored-by: Christian Clauss <cclauss@me.com>
This commit is contained in:
parent
152261765a
commit
37385883aa
|
@ -1,20 +1,53 @@
|
|||
import bs4
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def imdb_top(imdb_top_n):
|
||||
def get_movie_data_from_soup(soup: bs4.element.ResultSet) -> dict[str, str]:
|
||||
return {
|
||||
"name": soup.h3.a.text,
|
||||
"genre": soup.find("span", class_="genre").text.strip(),
|
||||
"rating": soup.strong.text,
|
||||
"page_link": f"https://www.imdb.com{soup.a.get('href')}",
|
||||
}
|
||||
|
||||
|
||||
def get_imdb_top_movies(num_movies: int = 5) -> tuple:
|
||||
"""Get the top num_movies most highly rated movies from IMDB and
|
||||
return a tuple of dicts describing each movie's name, genre, rating, and URL.
|
||||
|
||||
Args:
|
||||
num_movies: The number of movies to get. Defaults to 5.
|
||||
|
||||
Returns:
|
||||
A list of tuples containing information about the top n movies.
|
||||
|
||||
>>> len(get_imdb_top_movies(5))
|
||||
5
|
||||
>>> len(get_imdb_top_movies(-3))
|
||||
0
|
||||
>>> len(get_imdb_top_movies(4.99999))
|
||||
4
|
||||
"""
|
||||
num_movies = int(float(num_movies))
|
||||
if num_movies < 1:
|
||||
return ()
|
||||
base_url = (
|
||||
f"https://www.imdb.com/search/title?title_type="
|
||||
f"feature&sort=num_votes,desc&count={imdb_top_n}"
|
||||
"https://www.imdb.com/search/title?title_type="
|
||||
f"feature&sort=num_votes,desc&count={num_movies}"
|
||||
)
|
||||
source = bs4.BeautifulSoup(requests.get(base_url).content, "html.parser")
|
||||
return tuple(
|
||||
get_movie_data_from_soup(movie)
|
||||
for movie in source.find_all("div", class_="lister-item mode-advanced")
|
||||
)
|
||||
source = BeautifulSoup(requests.get(base_url).content, "html.parser")
|
||||
for m in source.findAll("div", class_="lister-item mode-advanced"):
|
||||
print("\n" + m.h3.a.text) # movie's name
|
||||
print(m.find("span", attrs={"class": "genre"}).text) # genre
|
||||
print(m.strong.text) # movie's rating
|
||||
print(f"https://www.imdb.com{m.a.get('href')}") # movie's page link
|
||||
print("*" * 40)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
imdb_top(input("How many movies would you like to see? "))
|
||||
import json
|
||||
|
||||
num_movies = int(input("How many movies would you like to see? "))
|
||||
print(
|
||||
", ".join(
|
||||
json.dumps(movie, indent=4) for movie in get_imdb_top_movies(num_movies)
|
||||
)
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue
Block a user