From 51b769095f9b5671147b71c412024009eba03220 Mon Sep 17 00:00:00 2001 From: nishithshowri006 <58651995+nishithshowri006@users.noreply.github.com> Date: Mon, 6 Jan 2020 02:58:36 +0530 Subject: [PATCH] Create get_imdb_top_250_movies_csv.py (#1659) * Create get_imdb_top_250_movies_csv.py * Update get_imdb_top_250_movies_csv.py * Update get_imdb_top_250_movies_csv.py * get_imdb_top_250_movies() Co-authored-by: Christian Clauss --- .../get_imdb_top_250_movies_csv.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 web_programming/get_imdb_top_250_movies_csv.py diff --git a/web_programming/get_imdb_top_250_movies_csv.py b/web_programming/get_imdb_top_250_movies_csv.py new file mode 100644 index 000000000..811c21fb0 --- /dev/null +++ b/web_programming/get_imdb_top_250_movies_csv.py @@ -0,0 +1,29 @@ +import csv +from typing import Dict + +import requests +from bs4 import BeautifulSoup + + +def get_imdb_top_250_movies(url: str = "") -> Dict[str, float]: + url = url or "https://www.imdb.com/chart/top/?ref_=nv_mv_250" + soup = BeautifulSoup(requests.get(url).text, "html.parser") + titles = soup.find_all("td", attrs="titleColumn") + ratings = soup.find_all("td", class_="ratingColumn imdbRating") + return { + title.a.text: float(rating.strong.text) + for title, rating in zip(titles, ratings) + } + + +def write_movies(filename: str = "IMDb_Top_250_Movies.csv") -> None: + movies = get_imdb_top_250_movies() + with open(filename, "w", newline="") as out_file: + writer = csv.writer(out_file) + writer.writerow(["Movie title", "IMDb rating"]) + for title, rating in movies.items(): + writer.writerow([title, rating]) + + +if __name__ == "__main__": + write_movies()