mirror of
https://github.com/hastagAB/Awesome-Python-Scripts.git
synced 2024-12-18 16:20:14 +00:00
bd9a89afd0
Co-authored-by: Ayush Bhardwaj <classicayush@gmail.com>
57 lines
1.7 KiB
Python
57 lines
1.7 KiB
Python
from bs4 import BeautifulSoup
|
|
from selenium import webdriver
|
|
import time
|
|
from selenium.webdriver.common.keys import Keys
|
|
from film_content_parser import obtain_film_object
|
|
from parser_config import check_film_object, watched_included
|
|
from html_creator import create_html_file
|
|
|
|
def get_watched_films(file_path):
|
|
watched_films_txt = open(file_path, 'r')
|
|
if watched_films_txt:
|
|
watched_names = watched_films_txt.read().split('\n')
|
|
return [names for names in watched_names if names != '']
|
|
return None
|
|
|
|
watched_films = None
|
|
if not watched_included():
|
|
watched_films = get_watched_films('watched_films.txt')
|
|
|
|
# Time to wait for web page to be loaded.
|
|
TIME_FACTOR = 2
|
|
|
|
# Give the URL of the imdb list.
|
|
list_url = "https://www.imdb.com/list/ls025718406/?ref_=tt_rls_2"
|
|
|
|
print("Opening a webdriver")
|
|
driver = webdriver.Chrome()
|
|
|
|
driver.get(list_url)
|
|
|
|
print("Waiting the website to be loaded")
|
|
# Wait browser to load the page.
|
|
time.sleep(TIME_FACTOR)
|
|
|
|
content = driver.page_source.encode('utf-16').strip()
|
|
soup = BeautifulSoup(content, 'lxml')
|
|
|
|
# Obtain all films
|
|
film_contents = soup.find_all("div", class_="lister-item mode-detail")
|
|
|
|
wanted_films = []
|
|
|
|
list_header = soup.find("h1", class_='header list-name').text
|
|
|
|
print("Parsing and querying films")
|
|
for all_content in film_contents:
|
|
img_source = all_content.find('div', class_='lister-item-image ribbonize').find('img')
|
|
content = all_content.find('div', class_='lister-item-content')
|
|
current_film = obtain_film_object(content, img_source)
|
|
if check_film_object(current_film, watched_films):
|
|
wanted_films.append(current_film)
|
|
|
|
create_html_file(wanted_films, list_header)
|
|
print("New html created with the name ",list_header )
|
|
|
|
driver.close()
|