Awesome-Python-Scripts/IMDBQuerier/main.py

from bs4 import BeautifulSoup
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
from film_content_parser import obtain_film_object
from parser_config import check_film_object, watched_included
from html_creator import create_html_file

def get_watched_films(file_path):
    watched_films_txt = open(file_path, 'r')
    if watched_films_txt:
        watched_names = watched_films_txt.read().split('\n')
        return [names  for names in watched_names if names != '']
    return None

watched_films = None
if not watched_included():
    watched_films = get_watched_films('watched_films.txt')

# Time to wait for web page to be loaded.
TIME_FACTOR = 2

# Give the URL of the imdb list.
list_url = "https://www.imdb.com/list/ls025718406/?ref_=tt_rls_2"

print("Opening a webdriver")
driver = webdriver.Chrome()

driver.get(list_url)

print("Waiting the website to be loaded")
# Wait browser to load the page.
time.sleep(TIME_FACTOR)

content = driver.page_source.encode('utf-16').strip()
soup = BeautifulSoup(content, 'lxml')

# Obtain all films
film_contents = soup.find_all("div", class_="lister-item mode-detail")

wanted_films = []

list_header = soup.find("h1", class_='header list-name').text

print("Parsing and querying films")
for all_content in film_contents:
    img_source = all_content.find('div', class_='lister-item-image ribbonize').find('img')
    content = all_content.find('div', class_='lister-item-content')
    current_film = obtain_film_object(content, img_source)
    if check_film_object(current_film, watched_films):
        wanted_films.append(current_film)

create_html_file(wanted_films, list_header)
print("New html created with the name ",list_header )

driver.close()
IMDB list querier project by Burak Bekci (#187) Co-authored-by: Ayush Bhardwaj <classicayush@gmail.com> 2020-10-14 11:46:08 +00:00			`from bs4 import BeautifulSoup`
			`from selenium import webdriver`
			`import time`
			`from selenium.webdriver.common.keys import Keys`
			`from film_content_parser import obtain_film_object`
			`from parser_config import check_film_object, watched_included`
			`from html_creator import create_html_file`

			`def get_watched_films(file_path):`
			`watched_films_txt = open(file_path, 'r')`
			`if watched_films_txt:`
			`watched_names = watched_films_txt.read().split('\n')`
			`return [names for names in watched_names if names != '']`
			`return None`

			`watched_films = None`
			`if not watched_included():`
			`watched_films = get_watched_films('watched_films.txt')`

			`# Time to wait for web page to be loaded.`
			`TIME_FACTOR = 2`

			`# Give the URL of the imdb list.`
			`list_url = "https://www.imdb.com/list/ls025718406/?ref_=tt_rls_2"`

			`print("Opening a webdriver")`
			`driver = webdriver.Chrome()`

			`driver.get(list_url)`

			`print("Waiting the website to be loaded")`
			`# Wait browser to load the page.`
			`time.sleep(TIME_FACTOR)`

			`content = driver.page_source.encode('utf-16').strip()`
			`soup = BeautifulSoup(content, 'lxml')`

			`# Obtain all films`
			`film_contents = soup.find_all("div", class_="lister-item mode-detail")`

			`wanted_films = []`

			`list_header = soup.find("h1", class_='header list-name').text`

			`print("Parsing and querying films")`
			`for all_content in film_contents:`
			`img_source = all_content.find('div', class_='lister-item-image ribbonize').find('img')`
			`content = all_content.find('div', class_='lister-item-content')`
			`current_film = obtain_film_object(content, img_source)`
			`if check_film_object(current_film, watched_films):`
			`wanted_films.append(current_film)`

			`create_html_file(wanted_films, list_header)`
			`print("New html created with the name ",list_header )`

			`driver.close()`