diff --git a/requirements.txt b/requirements.txt index 4f6ff321c..824f534a2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ +beautifulsoup4 black +fake_useragent flake8 matplotlib mypy diff --git a/web_programming/crawl_google_results.py b/web_programming/crawl_google_results.py new file mode 100644 index 000000000..c31ec1526 --- /dev/null +++ b/web_programming/crawl_google_results.py @@ -0,0 +1,20 @@ +import sys +import webbrowser + +from bs4 import BeautifulSoup +from fake_useragent import UserAgent +import requests + +print("Googling.....") +url = "https://www.google.com/search?q=" + " ".join(sys.argv[1:]) +res = requests.get(url, headers={"UserAgent": UserAgent().random}) +# res.raise_for_status() +with open("project1a.html", "wb") as out_file: # only for knowing the class + for data in res.iter_content(10000): + out_file.write(data) +soup = BeautifulSoup(res.text, "html.parser") +links = list(soup.select(".eZt8xd"))[:5] + +print(len(links)) +for link in links: + webbrowser.open(f"http://google.com{link.get('href')}")