mirror of
https://github.com/TheAlgorithms/Python.git
synced 2024-11-24 05:21:09 +00:00
43f99e56c9
* Python program that scrufs 3 site at a time add input in the compiling time like -- python3 project1.py (man) * Update project1.py * noqa: F401 and reformat with black * Rename project1.py to web_programming/crawl_google_results.py * Add beautifulsoup4 to requirements.txt * Add fake_useragent to requirements.txt * Update crawl_google_results.py * headers={"UserAgent": UserAgent().random} * html.parser, not lxml * link, not links
21 lines
627 B
Python
21 lines
627 B
Python
import sys
|
|
import webbrowser
|
|
|
|
from bs4 import BeautifulSoup
|
|
from fake_useragent import UserAgent
|
|
import requests
|
|
|
|
print("Googling.....")
|
|
url = "https://www.google.com/search?q=" + " ".join(sys.argv[1:])
|
|
res = requests.get(url, headers={"UserAgent": UserAgent().random})
|
|
# res.raise_for_status()
|
|
with open("project1a.html", "wb") as out_file: # only for knowing the class
|
|
for data in res.iter_content(10000):
|
|
out_file.write(data)
|
|
soup = BeautifulSoup(res.text, "html.parser")
|
|
links = list(soup.select(".eZt8xd"))[:5]
|
|
|
|
print(len(links))
|
|
for link in links:
|
|
webbrowser.open(f"http://google.com{link.get('href')}")
|