Merge pull request #120 from mclmza/master

Added crawl Google search
This commit is contained in:
Advaita Saha 2022-10-04 20:41:34 +05:30 committed by GitHub
commit 518feb41a2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 33 additions and 0 deletions

View File

@ -0,0 +1,9 @@
# Crawl Google results
This is a simple script that lets you collect results provided by Google.
## Usage
* 3 packages required requests, BeautifulSoup and fake_useragent
* Use `pip install requests`, `pip install bs4` and `pip install fake_useragent`
* Add path to your csv file and output excel file WITH EXTENSTION `.csv` and `.xlsx`
* Run `python main.py "query search"`

View File

@ -0,0 +1,24 @@
import sys
import webbrowser
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
if __name__ == "__main__":
print("Googling.....")
url = "https://www.google.com/search?q=" + " ".join(sys.argv[1:])
res = requests.get(url, headers={"UserAgent": UserAgent().random})
# res.raise_for_status()
with open("project1a.html", "wb") as out_file: # only for knowing the class
for data in res.iter_content(10000):
out_file.write(data)
soup = BeautifulSoup(res.text, "html.parser")
links = list(soup.select(".eZt8xd"))[:5]
print(len(links))
for link in links:
if link.text == "Maps":
webbrowser.open(link.get("href"))
else:
webbrowser.open(f"http://google.com{link.get('href')}")