Merge pull request #120 from mclmza/master

Added crawl Google search
2025-04-16 01:27:35 +00:00 · 2022-10-04 20:41:34 +05:30 · 2022-10-04 20:41:34 +05:30 · 518feb41a2
commit 518feb41a2
parent 821ae33553 a12b9931ca
2 changed files with 33 additions and 0 deletions
--- a/Results/README.md
+++ b/Results/README.md
@ -0,0 +1,9 @@
 # Crawl Google results
 This is a simple script that lets you collect results provided by Google.
 ## Usage
 * 3 packages required requests, BeautifulSoup and fake_useragent
 * Use `pip install requests`, `pip install bs4` and `pip install fake_useragent`
 * Add path to your csv file and output excel file WITH EXTENSTION `.csv` and `.xlsx`
 * Run `python main.py "query search"`
--- a/Results/main.py
+++ b/Results/main.py
@ -0,0 +1,24 @@
 import sys
 import webbrowser
 import requests
 from bs4 import BeautifulSoup
 from fake_useragent import UserAgent
 if __name__ == "__main__":
    print("Googling.....")
    url = "https://www.google.com/search?q=" + " ".join(sys.argv[1:])
    res = requests.get(url, headers={"UserAgent": UserAgent().random})
    # res.raise_for_status()
    with open("project1a.html", "wb") as out_file:  # only for knowing the class
        for data in res.iter_content(10000):
            out_file.write(data)
    soup = BeautifulSoup(res.text, "html.parser")
    links = list(soup.select(".eZt8xd"))[:5]
    print(len(links))
    for link in links:
        if link.text == "Maps":
            webbrowser.open(link.get("href"))
        else:
            webbrowser.open(f"http://google.com{link.get('href')}")