Merge pull request #120 from mclmza/master

Added crawl Google search
2025-05-04 18:33:33 +00:00 · 2022-10-04 20:41:34 +05:30 · 2022-10-04 20:41:34 +05:30 · 518feb41a2
commit 518feb41a2
parent 821ae33553 a12b9931ca
2 changed files with 33 additions and 0 deletions
--- a/Results/README.md
+++ b/Results/README.md
@ -0,0 +1,9 @@
+# Crawl Google results
+This is a simple script that lets you collect results provided by Google.
+
+## Usage
+
+* 3 packages required requests, BeautifulSoup and fake_useragent
+* Use `pip install requests`, `pip install bs4` and `pip install fake_useragent`
+* Add path to your csv file and output excel file WITH EXTENSTION `.csv` and `.xlsx`
+* Run `python main.py "query search"`
--- a/Results/main.py
+++ b/Results/main.py
@ -0,0 +1,24 @@
+import sys
+import webbrowser
+
+import requests
+from bs4 import BeautifulSoup
+from fake_useragent import UserAgent
+
+if __name__ == "__main__":
+    print("Googling.....")
+    url = "https://www.google.com/search?q=" + " ".join(sys.argv[1:])
+    res = requests.get(url, headers={"UserAgent": UserAgent().random})
+    # res.raise_for_status()
+    with open("project1a.html", "wb") as out_file:  # only for knowing the class
+        for data in res.iter_content(10000):
+            out_file.write(data)
+    soup = BeautifulSoup(res.text, "html.parser")
+    links = list(soup.select(".eZt8xd"))[:5]
+
+    print(len(links))
+    for link in links:
+        if link.text == "Maps":
+            webbrowser.open(link.get("href"))
+        else:
+            webbrowser.open(f"http://google.com{link.get('href')}")