mirror of
https://github.com/metafy-social/python-scripts.git
synced 2024-12-18 00:00:17 +00:00
Added crawl Google search
This commit is contained in:
parent
5e42fdc82d
commit
a12b9931ca
9
scripts/Crawl Google Results/README.md
Normal file
9
scripts/Crawl Google Results/README.md
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
# Crawl Google results
|
||||||
|
This is a simple script that lets you collect results provided by Google.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
* 3 packages required requests, BeautifulSoup and fake_useragent
|
||||||
|
* Use `pip install requests`, `pip install bs4` and `pip install fake_useragent`
|
||||||
|
* Add path to your csv file and output excel file WITH EXTENSTION `.csv` and `.xlsx`
|
||||||
|
* Run `python main.py "query search"`
|
24
scripts/Crawl Google Results/main.py
Normal file
24
scripts/Crawl Google Results/main.py
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
import sys
|
||||||
|
import webbrowser
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from fake_useragent import UserAgent
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("Googling.....")
|
||||||
|
url = "https://www.google.com/search?q=" + " ".join(sys.argv[1:])
|
||||||
|
res = requests.get(url, headers={"UserAgent": UserAgent().random})
|
||||||
|
# res.raise_for_status()
|
||||||
|
with open("project1a.html", "wb") as out_file: # only for knowing the class
|
||||||
|
for data in res.iter_content(10000):
|
||||||
|
out_file.write(data)
|
||||||
|
soup = BeautifulSoup(res.text, "html.parser")
|
||||||
|
links = list(soup.select(".eZt8xd"))[:5]
|
||||||
|
|
||||||
|
print(len(links))
|
||||||
|
for link in links:
|
||||||
|
if link.text == "Maps":
|
||||||
|
webbrowser.open(link.get("href"))
|
||||||
|
else:
|
||||||
|
webbrowser.open(f"http://google.com{link.get('href')}")
|
Loading…
Reference in New Issue
Block a user