mirror of
https://github.com/metafy-social/python-scripts.git
synced 2024-11-23 20:11:10 +00:00
Added crawl Google search
This commit is contained in:
parent
5e42fdc82d
commit
a12b9931ca
9
scripts/Crawl Google Results/README.md
Normal file
9
scripts/Crawl Google Results/README.md
Normal file
|
@ -0,0 +1,9 @@
|
|||
# Crawl Google results
|
||||
This is a simple script that lets you collect results provided by Google.
|
||||
|
||||
## Usage
|
||||
|
||||
* 3 packages required requests, BeautifulSoup and fake_useragent
|
||||
* Use `pip install requests`, `pip install bs4` and `pip install fake_useragent`
|
||||
* Add path to your csv file and output excel file WITH EXTENSTION `.csv` and `.xlsx`
|
||||
* Run `python main.py "query search"`
|
24
scripts/Crawl Google Results/main.py
Normal file
24
scripts/Crawl Google Results/main.py
Normal file
|
@ -0,0 +1,24 @@
|
|||
import sys
|
||||
import webbrowser
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from fake_useragent import UserAgent
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Googling.....")
|
||||
url = "https://www.google.com/search?q=" + " ".join(sys.argv[1:])
|
||||
res = requests.get(url, headers={"UserAgent": UserAgent().random})
|
||||
# res.raise_for_status()
|
||||
with open("project1a.html", "wb") as out_file: # only for knowing the class
|
||||
for data in res.iter_content(10000):
|
||||
out_file.write(data)
|
||||
soup = BeautifulSoup(res.text, "html.parser")
|
||||
links = list(soup.select(".eZt8xd"))[:5]
|
||||
|
||||
print(len(links))
|
||||
for link in links:
|
||||
if link.text == "Maps":
|
||||
webbrowser.open(link.get("href"))
|
||||
else:
|
||||
webbrowser.open(f"http://google.com{link.get('href')}")
|
Loading…
Reference in New Issue
Block a user