mirror of
https://github.com/hastagAB/Awesome-Python-Scripts.git
synced 2025-01-31 05:33:41 +00:00
Added automated web scraper script with README and requirements.
This commit is contained in:
parent
e9e1cde1a6
commit
4b5cb86d29
31
Automated-Web-Scraper/README.md
Normal file
31
Automated-Web-Scraper/README.md
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
# Automated Web Scraper
|
||||||
|
|
||||||
|
This Python script automates the process of web scraping using the `requests` and `BeautifulSoup` libraries.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
1. Modify the script (`automated_web_scraper.py`) to set the URL you want to scrape and specify the data you want to extract.
|
||||||
|
|
||||||
|
2. Run the script using Python.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- Python 3.x
|
||||||
|
- `requests` library
|
||||||
|
- `BeautifulSoup` library
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
1. Clone this repository or download the script (`automated_web_scraper.py`).
|
||||||
|
|
||||||
|
2. Install the required libraries using the `requirements.txt` file.
|
||||||
|
|
||||||
|
3. Modify the script and run it to perform automated web scraping.
|
||||||
|
|
||||||
|
## Author
|
||||||
|
|
||||||
|
sonicdashh
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
16
Automated-Web-Scraper/automated-web-scraper.py
Normal file
16
Automated-Web-Scraper/automated-web-scraper.py
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
# Specify the URL to scrape
|
||||||
|
url = "https://example.com"
|
||||||
|
|
||||||
|
# Send an HTTP request
|
||||||
|
response = requests.get(url)
|
||||||
|
|
||||||
|
# Parse the HTML content
|
||||||
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
|
|
||||||
|
# Extract and process data (e.g., extract all headings)
|
||||||
|
headings = soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"])
|
||||||
|
for heading in headings:
|
||||||
|
print(heading.text.strip())
|
2
Automated-Web-Scraper/requirements.txt
Normal file
2
Automated-Web-Scraper/requirements.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
requests==2.26.0
|
||||||
|
beautifulsoup4==4.10.0
|
Loading…
Reference in New Issue
Block a user