diff --git a/Automated-Web-Scraper/README.md b/Automated-Web-Scraper/README.md new file mode 100644 index 0000000..0bf998c --- /dev/null +++ b/Automated-Web-Scraper/README.md @@ -0,0 +1,31 @@ +# Automated Web Scraper + +This Python script automates the process of web scraping using the `requests` and `BeautifulSoup` libraries. + +## Usage + +1. Modify the script (`automated_web_scraper.py`) to set the URL you want to scrape and specify the data you want to extract. + +2. Run the script using Python. + +## Requirements + +- Python 3.x +- `requests` library +- `BeautifulSoup` library + +## Installation + +1. Clone this repository or download the script (`automated_web_scraper.py`). + +2. Install the required libraries using the `requirements.txt` file. + +3. Modify the script and run it to perform automated web scraping. + +## Author + +sonicdashh + +## License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. \ No newline at end of file diff --git a/Automated-Web-Scraper/automated-web-scraper.py b/Automated-Web-Scraper/automated-web-scraper.py new file mode 100644 index 0000000..334f34c --- /dev/null +++ b/Automated-Web-Scraper/automated-web-scraper.py @@ -0,0 +1,16 @@ +import requests +from bs4 import BeautifulSoup + +# Specify the URL to scrape +url = "https://example.com" + +# Send an HTTP request +response = requests.get(url) + +# Parse the HTML content +soup = BeautifulSoup(response.text, "html.parser") + +# Extract and process data (e.g., extract all headings) +headings = soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]) +for heading in headings: + print(heading.text.strip()) diff --git a/Automated-Web-Scraper/requirements.txt b/Automated-Web-Scraper/requirements.txt new file mode 100644 index 0000000..42d911b --- /dev/null +++ b/Automated-Web-Scraper/requirements.txt @@ -0,0 +1,2 @@ +requests==2.26.0 +beautifulsoup4==4.10.0