From 4b5cb86d2979824a92382b59c04ece0e99723c11 Mon Sep 17 00:00:00 2001 From: sonicdashh <147204439+sonicdashh@users.noreply.github.com> Date: Sat, 7 Oct 2023 05:27:05 +0000 Subject: [PATCH] Added automated web scraper script with README and requirements. --- Automated-Web-Scraper/README.md | 31 +++++++++++++++++++ .../automated-web-scraper.py | 16 ++++++++++ Automated-Web-Scraper/requirements.txt | 2 ++ 3 files changed, 49 insertions(+) create mode 100644 Automated-Web-Scraper/README.md create mode 100644 Automated-Web-Scraper/automated-web-scraper.py create mode 100644 Automated-Web-Scraper/requirements.txt diff --git a/Automated-Web-Scraper/README.md b/Automated-Web-Scraper/README.md new file mode 100644 index 0000000..0bf998c --- /dev/null +++ b/Automated-Web-Scraper/README.md @@ -0,0 +1,31 @@ +# Automated Web Scraper + +This Python script automates the process of web scraping using the `requests` and `BeautifulSoup` libraries. + +## Usage + +1. Modify the script (`automated_web_scraper.py`) to set the URL you want to scrape and specify the data you want to extract. + +2. Run the script using Python. + +## Requirements + +- Python 3.x +- `requests` library +- `BeautifulSoup` library + +## Installation + +1. Clone this repository or download the script (`automated_web_scraper.py`). + +2. Install the required libraries using the `requirements.txt` file. + +3. Modify the script and run it to perform automated web scraping. + +## Author + +sonicdashh + +## License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. \ No newline at end of file diff --git a/Automated-Web-Scraper/automated-web-scraper.py b/Automated-Web-Scraper/automated-web-scraper.py new file mode 100644 index 0000000..334f34c --- /dev/null +++ b/Automated-Web-Scraper/automated-web-scraper.py @@ -0,0 +1,16 @@ +import requests +from bs4 import BeautifulSoup + +# Specify the URL to scrape +url = "https://example.com" + +# Send an HTTP request +response = requests.get(url) + +# Parse the HTML content +soup = BeautifulSoup(response.text, "html.parser") + +# Extract and process data (e.g., extract all headings) +headings = soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]) +for heading in headings: + print(heading.text.strip()) diff --git a/Automated-Web-Scraper/requirements.txt b/Automated-Web-Scraper/requirements.txt new file mode 100644 index 0000000..42d911b --- /dev/null +++ b/Automated-Web-Scraper/requirements.txt @@ -0,0 +1,2 @@ +requests==2.26.0 +beautifulsoup4==4.10.0