diff --git a/scripts/Email Extractor/README.md b/scripts/Email Extractor/README.md new file mode 100644 index 0000000..68230b9 --- /dev/null +++ b/scripts/Email Extractor/README.md @@ -0,0 +1,16 @@ +# Email Extractor with Python + +This is a script that takes input as a website and collect all the email address into a csv file. + + +### Setup + - Install the requirements (refer below) + - Run the script by 'python email_extractor.py' + - Input the website to collect emails + + +### Requirements +```pip install -r requirements.txt``` + +### usage +```python email_extractor.py``` \ No newline at end of file diff --git a/scripts/Email Extractor/email_extractor.py b/scripts/Email Extractor/email_extractor.py new file mode 100644 index 0000000..550752b --- /dev/null +++ b/scripts/Email Extractor/email_extractor.py @@ -0,0 +1,45 @@ + + +import requests +from bs4 import BeautifulSoup +import urllib.request +from email_scraper import scrape_emails +import pandas as pd +from google.colab import files + + +urlid = input("Enter Website url (i.e.: example.com): ") +url = "https://"+urlid+"/" +reqs = requests.get(url) +soup = BeautifulSoup(reqs.text, 'html.parser') + +urls = [] +response = [] +email = [] +for link in soup.find_all('a'): + urls.append(link.get('href')) +for i in range(len(urls)): + if(urls[i].startswith("https://")): + fp = urllib.request.urlopen(url+urls[i]) + mybytes = fp.read() + mystr = mybytes.decode("utf8") + fp.close() + response.append(scrape_emails(mystr)) + else: + fp = urllib.request.urlopen(url+urls[i]) + mybytes = fp.read() + mystr = mybytes.decode("utf8") + fp.close() + response.append(scrape_emails(mystr)) + +for r in range(len(response)): + if not response[r]: + continue + else: + email.append(response[r]) + +df = pd.DataFrame(email, columns=["Email"]) +df.to_csv('email.csv', index=False) + +files.download("email.csv") + diff --git a/scripts/Email Extractor/requirements.txt b/scripts/Email Extractor/requirements.txt new file mode 100644 index 0000000..424180a --- /dev/null +++ b/scripts/Email Extractor/requirements.txt @@ -0,0 +1,6 @@ +pip install requests +pip install bs4 +pip install urllib +pip install email_scraper +pip install pandas +pip install google \ No newline at end of file