diff --git a/scripts/Email Extractor/README.md b/scripts/Email Extractor/README.md new file mode 100644 index 0000000..68230b9 --- /dev/null +++ b/scripts/Email Extractor/README.md @@ -0,0 +1,16 @@ +# Email Extractor with Python + +This is a script that takes input as a website and collect all the email address into a csv file. + + +### Setup + - Install the requirements (refer below) + - Run the script by 'python email_extractor.py' + - Input the website to collect emails + + +### Requirements +```pip install -r requirements.txt``` + +### usage +```python email_extractor.py``` \ No newline at end of file diff --git a/scripts/Email Extractor/email_extractor.py b/scripts/Email Extractor/email_extractor.py new file mode 100644 index 0000000..0c23cba --- /dev/null +++ b/scripts/Email Extractor/email_extractor.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- +"""Untitled0.ipynb + +Automatically generated by Colaboratory. + +Original file is located at + https://colab.research.google.com/drive/1BuQhjlIL_OYu39gpE2NQNZx9KJ_kPy3o +""" + +import requests +from bs4 import BeautifulSoup +import urllib.request +from email_scraper import scrape_emails +import pandas as pd +from google.colab import files + + +urlid = input("Enter Website url (i.e.: example.com): ") +url = "https://"+urlid+"/" +reqs = requests.get(url) +soup = BeautifulSoup(reqs.text, 'html.parser') + +urls = [] +response = [] +email = [] +for link in soup.find_all('a'): + urls.append(link.get('href')) +for i in range(len(urls)): + if(urls[i].startswith("https://")): + fp = urllib.request.urlopen(url+urls[i]) + mybytes = fp.read() + mystr = mybytes.decode("utf8") + fp.close() + response.append(scrape_emails(mystr)) + else: + fp = urllib.request.urlopen(url+urls[i]) + mybytes = fp.read() + mystr = mybytes.decode("utf8") + fp.close() + response.append(scrape_emails(mystr)) + +for r in range(len(response)): + if not response[r]: + continue + else: + email.append(response[r]) + +df = pd.DataFrame(email, columns=["Email"]) +df.to_csv('email.csv', index=False) + +files.download("email.csv") + +urllib.request.urlopen('https://www.youracclaim.com/badges/42b5d2d4-7c14-4c1a-b78a-adb3ac04105b/public_url').read().decode("utf-8") + + + +import urllib.request + +fp = urllib.request.urlopen("http://royninja.github.io/contact.html") +mybytes = fp.read() + +mystr = mybytes.decode("utf8") +fp.close() + +print(mystr) + + + +import urllib.request + +fp = urllib.request.urlopen("http://royninja.github.io/contact.html") +mybytes = fp.read() + +mystr = mybytes.decode("utf8") +fp.close() + +print(mystr) + +webUrl = urllib.request.urlopen("https://royninja.github.io") + +pip install email-scraper + + + +scrape_emails(mystr) + + + + + +import requests +from bs4 import BeautifulSoup + + +url = 'https://royninja.github.io/' +reqs = requests.get(url) +soup = BeautifulSoup(reqs.text, 'html.parser') + +urls = [] +for link in soup.find_all('a'): + urls.append(link.get('href')) + +urls[1] + +url+urls[1] + +BufautifulSoup(requests.get(url+urls[1]).text,'html.parser') + +url2 + diff --git a/scripts/Email Extractor/requirements.txt b/scripts/Email Extractor/requirements.txt new file mode 100644 index 0000000..424180a --- /dev/null +++ b/scripts/Email Extractor/requirements.txt @@ -0,0 +1,6 @@ +pip install requests +pip install bs4 +pip install urllib +pip install email_scraper +pip install pandas +pip install google \ No newline at end of file