# -*- coding: utf-8 -*- """Untitled0.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1BuQhjlIL_OYu39gpE2NQNZx9KJ_kPy3o """ import requests from bs4 import BeautifulSoup import urllib.request from email_scraper import scrape_emails import pandas as pd from google.colab import files urlid = input("Enter Website url (i.e.: example.com): ") url = "https://"+urlid+"/" reqs = requests.get(url) soup = BeautifulSoup(reqs.text, 'html.parser') urls = [] response = [] email = [] for link in soup.find_all('a'): urls.append(link.get('href')) for i in range(len(urls)): if(urls[i].startswith("https://")): fp = urllib.request.urlopen(url+urls[i]) mybytes = fp.read() mystr = mybytes.decode("utf8") fp.close() response.append(scrape_emails(mystr)) else: fp = urllib.request.urlopen(url+urls[i]) mybytes = fp.read() mystr = mybytes.decode("utf8") fp.close() response.append(scrape_emails(mystr)) for r in range(len(response)): if not response[r]: continue else: email.append(response[r]) df = pd.DataFrame(email, columns=["Email"]) df.to_csv('email.csv', index=False) files.download("email.csv") urllib.request.urlopen('https://www.youracclaim.com/badges/42b5d2d4-7c14-4c1a-b78a-adb3ac04105b/public_url').read().decode("utf-8") import urllib.request fp = urllib.request.urlopen("http://royninja.github.io/contact.html") mybytes = fp.read() mystr = mybytes.decode("utf8") fp.close() print(mystr) import urllib.request fp = urllib.request.urlopen("http://royninja.github.io/contact.html") mybytes = fp.read() mystr = mybytes.decode("utf8") fp.close() print(mystr) webUrl = urllib.request.urlopen("https://royninja.github.io") pip install email-scraper scrape_emails(mystr) import requests from bs4 import BeautifulSoup url = 'https://royninja.github.io/' reqs = requests.get(url) soup = BeautifulSoup(reqs.text, 'html.parser') urls = [] for link in soup.find_all('a'): urls.append(link.get('href')) urls[1] url+urls[1] BufautifulSoup(requests.get(url+urls[1]).text,'html.parser') url2