mirror of
https://github.com/metafy-social/python-scripts.git
synced 2025-02-25 17:38:42 +00:00
Merge pull request #285 from drk1rd/master
Tracking webpages [ISSUE#280]
This commit is contained in:
commit
ac5446da60
5
scripts/track_webpage_changes/README.md
Normal file
5
scripts/track_webpage_changes/README.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
Tracking any change in a webpage using Python.
|
||||||
|
|
||||||
|
- Input the url with proper format(with https:// and so on).
|
||||||
|
- The program checks the site periodically, so input an interval in seconds.
|
||||||
|
- Look at your screen.
|
40
scripts/track_webpage_changes/main.py
Normal file
40
scripts/track_webpage_changes/main.py
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import difflib
|
||||||
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
url = str(input("url: "))
|
||||||
|
interval = int(input("interval(s): "))
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
|
||||||
|
|
||||||
|
PrevVersion = ""
|
||||||
|
FirstRun = True
|
||||||
|
while True:
|
||||||
|
response = requests.get(url, headers=headers)
|
||||||
|
soup = BeautifulSoup(response.text, "lxml")
|
||||||
|
for script in soup(["script", "style"]):
|
||||||
|
script.extract()
|
||||||
|
soup = soup.get_text()
|
||||||
|
|
||||||
|
if PrevVersion != soup:
|
||||||
|
if FirstRun == True:
|
||||||
|
PrevVersion = soup
|
||||||
|
FirstRun = False
|
||||||
|
print("Started Monitoring " + url + " " + str(datetime.now()))
|
||||||
|
else:
|
||||||
|
print("Changes detected at: " + str(datetime.now()))
|
||||||
|
OldPage = PrevVersion.splitlines()
|
||||||
|
NewPage = soup.splitlines()
|
||||||
|
d = difflib.Differ()
|
||||||
|
diff = d.compare(OldPage, NewPage)
|
||||||
|
out_text = "\n".join([ll.rstrip() for ll in '\n'.join(diff).splitlines() if ll.strip()])
|
||||||
|
#print(out_text)
|
||||||
|
OldPage = NewPage
|
||||||
|
# print ('\n'.join(diff))
|
||||||
|
PrevVersion = soup
|
||||||
|
else:
|
||||||
|
print("No Changes Detected " + str(datetime.now()))
|
||||||
|
time.sleep(interval)
|
||||||
|
continue
|
3
scripts/track_webpage_changes/requirements.txt
Normal file
3
scripts/track_webpage_changes/requirements.txt
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
bs4==0.0.1
|
||||||
|
lxml==4.9.1
|
||||||
|
requests==2.28.1
|
Loading…
x
Reference in New Issue
Block a user