From be779b9f0225dc865f0aee5d6fe4664a9b53715a Mon Sep 17 00:00:00 2001 From: drk1rd <58465650+drk1rd@users.noreply.github.com> Date: Sun, 9 Oct 2022 23:36:57 +0530 Subject: [PATCH 1/3] Create main.py --- scripts/track_webpage_changes/main.py | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 scripts/track_webpage_changes/main.py diff --git a/scripts/track_webpage_changes/main.py b/scripts/track_webpage_changes/main.py new file mode 100644 index 0000000..c637ea7 --- /dev/null +++ b/scripts/track_webpage_changes/main.py @@ -0,0 +1,40 @@ +import requests +from bs4 import BeautifulSoup +import difflib +import time +from datetime import datetime + +url = str(input("url: ")) +interval = int(input("interval(s): ")) +headers = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} + +PrevVersion = "" +FirstRun = True +while True: + response = requests.get(url, headers=headers) + soup = BeautifulSoup(response.text, "lxml") + for script in soup(["script", "style"]): + script.extract() + soup = soup.get_text() + + if PrevVersion != soup: + if FirstRun == True: + PrevVersion = soup + FirstRun = False + print("Started Monitoring " + url + " " + str(datetime.now())) + else: + print("Changes detected at: " + str(datetime.now())) + OldPage = PrevVersion.splitlines() + NewPage = soup.splitlines() + d = difflib.Differ() + diff = d.compare(OldPage, NewPage) + out_text = "\n".join([ll.rstrip() for ll in '\n'.join(diff).splitlines() if ll.strip()]) + #print(out_text) + OldPage = NewPage + # print ('\n'.join(diff)) + PrevVersion = soup + else: + print("No Changes Detected " + str(datetime.now())) + time.sleep(interval) + continue From 83462542564758d911c35ffa851b15d2f7b1b24d Mon Sep 17 00:00:00 2001 From: drk1rd <58465650+drk1rd@users.noreply.github.com> Date: Sun, 9 Oct 2022 23:38:52 +0530 Subject: [PATCH 2/3] Create requirements.txt --- scripts/track_webpage_changes/requirements.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 scripts/track_webpage_changes/requirements.txt diff --git a/scripts/track_webpage_changes/requirements.txt b/scripts/track_webpage_changes/requirements.txt new file mode 100644 index 0000000..1265253 --- /dev/null +++ b/scripts/track_webpage_changes/requirements.txt @@ -0,0 +1,3 @@ +bs4==0.0.1 +lxml==4.9.1 +requests==2.28.1 From 1e00b37c3c2f8ddc25cd13a4839920d7634addfb Mon Sep 17 00:00:00 2001 From: drk1rd <58465650+drk1rd@users.noreply.github.com> Date: Sun, 9 Oct 2022 23:44:32 +0530 Subject: [PATCH 3/3] Create README.md --- scripts/track_webpage_changes/README.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 scripts/track_webpage_changes/README.md diff --git a/scripts/track_webpage_changes/README.md b/scripts/track_webpage_changes/README.md new file mode 100644 index 0000000..78ff5e0 --- /dev/null +++ b/scripts/track_webpage_changes/README.md @@ -0,0 +1,5 @@ +Tracking any change in a webpage using Python. + +- Input the url with proper format(with https:// and so on). +- The program checks the site periodically, so input an interval in seconds. +- Look at your screen.