From 13a00ca66708f257f0e1d5664147c159c02d519c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Gl=C3=BCpker?= Date: Sat, 25 May 2024 13:08:53 +0200 Subject: Build diff for changes --- urldelta.py | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) (limited to 'urldelta.py') diff --git a/urldelta.py b/urldelta.py index 569e201..5584966 100755 --- a/urldelta.py +++ b/urldelta.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import datetime +import difflib import sqlite3 import time import requests @@ -19,6 +20,28 @@ def extract_text(content) -> str: return soup.get_text(separator="\n", strip=True) +def compare_to_html( + text_old: str, + text_new: str, +) -> str: + if not text_old or not text_new: + return "N/A" + + output = ["
"]
+    for line in difflib.Differ().compare(text_old.splitlines(), text_new.splitlines()):
+        if line.startswith("+"):
+            output.append(f"🟢 {line.strip()[2:]}")
+        elif line.startswith("-"):
+            output.append(f"🔴 {line.strip()[2:]}")
+        elif line.startswith("?"):
+            output.append(f"🔵 {line.strip()[2:]}")
+        else:
+            output.append(f"🔵 {line.strip()}")
+    output.append("
") + + return "\n".join(output) + + def get_page_delta(url): conn = sqlite3.connect(os.path.join(os.path.dirname(__file__), "database", "website_data.db")) cursor = conn.cursor() @@ -87,16 +110,17 @@ def get_page_delta(url): ) conn.commit() - cursor.execute("SELECT headers, content, fetch_date FROM deltas WHERE website_id = ?", (id,)) + cursor.execute("SELECT extracted_old, extracted_new, fetch_date FROM deltas WHERE website_id = ?", (id,)) updates = [] for update in cursor.fetchall(): + extracted_old, extracted_new, fetch_date = update updates.append( RSSItem( title=f"Change on {url}", url=url, - content=f"Headers: {update[0]}\n\nContent: {extract_text(update[1])}".replace("\n", "
"), - date=datetime.datetime.fromtimestamp(update[2], tz=datetime.UTC), + content=compare_to_html(extracted_old, extracted_new), + date=datetime.datetime.fromtimestamp(fetch_date, tz=datetime.UTC), enclosures=[], guid=update[2], ) -- cgit v1.2.3