diff options
Diffstat (limited to 'urldelta.py')
-rwxr-xr-x | urldelta.py | 30 |
1 files changed, 27 insertions, 3 deletions
diff --git a/urldelta.py b/urldelta.py index 569e201..5584966 100755 --- a/urldelta.py +++ b/urldelta.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import datetime +import difflib import sqlite3 import time import requests @@ -19,6 +20,28 @@ def extract_text(content) -> str: return soup.get_text(separator="\n", strip=True) +def compare_to_html( + text_old: str, + text_new: str, +) -> str: + if not text_old or not text_new: + return "N/A" + + output = ["<pre>"] + for line in difflib.Differ().compare(text_old.splitlines(), text_new.splitlines()): + if line.startswith("+"): + output.append(f"🟢 {line.strip()[2:]}") + elif line.startswith("-"): + output.append(f"🔴 {line.strip()[2:]}") + elif line.startswith("?"): + output.append(f"🔵 {line.strip()[2:]}") + else: + output.append(f"<small>🔵 {line.strip()}</small>") + output.append("</pre>") + + return "\n".join(output) + + def get_page_delta(url): conn = sqlite3.connect(os.path.join(os.path.dirname(__file__), "database", "website_data.db")) cursor = conn.cursor() @@ -87,16 +110,17 @@ def get_page_delta(url): ) conn.commit() - cursor.execute("SELECT headers, content, fetch_date FROM deltas WHERE website_id = ?", (id,)) + cursor.execute("SELECT extracted_old, extracted_new, fetch_date FROM deltas WHERE website_id = ?", (id,)) updates = [] for update in cursor.fetchall(): + extracted_old, extracted_new, fetch_date = update updates.append( RSSItem( title=f"Change on {url}", url=url, - content=f"Headers: {update[0]}\n\nContent: {extract_text(update[1])}".replace("\n", "<br>"), - date=datetime.datetime.fromtimestamp(update[2], tz=datetime.UTC), + content=compare_to_html(extracted_old, extracted_new), + date=datetime.datetime.fromtimestamp(fetch_date, tz=datetime.UTC), enclosures=[], guid=update[2], ) |