summaryrefslogtreecommitdiff
path: root/urldelta.py
diff options
context:
space:
mode:
authorAndré Glüpker <git@wgmd.de>2024-05-25 13:08:53 +0200
committerAndré Glüpker <git@wgmd.de>2024-05-25 13:08:53 +0200
commit13a00ca66708f257f0e1d5664147c159c02d519c (patch)
tree785f90056a972f0cb0f5c3a1081bde278c8e25dd /urldelta.py
parent02375f5bed06741831f235fb53f41fc324ef3c23 (diff)
downloadrss-feeds-13a00ca66708f257f0e1d5664147c159c02d519c.tar.gz
rss-feeds-13a00ca66708f257f0e1d5664147c159c02d519c.tar.bz2
rss-feeds-13a00ca66708f257f0e1d5664147c159c02d519c.zip
Build diff for changesmain
Diffstat (limited to 'urldelta.py')
-rwxr-xr-xurldelta.py30
1 files changed, 27 insertions, 3 deletions
diff --git a/urldelta.py b/urldelta.py
index 569e201..5584966 100755
--- a/urldelta.py
+++ b/urldelta.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
import datetime
+import difflib
import sqlite3
import time
import requests
@@ -19,6 +20,28 @@ def extract_text(content) -> str:
return soup.get_text(separator="\n", strip=True)
+def compare_to_html(
+ text_old: str,
+ text_new: str,
+) -> str:
+ if not text_old or not text_new:
+ return "N/A"
+
+ output = ["<pre>"]
+ for line in difflib.Differ().compare(text_old.splitlines(), text_new.splitlines()):
+ if line.startswith("+"):
+ output.append(f"🟢 {line.strip()[2:]}")
+ elif line.startswith("-"):
+ output.append(f"🔴 {line.strip()[2:]}")
+ elif line.startswith("?"):
+ output.append(f"🔵 {line.strip()[2:]}")
+ else:
+ output.append(f"<small>🔵 {line.strip()}</small>")
+ output.append("</pre>")
+
+ return "\n".join(output)
+
+
def get_page_delta(url):
conn = sqlite3.connect(os.path.join(os.path.dirname(__file__), "database", "website_data.db"))
cursor = conn.cursor()
@@ -87,16 +110,17 @@ def get_page_delta(url):
)
conn.commit()
- cursor.execute("SELECT headers, content, fetch_date FROM deltas WHERE website_id = ?", (id,))
+ cursor.execute("SELECT extracted_old, extracted_new, fetch_date FROM deltas WHERE website_id = ?", (id,))
updates = []
for update in cursor.fetchall():
+ extracted_old, extracted_new, fetch_date = update
updates.append(
RSSItem(
title=f"Change on {url}",
url=url,
- content=f"Headers: {update[0]}\n\nContent: {extract_text(update[1])}".replace("\n", "<br>"),
- date=datetime.datetime.fromtimestamp(update[2], tz=datetime.UTC),
+ content=compare_to_html(extracted_old, extracted_new),
+ date=datetime.datetime.fromtimestamp(fetch_date, tz=datetime.UTC),
enclosures=[],
guid=update[2],
)