diff options
| author | André Glüpker <git@wgmd.de> | 2024-05-25 13:08:53 +0200 | 
|---|---|---|
| committer | André Glüpker <git@wgmd.de> | 2024-05-25 13:08:53 +0200 | 
| commit | 13a00ca66708f257f0e1d5664147c159c02d519c (patch) | |
| tree | 785f90056a972f0cb0f5c3a1081bde278c8e25dd | |
| parent | 02375f5bed06741831f235fb53f41fc324ef3c23 (diff) | |
| download | rss-feeds-main.tar.gz rss-feeds-main.tar.bz2 rss-feeds-main.zip | |
Build diff for changesmain
| -rwxr-xr-x | urldelta.py | 30 | 
1 files changed, 27 insertions, 3 deletions
| diff --git a/urldelta.py b/urldelta.py index 569e201..5584966 100755 --- a/urldelta.py +++ b/urldelta.py @@ -1,6 +1,7 @@  #!/usr/bin/env python3  import datetime +import difflib  import sqlite3  import time  import requests @@ -19,6 +20,28 @@ def extract_text(content) -> str:      return soup.get_text(separator="\n", strip=True) +def compare_to_html( +    text_old: str, +    text_new: str, +) -> str: +    if not text_old or not text_new: +        return "N/A" + +    output = ["<pre>"] +    for line in difflib.Differ().compare(text_old.splitlines(), text_new.splitlines()): +        if line.startswith("+"): +            output.append(f"🟢 {line.strip()[2:]}") +        elif line.startswith("-"): +            output.append(f"🔴 {line.strip()[2:]}") +        elif line.startswith("?"): +            output.append(f"🔵 {line.strip()[2:]}") +        else: +            output.append(f"<small>🔵 {line.strip()}</small>") +    output.append("</pre>") + +    return "\n".join(output) + +  def get_page_delta(url):      conn = sqlite3.connect(os.path.join(os.path.dirname(__file__), "database", "website_data.db"))      cursor = conn.cursor() @@ -87,16 +110,17 @@ def get_page_delta(url):              )              conn.commit() -    cursor.execute("SELECT headers, content, fetch_date FROM deltas WHERE website_id = ?", (id,)) +    cursor.execute("SELECT extracted_old, extracted_new, fetch_date FROM deltas WHERE website_id = ?", (id,))      updates = []      for update in cursor.fetchall(): +        extracted_old, extracted_new, fetch_date = update          updates.append(              RSSItem(                  title=f"Change on {url}",                  url=url, -                content=f"Headers: {update[0]}\n\nContent: {extract_text(update[1])}".replace("\n", "<br>"), -                date=datetime.datetime.fromtimestamp(update[2], tz=datetime.UTC), +                content=compare_to_html(extracted_old, extracted_new), +                date=datetime.datetime.fromtimestamp(fetch_date, tz=datetime.UTC),                  enclosures=[],                  guid=update[2],              ) | 
