#!/usr/bin/env python3

import datetime
import difflib
import sqlite3
import time
import requests
import os

from bs4 import BeautifulSoup
from rss_types import RSSItem, RSSFeed


def extract_text(content) -> str:
    soup = BeautifulSoup(content, features="html.parser")

    for script in soup(["script", "style"]):
        script.extract()

    return soup.get_text(separator="\n", strip=True)


def compare_to_html(
    text_old: str,
    text_new: str,
) -> str:
    if not text_old or not text_new:
        return "N/A"

    output = ["<pre>"]
    for line in difflib.Differ().compare(text_old.splitlines(), text_new.splitlines()):
        if line.startswith("+"):
            output.append(f"🟢 {line.strip()[2:]}")
        elif line.startswith("-"):
            output.append(f"🔴 {line.strip()[2:]}")
        elif line.startswith("?"):
            output.append(f"🔵 {line.strip()[2:]}")
        else:
            output.append(f"<small>🔵 {line.strip()}</small>")
    output.append("</pre>")

    return "\n".join(output)


def get_page_delta(url):
    conn = sqlite3.connect(os.path.join(os.path.dirname(__file__), "database", "website_data.db"))
    cursor = conn.cursor()

    # Initialize database, if needed
    cursor.execute(
        """
        CREATE TABLE IF NOT EXISTS websites (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            url TEXT,
            date_added INTEGER,
            last_fetched INTEGER
        );
    """
    )
    cursor.execute(
        """
        CREATE TABLE IF NOT EXISTS deltas (
            website_id INTEGER,
            headers TEXT,
            content TEXT,
            fetch_date INTEGER
        );

    """
    )
    conn.commit()

    # Add debug info
    cursor.execute("PRAGMA table_info(deltas)")
    existing_cols = [row[1] for row in cursor.fetchall()]
    if "extracted_old" not in existing_cols:
        cursor.execute("ALTER TABLE deltas ADD COLUMN {} text".format("extracted_old"))
        cursor.execute("ALTER TABLE deltas ADD COLUMN {} text".format("extracted_new"))
        conn.commit()

    # Check, if current website is known. Get latest state, if known.
    cursor.execute("SELECT id, last_fetched FROM websites WHERE url = ?", (url,))

    id = last_fetched = last_content = None
    data = cursor.fetchone()
    if data:
        id, last_fetched = data
        cursor.execute("SELECT content FROM deltas WHERE website_id = ? ORDER BY fetch_date desc LIMIT 1", (id,))
        last_content = cursor.fetchone()
        if last_content:
            last_content = last_content[0]
    else:
        cursor.execute(
            "INSERT INTO websites (url, date_added, last_fetched) VALUES (?, ?, ?)",
            (url, int(time.time()), int(time.time())),
        )
        conn.commit()
        id = cursor.lastrowid

    if not last_fetched or int(time.time()) - last_fetched > 3600:
        response = requests.get(url, timeout=20)

        cursor.execute("UPDATE websites SET last_fetched = ? WHERE id = ?", (int(time.time()), id))
        extracted_new = extract_text(response.content)
        extracted_old = extract_text(last_content)
        if extracted_new != extracted_old:
            cursor.execute(
                "INSERT INTO deltas (website_id, headers, content, fetch_date, extracted_old, extracted_new) VALUES (?, ?, ?, ?, ?, ?)",
                (id, str(response.headers), response.content, int(time.time()), extracted_old, extracted_new),
            )
            conn.commit()

    cursor.execute("SELECT extracted_old, extracted_new, fetch_date FROM deltas WHERE website_id = ?", (id,))

    updates = []
    for update in cursor.fetchall():
        extracted_old, extracted_new, fetch_date = update
        updates.append(
            RSSItem(
                title=f"Change on {url}",
                url=url,
                content=compare_to_html(extracted_old, extracted_new),
                date=datetime.datetime.fromtimestamp(fetch_date, tz=datetime.UTC),
                enclosures=[],
                guid=update[2],
            )
        )

    return RSSFeed(
        title=f"Updates for {url}",
        url=url,
        description=f"Detected changes on page {url}",
        content=updates,
    )