#!/usr/bin/env python3
"""
Home Assistant observer

Modes:
  collect  - run every 30 minutes; stores a compact JSON snapshot locally
  analyze  - run at 05:00; sends the last snapshots to AI and publishes a funny local web page

Configuration is via environment variables. See .env.example.
"""

from __future__ import annotations

import argparse
import html
import json
import os
import re
import subprocess
import sys
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any

import requests


HA_URL = os.environ.get("HA_URL", "").rstrip("/")
HA_TOKEN = os.environ.get("HA_TOKEN", "")
DATA_DIR = Path(os.environ.get("DATA_DIR", "./data"))
REPORT_DIR = Path(os.environ.get("REPORT_DIR", "./reports"))
WEB_DIR = Path(os.environ.get("WEB_DIR", "./web"))
PROMPT_FILE = Path(os.environ.get("PROMPT_FILE", "./llm_instructions.md"))
HISTORY_HOURS = int(os.environ.get("HISTORY_HOURS", "24"))
MAX_HISTORY_PER_ENTITY = int(os.environ.get("MAX_HISTORY_PER_ENTITY", "20"))
ANALYZE_SNAPSHOT_HOURS = int(os.environ.get("ANALYZE_SNAPSHOT_HOURS", "24"))
ARTICLE_CONTEXT_DAYS = int(os.environ.get("ARTICLE_CONTEXT_DAYS", "7"))
KEEP_SNAPSHOT_DAYS = int(os.environ.get("KEEP_SNAPSHOT_DAYS", "14"))

# LLM_MODE: none | pi | ollama | openai
LLM_MODE = os.environ.get("LLM_MODE", "none").lower()
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434").rstrip("/")
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
OPENAI_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini")
PI_BIN = os.environ.get("PI_BIN", "pi")
PI_MODEL = os.environ.get("PI_MODEL", "")
PI_TIMEOUT = int(os.environ.get("PI_TIMEOUT", "600"))

RELEVANT_DOMAINS = set(
    x.strip()
    for x in os.environ.get(
        "RELEVANT_DOMAINS",
        "sensor,binary_sensor,person,device_tracker,climate,light,switch,lock,cover,alarm_control_panel,media_player,calendar,weather",
    ).split(",")
    if x.strip()
)
EXCLUDED_ENTITIES = set(x.strip() for x in os.environ.get("EXCLUDED_ENTITIES", "").split(",") if x.strip())

ALLOWED_ATTRIBUTES = {
    "friendly_name",
    "unit_of_measurement",
    "device_class",
    "state_class",
    "current_temperature",
    "temperature",
    "humidity",
    "battery_level",
    "brightness",
    "gps_accuracy",
    "source_type",
    "assumed_state",
}


class ConfigError(RuntimeError):
    pass


def require_config(for_ai: bool = False) -> None:
    if not HA_URL:
        raise ConfigError("HA_URL is not set")
    if not HA_TOKEN:
        raise ConfigError("HA_TOKEN is not set")
    if for_ai and LLM_MODE == "openai" and not OPENAI_API_KEY:
        raise ConfigError("LLM_MODE=openai but OPENAI_API_KEY is not set")


def ha_get(path: str, params: dict[str, str] | None = None) -> Any:
    headers = {"Authorization": f"Bearer {HA_TOKEN}", "Content-Type": "application/json"}
    response = requests.get(f"{HA_URL}{path}", headers=headers, params=params, timeout=60)
    try:
        response.raise_for_status()
    except requests.HTTPError as exc:
        detail = response.text.strip()
        raise requests.HTTPError(f"{exc}; response={detail[:500]}", response=response) from exc
    return response.json()


def is_relevant_entity(entity_id: str) -> bool:
    return entity_id not in EXCLUDED_ENTITIES and entity_id.split(".", 1)[0] in RELEVANT_DOMAINS


def compact_attributes(attrs: dict[str, Any]) -> dict[str, Any]:
    return {k: v for k, v in attrs.items() if k in ALLOWED_ATTRIBUTES}


def get_states() -> list[dict[str, Any]]:
    useful: list[dict[str, Any]] = []
    for item in ha_get("/api/states"):
        entity_id = item.get("entity_id", "")
        state = item.get("state")
        if not is_relevant_entity(entity_id) or state in {"unknown", "unavailable", None}:
            continue
        useful.append(
            {
                "entity_id": entity_id,
                "state": state,
                "attributes": compact_attributes(item.get("attributes", {})),
                "last_changed": item.get("last_changed"),
                "last_updated": item.get("last_updated"),
            }
        )
    return sorted(useful, key=lambda x: x["entity_id"])


def get_history(hours: int, entity_ids: list[str]) -> list[dict[str, Any]]:
    start = datetime.now(timezone.utc) - timedelta(hours=hours)
    changes: list[dict[str, Any]] = []

    # Recent Home Assistant versions/configurations require filter_entity_id for
    # the history endpoint. Query in chunks to avoid an overlong URL.
    chunk_size = 50
    for i in range(0, len(entity_ids), chunk_size):
        chunk = entity_ids[i : i + chunk_size]
        data = ha_get(
            f"/api/history/period/{start.isoformat(timespec='seconds')}",
            params={"filter_entity_id": ",".join(chunk), "minimal_response": ""},
        )

        for entity_history in data:
            if not entity_history:
                continue
            entity_id = entity_history[0].get("entity_id", "")
            if not is_relevant_entity(entity_id):
                continue
            compact = []
            for item in entity_history[-MAX_HISTORY_PER_ENTITY:]:
                state = item.get("state")
                if state in {"unknown", "unavailable", None}:
                    continue
                compact.append({"state": state, "last_changed": item.get("last_changed")})
            if len(set(x["state"] for x in compact)) > 1:
                changes.append({"entity_id": entity_id, "recent_states": compact})

    return sorted(changes, key=lambda x: x["entity_id"])


def make_snapshot() -> dict[str, Any]:
    states = get_states()
    entity_ids = [state["entity_id"] for state in states]
    return {
        "generated_at": datetime.now().isoformat(timespec="seconds"),
        "history_hours": HISTORY_HOURS,
        "states": states,
        "history": get_history(HISTORY_HOURS, entity_ids),
    }


def save_snapshot(snapshot: dict[str, Any]) -> Path:
    DATA_DIR.mkdir(parents=True, exist_ok=True)
    stamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    path = DATA_DIR / f"snapshot-{stamp}.json"
    path.write_text(json.dumps(snapshot, indent=2, ensure_ascii=False), encoding="utf-8")
    return path


def cleanup_old_snapshots() -> None:
    cutoff = datetime.now() - timedelta(days=KEEP_SNAPSHOT_DAYS)
    for path in DATA_DIR.glob("snapshot-*.json"):
        if datetime.fromtimestamp(path.stat().st_mtime) < cutoff:
            path.unlink(missing_ok=True)


def load_recent_snapshots(hours: int) -> list[dict[str, Any]]:
    cutoff = datetime.now() - timedelta(hours=hours)
    snapshots = []
    for path in sorted(DATA_DIR.glob("snapshot-*.json")):
        if datetime.fromtimestamp(path.stat().st_mtime) < cutoff:
            continue
        try:
            snapshots.append(json.loads(path.read_text(encoding="utf-8")))
        except Exception as exc:
            print(f"Skipping unreadable snapshot {path}: {exc}", file=sys.stderr)
    return snapshots


def summarize_snapshot(snapshot: dict[str, Any]) -> str:
    lines = [f"Snapshot: {snapshot.get('generated_at')}", "Current states:"]
    for state in snapshot.get("states", []):
        attrs = state.get("attributes", {})
        name = attrs.get("friendly_name", state.get("entity_id"))
        unit = attrs.get("unit_of_measurement", "")
        value = f"{state.get('state')} {unit}".strip()
        lines.append(f"- {name} ({state.get('entity_id')}): {value}; last_changed={state.get('last_changed')}")
    lines.append("Recently changed entities:")
    for item in snapshot.get("history", []):
        transitions = ", ".join(f"{x.get('state')} @ {x.get('last_changed')}" for x in item.get("recent_states", [])[-8:])
        lines.append(f"- {item.get('entity_id')}: {transitions}")
    return "\n".join(lines)


def build_daily_summary(snapshots: list[dict[str, Any]]) -> str:
    parts = [
        f"Daily Home Assistant bundle generated {datetime.now().isoformat(timespec='seconds')}",
        f"Contains {len(snapshots)} snapshots from roughly the last {ANALYZE_SNAPSHOT_HOURS} hours.",
    ]
    for snapshot in snapshots:
        parts.append("\n---\n" + summarize_snapshot(snapshot))
    return "\n".join(parts)


def read_extra_llm_instructions() -> str:
    if not PROMPT_FILE.exists():
        return ""
    return PROMPT_FILE.read_text(encoding="utf-8").strip()


def load_recent_article_context(days: int) -> str:
    if days <= 0 or not REPORT_DIR.exists():
        return ""
    cutoff = datetime.now() - timedelta(days=days)
    articles: list[str] = []
    for path in sorted(REPORT_DIR.glob("daily-ai-analysis-*.md")):
        if datetime.fromtimestamp(path.stat().st_mtime) < cutoff:
            continue
        try:
            text = path.read_text(encoding="utf-8")
        except Exception as exc:
            print(f"Skipping unreadable previous report {path}: {exc}", file=sys.stderr)
            continue
        conclusions = text.split("\n## Data bundle\n", 1)[0].strip()
        articles.append(f"PREVIOUS ARTICLE {path.name}:\n{conclusions[:8000]}")
    return "\n\n---\n\n".join(articles[-7:])


def analysis_prompt(input_summary: str, previous_articles: str = "") -> str:
    extra_instructions = read_extra_llm_instructions()
    extra_block = ""
    if extra_instructions:
        extra_block = f"""

ADDITIONAL OWNER INSTRUCTIONS FROM {PROMPT_FILE}:
{extra_instructions}
"""
    previous_block = ""
    if previous_articles:
        previous_block = f"""

PREVIOUS ARTICLES FROM THE LAST {ARTICLE_CONTEXT_DAYS} DAYS FOR CONTEXT:
Use these only for trend/context awareness. Do not claim something happened today unless today's data supports it.
{previous_articles}
"""

    return f"""You are writing today's Home Assistant smart-home blog article for the owner.

Write a funny but useful morning briefing in a blog/article style. Use light humor, emojis,
and playful headings, but remain factual and privacy-aware. Include:
- A short comedy headline for the day
- What seemed to happen at home today
- Behavioral patterns that can reasonably be inferred
- Notable trends compared with recent previous articles, if supported
- What a nosy raccoon/hacker could figure out about the resident
- Anomalies, risks, or privacy/security concerns
- Suggested Home Assistant automations or fixes

Distinguish strong evidence from guesses. Do not invent facts not supported by the data.
{extra_block}{previous_block}
TODAY'S DATA:
{input_summary}
"""


def call_ollama(prompt: str) -> str:
    response = requests.post(f"{OLLAMA_URL}/api/generate", json={"model": OLLAMA_MODEL, "prompt": prompt, "stream": False}, timeout=300)
    response.raise_for_status()
    return response.json().get("response", "").strip()


def call_openai(prompt: str) -> str:
    response = requests.post(
        "https://api.openai.com/v1/chat/completions",
        headers={"Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json"},
        json={
            "model": OPENAI_MODEL,
            "messages": [
                {"role": "system", "content": "You are a careful but funny smart-home analyst."},
                {"role": "user", "content": prompt},
            ],
            "temperature": 0.35,
        },
        timeout=300,
    )
    response.raise_for_status()
    return response.json()["choices"][0]["message"]["content"].strip()


def call_pi(prompt: str) -> str:
    cmd = [PI_BIN, "--no-tools"]
    if PI_MODEL:
        cmd.extend(["--model", PI_MODEL])
    cmd.extend(["-p", "Analyze the Home Assistant data from stdin and write the requested briefing."])
    result = subprocess.run(
        cmd,
        input=prompt,
        text=True,
        capture_output=True,
        timeout=PI_TIMEOUT,
        check=False,
    )
    if result.returncode != 0:
        stderr = result.stderr.strip()
        raise RuntimeError(f"pi exited with status {result.returncode}: {stderr[-1000:]}")
    return result.stdout.strip()


def get_llm_conclusions(input_summary: str, previous_articles: str = "") -> str:
    if LLM_MODE == "none":
        return "AI analysis disabled. Set LLM_MODE=pi, LLM_MODE=ollama, or LLM_MODE=openai in .env. The raccoon analyst is asleep. 🦝💤"
    prompt = analysis_prompt(input_summary, previous_articles)
    if LLM_MODE == "ollama":
        return call_ollama(prompt)
    if LLM_MODE == "openai":
        return call_openai(prompt)
    if LLM_MODE == "pi":
        return call_pi(prompt)
    return f"Unknown LLM_MODE={LLM_MODE!r}. Use none, pi, ollama, or openai."


def markdownish_to_html(text: str) -> str:
    safe = html.escape(text)
    safe = re.sub(r"^### (.*)$", r"<h3>\1</h3>", safe, flags=re.MULTILINE)
    safe = re.sub(r"^## (.*)$", r"<h2>\1</h2>", safe, flags=re.MULTILINE)
    safe = re.sub(r"^# (.*)$", r"<h1>\1</h1>", safe, flags=re.MULTILINE)
    safe = re.sub(r"^- (.*)$", r"<li>\1</li>", safe, flags=re.MULTILINE)
    safe = safe.replace("\n", "<br>\n")
    return safe


BLOG_CSS = """
  :root { color-scheme: dark; }
  body { margin:0; font-family: Georgia, 'Times New Roman', serif; background:#101018; color:#eeeef6; line-height:1.65; }
  header { border-bottom:1px solid #303044; background:linear-gradient(135deg,#18182a,#25193a); }
  .wrap { max-width:980px; margin:0 auto; padding:1.5rem; }
  .masthead { padding:2.4rem 1.5rem; }
  .kicker { color:#fbbf24; text-transform:uppercase; letter-spacing:.14em; font:700 .78rem system-ui,sans-serif; }
  h1 { margin:.2rem 0; font-size:clamp(2.2rem,6vw,4.8rem); line-height:1; }
  h2,h3 { color:#fde68a; line-height:1.2; }
  article { background:#181827; border:1px solid #33334a; border-radius:22px; padding:clamp(1rem,3vw,2rem); box-shadow:0 18px 45px #0007; }
  article p, article li { font-size:1.05rem; }
  .layout { display:grid; grid-template-columns:minmax(0,1fr) 280px; gap:1.25rem; align-items:start; }
  aside { background:#171724; border:1px solid #303044; border-radius:18px; padding:1rem; position:sticky; top:1rem; }
  .archive { list-style:none; margin:0; padding:0; }
  .archive li { border-bottom:1px solid #2b2b3f; padding:.55rem 0; }
  .archive li:last-child { border-bottom:0; }
  a { color:#93c5fd; text-decoration:none; }
  a:hover { text-decoration:underline; }
  .meta { color:#b7b7c8; font: .95rem system-ui,sans-serif; }
  details { margin-top:1.5rem; }
  pre { white-space:pre-wrap; background:#0b0b11; color:#d1d5db; padding:1rem; border-radius:12px; overflow:auto; font-size:.85rem; }
  footer { color:#9999aa; text-align:center; padding:2rem; font: .9rem system-ui,sans-serif; }
  @media (max-width:800px) { .layout { grid-template-columns:1fr; } aside { position:static; } }
"""


def article_links() -> str:
    articles_dir = WEB_DIR / "articles"
    if not articles_dir.exists():
        return "<li>No articles yet. The raccoon newsroom is warming up.</li>"
    links = []
    for path in sorted(articles_dir.glob("*.html"), reverse=True):
        label = path.stem
        try:
            label = datetime.strptime(path.stem, "%Y-%m-%d").strftime("%A, %B %-d, %Y")
        except ValueError:
            pass
        links.append(f'<li><a href="articles/{html.escape(path.name)}">{html.escape(label)}</a></li>')
    return "\n".join(links) or "<li>No articles yet. The raccoon newsroom is warming up.</li>"


def blog_shell(title: str, subtitle: str, main_content: str, archive_links: str, article_href_prefix: str = "") -> str:
    archive = archive_links.replace('href="articles/', f'href="{article_href_prefix}articles/')
    return f"""<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>{html.escape(title)}</title>
<style>{BLOG_CSS}</style>
</head>
<body>
<header>
  <div class="wrap masthead">
    <div class="kicker">🦝 Smart Home Gossip Gazette</div>
    <h1>{html.escape(title)}</h1>
    <p class="meta">{html.escape(subtitle)}</p>
  </div>
</header>
<main class="wrap layout">
  <section>{main_content}</section>
  <aside>
    <h2>Article archive</h2>
    <ul class="archive">{archive}</ul>
  </aside>
</main>
<footer>Generated by Home Assistant Observer · Served locally by nginx</footer>
</body>
</html>
"""


def publish_webpage(conclusions: str, raw_summary: str) -> Path:
    WEB_DIR.mkdir(parents=True, exist_ok=True)
    articles_dir = WEB_DIR / "articles"
    articles_dir.mkdir(parents=True, exist_ok=True)
    now_dt = datetime.now()
    now = now_dt.strftime("%Y-%m-%d %H:%M")
    article_name = f"{now_dt:%Y-%m-%d}.html"
    body = markdownish_to_html(conclusions)
    raw = html.escape(raw_summary[:60000])
    article_content = f"""
  <article>
    {body}
    <details>
      <summary>Raw data bundle shown to the AI goblin</summary>
      <pre>{raw}</pre>
    </details>
  </article>
"""
    article_path = articles_dir / article_name
    article_path.write_text(
        blog_shell(
            "Smart Home Gossip Gazette",
            f"Daily home intelligence briefing · Generated {now}",
            article_content,
            article_links(),
            article_href_prefix="../",
        ),
        encoding="utf-8",
    )

    featured = f"""
  <article>
    <p class="meta">Latest article · {html.escape(now)}</p>
    {body}
    <p><a href="articles/{html.escape(article_name)}">Permanent link for this article →</a></p>
  </article>
"""
    index_path = WEB_DIR / "index.html"
    index_path.write_text(
        blog_shell("Smart Home Gossip Gazette", "A daily blog of your Home Assistant household signals", featured, article_links()),
        encoding="utf-8",
    )
    return article_path


def write_markdown_report(summary: str, conclusions: str) -> Path:
    REPORT_DIR.mkdir(parents=True, exist_ok=True)
    stamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    path = REPORT_DIR / f"daily-ai-analysis-{stamp}.md"
    path.write_text(f"# Daily Home Assistant AI Analysis\n\n{conclusions}\n\n## Data bundle\n\n```text\n{summary}\n```\n", encoding="utf-8")
    return path


def cmd_collect() -> int:
    require_config(for_ai=False)
    snapshot = make_snapshot()
    path = save_snapshot(snapshot)
    cleanup_old_snapshots()
    print(f"Collected snapshot: {path}")
    return 0


def cmd_analyze() -> int:
    require_config(for_ai=True)
    snapshots = load_recent_snapshots(ANALYZE_SNAPSHOT_HOURS)
    if not snapshots:
        raise RuntimeError(f"No snapshots found in {DATA_DIR}; run collect first")
    summary = build_daily_summary(snapshots)
    previous_articles = load_recent_article_context(ARTICLE_CONTEXT_DAYS)
    conclusions = get_llm_conclusions(summary, previous_articles)
    md_path = write_markdown_report(summary, conclusions)
    html_path = publish_webpage(conclusions, summary)
    print(f"Wrote report: {md_path}")
    print(f"Published webpage: {html_path}")
    return 0


def main() -> int:
    parser = argparse.ArgumentParser(description="Home Assistant observer")
    parser.add_argument("mode", nargs="?", default="collect", choices=["collect", "analyze"], help="collect snapshots or analyze/publish them")
    args = parser.parse_args()
    try:
        return cmd_collect() if args.mode == "collect" else cmd_analyze()
    except Exception as exc:
        print(f"ERROR: {exc}", file=sys.stderr)
        return 1


if __name__ == "__main__":
    raise SystemExit(main())