#!/usr/bin/env python3 """ Home Assistant observer Modes: collect - run every 30 minutes; stores a compact JSON snapshot locally analyze - run at 05:00; sends the last snapshots to AI and publishes a funny local web page Configuration is via environment variables. See .env.example. """ from __future__ import annotations import argparse import html import json import os import re import subprocess import sys from datetime import datetime, timedelta, timezone from email.utils import format_datetime from pathlib import Path from typing import Any import requests HA_URL = os.environ.get("HA_URL", "").rstrip("/") HA_TOKEN = os.environ.get("HA_TOKEN", "") DATA_DIR = Path(os.environ.get("DATA_DIR", "./data")) REPORT_DIR = Path(os.environ.get("REPORT_DIR", "./reports")) WEB_DIR = Path(os.environ.get("WEB_DIR", "./web")) SITE_BASE_PATH = os.environ.get("SITE_BASE_PATH", "/").strip() or "/" SITE_URL = os.environ.get("SITE_URL", "http://localhost").rstrip("/") PROMPT_FILE = Path(os.environ.get("PROMPT_FILE", "./llm_instructions.md")) HISTORY_HOURS = int(os.environ.get("HISTORY_HOURS", "24")) MAX_HISTORY_PER_ENTITY = int(os.environ.get("MAX_HISTORY_PER_ENTITY", "20")) ANALYZE_SNAPSHOT_HOURS = int(os.environ.get("ANALYZE_SNAPSHOT_HOURS", "24")) ARTICLE_CONTEXT_DAYS = int(os.environ.get("ARTICLE_CONTEXT_DAYS", "7")) KEEP_SNAPSHOT_DAYS = int(os.environ.get("KEEP_SNAPSHOT_DAYS", "14")) # LLM_MODE: none | pi | ollama | openai LLM_MODE = os.environ.get("LLM_MODE", "none").lower() OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434").rstrip("/") OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1") OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "") OPENAI_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini") PI_BIN = os.environ.get("PI_BIN", "pi") PI_MODEL = os.environ.get("PI_MODEL", "") PI_TIMEOUT = int(os.environ.get("PI_TIMEOUT", "600")) RELEVANT_DOMAINS = set( x.strip() for x in os.environ.get( "RELEVANT_DOMAINS", "sensor,binary_sensor,person,device_tracker,climate,light,switch,lock,cover,alarm_control_panel,media_player,calendar,weather", ).split(",") if x.strip() ) EXCLUDED_ENTITIES = set(x.strip() for x in os.environ.get("EXCLUDED_ENTITIES", "").split(",") if x.strip()) ALLOWED_ATTRIBUTES = { "friendly_name", "unit_of_measurement", "device_class", "state_class", "current_temperature", "temperature", "humidity", "battery_level", "brightness", "gps_accuracy", "source_type", "assumed_state", } class ConfigError(RuntimeError): pass def require_config(for_ai: bool = False) -> None: if not HA_URL: raise ConfigError("HA_URL is not set") if not HA_TOKEN: raise ConfigError("HA_TOKEN is not set") if for_ai and LLM_MODE == "openai" and not OPENAI_API_KEY: raise ConfigError("LLM_MODE=openai but OPENAI_API_KEY is not set") def ha_get(path: str, params: dict[str, str] | None = None) -> Any: headers = {"Authorization": f"Bearer {HA_TOKEN}", "Content-Type": "application/json"} response = requests.get(f"{HA_URL}{path}", headers=headers, params=params, timeout=60) try: response.raise_for_status() except requests.HTTPError as exc: detail = response.text.strip() raise requests.HTTPError(f"{exc}; response={detail[:500]}", response=response) from exc return response.json() def is_relevant_entity(entity_id: str) -> bool: return entity_id not in EXCLUDED_ENTITIES and entity_id.split(".", 1)[0] in RELEVANT_DOMAINS def compact_attributes(attrs: dict[str, Any]) -> dict[str, Any]: return {k: v for k, v in attrs.items() if k in ALLOWED_ATTRIBUTES} def get_states() -> list[dict[str, Any]]: useful: list[dict[str, Any]] = [] for item in ha_get("/api/states"): entity_id = item.get("entity_id", "") state = item.get("state") if not is_relevant_entity(entity_id) or state in {"unknown", "unavailable", None}: continue useful.append( { "entity_id": entity_id, "state": state, "attributes": compact_attributes(item.get("attributes", {})), "last_changed": item.get("last_changed"), "last_updated": item.get("last_updated"), } ) return sorted(useful, key=lambda x: x["entity_id"]) def get_history(hours: int, entity_ids: list[str]) -> list[dict[str, Any]]: start = datetime.now(timezone.utc) - timedelta(hours=hours) changes: list[dict[str, Any]] = [] # Recent Home Assistant versions/configurations require filter_entity_id for # the history endpoint. Query in chunks to avoid an overlong URL. chunk_size = 50 for i in range(0, len(entity_ids), chunk_size): chunk = entity_ids[i : i + chunk_size] data = ha_get( f"/api/history/period/{start.isoformat(timespec='seconds')}", params={"filter_entity_id": ",".join(chunk), "minimal_response": ""}, ) for entity_history in data: if not entity_history: continue entity_id = entity_history[0].get("entity_id", "") if not is_relevant_entity(entity_id): continue compact = [] for item in entity_history[-MAX_HISTORY_PER_ENTITY:]: state = item.get("state") if state in {"unknown", "unavailable", None}: continue compact.append({"state": state, "last_changed": item.get("last_changed")}) if len(set(x["state"] for x in compact)) > 1: changes.append({"entity_id": entity_id, "recent_states": compact}) return sorted(changes, key=lambda x: x["entity_id"]) def make_snapshot() -> dict[str, Any]: states = get_states() entity_ids = [state["entity_id"] for state in states] return { "generated_at": datetime.now().isoformat(timespec="seconds"), "history_hours": HISTORY_HOURS, "states": states, "history": get_history(HISTORY_HOURS, entity_ids), } def save_snapshot(snapshot: dict[str, Any]) -> Path: DATA_DIR.mkdir(parents=True, exist_ok=True) stamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") path = DATA_DIR / f"snapshot-{stamp}.json" path.write_text(json.dumps(snapshot, indent=2, ensure_ascii=False), encoding="utf-8") return path def cleanup_old_snapshots() -> None: cutoff = datetime.now() - timedelta(days=KEEP_SNAPSHOT_DAYS) for path in DATA_DIR.glob("snapshot-*.json"): if datetime.fromtimestamp(path.stat().st_mtime) < cutoff: path.unlink(missing_ok=True) def load_recent_snapshots(hours: int) -> list[dict[str, Any]]: cutoff = datetime.now() - timedelta(hours=hours) snapshots = [] for path in sorted(DATA_DIR.glob("snapshot-*.json")): if datetime.fromtimestamp(path.stat().st_mtime) < cutoff: continue try: snapshots.append(json.loads(path.read_text(encoding="utf-8"))) except Exception as exc: print(f"Skipping unreadable snapshot {path}: {exc}", file=sys.stderr) return snapshots def summarize_snapshot(snapshot: dict[str, Any]) -> str: lines = [f"Snapshot: {snapshot.get('generated_at')}", "Current states:"] for state in snapshot.get("states", []): attrs = state.get("attributes", {}) name = attrs.get("friendly_name", state.get("entity_id")) unit = attrs.get("unit_of_measurement", "") value = f"{state.get('state')} {unit}".strip() lines.append(f"- {name} ({state.get('entity_id')}): {value}; last_changed={state.get('last_changed')}") lines.append("Recently changed entities:") for item in snapshot.get("history", []): transitions = ", ".join(f"{x.get('state')} @ {x.get('last_changed')}" for x in item.get("recent_states", [])[-8:]) lines.append(f"- {item.get('entity_id')}: {transitions}") return "\n".join(lines) def build_daily_summary(snapshots: list[dict[str, Any]]) -> str: parts = [ f"Daily Home Assistant bundle generated {datetime.now().isoformat(timespec='seconds')}", f"Contains {len(snapshots)} snapshots from roughly the last {ANALYZE_SNAPSHOT_HOURS} hours.", ] for snapshot in snapshots: parts.append("\n---\n" + summarize_snapshot(snapshot)) return "\n".join(parts) def read_extra_llm_instructions() -> str: if not PROMPT_FILE.exists(): return "" return PROMPT_FILE.read_text(encoding="utf-8").strip() def load_recent_article_context(days: int) -> str: if days <= 0 or not REPORT_DIR.exists(): return "" cutoff = datetime.now() - timedelta(days=days) articles: list[str] = [] for path in sorted(REPORT_DIR.glob("daily-ai-analysis-*.md")): if datetime.fromtimestamp(path.stat().st_mtime) < cutoff: continue try: text = path.read_text(encoding="utf-8") except Exception as exc: print(f"Skipping unreadable previous report {path}: {exc}", file=sys.stderr) continue conclusions = text.split("\n## Data bundle\n", 1)[0].strip() articles.append(f"PREVIOUS ARTICLE {path.name}:\n{conclusions[:8000]}") return "\n\n---\n\n".join(articles[-7:]) def analysis_prompt(input_summary: str, previous_articles: str = "") -> str: extra_instructions = read_extra_llm_instructions() extra_block = "" if extra_instructions: extra_block = f""" ADDITIONAL OWNER INSTRUCTIONS FROM {PROMPT_FILE}: {extra_instructions} """ previous_block = "" if previous_articles: previous_block = f""" PREVIOUS ARTICLES FROM THE LAST {ARTICLE_CONTEXT_DAYS} DAYS FOR CONTEXT: Use these only for trend/context awareness. Do not claim something happened today unless today's data supports it. {previous_articles} """ return f"""You are writing today's Home Assistant smart-home blog article for the owner. Write a funny but useful morning briefing in a blog/article style. Use light humor, emojis, and playful headings, but remain factual and privacy-aware. Include: - A short comedy headline for the day - What seemed to happen at home today - Behavioral patterns that can reasonably be inferred - Notable trends compared with recent previous articles, if supported - What a nosy raccoon/hacker could figure out about the resident - Anomalies, risks, or privacy/security concerns - Suggested Home Assistant automations or fixes Distinguish strong evidence from guesses. Do not invent facts not supported by the data. {extra_block}{previous_block} TODAY'S DATA: {input_summary} """ def call_ollama(prompt: str) -> str: response = requests.post(f"{OLLAMA_URL}/api/generate", json={"model": OLLAMA_MODEL, "prompt": prompt, "stream": False}, timeout=300) response.raise_for_status() return response.json().get("response", "").strip() def call_openai(prompt: str) -> str: response = requests.post( "https://api.openai.com/v1/chat/completions", headers={"Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json"}, json={ "model": OPENAI_MODEL, "messages": [ {"role": "system", "content": "You are a careful but funny smart-home analyst."}, {"role": "user", "content": prompt}, ], "temperature": 0.35, }, timeout=300, ) response.raise_for_status() return response.json()["choices"][0]["message"]["content"].strip() def call_pi(prompt: str) -> str: cmd = [PI_BIN, "--no-tools"] if PI_MODEL: cmd.extend(["--model", PI_MODEL]) cmd.extend(["-p", "Analyze the Home Assistant data from stdin and write the requested briefing."]) result = subprocess.run( cmd, input=prompt, text=True, capture_output=True, timeout=PI_TIMEOUT, check=False, ) if result.returncode != 0: stderr = result.stderr.strip() raise RuntimeError(f"pi exited with status {result.returncode}: {stderr[-1000:]}") return result.stdout.strip() def get_llm_conclusions(input_summary: str, previous_articles: str = "") -> str: if LLM_MODE == "none": return "AI analysis disabled. Set LLM_MODE=pi, LLM_MODE=ollama, or LLM_MODE=openai in .env. The raccoon analyst is asleep. 🦝💤" prompt = analysis_prompt(input_summary, previous_articles) if LLM_MODE == "ollama": return call_ollama(prompt) if LLM_MODE == "openai": return call_openai(prompt) if LLM_MODE == "pi": return call_pi(prompt) return f"Unknown LLM_MODE={LLM_MODE!r}. Use none, pi, ollama, or openai." def markdownish_to_html(text: str) -> str: safe = html.escape(text) safe = re.sub(r"^### (.*)$", r"

\1

", safe, flags=re.MULTILINE) safe = re.sub(r"^## (.*)$", r"

\1

", safe, flags=re.MULTILINE) safe = re.sub(r"^# (.*)$", r"

\1

", safe, flags=re.MULTILINE) safe = re.sub(r"^- (.*)$", r"
  • \1
  • ", safe, flags=re.MULTILINE) safe = safe.replace("\n", "
    \n") return safe BLOG_CSS = """ :root { color-scheme: dark; --cyan:#00f5ff; --blue:#2777ff; --violet:#8b5cf6; --amber:#fbbf24; --panel:#07111fcc; --line:#1de7ff66; } * { box-sizing:border-box; } body { margin:0; min-height:100vh; color:#dff9ff; line-height:1.7; font-family:'Rajdhani','Orbitron','Eurostile',system-ui,sans-serif; background: radial-gradient(circle at 16% 10%, #1746ff55 0 12rem, transparent 28rem), radial-gradient(circle at 82% 4%, #00f5ff30 0 10rem, transparent 24rem), radial-gradient(circle at 50% 100%, #6d28d955 0 15rem, transparent 34rem), linear-gradient(135deg,#02040a 0%,#07111f 48%,#030712 100%); overflow-x:hidden; } body::before { content:""; position:fixed; inset:0; pointer-events:none; opacity:.34; background-image: linear-gradient(#00f5ff16 1px, transparent 1px), linear-gradient(90deg,#00f5ff16 1px, transparent 1px), linear-gradient(115deg, transparent 0 48%, #7dd3fc22 50%, transparent 52% 100%); background-size:54px 54px,54px 54px,180px 180px; mask-image:linear-gradient(to bottom,#000 0%,#000 55%,transparent 100%); } body::after { content:""; position:fixed; inset:0; pointer-events:none; opacity:.14; background:repeating-linear-gradient(to bottom, transparent 0 3px, #ffffff 4px 5px); mix-blend-mode:screen; } header { position:relative; border-bottom:1px solid var(--line); background:linear-gradient(90deg,#020617dd,#051b33bb,#020617dd); box-shadow:0 0 42px #00d9ff22; } header::before, header::after { content:""; position:absolute; top:0; bottom:0; width:18vw; border-color:var(--cyan); opacity:.65; pointer-events:none; } header::before { left:0; border-top:2px solid; border-left:2px solid; clip-path:polygon(0 0,100% 0,35% 100%,0 100%); } header::after { right:0; border-top:2px solid; border-right:2px solid; clip-path:polygon(0 0,100% 0,100% 100%,65% 100%); } .wrap { max-width:1180px; margin:0 auto; padding:1.5rem; position:relative; } .masthead { padding:3rem 1.5rem 2.6rem; text-align:center; } .kicker { color:var(--cyan); text-transform:uppercase; letter-spacing:.28em; font:800 .78rem system-ui,sans-serif; text-shadow:0 0 14px #00f5ff; } h1 { margin:.35rem 0; font-size:clamp(2.4rem,7vw,6rem); line-height:.9; text-transform:uppercase; letter-spacing:.05em; color:#f8feff; text-shadow:0 0 12px #00f5ff,0 0 38px #2777ff; } h2,h3 { color:#c8fbff; line-height:1.15; text-transform:uppercase; letter-spacing:.06em; text-shadow:0 0 12px #00f5ff88; } article, aside { position:relative; background:linear-gradient(180deg,#071827d9,#050914e6); border:1px solid var(--line); clip-path:polygon(0 18px,18px 0,100% 0,100% calc(100% - 18px),calc(100% - 18px) 100%,0 100%); box-shadow:0 0 0 1px #2777ff22 inset,0 0 34px #00d9ff18,0 24px 60px #000b; } article::before, aside::before { content:""; position:absolute; inset:0; pointer-events:none; border:1px solid #ffffff12; clip-path:inherit; } article { padding:clamp(1.1rem,3vw,2.2rem); } article p, article li { font-size:1.06rem; color:#e6fbff; } article h1 { font-size:clamp(1.8rem,4vw,3.5rem); text-align:left; } .layout { display:grid; grid-template-columns:minmax(0,1fr) 310px; gap:1.35rem; align-items:start; } aside { padding:1.1rem; position:sticky; top:1rem; } .archive { list-style:none; margin:0; padding:0; } .archive li { border-bottom:1px solid #22d3ee33; padding:.7rem 0; font-family:ui-monospace,SFMono-Regular,Menlo,monospace; } .archive li::before { content:"▸ "; color:var(--cyan); text-shadow:0 0 10px var(--cyan); } .archive li:last-child { border-bottom:0; } a { color:#67e8f9; text-decoration:none; text-shadow:0 0 9px #00f5ff77; } a:hover { color:white; text-decoration:none; filter:drop-shadow(0 0 8px var(--cyan)); } .meta { color:#9eeaff; font:.95rem ui-monospace,SFMono-Regular,Menlo,monospace; letter-spacing:.04em; } details { margin-top:1.5rem; border-top:1px solid #22d3ee33; padding-top:1rem; } summary { cursor:pointer; color:var(--amber); text-transform:uppercase; letter-spacing:.08em; } pre { white-space:pre-wrap; background:#01040acc; color:#bff8ff; padding:1rem; border:1px solid #22d3ee44; border-radius:0; overflow:auto; font-size:.82rem; box-shadow:0 0 22px #00d9ff11 inset; } footer { color:#7dd3fc; text-align:center; padding:2rem; font:.82rem ui-monospace,SFMono-Regular,Menlo,monospace; text-transform:uppercase; letter-spacing:.12em; } @media (max-width:850px) { .layout { grid-template-columns:1fr; } aside { position:static; } .masthead { text-align:left; } } """ def site_href(relative_path: str = "") -> str: base = SITE_BASE_PATH if not base.startswith("/"): base = f"/{base}" if not base.endswith("/"): base = f"{base}/" return f"{base}{relative_path.lstrip('/')}" def site_url(relative_path: str = "") -> str: return f"{SITE_URL}{site_href(relative_path)}" def article_links() -> str: articles_dir = WEB_DIR / "articles" if not articles_dir.exists(): return "
  • No articles yet. The raccoon newsroom is warming up.
  • " links = [] for path in sorted(articles_dir.glob("*.html"), reverse=True): label = path.stem try: label = datetime.strptime(path.stem, "%Y-%m-%d").strftime("%A, %B %-d, %Y") except ValueError: pass href = site_href(f"articles/{path.name}") links.append(f'
  • {html.escape(label)}
  • ') return "\n".join(links) or "
  • No articles yet. The raccoon newsroom is warming up.
  • " def write_favicon() -> Path: favicon = f""" """ path = WEB_DIR / "favicon.svg" path.write_text(favicon, encoding="utf-8") return path def write_rss_feed() -> Path: articles_dir = WEB_DIR / "articles" items = [] for path in sorted(articles_dir.glob("*.html"), reverse=True)[:20]: title = path.stem try: title = datetime.strptime(path.stem, "%Y-%m-%d").strftime("Smart Home Briefing - %A, %B %-d, %Y") except ValueError: title = f"Smart Home Briefing - {path.stem}" content = path.read_text(encoding="utf-8", errors="ignore") description = re.sub(r"<[^>]+>", " ", content) description = re.sub(r"\s+", " ", html.unescape(description)).strip()[:500] pub_dt = datetime.fromtimestamp(path.stat().st_mtime, timezone.utc) url = site_url(f"articles/{path.name}") items.append(f""" {html.escape(title)} {html.escape(url)} {html.escape(url)} {format_datetime(pub_dt, usegmt=True)} {html.escape(description)} """) now = format_datetime(datetime.now(timezone.utc), usegmt=True) feed = f""" Smart Home Gossip Gazette {html.escape(site_url())} Daily Home Assistant smart-home briefings from the orbital raccoon telemetry desk. en {now} {''.join(items)} """ path = WEB_DIR / "rss.xml" path.write_text(feed, encoding="utf-8") return path def blog_shell(title: str, subtitle: str, main_content: str, archive_links: str) -> str: return f""" {html.escape(title)}
    ◇ orbital home telemetry // raccoon intelligence unit ◇

    {html.escape(title)}

    {html.escape(subtitle)}

    {main_content}
    """ def publish_webpage(conclusions: str, raw_summary: str) -> Path: WEB_DIR.mkdir(parents=True, exist_ok=True) articles_dir = WEB_DIR / "articles" articles_dir.mkdir(parents=True, exist_ok=True) now_dt = datetime.now() now = now_dt.strftime("%Y-%m-%d %H:%M") article_name = f"{now_dt:%Y-%m-%d}.html" body = markdownish_to_html(conclusions) raw = html.escape(raw_summary[:60000]) article_content = f"""
    {body}
    Raw data bundle shown to the AI goblin
    {raw}
    """ article_path = articles_dir / article_name article_path.touch(exist_ok=True) article_path.write_text( blog_shell( "Smart Home Gossip Gazette", f"Daily home intelligence briefing · Generated {now}", article_content, article_links(), ), encoding="utf-8", ) featured = f"""

    Latest article · {html.escape(now)}

    {body}

    Permanent link for this article →

    """ index_path = WEB_DIR / "index.html" index_path.write_text( blog_shell("Smart Home Gossip Gazette", "A daily blog of your Home Assistant household signals", featured, article_links()), encoding="utf-8", ) write_favicon() write_rss_feed() return article_path def write_markdown_report(summary: str, conclusions: str) -> Path: REPORT_DIR.mkdir(parents=True, exist_ok=True) stamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") path = REPORT_DIR / f"daily-ai-analysis-{stamp}.md" path.write_text(f"# Daily Home Assistant AI Analysis\n\n{conclusions}\n\n## Data bundle\n\n```text\n{summary}\n```\n", encoding="utf-8") return path def cmd_collect() -> int: require_config(for_ai=False) snapshot = make_snapshot() path = save_snapshot(snapshot) cleanup_old_snapshots() print(f"Collected snapshot: {path}") return 0 def cmd_analyze() -> int: require_config(for_ai=True) snapshots = load_recent_snapshots(ANALYZE_SNAPSHOT_HOURS) if not snapshots: raise RuntimeError(f"No snapshots found in {DATA_DIR}; run collect first") summary = build_daily_summary(snapshots) previous_articles = load_recent_article_context(ARTICLE_CONTEXT_DAYS) conclusions = get_llm_conclusions(summary, previous_articles) md_path = write_markdown_report(summary, conclusions) html_path = publish_webpage(conclusions, summary) print(f"Wrote report: {md_path}") print(f"Published webpage: {html_path}") return 0 def main() -> int: parser = argparse.ArgumentParser(description="Home Assistant observer") parser.add_argument("mode", nargs="?", default="collect", choices=["collect", "analyze"], help="collect snapshots or analyze/publish them") args = parser.parse_args() try: return cmd_collect() if args.mode == "collect" else cmd_analyze() except Exception as exc: print(f"ERROR: {exc}", file=sys.stderr) return 1 if __name__ == "__main__": raise SystemExit(main())