haobserver/ha_observer.py
2026-05-16 08:59:23 +00:00

632 lines
25 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Home Assistant observer
Modes:
collect - run every 30 minutes; stores a compact JSON snapshot locally
analyze - run at 05:00; sends the last snapshots to AI and publishes a funny local web page
Configuration is via environment variables. See .env.example.
"""
from __future__ import annotations
import argparse
import html
import json
import os
import re
import subprocess
import sys
from datetime import datetime, timedelta, timezone
from email.utils import format_datetime
from pathlib import Path
from typing import Any
import requests
HA_URL = os.environ.get("HA_URL", "").rstrip("/")
HA_TOKEN = os.environ.get("HA_TOKEN", "")
DATA_DIR = Path(os.environ.get("DATA_DIR", "./data"))
REPORT_DIR = Path(os.environ.get("REPORT_DIR", "./reports"))
WEB_DIR = Path(os.environ.get("WEB_DIR", "./web"))
SITE_BASE_PATH = os.environ.get("SITE_BASE_PATH", "/").strip() or "/"
SITE_URL = os.environ.get("SITE_URL", "http://localhost").rstrip("/")
PROMPT_FILE = Path(os.environ.get("PROMPT_FILE", "./llm_instructions.md"))
HISTORY_HOURS = int(os.environ.get("HISTORY_HOURS", "24"))
MAX_HISTORY_PER_ENTITY = int(os.environ.get("MAX_HISTORY_PER_ENTITY", "20"))
ANALYZE_SNAPSHOT_HOURS = int(os.environ.get("ANALYZE_SNAPSHOT_HOURS", "24"))
ARTICLE_CONTEXT_DAYS = int(os.environ.get("ARTICLE_CONTEXT_DAYS", "7"))
KEEP_SNAPSHOT_DAYS = int(os.environ.get("KEEP_SNAPSHOT_DAYS", "14"))
# LLM_MODE: none | pi | ollama | openai
LLM_MODE = os.environ.get("LLM_MODE", "none").lower()
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434").rstrip("/")
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
OPENAI_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini")
PI_BIN = os.environ.get("PI_BIN", "pi")
PI_MODEL = os.environ.get("PI_MODEL", "")
PI_TIMEOUT = int(os.environ.get("PI_TIMEOUT", "600"))
RELEVANT_DOMAINS = set(
x.strip()
for x in os.environ.get(
"RELEVANT_DOMAINS",
"sensor,binary_sensor,person,device_tracker,climate,light,switch,lock,cover,alarm_control_panel,media_player,calendar,weather",
).split(",")
if x.strip()
)
EXCLUDED_ENTITIES = set(x.strip() for x in os.environ.get("EXCLUDED_ENTITIES", "").split(",") if x.strip())
ALLOWED_ATTRIBUTES = {
"friendly_name",
"unit_of_measurement",
"device_class",
"state_class",
"current_temperature",
"temperature",
"humidity",
"battery_level",
"brightness",
"gps_accuracy",
"source_type",
"assumed_state",
}
class ConfigError(RuntimeError):
pass
def require_config(for_ai: bool = False) -> None:
if not HA_URL:
raise ConfigError("HA_URL is not set")
if not HA_TOKEN:
raise ConfigError("HA_TOKEN is not set")
if for_ai and LLM_MODE == "openai" and not OPENAI_API_KEY:
raise ConfigError("LLM_MODE=openai but OPENAI_API_KEY is not set")
def ha_get(path: str, params: dict[str, str] | None = None) -> Any:
headers = {"Authorization": f"Bearer {HA_TOKEN}", "Content-Type": "application/json"}
response = requests.get(f"{HA_URL}{path}", headers=headers, params=params, timeout=60)
try:
response.raise_for_status()
except requests.HTTPError as exc:
detail = response.text.strip()
raise requests.HTTPError(f"{exc}; response={detail[:500]}", response=response) from exc
return response.json()
def is_relevant_entity(entity_id: str) -> bool:
return entity_id not in EXCLUDED_ENTITIES and entity_id.split(".", 1)[0] in RELEVANT_DOMAINS
def compact_attributes(attrs: dict[str, Any]) -> dict[str, Any]:
return {k: v for k, v in attrs.items() if k in ALLOWED_ATTRIBUTES}
def get_states() -> list[dict[str, Any]]:
useful: list[dict[str, Any]] = []
for item in ha_get("/api/states"):
entity_id = item.get("entity_id", "")
state = item.get("state")
if not is_relevant_entity(entity_id) or state in {"unknown", "unavailable", None}:
continue
useful.append(
{
"entity_id": entity_id,
"state": state,
"attributes": compact_attributes(item.get("attributes", {})),
"last_changed": item.get("last_changed"),
"last_updated": item.get("last_updated"),
}
)
return sorted(useful, key=lambda x: x["entity_id"])
def get_history(hours: int, entity_ids: list[str]) -> list[dict[str, Any]]:
start = datetime.now(timezone.utc) - timedelta(hours=hours)
changes: list[dict[str, Any]] = []
# Recent Home Assistant versions/configurations require filter_entity_id for
# the history endpoint. Query in chunks to avoid an overlong URL.
chunk_size = 50
for i in range(0, len(entity_ids), chunk_size):
chunk = entity_ids[i : i + chunk_size]
data = ha_get(
f"/api/history/period/{start.isoformat(timespec='seconds')}",
params={"filter_entity_id": ",".join(chunk), "minimal_response": ""},
)
for entity_history in data:
if not entity_history:
continue
entity_id = entity_history[0].get("entity_id", "")
if not is_relevant_entity(entity_id):
continue
compact = []
for item in entity_history[-MAX_HISTORY_PER_ENTITY:]:
state = item.get("state")
if state in {"unknown", "unavailable", None}:
continue
compact.append({"state": state, "last_changed": item.get("last_changed")})
if len(set(x["state"] for x in compact)) > 1:
changes.append({"entity_id": entity_id, "recent_states": compact})
return sorted(changes, key=lambda x: x["entity_id"])
def make_snapshot() -> dict[str, Any]:
states = get_states()
entity_ids = [state["entity_id"] for state in states]
return {
"generated_at": datetime.now().isoformat(timespec="seconds"),
"history_hours": HISTORY_HOURS,
"states": states,
"history": get_history(HISTORY_HOURS, entity_ids),
}
def save_snapshot(snapshot: dict[str, Any]) -> Path:
DATA_DIR.mkdir(parents=True, exist_ok=True)
stamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
path = DATA_DIR / f"snapshot-{stamp}.json"
path.write_text(json.dumps(snapshot, indent=2, ensure_ascii=False), encoding="utf-8")
return path
def cleanup_old_snapshots() -> None:
cutoff = datetime.now() - timedelta(days=KEEP_SNAPSHOT_DAYS)
for path in DATA_DIR.glob("snapshot-*.json"):
if datetime.fromtimestamp(path.stat().st_mtime) < cutoff:
path.unlink(missing_ok=True)
def load_recent_snapshots(hours: int) -> list[dict[str, Any]]:
cutoff = datetime.now() - timedelta(hours=hours)
snapshots = []
for path in sorted(DATA_DIR.glob("snapshot-*.json")):
if datetime.fromtimestamp(path.stat().st_mtime) < cutoff:
continue
try:
snapshots.append(json.loads(path.read_text(encoding="utf-8")))
except Exception as exc:
print(f"Skipping unreadable snapshot {path}: {exc}", file=sys.stderr)
return snapshots
def summarize_snapshot(snapshot: dict[str, Any]) -> str:
lines = [f"Snapshot: {snapshot.get('generated_at')}", "Current states:"]
for state in snapshot.get("states", []):
attrs = state.get("attributes", {})
name = attrs.get("friendly_name", state.get("entity_id"))
unit = attrs.get("unit_of_measurement", "")
value = f"{state.get('state')} {unit}".strip()
lines.append(f"- {name} ({state.get('entity_id')}): {value}; last_changed={state.get('last_changed')}")
lines.append("Recently changed entities:")
for item in snapshot.get("history", []):
transitions = ", ".join(f"{x.get('state')} @ {x.get('last_changed')}" for x in item.get("recent_states", [])[-8:])
lines.append(f"- {item.get('entity_id')}: {transitions}")
return "\n".join(lines)
def build_daily_summary(snapshots: list[dict[str, Any]]) -> str:
parts = [
f"Daily Home Assistant bundle generated {datetime.now().isoformat(timespec='seconds')}",
f"Contains {len(snapshots)} snapshots from roughly the last {ANALYZE_SNAPSHOT_HOURS} hours.",
]
for snapshot in snapshots:
parts.append("\n---\n" + summarize_snapshot(snapshot))
return "\n".join(parts)
def read_extra_llm_instructions() -> str:
if not PROMPT_FILE.exists():
return ""
return PROMPT_FILE.read_text(encoding="utf-8").strip()
def load_recent_article_context(days: int) -> str:
if days <= 0 or not REPORT_DIR.exists():
return ""
cutoff = datetime.now() - timedelta(days=days)
articles: list[str] = []
for path in sorted(REPORT_DIR.glob("daily-ai-analysis-*.md")):
if datetime.fromtimestamp(path.stat().st_mtime) < cutoff:
continue
try:
text = path.read_text(encoding="utf-8")
except Exception as exc:
print(f"Skipping unreadable previous report {path}: {exc}", file=sys.stderr)
continue
conclusions = text.split("\n## Data bundle\n", 1)[0].strip()
articles.append(f"PREVIOUS ARTICLE {path.name}:\n{conclusions[:8000]}")
return "\n\n---\n\n".join(articles[-7:])
def analysis_prompt(input_summary: str, previous_articles: str = "") -> str:
extra_instructions = read_extra_llm_instructions()
extra_block = ""
if extra_instructions:
extra_block = f"""
ADDITIONAL OWNER INSTRUCTIONS FROM {PROMPT_FILE}:
{extra_instructions}
"""
previous_block = ""
if previous_articles:
previous_block = f"""
PREVIOUS ARTICLES FROM THE LAST {ARTICLE_CONTEXT_DAYS} DAYS FOR CONTEXT:
Use these only for trend/context awareness. Do not claim something happened today unless today's data supports it.
{previous_articles}
"""
return f"""You are writing today's Home Assistant smart-home blog article for the owner.
Write a funny but useful morning briefing in a blog/article style. Use light humor, emojis,
and playful headings, but remain factual and privacy-aware. Include:
- A short comedy headline for the day
- What seemed to happen at home today
- Behavioral patterns that can reasonably be inferred
- Notable trends compared with recent previous articles, if supported
- What a nosy raccoon/hacker could figure out about the resident
- Anomalies, risks, or privacy/security concerns
- Suggested Home Assistant automations or fixes
Distinguish strong evidence from guesses. Do not invent facts not supported by the data.
{extra_block}{previous_block}
TODAY'S DATA:
{input_summary}
"""
def call_ollama(prompt: str) -> str:
response = requests.post(f"{OLLAMA_URL}/api/generate", json={"model": OLLAMA_MODEL, "prompt": prompt, "stream": False}, timeout=300)
response.raise_for_status()
return response.json().get("response", "").strip()
def call_openai(prompt: str) -> str:
response = requests.post(
"https://api.openai.com/v1/chat/completions",
headers={"Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json"},
json={
"model": OPENAI_MODEL,
"messages": [
{"role": "system", "content": "You are a careful but funny smart-home analyst."},
{"role": "user", "content": prompt},
],
"temperature": 0.35,
},
timeout=300,
)
response.raise_for_status()
return response.json()["choices"][0]["message"]["content"].strip()
def call_pi(prompt: str) -> str:
cmd = [PI_BIN, "--no-tools"]
if PI_MODEL:
cmd.extend(["--model", PI_MODEL])
cmd.extend(["-p", "Analyze the Home Assistant data from stdin and write the requested briefing."])
result = subprocess.run(
cmd,
input=prompt,
text=True,
capture_output=True,
timeout=PI_TIMEOUT,
check=False,
)
if result.returncode != 0:
stderr = result.stderr.strip()
raise RuntimeError(f"pi exited with status {result.returncode}: {stderr[-1000:]}")
return result.stdout.strip()
def get_llm_conclusions(input_summary: str, previous_articles: str = "") -> str:
if LLM_MODE == "none":
return "AI analysis disabled. Set LLM_MODE=pi, LLM_MODE=ollama, or LLM_MODE=openai in .env. The raccoon analyst is asleep. 🦝💤"
prompt = analysis_prompt(input_summary, previous_articles)
if LLM_MODE == "ollama":
return call_ollama(prompt)
if LLM_MODE == "openai":
return call_openai(prompt)
if LLM_MODE == "pi":
return call_pi(prompt)
return f"Unknown LLM_MODE={LLM_MODE!r}. Use none, pi, ollama, or openai."
def markdownish_to_html(text: str) -> str:
safe = html.escape(text)
safe = re.sub(r"^### (.*)$", r"<h3>\1</h3>", safe, flags=re.MULTILINE)
safe = re.sub(r"^## (.*)$", r"<h2>\1</h2>", safe, flags=re.MULTILINE)
safe = re.sub(r"^# (.*)$", r"<h1>\1</h1>", safe, flags=re.MULTILINE)
safe = re.sub(r"^- (.*)$", r"<li>\1</li>", safe, flags=re.MULTILINE)
safe = safe.replace("\n", "<br>\n")
return safe
BLOG_CSS = """
:root { color-scheme: dark; --cyan:#00f5ff; --blue:#2777ff; --violet:#8b5cf6; --amber:#fbbf24; --panel:#07111fcc; --line:#1de7ff66; }
* { box-sizing:border-box; }
body {
margin:0; min-height:100vh; color:#dff9ff; line-height:1.7;
font-family:'Rajdhani','Orbitron','Eurostile',system-ui,sans-serif;
background:
radial-gradient(circle at 16% 10%, #1746ff55 0 12rem, transparent 28rem),
radial-gradient(circle at 82% 4%, #00f5ff30 0 10rem, transparent 24rem),
radial-gradient(circle at 50% 100%, #6d28d955 0 15rem, transparent 34rem),
linear-gradient(135deg,#02040a 0%,#07111f 48%,#030712 100%);
overflow-x:hidden;
}
body::before {
content:""; position:fixed; inset:0; pointer-events:none; opacity:.34;
background-image:
linear-gradient(#00f5ff16 1px, transparent 1px),
linear-gradient(90deg,#00f5ff16 1px, transparent 1px),
linear-gradient(115deg, transparent 0 48%, #7dd3fc22 50%, transparent 52% 100%);
background-size:54px 54px,54px 54px,180px 180px;
mask-image:linear-gradient(to bottom,#000 0%,#000 55%,transparent 100%);
}
body::after {
content:""; position:fixed; inset:0; pointer-events:none; opacity:.14;
background:repeating-linear-gradient(to bottom, transparent 0 3px, #ffffff 4px 5px);
mix-blend-mode:screen;
}
header { position:relative; border-bottom:1px solid var(--line); background:linear-gradient(90deg,#020617dd,#051b33bb,#020617dd); box-shadow:0 0 42px #00d9ff22; }
header::before, header::after { content:""; position:absolute; top:0; bottom:0; width:18vw; border-color:var(--cyan); opacity:.65; pointer-events:none; }
header::before { left:0; border-top:2px solid; border-left:2px solid; clip-path:polygon(0 0,100% 0,35% 100%,0 100%); }
header::after { right:0; border-top:2px solid; border-right:2px solid; clip-path:polygon(0 0,100% 0,100% 100%,65% 100%); }
.wrap { max-width:1180px; margin:0 auto; padding:1.5rem; position:relative; }
.masthead { padding:3rem 1.5rem 2.6rem; text-align:center; }
.kicker { color:var(--cyan); text-transform:uppercase; letter-spacing:.28em; font:800 .78rem system-ui,sans-serif; text-shadow:0 0 14px #00f5ff; }
h1 { margin:.35rem 0; font-size:clamp(2.4rem,7vw,6rem); line-height:.9; text-transform:uppercase; letter-spacing:.05em; color:#f8feff; text-shadow:0 0 12px #00f5ff,0 0 38px #2777ff; }
h2,h3 { color:#c8fbff; line-height:1.15; text-transform:uppercase; letter-spacing:.06em; text-shadow:0 0 12px #00f5ff88; }
article, aside {
position:relative; background:linear-gradient(180deg,#071827d9,#050914e6); border:1px solid var(--line);
clip-path:polygon(0 18px,18px 0,100% 0,100% calc(100% - 18px),calc(100% - 18px) 100%,0 100%);
box-shadow:0 0 0 1px #2777ff22 inset,0 0 34px #00d9ff18,0 24px 60px #000b;
}
article::before, aside::before { content:""; position:absolute; inset:0; pointer-events:none; border:1px solid #ffffff12; clip-path:inherit; }
article { padding:clamp(1.1rem,3vw,2.2rem); }
article p, article li { font-size:1.06rem; color:#e6fbff; }
article h1 { font-size:clamp(1.8rem,4vw,3.5rem); text-align:left; }
.layout { display:grid; grid-template-columns:minmax(0,1fr) 310px; gap:1.35rem; align-items:start; }
aside { padding:1.1rem; position:sticky; top:1rem; }
.archive { list-style:none; margin:0; padding:0; }
.archive li { border-bottom:1px solid #22d3ee33; padding:.7rem 0; font-family:ui-monospace,SFMono-Regular,Menlo,monospace; }
.archive li::before { content:""; color:var(--cyan); text-shadow:0 0 10px var(--cyan); }
.archive li:last-child { border-bottom:0; }
a { color:#67e8f9; text-decoration:none; text-shadow:0 0 9px #00f5ff77; }
a:hover { color:white; text-decoration:none; filter:drop-shadow(0 0 8px var(--cyan)); }
.meta { color:#9eeaff; font:.95rem ui-monospace,SFMono-Regular,Menlo,monospace; letter-spacing:.04em; }
details { margin-top:1.5rem; border-top:1px solid #22d3ee33; padding-top:1rem; }
summary { cursor:pointer; color:var(--amber); text-transform:uppercase; letter-spacing:.08em; }
pre { white-space:pre-wrap; background:#01040acc; color:#bff8ff; padding:1rem; border:1px solid #22d3ee44; border-radius:0; overflow:auto; font-size:.82rem; box-shadow:0 0 22px #00d9ff11 inset; }
footer { color:#7dd3fc; text-align:center; padding:2rem; font:.82rem ui-monospace,SFMono-Regular,Menlo,monospace; text-transform:uppercase; letter-spacing:.12em; }
@media (max-width:850px) { .layout { grid-template-columns:1fr; } aside { position:static; } .masthead { text-align:left; } }
"""
def site_href(relative_path: str = "") -> str:
base = SITE_BASE_PATH
if not base.startswith("/"):
base = f"/{base}"
if not base.endswith("/"):
base = f"{base}/"
return f"{base}{relative_path.lstrip('/')}"
def site_url(relative_path: str = "") -> str:
return f"{SITE_URL}{site_href(relative_path)}"
def article_links() -> str:
articles_dir = WEB_DIR / "articles"
if not articles_dir.exists():
return "<li>No articles yet. The raccoon newsroom is warming up.</li>"
links = []
for path in sorted(articles_dir.glob("*.html"), reverse=True):
label = path.stem
try:
label = datetime.strptime(path.stem, "%Y-%m-%d").strftime("%A, %B %-d, %Y")
except ValueError:
pass
href = site_href(f"articles/{path.name}")
links.append(f'<li><a href="{html.escape(href)}">{html.escape(label)}</a></li>')
return "\n".join(links) or "<li>No articles yet. The raccoon newsroom is warming up.</li>"
def write_favicon() -> Path:
favicon = f"""<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64">
<defs>
<radialGradient id="g" cx="50%" cy="45%" r="70%">
<stop offset="0" stop-color="#67e8f9"/>
<stop offset="0.45" stop-color="#2777ff"/>
<stop offset="1" stop-color="#020617"/>
</radialGradient>
<filter id="glow"><feGaussianBlur stdDeviation="1.8" result="b"/><feMerge><feMergeNode in="b"/><feMergeNode in="SourceGraphic"/></feMerge></filter>
</defs>
<rect width="64" height="64" rx="12" fill="#020617"/>
<path d="M8 32h48M32 8v48M14 18l36 28M50 18L14 46" stroke="#00f5ff" stroke-width="1.3" opacity=".45"/>
<circle cx="32" cy="32" r="18" fill="url(#g)" stroke="#9effff" stroke-width="2" filter="url(#glow)"/>
<circle cx="25" cy="28" r="3" fill="#020617"/>
<circle cx="39" cy="28" r="3" fill="#020617"/>
<path d="M23 39c6 4 12 4 18 0" stroke="#020617" stroke-width="3" fill="none" stroke-linecap="round"/>
<path d="M7 32c10-16 40-16 50 0-10 16-40 16-50 0Z" fill="none" stroke="#fbbf24" stroke-width="2" opacity=".9"/>
</svg>
"""
path = WEB_DIR / "favicon.svg"
path.write_text(favicon, encoding="utf-8")
return path
def write_rss_feed() -> Path:
articles_dir = WEB_DIR / "articles"
items = []
for path in sorted(articles_dir.glob("*.html"), reverse=True)[:20]:
title = path.stem
try:
title = datetime.strptime(path.stem, "%Y-%m-%d").strftime("Smart Home Briefing - %A, %B %-d, %Y")
except ValueError:
title = f"Smart Home Briefing - {path.stem}"
content = path.read_text(encoding="utf-8", errors="ignore")
description = re.sub(r"<[^>]+>", " ", content)
description = re.sub(r"\s+", " ", html.unescape(description)).strip()[:500]
pub_dt = datetime.fromtimestamp(path.stat().st_mtime, timezone.utc)
url = site_url(f"articles/{path.name}")
items.append(f"""
<item>
<title>{html.escape(title)}</title>
<link>{html.escape(url)}</link>
<guid isPermaLink="true">{html.escape(url)}</guid>
<pubDate>{format_datetime(pub_dt, usegmt=True)}</pubDate>
<description>{html.escape(description)}</description>
</item>""")
now = format_datetime(datetime.now(timezone.utc), usegmt=True)
feed = f"""<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Smart Home Gossip Gazette</title>
<link>{html.escape(site_url())}</link>
<description>Daily Home Assistant smart-home briefings from the orbital raccoon telemetry desk.</description>
<language>en</language>
<lastBuildDate>{now}</lastBuildDate>
{''.join(items)}
</channel>
</rss>
"""
path = WEB_DIR / "rss.xml"
path.write_text(feed, encoding="utf-8")
return path
def blog_shell(title: str, subtitle: str, main_content: str, archive_links: str) -> str:
return f"""<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>{html.escape(title)}</title>
<link rel="alternate" type="application/rss+xml" title="Smart Home Gossip Gazette RSS" href="{html.escape(site_href('rss.xml'))}">
<link rel="icon" href="{html.escape(site_href('favicon.svg'))}" type="image/svg+xml">
<style>{BLOG_CSS}</style>
</head>
<body>
<header>
<div class="wrap masthead">
<div class="kicker">◇ orbital home telemetry // raccoon intelligence unit ◇</div>
<h1>{html.escape(title)}</h1>
<p class="meta">{html.escape(subtitle)}</p>
</div>
</header>
<main class="wrap layout">
<section>{main_content}</section>
<aside>
<h2>Transmission archive</h2>
<p class="meta"><a href="{html.escape(site_href('rss.xml'))}">RSS feed</a></p>
<ul class="archive">{archive_links}</ul>
</aside>
</main>
<footer>Generated by Home Assistant Observer · Local nginx uplink active</footer>
</body>
</html>
"""
def publish_webpage(conclusions: str, raw_summary: str) -> Path:
WEB_DIR.mkdir(parents=True, exist_ok=True)
articles_dir = WEB_DIR / "articles"
articles_dir.mkdir(parents=True, exist_ok=True)
now_dt = datetime.now()
now = now_dt.strftime("%Y-%m-%d %H:%M")
article_name = f"{now_dt:%Y-%m-%d}.html"
body = markdownish_to_html(conclusions)
raw = html.escape(raw_summary[:60000])
article_content = f"""
<article>
{body}
<details>
<summary>Raw data bundle shown to the AI goblin</summary>
<pre>{raw}</pre>
</details>
</article>
"""
article_path = articles_dir / article_name
article_path.touch(exist_ok=True)
article_path.write_text(
blog_shell(
"Smart Home Gossip Gazette",
f"Daily home intelligence briefing · Generated {now}",
article_content,
article_links(),
),
encoding="utf-8",
)
featured = f"""
<article>
<p class="meta">Latest article · {html.escape(now)}</p>
{body}
<p><a href="{html.escape(site_href(f'articles/{article_name}'))}">Permanent link for this article →</a></p>
</article>
"""
index_path = WEB_DIR / "index.html"
index_path.write_text(
blog_shell("Smart Home Gossip Gazette", "A daily blog of your Home Assistant household signals", featured, article_links()),
encoding="utf-8",
)
write_favicon()
write_rss_feed()
return article_path
def write_markdown_report(summary: str, conclusions: str) -> Path:
REPORT_DIR.mkdir(parents=True, exist_ok=True)
stamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
path = REPORT_DIR / f"daily-ai-analysis-{stamp}.md"
path.write_text(f"# Daily Home Assistant AI Analysis\n\n{conclusions}\n\n## Data bundle\n\n```text\n{summary}\n```\n", encoding="utf-8")
return path
def cmd_collect() -> int:
require_config(for_ai=False)
snapshot = make_snapshot()
path = save_snapshot(snapshot)
cleanup_old_snapshots()
print(f"Collected snapshot: {path}")
return 0
def cmd_analyze() -> int:
require_config(for_ai=True)
snapshots = load_recent_snapshots(ANALYZE_SNAPSHOT_HOURS)
if not snapshots:
raise RuntimeError(f"No snapshots found in {DATA_DIR}; run collect first")
summary = build_daily_summary(snapshots)
previous_articles = load_recent_article_context(ARTICLE_CONTEXT_DAYS)
conclusions = get_llm_conclusions(summary, previous_articles)
md_path = write_markdown_report(summary, conclusions)
html_path = publish_webpage(conclusions, summary)
print(f"Wrote report: {md_path}")
print(f"Published webpage: {html_path}")
return 0
def main() -> int:
parser = argparse.ArgumentParser(description="Home Assistant observer")
parser.add_argument("mode", nargs="?", default="collect", choices=["collect", "analyze"], help="collect snapshots or analyze/publish them")
args = parser.parse_args()
try:
return cmd_collect() if args.mode == "collect" else cmd_analyze()
except Exception as exc:
print(f"ERROR: {exc}", file=sys.stderr)
return 1
if __name__ == "__main__":
raise SystemExit(main())