From 325917c09b89b30d813bf1d4c7736b14bf76c512 Mon Sep 17 00:00:00 2001 From: hbrain Date: Sat, 16 May 2026 08:02:01 +0000 Subject: [PATCH] Initial Home Assistant observer --- .env.example | 41 +++++ .gitignore | 9 ++ README.md | 136 ++++++++++++++++ ha_observer.py | 373 ++++++++++++++++++++++++++++++++++++++++++++ install_cron.sh | 20 +++ llm_instructions.md | 22 +++ requirements.txt | 1 + run_ha_observer.sh | 22 +++ 8 files changed, 624 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 README.md create mode 100755 ha_observer.py create mode 100755 install_cron.sh create mode 100644 llm_instructions.md create mode 100644 requirements.txt create mode 100755 run_ha_observer.sh diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..d2d43c4 --- /dev/null +++ b/.env.example @@ -0,0 +1,41 @@ +# Copy this file to .env and fill in your values. +# Do not commit/share .env; it contains secrets. + +# Home Assistant URL and long-lived token +HA_URL="http://homeassistant.local:8123" +HA_TOKEN="paste_your_long_lived_access_token_here" + +# Local storage +DATA_DIR="./data" +REPORT_DIR="./reports" +WEB_DIR="./web" + +# Extra owner directions appended to the 05:00 AI prompt +PROMPT_FILE="./llm_instructions.md" + +# Collection/history settings +HISTORY_HOURS="24" +MAX_HISTORY_PER_ENTITY="20" +KEEP_SNAPSHOT_DAYS="14" + +# At 05:00, analyze snapshots from roughly this many hours +ANALYZE_SNAPSHOT_HOURS="24" + +# Domains to include +RELEVANT_DOMAINS="sensor,binary_sensor,person,device_tracker,climate,light,switch,lock,cover,alarm_control_panel,media_player,calendar,weather" + +# Optional: comma-separated entity IDs to exclude for privacy/noise +# EXCLUDED_ENTITIES="device_tracker.my_phone,camera.front_door" +EXCLUDED_ENTITIES="" + +# AI backend for the 05:00 analysis: none, ollama, or openai +# none publishes a page, but without real AI conclusions. +LLM_MODE="none" + +# For local Ollama, recommended for privacy +OLLAMA_URL="http://localhost:11434" +OLLAMA_MODEL="llama3.1" + +# For OpenAI cloud API +OPENAI_API_KEY="" +OPENAI_MODEL="gpt-4o-mini" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7abbed5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +.env +.git.env +.venv/ +data/ +reports/ +web/ +cron.log +__pycache__/ +*.pyc diff --git a/README.md b/README.md new file mode 100644 index 0000000..8996c96 --- /dev/null +++ b/README.md @@ -0,0 +1,136 @@ +# Home Assistant Observer + +Cron-friendly Home Assistant observer: + +- every 30 minutes: collect compact Home Assistant snapshots into `./data` +- every day at 05:00: send the last day of snapshots to AI +- publish a funny local webpage at `./web/index.html` +- save Markdown AI reports in `./reports` + +## Setup + +```bash +cd /home/hbrain/ha +python3 -m venv .venv +. .venv/bin/activate +pip install -r requirements.txt +cp .env.example .env +chmod 600 .env +chmod +x run_ha_observer.sh ha_observer.py install_cron.sh +``` + +Edit `.env` and set: + +```bash +HA_URL="http://homeassistant.local:8123" +HA_TOKEN="your_long_lived_home_assistant_token" +``` + +Create the token in Home Assistant: + +```text +Profile โ†’ Security โ†’ Long-lived access tokens +``` + +## AI mode for the 05:00 report + +Local Ollama is recommended for privacy: + +```bash +LLM_MODE="ollama" +OLLAMA_MODEL="llama3.1" +``` + +OpenAI cloud API: + +```bash +LLM_MODE="openai" +OPENAI_API_KEY="..." +OPENAI_MODEL="gpt-4o-mini" +``` + +No AI, but still publish a placeholder page: + +```bash +LLM_MODE="none" +``` + +## Extra LLM instructions + +Edit this file to change how the 05:00 AI analysis behaves: + +```text +/home/hbrain/ha/llm_instructions.md +``` + +For example, add specific questions, preferred tone, things to ignore, or extra privacy/security concerns. The file is automatically appended to the AI prompt during `analyze`. + +You can change the path in `.env`: + +```bash +PROMPT_FILE="./llm_instructions.md" +``` + +## Test manually + +Collect one snapshot: + +```bash +./run_ha_observer.sh collect +``` + +Run the 05:00-style analysis/publishing step: + +```bash +./run_ha_observer.sh analyze +``` + +Open the page: + +```bash +xdg-open /home/hbrain/ha/web/index.html +``` + +Or serve it locally: + +```bash +cd /home/hbrain/ha/web +python3 -m http.server 8088 +``` + +Then browse to: + +```text +http://localhost:8088/ +``` + +## Install cron jobs + +This installs exactly these jobs: + +- `*/30 * * * *` collect snapshots every 30 minutes +- `0 5 * * *` analyze with AI and publish the funny local webpage + +```bash +./install_cron.sh +``` + +Manual crontab equivalent: + +```cron +*/30 * * * * /home/hbrain/ha/run_ha_observer.sh collect >> /home/hbrain/ha/cron.log 2>&1 +0 5 * * * /home/hbrain/ha/run_ha_observer.sh analyze >> /home/hbrain/ha/cron.log 2>&1 +``` + +## Output locations + +```text +/home/hbrain/ha/data/ 30-minute JSON snapshots +/home/hbrain/ha/reports/ daily Markdown AI reports +/home/hbrain/ha/web/ local funny webpage, index.html +/home/hbrain/ha/cron.log cron logs +``` + +## Privacy notes + +Home Assistant data can reveal sleep schedule, occupancy, work patterns, visitors, location habits, and security weaknesses. Prefer `LLM_MODE=ollama` to keep analysis local. Use `EXCLUDED_ENTITIES` in `.env` to omit sensitive/noisy entities. diff --git a/ha_observer.py b/ha_observer.py new file mode 100755 index 0000000..9700c7f --- /dev/null +++ b/ha_observer.py @@ -0,0 +1,373 @@ +#!/usr/bin/env python3 +""" +Home Assistant observer + +Modes: + collect - run every 30 minutes; stores a compact JSON snapshot locally + analyze - run at 05:00; sends the last snapshots to AI and publishes a funny local web page + +Configuration is via environment variables. See .env.example. +""" + +from __future__ import annotations + +import argparse +import html +import json +import os +import re +import sys +from datetime import datetime, timedelta, timezone +from pathlib import Path +from typing import Any + +import requests + + +HA_URL = os.environ.get("HA_URL", "").rstrip("/") +HA_TOKEN = os.environ.get("HA_TOKEN", "") +DATA_DIR = Path(os.environ.get("DATA_DIR", "./data")) +REPORT_DIR = Path(os.environ.get("REPORT_DIR", "./reports")) +WEB_DIR = Path(os.environ.get("WEB_DIR", "./web")) +PROMPT_FILE = Path(os.environ.get("PROMPT_FILE", "./llm_instructions.md")) +HISTORY_HOURS = int(os.environ.get("HISTORY_HOURS", "24")) +MAX_HISTORY_PER_ENTITY = int(os.environ.get("MAX_HISTORY_PER_ENTITY", "20")) +ANALYZE_SNAPSHOT_HOURS = int(os.environ.get("ANALYZE_SNAPSHOT_HOURS", "24")) +KEEP_SNAPSHOT_DAYS = int(os.environ.get("KEEP_SNAPSHOT_DAYS", "14")) + +# LLM_MODE: none | ollama | openai +LLM_MODE = os.environ.get("LLM_MODE", "none").lower() +OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434").rstrip("/") +OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1") +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "") +OPENAI_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini") + +RELEVANT_DOMAINS = set( + x.strip() + for x in os.environ.get( + "RELEVANT_DOMAINS", + "sensor,binary_sensor,person,device_tracker,climate,light,switch,lock,cover,alarm_control_panel,media_player,calendar,weather", + ).split(",") + if x.strip() +) +EXCLUDED_ENTITIES = set(x.strip() for x in os.environ.get("EXCLUDED_ENTITIES", "").split(",") if x.strip()) + +ALLOWED_ATTRIBUTES = { + "friendly_name", + "unit_of_measurement", + "device_class", + "state_class", + "current_temperature", + "temperature", + "humidity", + "battery_level", + "brightness", + "gps_accuracy", + "source_type", + "assumed_state", +} + + +class ConfigError(RuntimeError): + pass + + +def require_config(for_ai: bool = False) -> None: + if not HA_URL: + raise ConfigError("HA_URL is not set") + if not HA_TOKEN: + raise ConfigError("HA_TOKEN is not set") + if for_ai and LLM_MODE == "openai" and not OPENAI_API_KEY: + raise ConfigError("LLM_MODE=openai but OPENAI_API_KEY is not set") + + +def ha_get(path: str) -> Any: + headers = {"Authorization": f"Bearer {HA_TOKEN}", "Content-Type": "application/json"} + response = requests.get(f"{HA_URL}{path}", headers=headers, timeout=60) + response.raise_for_status() + return response.json() + + +def is_relevant_entity(entity_id: str) -> bool: + return entity_id not in EXCLUDED_ENTITIES and entity_id.split(".", 1)[0] in RELEVANT_DOMAINS + + +def compact_attributes(attrs: dict[str, Any]) -> dict[str, Any]: + return {k: v for k, v in attrs.items() if k in ALLOWED_ATTRIBUTES} + + +def get_states() -> list[dict[str, Any]]: + useful: list[dict[str, Any]] = [] + for item in ha_get("/api/states"): + entity_id = item.get("entity_id", "") + state = item.get("state") + if not is_relevant_entity(entity_id) or state in {"unknown", "unavailable", None}: + continue + useful.append( + { + "entity_id": entity_id, + "state": state, + "attributes": compact_attributes(item.get("attributes", {})), + "last_changed": item.get("last_changed"), + "last_updated": item.get("last_updated"), + } + ) + return sorted(useful, key=lambda x: x["entity_id"]) + + +def get_history(hours: int) -> list[dict[str, Any]]: + start = datetime.now(timezone.utc) - timedelta(hours=hours) + data = ha_get(f"/api/history/period/{start.isoformat()}?minimal_response") + changes: list[dict[str, Any]] = [] + + for entity_history in data: + if not entity_history: + continue + entity_id = entity_history[0].get("entity_id", "") + if not is_relevant_entity(entity_id): + continue + compact = [] + for item in entity_history[-MAX_HISTORY_PER_ENTITY:]: + state = item.get("state") + if state in {"unknown", "unavailable", None}: + continue + compact.append({"state": state, "last_changed": item.get("last_changed")}) + if len(set(x["state"] for x in compact)) > 1: + changes.append({"entity_id": entity_id, "recent_states": compact}) + + return sorted(changes, key=lambda x: x["entity_id"]) + + +def make_snapshot() -> dict[str, Any]: + return { + "generated_at": datetime.now().isoformat(timespec="seconds"), + "history_hours": HISTORY_HOURS, + "states": get_states(), + "history": get_history(HISTORY_HOURS), + } + + +def save_snapshot(snapshot: dict[str, Any]) -> Path: + DATA_DIR.mkdir(parents=True, exist_ok=True) + stamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + path = DATA_DIR / f"snapshot-{stamp}.json" + path.write_text(json.dumps(snapshot, indent=2, ensure_ascii=False), encoding="utf-8") + return path + + +def cleanup_old_snapshots() -> None: + cutoff = datetime.now() - timedelta(days=KEEP_SNAPSHOT_DAYS) + for path in DATA_DIR.glob("snapshot-*.json"): + if datetime.fromtimestamp(path.stat().st_mtime) < cutoff: + path.unlink(missing_ok=True) + + +def load_recent_snapshots(hours: int) -> list[dict[str, Any]]: + cutoff = datetime.now() - timedelta(hours=hours) + snapshots = [] + for path in sorted(DATA_DIR.glob("snapshot-*.json")): + if datetime.fromtimestamp(path.stat().st_mtime) < cutoff: + continue + try: + snapshots.append(json.loads(path.read_text(encoding="utf-8"))) + except Exception as exc: + print(f"Skipping unreadable snapshot {path}: {exc}", file=sys.stderr) + return snapshots + + +def summarize_snapshot(snapshot: dict[str, Any]) -> str: + lines = [f"Snapshot: {snapshot.get('generated_at')}", "Current states:"] + for state in snapshot.get("states", []): + attrs = state.get("attributes", {}) + name = attrs.get("friendly_name", state.get("entity_id")) + unit = attrs.get("unit_of_measurement", "") + value = f"{state.get('state')} {unit}".strip() + lines.append(f"- {name} ({state.get('entity_id')}): {value}; last_changed={state.get('last_changed')}") + lines.append("Recently changed entities:") + for item in snapshot.get("history", []): + transitions = ", ".join(f"{x.get('state')} @ {x.get('last_changed')}" for x in item.get("recent_states", [])[-8:]) + lines.append(f"- {item.get('entity_id')}: {transitions}") + return "\n".join(lines) + + +def build_daily_summary(snapshots: list[dict[str, Any]]) -> str: + parts = [ + f"Daily Home Assistant bundle generated {datetime.now().isoformat(timespec='seconds')}", + f"Contains {len(snapshots)} snapshots from roughly the last {ANALYZE_SNAPSHOT_HOURS} hours.", + ] + for snapshot in snapshots: + parts.append("\n---\n" + summarize_snapshot(snapshot)) + return "\n".join(parts) + + +def read_extra_llm_instructions() -> str: + if not PROMPT_FILE.exists(): + return "" + return PROMPT_FILE.read_text(encoding="utf-8").strip() + + +def analysis_prompt(input_summary: str) -> str: + extra_instructions = read_extra_llm_instructions() + extra_block = "" + if extra_instructions: + extra_block = f""" + +ADDITIONAL OWNER INSTRUCTIONS FROM {PROMPT_FILE}: +{extra_instructions} +""" + + return f"""You are analyzing a day of Home Assistant smart-home data for the owner. + +Write a funny but useful morning briefing. Use light humor, emojis, and playful headings, +but remain factual and privacy-aware. Include: +- A short comedy headline for the day +- What seemed to happen at home +- Behavioral patterns that can reasonably be inferred +- What a nosy raccoon/hacker could figure out about the resident +- Anomalies, risks, or privacy/security concerns +- Suggested Home Assistant automations or fixes + +Distinguish strong evidence from guesses. Do not invent facts not supported by the data. +{extra_block} +DATA: +{input_summary} +""" + + +def call_ollama(prompt: str) -> str: + response = requests.post(f"{OLLAMA_URL}/api/generate", json={"model": OLLAMA_MODEL, "prompt": prompt, "stream": False}, timeout=300) + response.raise_for_status() + return response.json().get("response", "").strip() + + +def call_openai(prompt: str) -> str: + response = requests.post( + "https://api.openai.com/v1/chat/completions", + headers={"Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json"}, + json={ + "model": OPENAI_MODEL, + "messages": [ + {"role": "system", "content": "You are a careful but funny smart-home analyst."}, + {"role": "user", "content": prompt}, + ], + "temperature": 0.35, + }, + timeout=300, + ) + response.raise_for_status() + return response.json()["choices"][0]["message"]["content"].strip() + + +def get_llm_conclusions(input_summary: str) -> str: + if LLM_MODE == "none": + return "AI analysis disabled. Set LLM_MODE=ollama or LLM_MODE=openai in .env. The raccoon analyst is asleep. ๐Ÿฆ๐Ÿ’ค" + prompt = analysis_prompt(input_summary) + if LLM_MODE == "ollama": + return call_ollama(prompt) + if LLM_MODE == "openai": + return call_openai(prompt) + return f"Unknown LLM_MODE={LLM_MODE!r}. Use none, ollama, or openai." + + +def markdownish_to_html(text: str) -> str: + safe = html.escape(text) + safe = re.sub(r"^### (.*)$", r"

\1

", safe, flags=re.MULTILINE) + safe = re.sub(r"^## (.*)$", r"

\1

", safe, flags=re.MULTILINE) + safe = re.sub(r"^# (.*)$", r"

\1

", safe, flags=re.MULTILINE) + safe = re.sub(r"^- (.*)$", r"
  • \1
  • ", safe, flags=re.MULTILINE) + safe = safe.replace("\n", "
    \n") + return safe + + +def publish_webpage(conclusions: str, raw_summary: str) -> Path: + WEB_DIR.mkdir(parents=True, exist_ok=True) + now = datetime.now().strftime("%Y-%m-%d %H:%M") + body = markdownish_to_html(conclusions) + raw = html.escape(raw_summary[:60000]) + page = f""" + + + + + +Smart Home Gossip Gazette + + + +
    +
    ๐Ÿฆ๐Ÿ 
    +

    Smart Home Gossip Gazette

    +

    Fresh 5AM nonsense-powered intelligence briefing ยท Generated {html.escape(now)}

    +
    +
    +
    {body}
    +
    + Raw data bundle shown to the AI goblin +
    {raw}
    +
    +
    + + +""" + path = WEB_DIR / "index.html" + path.write_text(page, encoding="utf-8") + return path + + +def write_markdown_report(summary: str, conclusions: str) -> Path: + REPORT_DIR.mkdir(parents=True, exist_ok=True) + stamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + path = REPORT_DIR / f"daily-ai-analysis-{stamp}.md" + path.write_text(f"# Daily Home Assistant AI Analysis\n\n{conclusions}\n\n## Data bundle\n\n```text\n{summary}\n```\n", encoding="utf-8") + return path + + +def cmd_collect() -> int: + require_config(for_ai=False) + snapshot = make_snapshot() + path = save_snapshot(snapshot) + cleanup_old_snapshots() + print(f"Collected snapshot: {path}") + return 0 + + +def cmd_analyze() -> int: + require_config(for_ai=True) + snapshots = load_recent_snapshots(ANALYZE_SNAPSHOT_HOURS) + if not snapshots: + raise RuntimeError(f"No snapshots found in {DATA_DIR}; run collect first") + summary = build_daily_summary(snapshots) + conclusions = get_llm_conclusions(summary) + md_path = write_markdown_report(summary, conclusions) + html_path = publish_webpage(conclusions, summary) + print(f"Wrote report: {md_path}") + print(f"Published webpage: {html_path}") + return 0 + + +def main() -> int: + parser = argparse.ArgumentParser(description="Home Assistant observer") + parser.add_argument("mode", nargs="?", default="collect", choices=["collect", "analyze"], help="collect snapshots or analyze/publish them") + args = parser.parse_args() + try: + return cmd_collect() if args.mode == "collect" else cmd_analyze() + except Exception as exc: + print(f"ERROR: {exc}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/install_cron.sh b/install_cron.sh new file mode 100755 index 0000000..e5c28d5 --- /dev/null +++ b/install_cron.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -euo pipefail + +PROJECT_DIR="$(cd "$(dirname "$0")" && pwd)" +CRON_FILE="$(mktemp)" + +crontab -l 2>/dev/null | grep -v "# ha-observer" | grep -v "$PROJECT_DIR/run_ha_observer.sh" > "$CRON_FILE" || true + +cat >> "$CRON_FILE" <> $PROJECT_DIR/cron.log 2>&1 +# ha-observer: at 05:00 send recent snapshots to AI and publish funny local webpage +0 5 * * * $PROJECT_DIR/run_ha_observer.sh analyze >> $PROJECT_DIR/cron.log 2>&1 +EOF + +crontab "$CRON_FILE" +rm -f "$CRON_FILE" + +echo "Installed cron jobs:" +crontab -l | grep -A2 "ha-observer" diff --git a/llm_instructions.md b/llm_instructions.md new file mode 100644 index 0000000..390afba --- /dev/null +++ b/llm_instructions.md @@ -0,0 +1,22 @@ +# Extra LLM instructions for Home Assistant analysis + +Edit this file whenever you want to change how the 05:00 AI report is written. +The contents are appended to the AI prompt before the Home Assistant data. + +Suggested directions: + +- Keep the tone funny, sarcastic, and playful, but still useful. +- Use clear confidence labels: **strong evidence**, **possible**, **wild guess**. +- Focus on patterns in occupancy, sleep/wake timing, lights, heating, doors, motion, media, and unusual sensor changes. +- Point out privacy leaks: what could a nosy neighbor, burglar, or raccoon detective infer? +- Recommend practical Home Assistant automations. +- If data is missing or ambiguous, say so instead of pretending. +- Avoid being creepy about personal habits; summarize respectfully. +- Prefer concise bullet points over long paragraphs. + +Optional custom questions to answer: + +1. Did anything look unusual overnight? +2. Are any batteries, devices, or sensors acting suspicious? +3. Could the home infer when I am asleep, away, or busy? +4. What would make this setup more private or secure? diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0eb8cae --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +requests>=2.31.0 diff --git a/run_ha_observer.sh b/run_ha_observer.sh new file mode 100755 index 0000000..fe96663 --- /dev/null +++ b/run_ha_observer.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +set -euo pipefail + +cd "$(dirname "$0")" + +if [[ -f .env ]]; then + set -a + # shellcheck disable=SC1091 + source .env + set +a +else + echo "Missing .env. Copy .env.example to .env and edit it." >&2 + exit 1 +fi + +if [[ -x .venv/bin/python ]]; then + PYTHON=.venv/bin/python +else + PYTHON=python3 +fi + +"$PYTHON" ha_observer.py "${1:-collect}"