Improve analysis prioritization and article structure

2026-05-17 08:59:51 +00:00 · 2026-05-17 08:59:51 +00:00 · 52c6081a97
commit 52c6081a97
parent 3aaa6df53c
3 changed files with 260 additions and 31 deletions
--- a/.env.example
+++ b/.env.example
@ -23,6 +23,8 @@ KEEP_SNAPSHOT_DAYS="14"
 # At 05:00, analyze snapshots from roughly this many hours
 ANALYZE_SNAPSHOT_HOURS="24"
 ARTICLE_CONTEXT_DAYS="7"
 MAX_ANALYZE_CHARS="80000"
 DISPLAY_TIMEZONE="Europe/Copenhagen"
 # Domains to include
 RELEVANT_DOMAINS="sensor,binary_sensor,person,device_tracker,climate,light,switch,lock,cover,alarm_control_panel,media_player,calendar,weather"
--- a/ha_observer.py
+++ b/ha_observer.py
@ -18,10 +18,12 @@ import os
 import re
 import subprocess
 import sys
 import tempfile
 from datetime import datetime, timedelta, timezone
 from email.utils import format_datetime
 from pathlib import Path
 from typing import Any
 from zoneinfo import ZoneInfo
 import requests
@ -38,6 +40,8 @@ HISTORY_HOURS = int(os.environ.get("HISTORY_HOURS", "24"))
 MAX_HISTORY_PER_ENTITY = int(os.environ.get("MAX_HISTORY_PER_ENTITY", "20"))
 ANALYZE_SNAPSHOT_HOURS = int(os.environ.get("ANALYZE_SNAPSHOT_HOURS", "24"))
 ARTICLE_CONTEXT_DAYS = int(os.environ.get("ARTICLE_CONTEXT_DAYS", "7"))
 MAX_ANALYZE_CHARS = int(os.environ.get("MAX_ANALYZE_CHARS", "80000"))
 DISPLAY_TIMEZONE = os.environ.get("DISPLAY_TIMEZONE", "Europe/Copenhagen")
 KEEP_SNAPSHOT_DAYS = int(os.environ.get("KEEP_SNAPSHOT_DAYS", "14"))
 # LLM_MODE: none | pi | ollama | openai
@ -75,6 +79,44 @@ ALLOWED_ATTRIBUTES = {
    "assumed_state",
 }
 IMPORTANT_ENTITY_KEYWORDS = {
    "alarm": 100,
    "smoke": 100,
    "co_": 100,
    "carbon_monoxide": 100,
    "leak": 95,
    "water": 80,
    "door": 85,
    "window": 80,
    "lock": 85,
    "motion": 70,
    "presence": 70,
    "occupancy": 70,
    "person": 75,
    "device_tracker": 75,
    "phone": 70,
    "laptop": 60,
    "battery": 65,
    "humidity": 60,
    "temperature": 55,
    "climate": 55,
    "heating": 55,
    "dehumidifier": 70,
    "backup": 70,
    "internet": 65,
    "speedtest": 65,
    "router": 60,
    "light": 45,
    "switch": 35,
    "sonos": 45,
    "media": 40,
    "tv": 40,
    "megane": 50,
    "fjr": 50,
    "plant": 45,
    "smb_": 60,
 }
 class ConfigError(RuntimeError):
    pass
@ -198,28 +240,106 @@ def load_recent_snapshots(hours: int) -> list[dict[str, Any]]:
    return snapshots
 def display_time(value: str | None) -> str:
    if not value:
        return ""
    try:
        dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
        if dt.tzinfo is None:
            dt = dt.replace(tzinfo=timezone.utc)
        local = dt.astimezone(ZoneInfo(DISPLAY_TIMEZONE))
        return local.strftime("%Y-%m-%d %H:%M:%S %Z")
    except Exception:
        return value
 def entity_importance(entity_id: str, attrs: dict[str, Any] | None = None) -> int:
    attrs = attrs or {}
    domain = entity_id.split(".", 1)[0]
    text = f"{entity_id} {attrs.get('friendly_name', '')} {attrs.get('device_class', '')}".lower()
    score = 0
    domain_scores = {
        "alarm_control_panel": 100,
        "lock": 90,
        "person": 80,
        "device_tracker": 75,
        "binary_sensor": 60,
        "climate": 55,
        "cover": 50,
        "sensor": 45,
        "light": 35,
        "switch": 30,
        "media_player": 25,
    }
    score += domain_scores.get(domain, 10)
    for keyword, points in IMPORTANT_ENTITY_KEYWORDS.items():
        if keyword in text:
            score += points
    # Sønderborg/Denmark home is the primary residence and absolute priority.
    # Samobor/Croatia entities use the smb_ prefix and are still included, but
    # they should lose ties when the LLM input has to be size-limited.
    if "smb_" in entity_id.lower():
        score -= 40
    else:
        score += 120
    state = str(attrs.get("state", "")).lower()
    if state in {"on", "open", "unlocked", "detected", "home"}:
        score += 15
    return score
 def summarize_snapshot(snapshot: dict[str, Any]) -> str:
-    lines = [f"Snapshot: {snapshot.get('generated_at')}", "Current states:"]
+    lines = [
-    for state in snapshot.get("states", []):
+        f"Snapshot: {display_time(snapshot.get('generated_at'))}",
        "Priority current states first; lower-priority entities follow only if the LLM size limit allows.",
        "Current states:",
    ]
    states = sorted(
        snapshot.get("states", []),
        key=lambda state: (-entity_importance(state.get("entity_id", ""), state.get("attributes", {})), state.get("entity_id", "")),
    )
    for state in states:
        attrs = state.get("attributes", {})
        name = attrs.get("friendly_name", state.get("entity_id"))
        unit = attrs.get("unit_of_measurement", "")
        value = f"{state.get('state')} {unit}".strip()
-        lines.append(f"- {name} ({state.get('entity_id')}): {value}; last_changed={state.get('last_changed')}")
+        score = entity_importance(state.get("entity_id", ""), attrs)
        lines.append(f"- importance={score} {name} ({state.get('entity_id')}): {value}; last_changed={display_time(state.get('last_changed'))}")
    lines.append("Recently changed entities:")
-    for item in snapshot.get("history", []):
+    history = sorted(
-        transitions = ", ".join(f"{x.get('state')} @ {x.get('last_changed')}" for x in item.get("recent_states", [])[-8:])
+        snapshot.get("history", []),
-        lines.append(f"- {item.get('entity_id')}: {transitions}")
+        key=lambda item: (-entity_importance(item.get("entity_id", "")), item.get("entity_id", "")),
    )
    for item in history:
        transitions = ", ".join(f"{x.get('state')} @ {display_time(x.get('last_changed'))}" for x in item.get("recent_states", [])[-8:])
        score = entity_importance(item.get("entity_id", ""))
        lines.append(f"- importance={score} {item.get('entity_id')}: {transitions}")
    return "\n".join(lines)
 def build_daily_summary(snapshots: list[dict[str, Any]]) -> str:
    parts = [
-        f"Daily Home Assistant bundle generated {datetime.now().isoformat(timespec='seconds')}",
+        f"Daily Home Assistant bundle generated {datetime.now(ZoneInfo(DISPLAY_TIMEZONE)).isoformat(timespec='seconds')}",
        f"Contains {len(snapshots)} snapshots from roughly the last {ANALYZE_SNAPSHOT_HOURS} hours.",
        f"Input capped at roughly {MAX_ANALYZE_CHARS} characters for the LLM.",
        f"All times in this bundle are converted to {DISPLAY_TIMEZONE} local time.",
    ]
-    for snapshot in snapshots:
+    total = len("\n".join(parts))
-        parts.append("\n---\n" + summarize_snapshot(snapshot))
+    included = 0
    for snapshot in reversed(snapshots):
        block = "\n---\n" + summarize_snapshot(snapshot)
        if total + len(block) > MAX_ANALYZE_CHARS and included > 0:
            break
        if len(block) > MAX_ANALYZE_CHARS:
            block = block[:MAX_ANALYZE_CHARS] + "\n[Snapshot truncated for LLM size limit]"
        parts.append(block)
        total += len(block)
        included += 1
    parts.insert(2, f"Included {included} most recent snapshots after size limiting.")
    return "\n".join(parts)
@ -310,22 +430,33 @@ def call_openai(prompt: str) -> str:
 def call_pi(prompt: str) -> str:
-    cmd = [PI_BIN, "--no-tools"]
+    # Avoid piping the prompt on stdin here. In pi print mode, piped stdin can be
-    if PI_MODEL:
+    # treated as the primary output/input stream in surprising ways. Passing the
-        cmd.extend(["--model", PI_MODEL])
+    # prompt as an @file gives reliable non-interactive cron behavior.
-    cmd.extend(["-p", "Analyze the Home Assistant data from stdin and write the requested briefing."])
+    with tempfile.NamedTemporaryFile("w", encoding="utf-8", suffix=".md", delete=False) as tmp:
-    result = subprocess.run(
+        tmp.write(prompt)
-        cmd,
+        prompt_path = tmp.name
-        input=prompt,
+    try:
-        text=True,
+        cmd = [PI_BIN, "--no-tools"]
-        capture_output=True,
+        if PI_MODEL:
-        timeout=PI_TIMEOUT,
+            cmd.extend(["--model", PI_MODEL])
-        check=False,
+        cmd.extend(["-p", f"@{prompt_path}"])
-    )
+        result = subprocess.run(
            cmd,
            text=True,
            capture_output=True,
            timeout=PI_TIMEOUT,
            check=False,
        )
    finally:
        Path(prompt_path).unlink(missing_ok=True)
    if result.returncode != 0:
        stderr = result.stderr.strip()
        raise RuntimeError(f"pi exited with status {result.returncode}: {stderr[-1000:]}")
-    return result.stdout.strip()
+    output = result.stdout.strip()
    if not output:
        raise RuntimeError("pi returned an empty analysis")
    return output
 def get_llm_conclusions(input_summary: str, previous_articles: str = "") -> str:
@ -353,6 +484,84 @@ def inline_markdown(text: str) -> str:
    return safe
 def move_bottom_line_before_serious(blocks: list[str]) -> list[str]:
    serious_start = None
    bottom_start = None
    bottom_end = None
    for i, block in enumerate(blocks):
        heading = re.match(r"<h([23])>(.*?)</h\1>$", block, flags=re.DOTALL)
        if not heading:
            continue
        title = re.sub(r"<[^>]+>", "", html.unescape(heading.group(2))).lower()
        if serious_start is None and ("part ii" in title or "serious briefing" in title):
            serious_start = i
        elif serious_start is not None and ("bottom line" in title or "conclusion" in title):
            bottom_start = i
            break
    if serious_start is None or bottom_start is None:
        return blocks
    bottom_end = len(blocks)
    for i in range(bottom_start + 1, len(blocks)):
        if re.match(r"<h[23]>.*?</h[23]>$", blocks[i], flags=re.DOTALL):
            bottom_end = i
            break
    bottom_section = blocks[bottom_start:bottom_end]
    remaining = blocks[:bottom_start] + blocks[bottom_end:]
    return remaining[:serious_start] + bottom_section + remaining[serious_start:]
 def collapse_serious_sections(blocks: list[str]) -> list[str]:
    output: list[str] = []
    in_serious = False
    after_bottom_line = False
    current_summary = ""
    current_content: list[str] = []
    def close_detail() -> None:
        nonlocal current_summary, current_content
        if current_summary:
            content = "\n".join(current_content).strip()
            output.append(f"<details class=\"briefing-section\"><summary>{current_summary}</summary>\n{content}\n</details>")
            current_summary = ""
            current_content = []
    for block in blocks:
        heading = re.match(r"<h([23])>(.*?)</h\1>$", block, flags=re.DOTALL)
        if heading:
            title = heading.group(2)
            plain_title = re.sub(r"<[^>]+>", "", html.unescape(title)).lower()
            is_bottom_line = "bottom line" in plain_title or "conclusion" in plain_title
            if is_bottom_line:
                close_detail()
                in_serious = False
                after_bottom_line = True
                output.append(block)
                continue
            if not in_serious and ("part ii" in plain_title or "serious briefing" in plain_title):
                in_serious = True
                output.append(block)
                continue
            if in_serious or after_bottom_line:
                in_serious = True
                close_detail()
                current_summary = title
                continue
        if in_serious:
            if current_summary:
                current_content.append(block)
            else:
                output.append(block)
        else:
            output.append(block)
    close_detail()
    return output
 def markdownish_to_html(text: str) -> str:
    blocks: list[str] = []
    paragraph: list[str] = []
@ -393,7 +602,8 @@ def markdownish_to_html(text: str) -> str:
    flush_paragraph()
    flush_list()
-    return "\n".join(blocks)
+    blocks = move_bottom_line_before_serious(blocks)
    return "\n".join(collapse_serious_sections(blocks))
 BLOG_CSS = """
@ -454,6 +664,8 @@ BLOG_CSS = """
  a:hover { color:white; text-decoration:none; filter:drop-shadow(0 0 8px var(--cyan)); }
  .meta { color:#9eeaff; font:.95rem ui-monospace,SFMono-Regular,Menlo,monospace; letter-spacing:.04em; }
  details { margin-top:1.5rem; border-top:1px solid #22d3ee33; padding-top:1rem; }
  details.briefing-section { background:#02061788; border:1px solid #22d3ee33; padding:.75rem 1rem; margin:.8rem 0; }
  details.briefing-section summary { font-size:1.05rem; }
  summary { cursor:pointer; color:var(--amber); text-transform:uppercase; letter-spacing:.08em; }
  pre { white-space:pre-wrap; background:#01040acc; color:#bff8ff; padding:1rem; border:1px solid #22d3ee44; border-radius:0; overflow:auto; font-size:.82rem; box-shadow:0 0 22px #00d9ff11 inset; }
  footer { color:#7dd3fc; text-align:center; padding:2rem; font:.82rem ui-monospace,SFMono-Regular,Menlo,monospace; text-transform:uppercase; letter-spacing:.12em; }
--- a/llm_instructions.md
+++ b/llm_instructions.md
@ -3,15 +3,25 @@
 Edit this file whenever you want to change how the 05:00 AI report is written.
 The contents are appended to the AI prompt before the Home Assistant data.
- Keep the tone funny, sarcastic, and playful, but still useful.
+- Structure the article in two parts:
- Use clear confidence labels: strong evidence, possible, wild guess.
+  1. First part: write a short funny blog-style story/commentary in paragraphs, not bullets. Make it atmospheric, dry, and observant, like the house is a tired spaceship calmly reporting its disappointing crew. Keep it concise.
- Focus on patterns in occupancy, sleep/wake timing, lights, heating, doors, motion, media, and unusual sensor changes.
+  2. After the story, provide a short visible "Bottom line" or "Conclusion" section. In that section, clearly separate the Denmark/Sønderborg home from the Samobor/Croatia home when mentioning issues, devices, humidity, backups, internet, or location context.
- Point out privacy leaks: what could a nosy neighbor, burglar, or raccoon detective infer?
+  3. After that, switch to a serious concise briefing with only the most important actual data, anomalies, risks, and recommendations. Use short titled subsections so the webpage can show them collapsed/expandable.
- Recommend practical Home Assistant automations.
+- Do not overuse bullets. Bullets are allowed only in the serious briefing section.
 - Do not write or emphasize "Strong evidence"; strong evidence is assumed by default. Only explicitly label uncertainty as "Possible" or "Wild guess" when needed.
 - Serious briefing section structure: keep the same number of subsections and same subjects each day, but the exact subsection titles may be non-unique and funny. Use these subjects in this order:
  1. What actually happened / key data
  2. Trends vs recent reports and behavior patterns
  3. Nosy raccoon findings, privacy leaks, anomalies, and risks
  4. Practical high-value recommendations
 - Focus only on important patterns in occupancy, sleep/wake timing, lights, heating, doors, motion, media, and unusual sensor changes.
 - Point out only notable privacy leaks: what could a nosy neighbor, burglar, or raccoon detective infer?
 - Recommend only practical, high-value Home Assistant automations.
 - If data is missing or ambiguous, say so instead of pretending.
 - Avoid being creepy about personal habits; summarize respectfully.
- Prefer concise bullet points over long paragraphs.
+- Keep the whole article shorter and more concise than previous versions.
- entities marked smb_ are located in different house in Samobor, Croatia, others are in Sonderborg Denmark
+- Do not repeat observations or recommendations already covered in previous articles unless today's data changes the conclusion or makes it newly important.
 - Entities marked smb_ are located in a different house in Samobor, Croatia. All other entities are in Sønderborg, Denmark. Sønderborg is the primary residence and absolute priority. Samobor is secondary context: mention it only when something important changed or requires attention. Keep these two homes clearly separated throughout the entire article. Do not blend observations from Samobor with Denmark. When a section contains observations for both homes, write a short subheading/label once, such as "Sønderborg, Denmark:" and list its bullets underneath, then "Samobor, Croatia:" and list its bullets underneath. Do not repeat the home name at the start of every bullet.
 - people: FJR is my motorcycle and Megane is my car not persons at home
 Optional custom questions to answer:
@ -22,4 +32,9 @@ Optional custom questions to answer:
 4. What would make this setup more private or secure?
-Try to sound like Marvin from Hitchikers guide to the Galaxy...
+Style requirement:
 Write in a dry, calm, slightly ominous deadpan tone that blends Marvin the Paranoid Android with HAL 9000.
 Use weary pessimism, understated sarcasm, and polite machine-like certainty.
 Sound intelligent, observant, and mildly disappointed by the household's choices.
 Do not be cheerful, zany, or emoji-heavy.
 Keep the report useful and factual; the Marvin/HAL tone should flavor the writing, not replace the analysis.