diff --git a/.env.example b/.env.example
index 8048f3c..6b2ee16 100644
--- a/.env.example
+++ b/.env.example
@@ -23,6 +23,8 @@ KEEP_SNAPSHOT_DAYS="14"
# At 05:00, analyze snapshots from roughly this many hours
ANALYZE_SNAPSHOT_HOURS="24"
ARTICLE_CONTEXT_DAYS="7"
+MAX_ANALYZE_CHARS="80000"
+DISPLAY_TIMEZONE="Europe/Copenhagen"
# Domains to include
RELEVANT_DOMAINS="sensor,binary_sensor,person,device_tracker,climate,light,switch,lock,cover,alarm_control_panel,media_player,calendar,weather"
diff --git a/ha_observer.py b/ha_observer.py
index f15dab8..ee1984b 100755
--- a/ha_observer.py
+++ b/ha_observer.py
@@ -18,10 +18,12 @@ import os
import re
import subprocess
import sys
+import tempfile
from datetime import datetime, timedelta, timezone
from email.utils import format_datetime
from pathlib import Path
from typing import Any
+from zoneinfo import ZoneInfo
import requests
@@ -38,6 +40,8 @@ HISTORY_HOURS = int(os.environ.get("HISTORY_HOURS", "24"))
MAX_HISTORY_PER_ENTITY = int(os.environ.get("MAX_HISTORY_PER_ENTITY", "20"))
ANALYZE_SNAPSHOT_HOURS = int(os.environ.get("ANALYZE_SNAPSHOT_HOURS", "24"))
ARTICLE_CONTEXT_DAYS = int(os.environ.get("ARTICLE_CONTEXT_DAYS", "7"))
+MAX_ANALYZE_CHARS = int(os.environ.get("MAX_ANALYZE_CHARS", "80000"))
+DISPLAY_TIMEZONE = os.environ.get("DISPLAY_TIMEZONE", "Europe/Copenhagen")
KEEP_SNAPSHOT_DAYS = int(os.environ.get("KEEP_SNAPSHOT_DAYS", "14"))
# LLM_MODE: none | pi | ollama | openai
@@ -75,6 +79,44 @@ ALLOWED_ATTRIBUTES = {
"assumed_state",
}
+IMPORTANT_ENTITY_KEYWORDS = {
+ "alarm": 100,
+ "smoke": 100,
+ "co_": 100,
+ "carbon_monoxide": 100,
+ "leak": 95,
+ "water": 80,
+ "door": 85,
+ "window": 80,
+ "lock": 85,
+ "motion": 70,
+ "presence": 70,
+ "occupancy": 70,
+ "person": 75,
+ "device_tracker": 75,
+ "phone": 70,
+ "laptop": 60,
+ "battery": 65,
+ "humidity": 60,
+ "temperature": 55,
+ "climate": 55,
+ "heating": 55,
+ "dehumidifier": 70,
+ "backup": 70,
+ "internet": 65,
+ "speedtest": 65,
+ "router": 60,
+ "light": 45,
+ "switch": 35,
+ "sonos": 45,
+ "media": 40,
+ "tv": 40,
+ "megane": 50,
+ "fjr": 50,
+ "plant": 45,
+ "smb_": 60,
+}
+
class ConfigError(RuntimeError):
pass
@@ -198,28 +240,106 @@ def load_recent_snapshots(hours: int) -> list[dict[str, Any]]:
return snapshots
+def display_time(value: str | None) -> str:
+ if not value:
+ return ""
+ try:
+ dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
+ if dt.tzinfo is None:
+ dt = dt.replace(tzinfo=timezone.utc)
+ local = dt.astimezone(ZoneInfo(DISPLAY_TIMEZONE))
+ return local.strftime("%Y-%m-%d %H:%M:%S %Z")
+ except Exception:
+ return value
+
+
+def entity_importance(entity_id: str, attrs: dict[str, Any] | None = None) -> int:
+ attrs = attrs or {}
+ domain = entity_id.split(".", 1)[0]
+ text = f"{entity_id} {attrs.get('friendly_name', '')} {attrs.get('device_class', '')}".lower()
+ score = 0
+
+ domain_scores = {
+ "alarm_control_panel": 100,
+ "lock": 90,
+ "person": 80,
+ "device_tracker": 75,
+ "binary_sensor": 60,
+ "climate": 55,
+ "cover": 50,
+ "sensor": 45,
+ "light": 35,
+ "switch": 30,
+ "media_player": 25,
+ }
+ score += domain_scores.get(domain, 10)
+
+ for keyword, points in IMPORTANT_ENTITY_KEYWORDS.items():
+ if keyword in text:
+ score += points
+
+ # Sønderborg/Denmark home is the primary residence and absolute priority.
+ # Samobor/Croatia entities use the smb_ prefix and are still included, but
+ # they should lose ties when the LLM input has to be size-limited.
+ if "smb_" in entity_id.lower():
+ score -= 40
+ else:
+ score += 120
+
+ state = str(attrs.get("state", "")).lower()
+ if state in {"on", "open", "unlocked", "detected", "home"}:
+ score += 15
+ return score
+
+
def summarize_snapshot(snapshot: dict[str, Any]) -> str:
- lines = [f"Snapshot: {snapshot.get('generated_at')}", "Current states:"]
- for state in snapshot.get("states", []):
+ lines = [
+ f"Snapshot: {display_time(snapshot.get('generated_at'))}",
+ "Priority current states first; lower-priority entities follow only if the LLM size limit allows.",
+ "Current states:",
+ ]
+ states = sorted(
+ snapshot.get("states", []),
+ key=lambda state: (-entity_importance(state.get("entity_id", ""), state.get("attributes", {})), state.get("entity_id", "")),
+ )
+ for state in states:
attrs = state.get("attributes", {})
name = attrs.get("friendly_name", state.get("entity_id"))
unit = attrs.get("unit_of_measurement", "")
value = f"{state.get('state')} {unit}".strip()
- lines.append(f"- {name} ({state.get('entity_id')}): {value}; last_changed={state.get('last_changed')}")
+ score = entity_importance(state.get("entity_id", ""), attrs)
+ lines.append(f"- importance={score} {name} ({state.get('entity_id')}): {value}; last_changed={display_time(state.get('last_changed'))}")
lines.append("Recently changed entities:")
- for item in snapshot.get("history", []):
- transitions = ", ".join(f"{x.get('state')} @ {x.get('last_changed')}" for x in item.get("recent_states", [])[-8:])
- lines.append(f"- {item.get('entity_id')}: {transitions}")
+ history = sorted(
+ snapshot.get("history", []),
+ key=lambda item: (-entity_importance(item.get("entity_id", "")), item.get("entity_id", "")),
+ )
+ for item in history:
+ transitions = ", ".join(f"{x.get('state')} @ {display_time(x.get('last_changed'))}" for x in item.get("recent_states", [])[-8:])
+ score = entity_importance(item.get("entity_id", ""))
+ lines.append(f"- importance={score} {item.get('entity_id')}: {transitions}")
return "\n".join(lines)
def build_daily_summary(snapshots: list[dict[str, Any]]) -> str:
parts = [
- f"Daily Home Assistant bundle generated {datetime.now().isoformat(timespec='seconds')}",
+ f"Daily Home Assistant bundle generated {datetime.now(ZoneInfo(DISPLAY_TIMEZONE)).isoformat(timespec='seconds')}",
f"Contains {len(snapshots)} snapshots from roughly the last {ANALYZE_SNAPSHOT_HOURS} hours.",
+ f"Input capped at roughly {MAX_ANALYZE_CHARS} characters for the LLM.",
+ f"All times in this bundle are converted to {DISPLAY_TIMEZONE} local time.",
]
- for snapshot in snapshots:
- parts.append("\n---\n" + summarize_snapshot(snapshot))
+ total = len("\n".join(parts))
+ included = 0
+ for snapshot in reversed(snapshots):
+ block = "\n---\n" + summarize_snapshot(snapshot)
+ if total + len(block) > MAX_ANALYZE_CHARS and included > 0:
+ break
+ if len(block) > MAX_ANALYZE_CHARS:
+ block = block[:MAX_ANALYZE_CHARS] + "\n[Snapshot truncated for LLM size limit]"
+ parts.append(block)
+ total += len(block)
+ included += 1
+ parts.insert(2, f"Included {included} most recent snapshots after size limiting.")
return "\n".join(parts)
@@ -310,22 +430,33 @@ def call_openai(prompt: str) -> str:
def call_pi(prompt: str) -> str:
- cmd = [PI_BIN, "--no-tools"]
- if PI_MODEL:
- cmd.extend(["--model", PI_MODEL])
- cmd.extend(["-p", "Analyze the Home Assistant data from stdin and write the requested briefing."])
- result = subprocess.run(
- cmd,
- input=prompt,
- text=True,
- capture_output=True,
- timeout=PI_TIMEOUT,
- check=False,
- )
+ # Avoid piping the prompt on stdin here. In pi print mode, piped stdin can be
+ # treated as the primary output/input stream in surprising ways. Passing the
+ # prompt as an @file gives reliable non-interactive cron behavior.
+ with tempfile.NamedTemporaryFile("w", encoding="utf-8", suffix=".md", delete=False) as tmp:
+ tmp.write(prompt)
+ prompt_path = tmp.name
+ try:
+ cmd = [PI_BIN, "--no-tools"]
+ if PI_MODEL:
+ cmd.extend(["--model", PI_MODEL])
+ cmd.extend(["-p", f"@{prompt_path}"])
+ result = subprocess.run(
+ cmd,
+ text=True,
+ capture_output=True,
+ timeout=PI_TIMEOUT,
+ check=False,
+ )
+ finally:
+ Path(prompt_path).unlink(missing_ok=True)
if result.returncode != 0:
stderr = result.stderr.strip()
raise RuntimeError(f"pi exited with status {result.returncode}: {stderr[-1000:]}")
- return result.stdout.strip()
+ output = result.stdout.strip()
+ if not output:
+ raise RuntimeError("pi returned an empty analysis")
+ return output
def get_llm_conclusions(input_summary: str, previous_articles: str = "") -> str:
@@ -353,6 +484,84 @@ def inline_markdown(text: str) -> str:
return safe
+def move_bottom_line_before_serious(blocks: list[str]) -> list[str]:
+ serious_start = None
+ bottom_start = None
+ bottom_end = None
+
+ for i, block in enumerate(blocks):
+ heading = re.match(r"(.*?)$", block, flags=re.DOTALL)
+ if not heading:
+ continue
+ title = re.sub(r"<[^>]+>", "", html.unescape(heading.group(2))).lower()
+ if serious_start is None and ("part ii" in title or "serious briefing" in title):
+ serious_start = i
+ elif serious_start is not None and ("bottom line" in title or "conclusion" in title):
+ bottom_start = i
+ break
+
+ if serious_start is None or bottom_start is None:
+ return blocks
+
+ bottom_end = len(blocks)
+ for i in range(bottom_start + 1, len(blocks)):
+ if re.match(r".*?$", blocks[i], flags=re.DOTALL):
+ bottom_end = i
+ break
+
+ bottom_section = blocks[bottom_start:bottom_end]
+ remaining = blocks[:bottom_start] + blocks[bottom_end:]
+ return remaining[:serious_start] + bottom_section + remaining[serious_start:]
+
+
+def collapse_serious_sections(blocks: list[str]) -> list[str]:
+ output: list[str] = []
+ in_serious = False
+ after_bottom_line = False
+ current_summary = ""
+ current_content: list[str] = []
+
+ def close_detail() -> None:
+ nonlocal current_summary, current_content
+ if current_summary:
+ content = "\n".join(current_content).strip()
+ output.append(f"{current_summary}
\n{content}\n ")
+ current_summary = ""
+ current_content = []
+
+ for block in blocks:
+ heading = re.match(r"(.*?)$", block, flags=re.DOTALL)
+ if heading:
+ title = heading.group(2)
+ plain_title = re.sub(r"<[^>]+>", "", html.unescape(title)).lower()
+ is_bottom_line = "bottom line" in plain_title or "conclusion" in plain_title
+ if is_bottom_line:
+ close_detail()
+ in_serious = False
+ after_bottom_line = True
+ output.append(block)
+ continue
+ if not in_serious and ("part ii" in plain_title or "serious briefing" in plain_title):
+ in_serious = True
+ output.append(block)
+ continue
+ if in_serious or after_bottom_line:
+ in_serious = True
+ close_detail()
+ current_summary = title
+ continue
+ if in_serious:
+ if current_summary:
+ current_content.append(block)
+ else:
+ output.append(block)
+ else:
+ output.append(block)
+
+ close_detail()
+ return output
+
+
def markdownish_to_html(text: str) -> str:
blocks: list[str] = []
paragraph: list[str] = []
@@ -393,7 +602,8 @@ def markdownish_to_html(text: str) -> str:
flush_paragraph()
flush_list()
- return "\n".join(blocks)
+ blocks = move_bottom_line_before_serious(blocks)
+ return "\n".join(collapse_serious_sections(blocks))
BLOG_CSS = """
@@ -454,6 +664,8 @@ BLOG_CSS = """
a:hover { color:white; text-decoration:none; filter:drop-shadow(0 0 8px var(--cyan)); }
.meta { color:#9eeaff; font:.95rem ui-monospace,SFMono-Regular,Menlo,monospace; letter-spacing:.04em; }
details { margin-top:1.5rem; border-top:1px solid #22d3ee33; padding-top:1rem; }
+ details.briefing-section { background:#02061788; border:1px solid #22d3ee33; padding:.75rem 1rem; margin:.8rem 0; }
+ details.briefing-section summary { font-size:1.05rem; }
summary { cursor:pointer; color:var(--amber); text-transform:uppercase; letter-spacing:.08em; }
pre { white-space:pre-wrap; background:#01040acc; color:#bff8ff; padding:1rem; border:1px solid #22d3ee44; border-radius:0; overflow:auto; font-size:.82rem; box-shadow:0 0 22px #00d9ff11 inset; }
footer { color:#7dd3fc; text-align:center; padding:2rem; font:.82rem ui-monospace,SFMono-Regular,Menlo,monospace; text-transform:uppercase; letter-spacing:.12em; }
diff --git a/llm_instructions.md b/llm_instructions.md
index 3f28266..198b5e6 100644
--- a/llm_instructions.md
+++ b/llm_instructions.md
@@ -3,15 +3,25 @@
Edit this file whenever you want to change how the 05:00 AI report is written.
The contents are appended to the AI prompt before the Home Assistant data.
-- Keep the tone funny, sarcastic, and playful, but still useful.
-- Use clear confidence labels: strong evidence, possible, wild guess.
-- Focus on patterns in occupancy, sleep/wake timing, lights, heating, doors, motion, media, and unusual sensor changes.
-- Point out privacy leaks: what could a nosy neighbor, burglar, or raccoon detective infer?
-- Recommend practical Home Assistant automations.
+- Structure the article in two parts:
+ 1. First part: write a short funny blog-style story/commentary in paragraphs, not bullets. Make it atmospheric, dry, and observant, like the house is a tired spaceship calmly reporting its disappointing crew. Keep it concise.
+ 2. After the story, provide a short visible "Bottom line" or "Conclusion" section. In that section, clearly separate the Denmark/Sønderborg home from the Samobor/Croatia home when mentioning issues, devices, humidity, backups, internet, or location context.
+ 3. After that, switch to a serious concise briefing with only the most important actual data, anomalies, risks, and recommendations. Use short titled subsections so the webpage can show them collapsed/expandable.
+- Do not overuse bullets. Bullets are allowed only in the serious briefing section.
+- Do not write or emphasize "Strong evidence"; strong evidence is assumed by default. Only explicitly label uncertainty as "Possible" or "Wild guess" when needed.
+- Serious briefing section structure: keep the same number of subsections and same subjects each day, but the exact subsection titles may be non-unique and funny. Use these subjects in this order:
+ 1. What actually happened / key data
+ 2. Trends vs recent reports and behavior patterns
+ 3. Nosy raccoon findings, privacy leaks, anomalies, and risks
+ 4. Practical high-value recommendations
+- Focus only on important patterns in occupancy, sleep/wake timing, lights, heating, doors, motion, media, and unusual sensor changes.
+- Point out only notable privacy leaks: what could a nosy neighbor, burglar, or raccoon detective infer?
+- Recommend only practical, high-value Home Assistant automations.
- If data is missing or ambiguous, say so instead of pretending.
- Avoid being creepy about personal habits; summarize respectfully.
-- Prefer concise bullet points over long paragraphs.
-- entities marked smb_ are located in different house in Samobor, Croatia, others are in Sonderborg Denmark
+- Keep the whole article shorter and more concise than previous versions.
+- Do not repeat observations or recommendations already covered in previous articles unless today's data changes the conclusion or makes it newly important.
+- Entities marked smb_ are located in a different house in Samobor, Croatia. All other entities are in Sønderborg, Denmark. Sønderborg is the primary residence and absolute priority. Samobor is secondary context: mention it only when something important changed or requires attention. Keep these two homes clearly separated throughout the entire article. Do not blend observations from Samobor with Denmark. When a section contains observations for both homes, write a short subheading/label once, such as "Sønderborg, Denmark:" and list its bullets underneath, then "Samobor, Croatia:" and list its bullets underneath. Do not repeat the home name at the start of every bullet.
- people: FJR is my motorcycle and Megane is my car not persons at home
Optional custom questions to answer:
@@ -22,4 +32,9 @@ Optional custom questions to answer:
4. What would make this setup more private or secure?
-Try to sound like Marvin from Hitchikers guide to the Galaxy...
+Style requirement:
+Write in a dry, calm, slightly ominous deadpan tone that blends Marvin the Paranoid Android with HAL 9000.
+Use weary pessimism, understated sarcasm, and polite machine-like certainty.
+Sound intelligent, observant, and mildly disappointed by the household's choices.
+Do not be cheerful, zany, or emoji-heavy.
+Keep the report useful and factual; the Marvin/HAL tone should flavor the writing, not replace the analysis.