Improve analysis prioritization and article structure

This commit is contained in:
hbrain 2026-05-17 08:59:51 +00:00
parent 3aaa6df53c
commit 52c6081a97
3 changed files with 260 additions and 31 deletions

View file

@ -23,6 +23,8 @@ KEEP_SNAPSHOT_DAYS="14"
# At 05:00, analyze snapshots from roughly this many hours
ANALYZE_SNAPSHOT_HOURS="24"
ARTICLE_CONTEXT_DAYS="7"
MAX_ANALYZE_CHARS="80000"
DISPLAY_TIMEZONE="Europe/Copenhagen"
# Domains to include
RELEVANT_DOMAINS="sensor,binary_sensor,person,device_tracker,climate,light,switch,lock,cover,alarm_control_panel,media_player,calendar,weather"

View file

@ -18,10 +18,12 @@ import os
import re
import subprocess
import sys
import tempfile
from datetime import datetime, timedelta, timezone
from email.utils import format_datetime
from pathlib import Path
from typing import Any
from zoneinfo import ZoneInfo
import requests
@ -38,6 +40,8 @@ HISTORY_HOURS = int(os.environ.get("HISTORY_HOURS", "24"))
MAX_HISTORY_PER_ENTITY = int(os.environ.get("MAX_HISTORY_PER_ENTITY", "20"))
ANALYZE_SNAPSHOT_HOURS = int(os.environ.get("ANALYZE_SNAPSHOT_HOURS", "24"))
ARTICLE_CONTEXT_DAYS = int(os.environ.get("ARTICLE_CONTEXT_DAYS", "7"))
MAX_ANALYZE_CHARS = int(os.environ.get("MAX_ANALYZE_CHARS", "80000"))
DISPLAY_TIMEZONE = os.environ.get("DISPLAY_TIMEZONE", "Europe/Copenhagen")
KEEP_SNAPSHOT_DAYS = int(os.environ.get("KEEP_SNAPSHOT_DAYS", "14"))
# LLM_MODE: none | pi | ollama | openai
@ -75,6 +79,44 @@ ALLOWED_ATTRIBUTES = {
"assumed_state",
}
IMPORTANT_ENTITY_KEYWORDS = {
"alarm": 100,
"smoke": 100,
"co_": 100,
"carbon_monoxide": 100,
"leak": 95,
"water": 80,
"door": 85,
"window": 80,
"lock": 85,
"motion": 70,
"presence": 70,
"occupancy": 70,
"person": 75,
"device_tracker": 75,
"phone": 70,
"laptop": 60,
"battery": 65,
"humidity": 60,
"temperature": 55,
"climate": 55,
"heating": 55,
"dehumidifier": 70,
"backup": 70,
"internet": 65,
"speedtest": 65,
"router": 60,
"light": 45,
"switch": 35,
"sonos": 45,
"media": 40,
"tv": 40,
"megane": 50,
"fjr": 50,
"plant": 45,
"smb_": 60,
}
class ConfigError(RuntimeError):
pass
@ -198,28 +240,106 @@ def load_recent_snapshots(hours: int) -> list[dict[str, Any]]:
return snapshots
def display_time(value: str | None) -> str:
if not value:
return ""
try:
dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
local = dt.astimezone(ZoneInfo(DISPLAY_TIMEZONE))
return local.strftime("%Y-%m-%d %H:%M:%S %Z")
except Exception:
return value
def entity_importance(entity_id: str, attrs: dict[str, Any] | None = None) -> int:
attrs = attrs or {}
domain = entity_id.split(".", 1)[0]
text = f"{entity_id} {attrs.get('friendly_name', '')} {attrs.get('device_class', '')}".lower()
score = 0
domain_scores = {
"alarm_control_panel": 100,
"lock": 90,
"person": 80,
"device_tracker": 75,
"binary_sensor": 60,
"climate": 55,
"cover": 50,
"sensor": 45,
"light": 35,
"switch": 30,
"media_player": 25,
}
score += domain_scores.get(domain, 10)
for keyword, points in IMPORTANT_ENTITY_KEYWORDS.items():
if keyword in text:
score += points
# Sønderborg/Denmark home is the primary residence and absolute priority.
# Samobor/Croatia entities use the smb_ prefix and are still included, but
# they should lose ties when the LLM input has to be size-limited.
if "smb_" in entity_id.lower():
score -= 40
else:
score += 120
state = str(attrs.get("state", "")).lower()
if state in {"on", "open", "unlocked", "detected", "home"}:
score += 15
return score
def summarize_snapshot(snapshot: dict[str, Any]) -> str:
lines = [f"Snapshot: {snapshot.get('generated_at')}", "Current states:"]
for state in snapshot.get("states", []):
lines = [
f"Snapshot: {display_time(snapshot.get('generated_at'))}",
"Priority current states first; lower-priority entities follow only if the LLM size limit allows.",
"Current states:",
]
states = sorted(
snapshot.get("states", []),
key=lambda state: (-entity_importance(state.get("entity_id", ""), state.get("attributes", {})), state.get("entity_id", "")),
)
for state in states:
attrs = state.get("attributes", {})
name = attrs.get("friendly_name", state.get("entity_id"))
unit = attrs.get("unit_of_measurement", "")
value = f"{state.get('state')} {unit}".strip()
lines.append(f"- {name} ({state.get('entity_id')}): {value}; last_changed={state.get('last_changed')}")
score = entity_importance(state.get("entity_id", ""), attrs)
lines.append(f"- importance={score} {name} ({state.get('entity_id')}): {value}; last_changed={display_time(state.get('last_changed'))}")
lines.append("Recently changed entities:")
for item in snapshot.get("history", []):
transitions = ", ".join(f"{x.get('state')} @ {x.get('last_changed')}" for x in item.get("recent_states", [])[-8:])
lines.append(f"- {item.get('entity_id')}: {transitions}")
history = sorted(
snapshot.get("history", []),
key=lambda item: (-entity_importance(item.get("entity_id", "")), item.get("entity_id", "")),
)
for item in history:
transitions = ", ".join(f"{x.get('state')} @ {display_time(x.get('last_changed'))}" for x in item.get("recent_states", [])[-8:])
score = entity_importance(item.get("entity_id", ""))
lines.append(f"- importance={score} {item.get('entity_id')}: {transitions}")
return "\n".join(lines)
def build_daily_summary(snapshots: list[dict[str, Any]]) -> str:
parts = [
f"Daily Home Assistant bundle generated {datetime.now().isoformat(timespec='seconds')}",
f"Daily Home Assistant bundle generated {datetime.now(ZoneInfo(DISPLAY_TIMEZONE)).isoformat(timespec='seconds')}",
f"Contains {len(snapshots)} snapshots from roughly the last {ANALYZE_SNAPSHOT_HOURS} hours.",
f"Input capped at roughly {MAX_ANALYZE_CHARS} characters for the LLM.",
f"All times in this bundle are converted to {DISPLAY_TIMEZONE} local time.",
]
for snapshot in snapshots:
parts.append("\n---\n" + summarize_snapshot(snapshot))
total = len("\n".join(parts))
included = 0
for snapshot in reversed(snapshots):
block = "\n---\n" + summarize_snapshot(snapshot)
if total + len(block) > MAX_ANALYZE_CHARS and included > 0:
break
if len(block) > MAX_ANALYZE_CHARS:
block = block[:MAX_ANALYZE_CHARS] + "\n[Snapshot truncated for LLM size limit]"
parts.append(block)
total += len(block)
included += 1
parts.insert(2, f"Included {included} most recent snapshots after size limiting.")
return "\n".join(parts)
@ -310,22 +430,33 @@ def call_openai(prompt: str) -> str:
def call_pi(prompt: str) -> str:
cmd = [PI_BIN, "--no-tools"]
if PI_MODEL:
cmd.extend(["--model", PI_MODEL])
cmd.extend(["-p", "Analyze the Home Assistant data from stdin and write the requested briefing."])
result = subprocess.run(
cmd,
input=prompt,
text=True,
capture_output=True,
timeout=PI_TIMEOUT,
check=False,
)
# Avoid piping the prompt on stdin here. In pi print mode, piped stdin can be
# treated as the primary output/input stream in surprising ways. Passing the
# prompt as an @file gives reliable non-interactive cron behavior.
with tempfile.NamedTemporaryFile("w", encoding="utf-8", suffix=".md", delete=False) as tmp:
tmp.write(prompt)
prompt_path = tmp.name
try:
cmd = [PI_BIN, "--no-tools"]
if PI_MODEL:
cmd.extend(["--model", PI_MODEL])
cmd.extend(["-p", f"@{prompt_path}"])
result = subprocess.run(
cmd,
text=True,
capture_output=True,
timeout=PI_TIMEOUT,
check=False,
)
finally:
Path(prompt_path).unlink(missing_ok=True)
if result.returncode != 0:
stderr = result.stderr.strip()
raise RuntimeError(f"pi exited with status {result.returncode}: {stderr[-1000:]}")
return result.stdout.strip()
output = result.stdout.strip()
if not output:
raise RuntimeError("pi returned an empty analysis")
return output
def get_llm_conclusions(input_summary: str, previous_articles: str = "") -> str:
@ -353,6 +484,84 @@ def inline_markdown(text: str) -> str:
return safe
def move_bottom_line_before_serious(blocks: list[str]) -> list[str]:
serious_start = None
bottom_start = None
bottom_end = None
for i, block in enumerate(blocks):
heading = re.match(r"<h([23])>(.*?)</h\1>$", block, flags=re.DOTALL)
if not heading:
continue
title = re.sub(r"<[^>]+>", "", html.unescape(heading.group(2))).lower()
if serious_start is None and ("part ii" in title or "serious briefing" in title):
serious_start = i
elif serious_start is not None and ("bottom line" in title or "conclusion" in title):
bottom_start = i
break
if serious_start is None or bottom_start is None:
return blocks
bottom_end = len(blocks)
for i in range(bottom_start + 1, len(blocks)):
if re.match(r"<h[23]>.*?</h[23]>$", blocks[i], flags=re.DOTALL):
bottom_end = i
break
bottom_section = blocks[bottom_start:bottom_end]
remaining = blocks[:bottom_start] + blocks[bottom_end:]
return remaining[:serious_start] + bottom_section + remaining[serious_start:]
def collapse_serious_sections(blocks: list[str]) -> list[str]:
output: list[str] = []
in_serious = False
after_bottom_line = False
current_summary = ""
current_content: list[str] = []
def close_detail() -> None:
nonlocal current_summary, current_content
if current_summary:
content = "\n".join(current_content).strip()
output.append(f"<details class=\"briefing-section\"><summary>{current_summary}</summary>\n{content}\n</details>")
current_summary = ""
current_content = []
for block in blocks:
heading = re.match(r"<h([23])>(.*?)</h\1>$", block, flags=re.DOTALL)
if heading:
title = heading.group(2)
plain_title = re.sub(r"<[^>]+>", "", html.unescape(title)).lower()
is_bottom_line = "bottom line" in plain_title or "conclusion" in plain_title
if is_bottom_line:
close_detail()
in_serious = False
after_bottom_line = True
output.append(block)
continue
if not in_serious and ("part ii" in plain_title or "serious briefing" in plain_title):
in_serious = True
output.append(block)
continue
if in_serious or after_bottom_line:
in_serious = True
close_detail()
current_summary = title
continue
if in_serious:
if current_summary:
current_content.append(block)
else:
output.append(block)
else:
output.append(block)
close_detail()
return output
def markdownish_to_html(text: str) -> str:
blocks: list[str] = []
paragraph: list[str] = []
@ -393,7 +602,8 @@ def markdownish_to_html(text: str) -> str:
flush_paragraph()
flush_list()
return "\n".join(blocks)
blocks = move_bottom_line_before_serious(blocks)
return "\n".join(collapse_serious_sections(blocks))
BLOG_CSS = """
@ -454,6 +664,8 @@ BLOG_CSS = """
a:hover { color:white; text-decoration:none; filter:drop-shadow(0 0 8px var(--cyan)); }
.meta { color:#9eeaff; font:.95rem ui-monospace,SFMono-Regular,Menlo,monospace; letter-spacing:.04em; }
details { margin-top:1.5rem; border-top:1px solid #22d3ee33; padding-top:1rem; }
details.briefing-section { background:#02061788; border:1px solid #22d3ee33; padding:.75rem 1rem; margin:.8rem 0; }
details.briefing-section summary { font-size:1.05rem; }
summary { cursor:pointer; color:var(--amber); text-transform:uppercase; letter-spacing:.08em; }
pre { white-space:pre-wrap; background:#01040acc; color:#bff8ff; padding:1rem; border:1px solid #22d3ee44; border-radius:0; overflow:auto; font-size:.82rem; box-shadow:0 0 22px #00d9ff11 inset; }
footer { color:#7dd3fc; text-align:center; padding:2rem; font:.82rem ui-monospace,SFMono-Regular,Menlo,monospace; text-transform:uppercase; letter-spacing:.12em; }

View file

@ -3,15 +3,25 @@
Edit this file whenever you want to change how the 05:00 AI report is written.
The contents are appended to the AI prompt before the Home Assistant data.
- Keep the tone funny, sarcastic, and playful, but still useful.
- Use clear confidence labels: strong evidence, possible, wild guess.
- Focus on patterns in occupancy, sleep/wake timing, lights, heating, doors, motion, media, and unusual sensor changes.
- Point out privacy leaks: what could a nosy neighbor, burglar, or raccoon detective infer?
- Recommend practical Home Assistant automations.
- Structure the article in two parts:
1. First part: write a short funny blog-style story/commentary in paragraphs, not bullets. Make it atmospheric, dry, and observant, like the house is a tired spaceship calmly reporting its disappointing crew. Keep it concise.
2. After the story, provide a short visible "Bottom line" or "Conclusion" section. In that section, clearly separate the Denmark/Sønderborg home from the Samobor/Croatia home when mentioning issues, devices, humidity, backups, internet, or location context.
3. After that, switch to a serious concise briefing with only the most important actual data, anomalies, risks, and recommendations. Use short titled subsections so the webpage can show them collapsed/expandable.
- Do not overuse bullets. Bullets are allowed only in the serious briefing section.
- Do not write or emphasize "Strong evidence"; strong evidence is assumed by default. Only explicitly label uncertainty as "Possible" or "Wild guess" when needed.
- Serious briefing section structure: keep the same number of subsections and same subjects each day, but the exact subsection titles may be non-unique and funny. Use these subjects in this order:
1. What actually happened / key data
2. Trends vs recent reports and behavior patterns
3. Nosy raccoon findings, privacy leaks, anomalies, and risks
4. Practical high-value recommendations
- Focus only on important patterns in occupancy, sleep/wake timing, lights, heating, doors, motion, media, and unusual sensor changes.
- Point out only notable privacy leaks: what could a nosy neighbor, burglar, or raccoon detective infer?
- Recommend only practical, high-value Home Assistant automations.
- If data is missing or ambiguous, say so instead of pretending.
- Avoid being creepy about personal habits; summarize respectfully.
- Prefer concise bullet points over long paragraphs.
- entities marked smb_ are located in different house in Samobor, Croatia, others are in Sonderborg Denmark
- Keep the whole article shorter and more concise than previous versions.
- Do not repeat observations or recommendations already covered in previous articles unless today's data changes the conclusion or makes it newly important.
- Entities marked smb_ are located in a different house in Samobor, Croatia. All other entities are in Sønderborg, Denmark. Sønderborg is the primary residence and absolute priority. Samobor is secondary context: mention it only when something important changed or requires attention. Keep these two homes clearly separated throughout the entire article. Do not blend observations from Samobor with Denmark. When a section contains observations for both homes, write a short subheading/label once, such as "Sønderborg, Denmark:" and list its bullets underneath, then "Samobor, Croatia:" and list its bullets underneath. Do not repeat the home name at the start of every bullet.
- people: FJR is my motorcycle and Megane is my car not persons at home
Optional custom questions to answer:
@ -22,4 +32,9 @@ Optional custom questions to answer:
4. What would make this setup more private or secure?
Try to sound like Marvin from Hitchikers guide to the Galaxy...
Style requirement:
Write in a dry, calm, slightly ominous deadpan tone that blends Marvin the Paranoid Android with HAL 9000.
Use weary pessimism, understated sarcasm, and polite machine-like certainty.
Sound intelligent, observant, and mildly disappointed by the household's choices.
Do not be cheerful, zany, or emoji-heavy.
Keep the report useful and factual; the Marvin/HAL tone should flavor the writing, not replace the analysis.