Improve analysis prioritization and article structure

This commit is contained in:
hbrain 2026-05-17 08:59:51 +00:00
parent 3aaa6df53c
commit 52c6081a97
3 changed files with 260 additions and 31 deletions

View file

@ -23,6 +23,8 @@ KEEP_SNAPSHOT_DAYS="14"
# At 05:00, analyze snapshots from roughly this many hours # At 05:00, analyze snapshots from roughly this many hours
ANALYZE_SNAPSHOT_HOURS="24" ANALYZE_SNAPSHOT_HOURS="24"
ARTICLE_CONTEXT_DAYS="7" ARTICLE_CONTEXT_DAYS="7"
MAX_ANALYZE_CHARS="80000"
DISPLAY_TIMEZONE="Europe/Copenhagen"
# Domains to include # Domains to include
RELEVANT_DOMAINS="sensor,binary_sensor,person,device_tracker,climate,light,switch,lock,cover,alarm_control_panel,media_player,calendar,weather" RELEVANT_DOMAINS="sensor,binary_sensor,person,device_tracker,climate,light,switch,lock,cover,alarm_control_panel,media_player,calendar,weather"

View file

@ -18,10 +18,12 @@ import os
import re import re
import subprocess import subprocess
import sys import sys
import tempfile
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from email.utils import format_datetime from email.utils import format_datetime
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
from zoneinfo import ZoneInfo
import requests import requests
@ -38,6 +40,8 @@ HISTORY_HOURS = int(os.environ.get("HISTORY_HOURS", "24"))
MAX_HISTORY_PER_ENTITY = int(os.environ.get("MAX_HISTORY_PER_ENTITY", "20")) MAX_HISTORY_PER_ENTITY = int(os.environ.get("MAX_HISTORY_PER_ENTITY", "20"))
ANALYZE_SNAPSHOT_HOURS = int(os.environ.get("ANALYZE_SNAPSHOT_HOURS", "24")) ANALYZE_SNAPSHOT_HOURS = int(os.environ.get("ANALYZE_SNAPSHOT_HOURS", "24"))
ARTICLE_CONTEXT_DAYS = int(os.environ.get("ARTICLE_CONTEXT_DAYS", "7")) ARTICLE_CONTEXT_DAYS = int(os.environ.get("ARTICLE_CONTEXT_DAYS", "7"))
MAX_ANALYZE_CHARS = int(os.environ.get("MAX_ANALYZE_CHARS", "80000"))
DISPLAY_TIMEZONE = os.environ.get("DISPLAY_TIMEZONE", "Europe/Copenhagen")
KEEP_SNAPSHOT_DAYS = int(os.environ.get("KEEP_SNAPSHOT_DAYS", "14")) KEEP_SNAPSHOT_DAYS = int(os.environ.get("KEEP_SNAPSHOT_DAYS", "14"))
# LLM_MODE: none | pi | ollama | openai # LLM_MODE: none | pi | ollama | openai
@ -75,6 +79,44 @@ ALLOWED_ATTRIBUTES = {
"assumed_state", "assumed_state",
} }
IMPORTANT_ENTITY_KEYWORDS = {
"alarm": 100,
"smoke": 100,
"co_": 100,
"carbon_monoxide": 100,
"leak": 95,
"water": 80,
"door": 85,
"window": 80,
"lock": 85,
"motion": 70,
"presence": 70,
"occupancy": 70,
"person": 75,
"device_tracker": 75,
"phone": 70,
"laptop": 60,
"battery": 65,
"humidity": 60,
"temperature": 55,
"climate": 55,
"heating": 55,
"dehumidifier": 70,
"backup": 70,
"internet": 65,
"speedtest": 65,
"router": 60,
"light": 45,
"switch": 35,
"sonos": 45,
"media": 40,
"tv": 40,
"megane": 50,
"fjr": 50,
"plant": 45,
"smb_": 60,
}
class ConfigError(RuntimeError): class ConfigError(RuntimeError):
pass pass
@ -198,28 +240,106 @@ def load_recent_snapshots(hours: int) -> list[dict[str, Any]]:
return snapshots return snapshots
def display_time(value: str | None) -> str:
if not value:
return ""
try:
dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
local = dt.astimezone(ZoneInfo(DISPLAY_TIMEZONE))
return local.strftime("%Y-%m-%d %H:%M:%S %Z")
except Exception:
return value
def entity_importance(entity_id: str, attrs: dict[str, Any] | None = None) -> int:
attrs = attrs or {}
domain = entity_id.split(".", 1)[0]
text = f"{entity_id} {attrs.get('friendly_name', '')} {attrs.get('device_class', '')}".lower()
score = 0
domain_scores = {
"alarm_control_panel": 100,
"lock": 90,
"person": 80,
"device_tracker": 75,
"binary_sensor": 60,
"climate": 55,
"cover": 50,
"sensor": 45,
"light": 35,
"switch": 30,
"media_player": 25,
}
score += domain_scores.get(domain, 10)
for keyword, points in IMPORTANT_ENTITY_KEYWORDS.items():
if keyword in text:
score += points
# Sønderborg/Denmark home is the primary residence and absolute priority.
# Samobor/Croatia entities use the smb_ prefix and are still included, but
# they should lose ties when the LLM input has to be size-limited.
if "smb_" in entity_id.lower():
score -= 40
else:
score += 120
state = str(attrs.get("state", "")).lower()
if state in {"on", "open", "unlocked", "detected", "home"}:
score += 15
return score
def summarize_snapshot(snapshot: dict[str, Any]) -> str: def summarize_snapshot(snapshot: dict[str, Any]) -> str:
lines = [f"Snapshot: {snapshot.get('generated_at')}", "Current states:"] lines = [
for state in snapshot.get("states", []): f"Snapshot: {display_time(snapshot.get('generated_at'))}",
"Priority current states first; lower-priority entities follow only if the LLM size limit allows.",
"Current states:",
]
states = sorted(
snapshot.get("states", []),
key=lambda state: (-entity_importance(state.get("entity_id", ""), state.get("attributes", {})), state.get("entity_id", "")),
)
for state in states:
attrs = state.get("attributes", {}) attrs = state.get("attributes", {})
name = attrs.get("friendly_name", state.get("entity_id")) name = attrs.get("friendly_name", state.get("entity_id"))
unit = attrs.get("unit_of_measurement", "") unit = attrs.get("unit_of_measurement", "")
value = f"{state.get('state')} {unit}".strip() value = f"{state.get('state')} {unit}".strip()
lines.append(f"- {name} ({state.get('entity_id')}): {value}; last_changed={state.get('last_changed')}") score = entity_importance(state.get("entity_id", ""), attrs)
lines.append(f"- importance={score} {name} ({state.get('entity_id')}): {value}; last_changed={display_time(state.get('last_changed'))}")
lines.append("Recently changed entities:") lines.append("Recently changed entities:")
for item in snapshot.get("history", []): history = sorted(
transitions = ", ".join(f"{x.get('state')} @ {x.get('last_changed')}" for x in item.get("recent_states", [])[-8:]) snapshot.get("history", []),
lines.append(f"- {item.get('entity_id')}: {transitions}") key=lambda item: (-entity_importance(item.get("entity_id", "")), item.get("entity_id", "")),
)
for item in history:
transitions = ", ".join(f"{x.get('state')} @ {display_time(x.get('last_changed'))}" for x in item.get("recent_states", [])[-8:])
score = entity_importance(item.get("entity_id", ""))
lines.append(f"- importance={score} {item.get('entity_id')}: {transitions}")
return "\n".join(lines) return "\n".join(lines)
def build_daily_summary(snapshots: list[dict[str, Any]]) -> str: def build_daily_summary(snapshots: list[dict[str, Any]]) -> str:
parts = [ parts = [
f"Daily Home Assistant bundle generated {datetime.now().isoformat(timespec='seconds')}", f"Daily Home Assistant bundle generated {datetime.now(ZoneInfo(DISPLAY_TIMEZONE)).isoformat(timespec='seconds')}",
f"Contains {len(snapshots)} snapshots from roughly the last {ANALYZE_SNAPSHOT_HOURS} hours.", f"Contains {len(snapshots)} snapshots from roughly the last {ANALYZE_SNAPSHOT_HOURS} hours.",
f"Input capped at roughly {MAX_ANALYZE_CHARS} characters for the LLM.",
f"All times in this bundle are converted to {DISPLAY_TIMEZONE} local time.",
] ]
for snapshot in snapshots: total = len("\n".join(parts))
parts.append("\n---\n" + summarize_snapshot(snapshot)) included = 0
for snapshot in reversed(snapshots):
block = "\n---\n" + summarize_snapshot(snapshot)
if total + len(block) > MAX_ANALYZE_CHARS and included > 0:
break
if len(block) > MAX_ANALYZE_CHARS:
block = block[:MAX_ANALYZE_CHARS] + "\n[Snapshot truncated for LLM size limit]"
parts.append(block)
total += len(block)
included += 1
parts.insert(2, f"Included {included} most recent snapshots after size limiting.")
return "\n".join(parts) return "\n".join(parts)
@ -310,22 +430,33 @@ def call_openai(prompt: str) -> str:
def call_pi(prompt: str) -> str: def call_pi(prompt: str) -> str:
cmd = [PI_BIN, "--no-tools"] # Avoid piping the prompt on stdin here. In pi print mode, piped stdin can be
if PI_MODEL: # treated as the primary output/input stream in surprising ways. Passing the
cmd.extend(["--model", PI_MODEL]) # prompt as an @file gives reliable non-interactive cron behavior.
cmd.extend(["-p", "Analyze the Home Assistant data from stdin and write the requested briefing."]) with tempfile.NamedTemporaryFile("w", encoding="utf-8", suffix=".md", delete=False) as tmp:
result = subprocess.run( tmp.write(prompt)
cmd, prompt_path = tmp.name
input=prompt, try:
text=True, cmd = [PI_BIN, "--no-tools"]
capture_output=True, if PI_MODEL:
timeout=PI_TIMEOUT, cmd.extend(["--model", PI_MODEL])
check=False, cmd.extend(["-p", f"@{prompt_path}"])
) result = subprocess.run(
cmd,
text=True,
capture_output=True,
timeout=PI_TIMEOUT,
check=False,
)
finally:
Path(prompt_path).unlink(missing_ok=True)
if result.returncode != 0: if result.returncode != 0:
stderr = result.stderr.strip() stderr = result.stderr.strip()
raise RuntimeError(f"pi exited with status {result.returncode}: {stderr[-1000:]}") raise RuntimeError(f"pi exited with status {result.returncode}: {stderr[-1000:]}")
return result.stdout.strip() output = result.stdout.strip()
if not output:
raise RuntimeError("pi returned an empty analysis")
return output
def get_llm_conclusions(input_summary: str, previous_articles: str = "") -> str: def get_llm_conclusions(input_summary: str, previous_articles: str = "") -> str:
@ -353,6 +484,84 @@ def inline_markdown(text: str) -> str:
return safe return safe
def move_bottom_line_before_serious(blocks: list[str]) -> list[str]:
serious_start = None
bottom_start = None
bottom_end = None
for i, block in enumerate(blocks):
heading = re.match(r"<h([23])>(.*?)</h\1>$", block, flags=re.DOTALL)
if not heading:
continue
title = re.sub(r"<[^>]+>", "", html.unescape(heading.group(2))).lower()
if serious_start is None and ("part ii" in title or "serious briefing" in title):
serious_start = i
elif serious_start is not None and ("bottom line" in title or "conclusion" in title):
bottom_start = i
break
if serious_start is None or bottom_start is None:
return blocks
bottom_end = len(blocks)
for i in range(bottom_start + 1, len(blocks)):
if re.match(r"<h[23]>.*?</h[23]>$", blocks[i], flags=re.DOTALL):
bottom_end = i
break
bottom_section = blocks[bottom_start:bottom_end]
remaining = blocks[:bottom_start] + blocks[bottom_end:]
return remaining[:serious_start] + bottom_section + remaining[serious_start:]
def collapse_serious_sections(blocks: list[str]) -> list[str]:
output: list[str] = []
in_serious = False
after_bottom_line = False
current_summary = ""
current_content: list[str] = []
def close_detail() -> None:
nonlocal current_summary, current_content
if current_summary:
content = "\n".join(current_content).strip()
output.append(f"<details class=\"briefing-section\"><summary>{current_summary}</summary>\n{content}\n</details>")
current_summary = ""
current_content = []
for block in blocks:
heading = re.match(r"<h([23])>(.*?)</h\1>$", block, flags=re.DOTALL)
if heading:
title = heading.group(2)
plain_title = re.sub(r"<[^>]+>", "", html.unescape(title)).lower()
is_bottom_line = "bottom line" in plain_title or "conclusion" in plain_title
if is_bottom_line:
close_detail()
in_serious = False
after_bottom_line = True
output.append(block)
continue
if not in_serious and ("part ii" in plain_title or "serious briefing" in plain_title):
in_serious = True
output.append(block)
continue
if in_serious or after_bottom_line:
in_serious = True
close_detail()
current_summary = title
continue
if in_serious:
if current_summary:
current_content.append(block)
else:
output.append(block)
else:
output.append(block)
close_detail()
return output
def markdownish_to_html(text: str) -> str: def markdownish_to_html(text: str) -> str:
blocks: list[str] = [] blocks: list[str] = []
paragraph: list[str] = [] paragraph: list[str] = []
@ -393,7 +602,8 @@ def markdownish_to_html(text: str) -> str:
flush_paragraph() flush_paragraph()
flush_list() flush_list()
return "\n".join(blocks) blocks = move_bottom_line_before_serious(blocks)
return "\n".join(collapse_serious_sections(blocks))
BLOG_CSS = """ BLOG_CSS = """
@ -454,6 +664,8 @@ BLOG_CSS = """
a:hover { color:white; text-decoration:none; filter:drop-shadow(0 0 8px var(--cyan)); } a:hover { color:white; text-decoration:none; filter:drop-shadow(0 0 8px var(--cyan)); }
.meta { color:#9eeaff; font:.95rem ui-monospace,SFMono-Regular,Menlo,monospace; letter-spacing:.04em; } .meta { color:#9eeaff; font:.95rem ui-monospace,SFMono-Regular,Menlo,monospace; letter-spacing:.04em; }
details { margin-top:1.5rem; border-top:1px solid #22d3ee33; padding-top:1rem; } details { margin-top:1.5rem; border-top:1px solid #22d3ee33; padding-top:1rem; }
details.briefing-section { background:#02061788; border:1px solid #22d3ee33; padding:.75rem 1rem; margin:.8rem 0; }
details.briefing-section summary { font-size:1.05rem; }
summary { cursor:pointer; color:var(--amber); text-transform:uppercase; letter-spacing:.08em; } summary { cursor:pointer; color:var(--amber); text-transform:uppercase; letter-spacing:.08em; }
pre { white-space:pre-wrap; background:#01040acc; color:#bff8ff; padding:1rem; border:1px solid #22d3ee44; border-radius:0; overflow:auto; font-size:.82rem; box-shadow:0 0 22px #00d9ff11 inset; } pre { white-space:pre-wrap; background:#01040acc; color:#bff8ff; padding:1rem; border:1px solid #22d3ee44; border-radius:0; overflow:auto; font-size:.82rem; box-shadow:0 0 22px #00d9ff11 inset; }
footer { color:#7dd3fc; text-align:center; padding:2rem; font:.82rem ui-monospace,SFMono-Regular,Menlo,monospace; text-transform:uppercase; letter-spacing:.12em; } footer { color:#7dd3fc; text-align:center; padding:2rem; font:.82rem ui-monospace,SFMono-Regular,Menlo,monospace; text-transform:uppercase; letter-spacing:.12em; }

View file

@ -3,15 +3,25 @@
Edit this file whenever you want to change how the 05:00 AI report is written. Edit this file whenever you want to change how the 05:00 AI report is written.
The contents are appended to the AI prompt before the Home Assistant data. The contents are appended to the AI prompt before the Home Assistant data.
- Keep the tone funny, sarcastic, and playful, but still useful. - Structure the article in two parts:
- Use clear confidence labels: strong evidence, possible, wild guess. 1. First part: write a short funny blog-style story/commentary in paragraphs, not bullets. Make it atmospheric, dry, and observant, like the house is a tired spaceship calmly reporting its disappointing crew. Keep it concise.
- Focus on patterns in occupancy, sleep/wake timing, lights, heating, doors, motion, media, and unusual sensor changes. 2. After the story, provide a short visible "Bottom line" or "Conclusion" section. In that section, clearly separate the Denmark/Sønderborg home from the Samobor/Croatia home when mentioning issues, devices, humidity, backups, internet, or location context.
- Point out privacy leaks: what could a nosy neighbor, burglar, or raccoon detective infer? 3. After that, switch to a serious concise briefing with only the most important actual data, anomalies, risks, and recommendations. Use short titled subsections so the webpage can show them collapsed/expandable.
- Recommend practical Home Assistant automations. - Do not overuse bullets. Bullets are allowed only in the serious briefing section.
- Do not write or emphasize "Strong evidence"; strong evidence is assumed by default. Only explicitly label uncertainty as "Possible" or "Wild guess" when needed.
- Serious briefing section structure: keep the same number of subsections and same subjects each day, but the exact subsection titles may be non-unique and funny. Use these subjects in this order:
1. What actually happened / key data
2. Trends vs recent reports and behavior patterns
3. Nosy raccoon findings, privacy leaks, anomalies, and risks
4. Practical high-value recommendations
- Focus only on important patterns in occupancy, sleep/wake timing, lights, heating, doors, motion, media, and unusual sensor changes.
- Point out only notable privacy leaks: what could a nosy neighbor, burglar, or raccoon detective infer?
- Recommend only practical, high-value Home Assistant automations.
- If data is missing or ambiguous, say so instead of pretending. - If data is missing or ambiguous, say so instead of pretending.
- Avoid being creepy about personal habits; summarize respectfully. - Avoid being creepy about personal habits; summarize respectfully.
- Prefer concise bullet points over long paragraphs. - Keep the whole article shorter and more concise than previous versions.
- entities marked smb_ are located in different house in Samobor, Croatia, others are in Sonderborg Denmark - Do not repeat observations or recommendations already covered in previous articles unless today's data changes the conclusion or makes it newly important.
- Entities marked smb_ are located in a different house in Samobor, Croatia. All other entities are in Sønderborg, Denmark. Sønderborg is the primary residence and absolute priority. Samobor is secondary context: mention it only when something important changed or requires attention. Keep these two homes clearly separated throughout the entire article. Do not blend observations from Samobor with Denmark. When a section contains observations for both homes, write a short subheading/label once, such as "Sønderborg, Denmark:" and list its bullets underneath, then "Samobor, Croatia:" and list its bullets underneath. Do not repeat the home name at the start of every bullet.
- people: FJR is my motorcycle and Megane is my car not persons at home - people: FJR is my motorcycle and Megane is my car not persons at home
Optional custom questions to answer: Optional custom questions to answer:
@ -22,4 +32,9 @@ Optional custom questions to answer:
4. What would make this setup more private or secure? 4. What would make this setup more private or secure?
Try to sound like Marvin from Hitchikers guide to the Galaxy... Style requirement:
Write in a dry, calm, slightly ominous deadpan tone that blends Marvin the Paranoid Android with HAL 9000.
Use weary pessimism, understated sarcasm, and polite machine-like certainty.
Sound intelligent, observant, and mildly disappointed by the household's choices.
Do not be cheerful, zany, or emoji-heavy.
Keep the report useful and factual; the Marvin/HAL tone should flavor the writing, not replace the analysis.