Improve analysis prioritization and article structure
This commit is contained in:
parent
3aaa6df53c
commit
52c6081a97
3 changed files with 260 additions and 31 deletions
258
ha_observer.py
258
ha_observer.py
|
|
@ -18,10 +18,12 @@ import os
|
|||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from email.utils import format_datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
import requests
|
||||
|
||||
|
|
@ -38,6 +40,8 @@ HISTORY_HOURS = int(os.environ.get("HISTORY_HOURS", "24"))
|
|||
MAX_HISTORY_PER_ENTITY = int(os.environ.get("MAX_HISTORY_PER_ENTITY", "20"))
|
||||
ANALYZE_SNAPSHOT_HOURS = int(os.environ.get("ANALYZE_SNAPSHOT_HOURS", "24"))
|
||||
ARTICLE_CONTEXT_DAYS = int(os.environ.get("ARTICLE_CONTEXT_DAYS", "7"))
|
||||
MAX_ANALYZE_CHARS = int(os.environ.get("MAX_ANALYZE_CHARS", "80000"))
|
||||
DISPLAY_TIMEZONE = os.environ.get("DISPLAY_TIMEZONE", "Europe/Copenhagen")
|
||||
KEEP_SNAPSHOT_DAYS = int(os.environ.get("KEEP_SNAPSHOT_DAYS", "14"))
|
||||
|
||||
# LLM_MODE: none | pi | ollama | openai
|
||||
|
|
@ -75,6 +79,44 @@ ALLOWED_ATTRIBUTES = {
|
|||
"assumed_state",
|
||||
}
|
||||
|
||||
IMPORTANT_ENTITY_KEYWORDS = {
|
||||
"alarm": 100,
|
||||
"smoke": 100,
|
||||
"co_": 100,
|
||||
"carbon_monoxide": 100,
|
||||
"leak": 95,
|
||||
"water": 80,
|
||||
"door": 85,
|
||||
"window": 80,
|
||||
"lock": 85,
|
||||
"motion": 70,
|
||||
"presence": 70,
|
||||
"occupancy": 70,
|
||||
"person": 75,
|
||||
"device_tracker": 75,
|
||||
"phone": 70,
|
||||
"laptop": 60,
|
||||
"battery": 65,
|
||||
"humidity": 60,
|
||||
"temperature": 55,
|
||||
"climate": 55,
|
||||
"heating": 55,
|
||||
"dehumidifier": 70,
|
||||
"backup": 70,
|
||||
"internet": 65,
|
||||
"speedtest": 65,
|
||||
"router": 60,
|
||||
"light": 45,
|
||||
"switch": 35,
|
||||
"sonos": 45,
|
||||
"media": 40,
|
||||
"tv": 40,
|
||||
"megane": 50,
|
||||
"fjr": 50,
|
||||
"plant": 45,
|
||||
"smb_": 60,
|
||||
}
|
||||
|
||||
|
||||
class ConfigError(RuntimeError):
|
||||
pass
|
||||
|
|
@ -198,28 +240,106 @@ def load_recent_snapshots(hours: int) -> list[dict[str, Any]]:
|
|||
return snapshots
|
||||
|
||||
|
||||
def display_time(value: str | None) -> str:
|
||||
if not value:
|
||||
return ""
|
||||
try:
|
||||
dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
local = dt.astimezone(ZoneInfo(DISPLAY_TIMEZONE))
|
||||
return local.strftime("%Y-%m-%d %H:%M:%S %Z")
|
||||
except Exception:
|
||||
return value
|
||||
|
||||
|
||||
def entity_importance(entity_id: str, attrs: dict[str, Any] | None = None) -> int:
|
||||
attrs = attrs or {}
|
||||
domain = entity_id.split(".", 1)[0]
|
||||
text = f"{entity_id} {attrs.get('friendly_name', '')} {attrs.get('device_class', '')}".lower()
|
||||
score = 0
|
||||
|
||||
domain_scores = {
|
||||
"alarm_control_panel": 100,
|
||||
"lock": 90,
|
||||
"person": 80,
|
||||
"device_tracker": 75,
|
||||
"binary_sensor": 60,
|
||||
"climate": 55,
|
||||
"cover": 50,
|
||||
"sensor": 45,
|
||||
"light": 35,
|
||||
"switch": 30,
|
||||
"media_player": 25,
|
||||
}
|
||||
score += domain_scores.get(domain, 10)
|
||||
|
||||
for keyword, points in IMPORTANT_ENTITY_KEYWORDS.items():
|
||||
if keyword in text:
|
||||
score += points
|
||||
|
||||
# Sønderborg/Denmark home is the primary residence and absolute priority.
|
||||
# Samobor/Croatia entities use the smb_ prefix and are still included, but
|
||||
# they should lose ties when the LLM input has to be size-limited.
|
||||
if "smb_" in entity_id.lower():
|
||||
score -= 40
|
||||
else:
|
||||
score += 120
|
||||
|
||||
state = str(attrs.get("state", "")).lower()
|
||||
if state in {"on", "open", "unlocked", "detected", "home"}:
|
||||
score += 15
|
||||
return score
|
||||
|
||||
|
||||
def summarize_snapshot(snapshot: dict[str, Any]) -> str:
|
||||
lines = [f"Snapshot: {snapshot.get('generated_at')}", "Current states:"]
|
||||
for state in snapshot.get("states", []):
|
||||
lines = [
|
||||
f"Snapshot: {display_time(snapshot.get('generated_at'))}",
|
||||
"Priority current states first; lower-priority entities follow only if the LLM size limit allows.",
|
||||
"Current states:",
|
||||
]
|
||||
states = sorted(
|
||||
snapshot.get("states", []),
|
||||
key=lambda state: (-entity_importance(state.get("entity_id", ""), state.get("attributes", {})), state.get("entity_id", "")),
|
||||
)
|
||||
for state in states:
|
||||
attrs = state.get("attributes", {})
|
||||
name = attrs.get("friendly_name", state.get("entity_id"))
|
||||
unit = attrs.get("unit_of_measurement", "")
|
||||
value = f"{state.get('state')} {unit}".strip()
|
||||
lines.append(f"- {name} ({state.get('entity_id')}): {value}; last_changed={state.get('last_changed')}")
|
||||
score = entity_importance(state.get("entity_id", ""), attrs)
|
||||
lines.append(f"- importance={score} {name} ({state.get('entity_id')}): {value}; last_changed={display_time(state.get('last_changed'))}")
|
||||
lines.append("Recently changed entities:")
|
||||
for item in snapshot.get("history", []):
|
||||
transitions = ", ".join(f"{x.get('state')} @ {x.get('last_changed')}" for x in item.get("recent_states", [])[-8:])
|
||||
lines.append(f"- {item.get('entity_id')}: {transitions}")
|
||||
history = sorted(
|
||||
snapshot.get("history", []),
|
||||
key=lambda item: (-entity_importance(item.get("entity_id", "")), item.get("entity_id", "")),
|
||||
)
|
||||
for item in history:
|
||||
transitions = ", ".join(f"{x.get('state')} @ {display_time(x.get('last_changed'))}" for x in item.get("recent_states", [])[-8:])
|
||||
score = entity_importance(item.get("entity_id", ""))
|
||||
lines.append(f"- importance={score} {item.get('entity_id')}: {transitions}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def build_daily_summary(snapshots: list[dict[str, Any]]) -> str:
|
||||
parts = [
|
||||
f"Daily Home Assistant bundle generated {datetime.now().isoformat(timespec='seconds')}",
|
||||
f"Daily Home Assistant bundle generated {datetime.now(ZoneInfo(DISPLAY_TIMEZONE)).isoformat(timespec='seconds')}",
|
||||
f"Contains {len(snapshots)} snapshots from roughly the last {ANALYZE_SNAPSHOT_HOURS} hours.",
|
||||
f"Input capped at roughly {MAX_ANALYZE_CHARS} characters for the LLM.",
|
||||
f"All times in this bundle are converted to {DISPLAY_TIMEZONE} local time.",
|
||||
]
|
||||
for snapshot in snapshots:
|
||||
parts.append("\n---\n" + summarize_snapshot(snapshot))
|
||||
total = len("\n".join(parts))
|
||||
included = 0
|
||||
for snapshot in reversed(snapshots):
|
||||
block = "\n---\n" + summarize_snapshot(snapshot)
|
||||
if total + len(block) > MAX_ANALYZE_CHARS and included > 0:
|
||||
break
|
||||
if len(block) > MAX_ANALYZE_CHARS:
|
||||
block = block[:MAX_ANALYZE_CHARS] + "\n[Snapshot truncated for LLM size limit]"
|
||||
parts.append(block)
|
||||
total += len(block)
|
||||
included += 1
|
||||
parts.insert(2, f"Included {included} most recent snapshots after size limiting.")
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
|
|
@ -310,22 +430,33 @@ def call_openai(prompt: str) -> str:
|
|||
|
||||
|
||||
def call_pi(prompt: str) -> str:
|
||||
cmd = [PI_BIN, "--no-tools"]
|
||||
if PI_MODEL:
|
||||
cmd.extend(["--model", PI_MODEL])
|
||||
cmd.extend(["-p", "Analyze the Home Assistant data from stdin and write the requested briefing."])
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
input=prompt,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
timeout=PI_TIMEOUT,
|
||||
check=False,
|
||||
)
|
||||
# Avoid piping the prompt on stdin here. In pi print mode, piped stdin can be
|
||||
# treated as the primary output/input stream in surprising ways. Passing the
|
||||
# prompt as an @file gives reliable non-interactive cron behavior.
|
||||
with tempfile.NamedTemporaryFile("w", encoding="utf-8", suffix=".md", delete=False) as tmp:
|
||||
tmp.write(prompt)
|
||||
prompt_path = tmp.name
|
||||
try:
|
||||
cmd = [PI_BIN, "--no-tools"]
|
||||
if PI_MODEL:
|
||||
cmd.extend(["--model", PI_MODEL])
|
||||
cmd.extend(["-p", f"@{prompt_path}"])
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
timeout=PI_TIMEOUT,
|
||||
check=False,
|
||||
)
|
||||
finally:
|
||||
Path(prompt_path).unlink(missing_ok=True)
|
||||
if result.returncode != 0:
|
||||
stderr = result.stderr.strip()
|
||||
raise RuntimeError(f"pi exited with status {result.returncode}: {stderr[-1000:]}")
|
||||
return result.stdout.strip()
|
||||
output = result.stdout.strip()
|
||||
if not output:
|
||||
raise RuntimeError("pi returned an empty analysis")
|
||||
return output
|
||||
|
||||
|
||||
def get_llm_conclusions(input_summary: str, previous_articles: str = "") -> str:
|
||||
|
|
@ -353,6 +484,84 @@ def inline_markdown(text: str) -> str:
|
|||
return safe
|
||||
|
||||
|
||||
def move_bottom_line_before_serious(blocks: list[str]) -> list[str]:
|
||||
serious_start = None
|
||||
bottom_start = None
|
||||
bottom_end = None
|
||||
|
||||
for i, block in enumerate(blocks):
|
||||
heading = re.match(r"<h([23])>(.*?)</h\1>$", block, flags=re.DOTALL)
|
||||
if not heading:
|
||||
continue
|
||||
title = re.sub(r"<[^>]+>", "", html.unescape(heading.group(2))).lower()
|
||||
if serious_start is None and ("part ii" in title or "serious briefing" in title):
|
||||
serious_start = i
|
||||
elif serious_start is not None and ("bottom line" in title or "conclusion" in title):
|
||||
bottom_start = i
|
||||
break
|
||||
|
||||
if serious_start is None or bottom_start is None:
|
||||
return blocks
|
||||
|
||||
bottom_end = len(blocks)
|
||||
for i in range(bottom_start + 1, len(blocks)):
|
||||
if re.match(r"<h[23]>.*?</h[23]>$", blocks[i], flags=re.DOTALL):
|
||||
bottom_end = i
|
||||
break
|
||||
|
||||
bottom_section = blocks[bottom_start:bottom_end]
|
||||
remaining = blocks[:bottom_start] + blocks[bottom_end:]
|
||||
return remaining[:serious_start] + bottom_section + remaining[serious_start:]
|
||||
|
||||
|
||||
def collapse_serious_sections(blocks: list[str]) -> list[str]:
|
||||
output: list[str] = []
|
||||
in_serious = False
|
||||
after_bottom_line = False
|
||||
current_summary = ""
|
||||
current_content: list[str] = []
|
||||
|
||||
def close_detail() -> None:
|
||||
nonlocal current_summary, current_content
|
||||
if current_summary:
|
||||
content = "\n".join(current_content).strip()
|
||||
output.append(f"<details class=\"briefing-section\"><summary>{current_summary}</summary>\n{content}\n</details>")
|
||||
current_summary = ""
|
||||
current_content = []
|
||||
|
||||
for block in blocks:
|
||||
heading = re.match(r"<h([23])>(.*?)</h\1>$", block, flags=re.DOTALL)
|
||||
if heading:
|
||||
title = heading.group(2)
|
||||
plain_title = re.sub(r"<[^>]+>", "", html.unescape(title)).lower()
|
||||
is_bottom_line = "bottom line" in plain_title or "conclusion" in plain_title
|
||||
if is_bottom_line:
|
||||
close_detail()
|
||||
in_serious = False
|
||||
after_bottom_line = True
|
||||
output.append(block)
|
||||
continue
|
||||
if not in_serious and ("part ii" in plain_title or "serious briefing" in plain_title):
|
||||
in_serious = True
|
||||
output.append(block)
|
||||
continue
|
||||
if in_serious or after_bottom_line:
|
||||
in_serious = True
|
||||
close_detail()
|
||||
current_summary = title
|
||||
continue
|
||||
if in_serious:
|
||||
if current_summary:
|
||||
current_content.append(block)
|
||||
else:
|
||||
output.append(block)
|
||||
else:
|
||||
output.append(block)
|
||||
|
||||
close_detail()
|
||||
return output
|
||||
|
||||
|
||||
def markdownish_to_html(text: str) -> str:
|
||||
blocks: list[str] = []
|
||||
paragraph: list[str] = []
|
||||
|
|
@ -393,7 +602,8 @@ def markdownish_to_html(text: str) -> str:
|
|||
|
||||
flush_paragraph()
|
||||
flush_list()
|
||||
return "\n".join(blocks)
|
||||
blocks = move_bottom_line_before_serious(blocks)
|
||||
return "\n".join(collapse_serious_sections(blocks))
|
||||
|
||||
|
||||
BLOG_CSS = """
|
||||
|
|
@ -454,6 +664,8 @@ BLOG_CSS = """
|
|||
a:hover { color:white; text-decoration:none; filter:drop-shadow(0 0 8px var(--cyan)); }
|
||||
.meta { color:#9eeaff; font:.95rem ui-monospace,SFMono-Regular,Menlo,monospace; letter-spacing:.04em; }
|
||||
details { margin-top:1.5rem; border-top:1px solid #22d3ee33; padding-top:1rem; }
|
||||
details.briefing-section { background:#02061788; border:1px solid #22d3ee33; padding:.75rem 1rem; margin:.8rem 0; }
|
||||
details.briefing-section summary { font-size:1.05rem; }
|
||||
summary { cursor:pointer; color:var(--amber); text-transform:uppercase; letter-spacing:.08em; }
|
||||
pre { white-space:pre-wrap; background:#01040acc; color:#bff8ff; padding:1rem; border:1px solid #22d3ee44; border-radius:0; overflow:auto; font-size:.82rem; box-shadow:0 0 22px #00d9ff11 inset; }
|
||||
footer { color:#7dd3fc; text-align:center; padding:2rem; font:.82rem ui-monospace,SFMono-Regular,Menlo,monospace; text-transform:uppercase; letter-spacing:.12em; }
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue