diff --git a/.env.example b/.env.example index 7ddadcc..8048f3c 100644 --- a/.env.example +++ b/.env.example @@ -10,7 +10,7 @@ DATA_DIR="./data" REPORT_DIR="./reports" WEB_DIR="./web" SITE_BASE_PATH="/" -SITE_URL="http://localhost" +SITE_URL="https://hapi.novosel.dk" # Extra owner directions appended to the 05:00 AI prompt PROMPT_FILE="./llm_instructions.md" diff --git a/README.md b/README.md index b7b1f65..83d6e1f 100644 --- a/README.md +++ b/README.md @@ -105,7 +105,7 @@ Run the 05:00-style analysis/publishing step: Open the blog served by nginx: ```text -http://localhost/ +https://hapi.novosel.dk/ ``` This instance publishes to the web root with: @@ -113,7 +113,7 @@ This instance publishes to the web root with: ```bash WEB_DIR="/var/www/html" SITE_BASE_PATH="/" -SITE_URL="http://piagent" +SITE_URL="https://hapi.novosel.dk" ``` For a subdirectory install, use for example: @@ -121,10 +121,10 @@ For a subdirectory install, use for example: ```bash WEB_DIR="/var/www/html/haobserver" SITE_BASE_PATH="/haobserver" -SITE_URL="http://piagent" +SITE_URL="https://hapi.novosel.dk" ``` -Daily articles are written under `articles/YYYY-MM-DD.html` inside `WEB_DIR`, and `index.html` links to the archive. An RSS feed is published at `rss.xml`, and a sci-fi favicon is published at `favicon.svg`. New articles include context from previous reports from the last `ARTICLE_CONTEXT_DAYS` days. +Daily articles are written under `articles/YYYY-MM-DD.html` inside `WEB_DIR`, and `index.html` links to the archive. A clean RSS feed is published at `rss.xml` using `SITE_URL` for absolute links, and a sci-fi favicon is published at `favicon.svg`. New articles include context from previous reports from the last `ARTICLE_CONTEXT_DAYS` days. ## Install cron jobs diff --git a/ha_observer.py b/ha_observer.py index 823f480..cdcd3ec 100755 --- a/ha_observer.py +++ b/ha_observer.py @@ -267,8 +267,9 @@ Use these only for trend/context awareness. Do not claim something happened toda return f"""You are writing today's Home Assistant smart-home blog article for the owner. -Write a funny but useful morning briefing in a blog/article style. Use light humor, emojis, -and playful headings, but remain factual and privacy-aware. Include: +Write a funny but useful morning briefing in a clean blog/article style. Use light humor, +but keep emojis/smileys rare: at most one in the whole article. Prefer clear headings, +short paragraphs, and readable bullet lists. Remain factual and privacy-aware. Include: - A short comedy headline for the day - What seemed to happen at home today - Behavioral patterns that can reasonably be inferred @@ -340,22 +341,67 @@ def get_llm_conclusions(input_summary: str, previous_articles: str = "") -> str: return f"Unknown LLM_MODE={LLM_MODE!r}. Use none, pi, ollama, or openai." -def markdownish_to_html(text: str) -> str: - safe = html.escape(text) - safe = re.sub(r"^### (.*)$", r"

\1

", safe, flags=re.MULTILINE) - safe = re.sub(r"^## (.*)$", r"

\1

", safe, flags=re.MULTILINE) - safe = re.sub(r"^# (.*)$", r"

\1

", safe, flags=re.MULTILINE) - safe = re.sub(r"^- (.*)$", r"
  • \1
  • ", safe, flags=re.MULTILINE) - safe = safe.replace("\n", "
    \n") +def remove_most_emoji(text: str) -> str: + # Keep the writing readable on the blog page even if the model gets a bit too festive. + return re.sub(r"[\U0001F300-\U0001FAFF\U00002700-\U000027BF\U00002600-\U000026FF]+", "", text) + + +def inline_markdown(text: str) -> str: + safe = html.escape(remove_most_emoji(text).strip()) + safe = re.sub(r"\*\*(.*?)\*\*", r"\1", safe) + safe = re.sub(r"`([^`]+)`", r"\1", safe) return safe +def markdownish_to_html(text: str) -> str: + blocks: list[str] = [] + paragraph: list[str] = [] + list_items: list[str] = [] + + def flush_paragraph() -> None: + nonlocal paragraph + if paragraph: + blocks.append(f"

    {inline_markdown(' '.join(paragraph))}

    ") + paragraph = [] + + def flush_list() -> None: + nonlocal list_items + if list_items: + blocks.append("") + list_items = [] + + for raw_line in text.splitlines(): + line = raw_line.strip() + if not line: + flush_paragraph() + flush_list() + continue + heading = re.match(r"^(#{1,3})\s+(.+)$", line) + if heading: + flush_paragraph() + flush_list() + level = min(len(heading.group(1)), 3) + blocks.append(f"{inline_markdown(heading.group(2))}") + continue + bullet = re.match(r"^[-*]\s+(.+)$", line) + if bullet: + flush_paragraph() + list_items.append(inline_markdown(bullet.group(1))) + continue + flush_list() + paragraph.append(line) + + flush_paragraph() + flush_list() + return "\n".join(blocks) + + BLOG_CSS = """ :root { color-scheme: dark; --cyan:#00f5ff; --blue:#2777ff; --violet:#8b5cf6; --amber:#fbbf24; --panel:#07111fcc; --line:#1de7ff66; } * { box-sizing:border-box; } body { margin:0; min-height:100vh; color:#dff9ff; line-height:1.7; - font-family:'Rajdhani','Orbitron','Eurostile',system-ui,sans-serif; + font-family:Inter,ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif; background: radial-gradient(circle at 16% 10%, #1746ff55 0 12rem, transparent 28rem), radial-gradient(circle at 82% 4%, #00f5ff30 0 10rem, transparent 24rem), @@ -372,29 +418,31 @@ BLOG_CSS = """ background-size:54px 54px,54px 54px,180px 180px; mask-image:linear-gradient(to bottom,#000 0%,#000 55%,transparent 100%); } - body::after { - content:""; position:fixed; inset:0; pointer-events:none; opacity:.14; - background:repeating-linear-gradient(to bottom, transparent 0 3px, #ffffff 4px 5px); - mix-blend-mode:screen; - } header { position:relative; border-bottom:1px solid var(--line); background:linear-gradient(90deg,#020617dd,#051b33bb,#020617dd); box-shadow:0 0 42px #00d9ff22; } header::before, header::after { content:""; position:absolute; top:0; bottom:0; width:18vw; border-color:var(--cyan); opacity:.65; pointer-events:none; } header::before { left:0; border-top:2px solid; border-left:2px solid; clip-path:polygon(0 0,100% 0,35% 100%,0 100%); } header::after { right:0; border-top:2px solid; border-right:2px solid; clip-path:polygon(0 0,100% 0,100% 100%,65% 100%); } .wrap { max-width:1180px; margin:0 auto; padding:1.5rem; position:relative; } .masthead { padding:3rem 1.5rem 2.6rem; text-align:center; } - .kicker { color:var(--cyan); text-transform:uppercase; letter-spacing:.28em; font:800 .78rem system-ui,sans-serif; text-shadow:0 0 14px #00f5ff; } + .kicker { color:var(--cyan); text-transform:uppercase; letter-spacing:.28em; font:800 .78rem ui-monospace,SFMono-Regular,Menlo,monospace; text-shadow:0 0 14px #00f5ff; } h1 { margin:.35rem 0; font-size:clamp(2.4rem,7vw,6rem); line-height:.9; text-transform:uppercase; letter-spacing:.05em; color:#f8feff; text-shadow:0 0 12px #00f5ff,0 0 38px #2777ff; } - h2,h3 { color:#c8fbff; line-height:1.15; text-transform:uppercase; letter-spacing:.06em; text-shadow:0 0 12px #00f5ff88; } + h2,h3 { color:#c8fbff; line-height:1.2; letter-spacing:.03em; text-shadow:0 0 12px #00f5ff88; } article, aside { position:relative; background:linear-gradient(180deg,#071827d9,#050914e6); border:1px solid var(--line); clip-path:polygon(0 18px,18px 0,100% 0,100% calc(100% - 18px),calc(100% - 18px) 100%,0 100%); box-shadow:0 0 0 1px #2777ff22 inset,0 0 34px #00d9ff18,0 24px 60px #000b; } article::before, aside::before { content:""; position:absolute; inset:0; pointer-events:none; border:1px solid #ffffff12; clip-path:inherit; } - article { padding:clamp(1.1rem,3vw,2.2rem); } - article p, article li { font-size:1.06rem; color:#e6fbff; } + article { padding:clamp(1.2rem,3vw,2.4rem); } + article p { margin:0 0 1.05rem; max-width:72ch; } + article ul { margin:.2rem 0 1.2rem; padding-left:1.35rem; max-width:74ch; } + article li { margin:.35rem 0; } + article p, article li { font-size:1.04rem; color:#e6fbff; } article h1 { font-size:clamp(1.8rem,4vw,3.5rem); text-align:left; } + article h2 { margin-top:1.8rem; padding-top:1rem; border-top:1px solid #22d3ee33; } + article h1 + p, article h2 + p, article h3 + p { margin-top:.3rem; } + strong { color:#ffffff; font-weight:750; } + code { color:#fef3c7; background:#020617; border:1px solid #22d3ee33; padding:.08rem .28rem; } .layout { display:grid; grid-template-columns:minmax(0,1fr) 310px; gap:1.35rem; align-items:start; } aside { padding:1.1rem; position:sticky; top:1rem; } .archive { list-style:none; margin:0; padding:0; } @@ -465,18 +513,40 @@ def write_favicon() -> Path: return path +def clean_rss_text(article_html: str) -> tuple[str, str]: + article_match = re.search(r"]*>(.*?)", article_html, flags=re.DOTALL | re.IGNORECASE) + content = article_match.group(1) if article_match else article_html + content = re.sub(r"", " ", content, flags=re.DOTALL | re.IGNORECASE) + content = re.sub(r"

    ]*>Permanent link.*?

    ", " ", content, flags=re.DOTALL | re.IGNORECASE) + title_match = re.search(r"]*>(.*?)|]*>(.*?)", content, flags=re.DOTALL | re.IGNORECASE) + title = "Smart Home Briefing" + if title_match: + title = re.sub(r"<[^>]+>", " ", title_match.group(1) or title_match.group(2) or "") + title = re.sub(r"\s+", " ", html.unescape(title)).strip() or "Smart Home Briefing" + text = re.sub(r"
    \s*", "\n", content) + text = re.sub(r"", "\n", text, flags=re.IGNORECASE) + text = re.sub(r"<[^>]+>", " ", text) + text = html.unescape(text) + text = re.sub(r"[`*_#]", "", text) + text = re.sub(r"^[\s\-•]+", "", text, flags=re.MULTILINE) + text = re.sub(r"[ \t]+", " ", text) + text = re.sub(r"\n\s*\n+", "\n\n", text).strip() + return title, text + + def write_rss_feed() -> Path: articles_dir = WEB_DIR / "articles" items = [] for path in sorted(articles_dir.glob("*.html"), reverse=True)[:20]: - title = path.stem + fallback_title = path.stem try: - title = datetime.strptime(path.stem, "%Y-%m-%d").strftime("Smart Home Briefing - %A, %B %-d, %Y") + fallback_title = datetime.strptime(path.stem, "%Y-%m-%d").strftime("Smart Home Briefing - %A, %B %-d, %Y") except ValueError: - title = f"Smart Home Briefing - {path.stem}" + fallback_title = f"Smart Home Briefing - {path.stem}" content = path.read_text(encoding="utf-8", errors="ignore") - description = re.sub(r"<[^>]+>", " ", content) - description = re.sub(r"\s+", " ", html.unescape(description)).strip()[:500] + article_title, article_text = clean_rss_text(content) + title = article_title if article_title != "Smart Home Briefing" else fallback_title + description = article_text[:600] pub_dt = datetime.fromtimestamp(path.stat().st_mtime, timezone.utc) url = site_url(f"articles/{path.name}") items.append(f""" @@ -488,12 +558,14 @@ def write_rss_feed() -> Path: {html.escape(description)} """) now = format_datetime(datetime.now(timezone.utc), usegmt=True) + feed_url = site_url("rss.xml") feed = f""" - + Smart Home Gossip Gazette {html.escape(site_url())} - Daily Home Assistant smart-home briefings from the orbital raccoon telemetry desk. + + Daily Home Assistant smart-home briefings. en {now} {''.join(items)} @@ -512,7 +584,8 @@ def blog_shell(title: str, subtitle: str, main_content: str, archive_links: str) {html.escape(title)} - + + diff --git a/llm_instructions.md b/llm_instructions.md index de3d95a..289ad37 100644 --- a/llm_instructions.md +++ b/llm_instructions.md @@ -3,10 +3,8 @@ Edit this file whenever you want to change how the 05:00 AI report is written. The contents are appended to the AI prompt before the Home Assistant data. -Suggested directions: - - Keep the tone funny, sarcastic, and playful, but still useful. -- Use clear confidence labels: **strong evidence**, **possible**, **wild guess**. +- Use clear confidence labels: strong evidence, possible, wild guess. - Focus on patterns in occupancy, sleep/wake timing, lights, heating, doors, motion, media, and unusual sensor changes. - Point out privacy leaks: what could a nosy neighbor, burglar, or raccoon detective infer? - Recommend practical Home Assistant automations. @@ -14,7 +12,7 @@ Suggested directions: - Avoid being creepy about personal habits; summarize respectfully. - Prefer concise bullet points over long paragraphs. - entities marked smb_ are located in different house in Samobor, Croatia, others are in Sonderborg Denmark -- people FJR and Megane are my motorcycle and car not persons at home +- people: FJR is my motorcycle and Megane is my car not persons at home Optional custom questions to answer: