Improve blog readability and RSS configuration

2026-05-16 09:45:42 +00:00 · 2026-05-16 09:45:42 +00:00 · 037d616764
commit 037d616764
parent 04e10a57ce
4 changed files with 107 additions and 36 deletions
--- a/.env.example
+++ b/.env.example
@ -10,7 +10,7 @@ DATA_DIR="./data"
 REPORT_DIR="./reports"
 WEB_DIR="./web"
 SITE_BASE_PATH="/"
-SITE_URL="http://localhost"
+SITE_URL="https://hapi.novosel.dk"

 # Extra owner directions appended to the 05:00 AI prompt
 PROMPT_FILE="./llm_instructions.md"
--- a/README.md
+++ b/README.md
@ -105,7 +105,7 @@ Run the 05:00-style analysis/publishing step:
 Open the blog served by nginx:

 ```text
-http://localhost/
+https://hapi.novosel.dk/
 ```

 This instance publishes to the web root with:
@ -113,7 +113,7 @@ This instance publishes to the web root with:
 ```bash
 WEB_DIR="/var/www/html"
 SITE_BASE_PATH="/"
-SITE_URL="http://piagent"
+SITE_URL="https://hapi.novosel.dk"
 ```

 For a subdirectory install, use for example:
@ -121,10 +121,10 @@ For a subdirectory install, use for example:
 ```bash
 WEB_DIR="/var/www/html/haobserver"
 SITE_BASE_PATH="/haobserver"
-SITE_URL="http://piagent"
+SITE_URL="https://hapi.novosel.dk"
 ```

-Daily articles are written under `articles/YYYY-MM-DD.html` inside `WEB_DIR`, and `index.html` links to the archive. An RSS feed is published at `rss.xml`, and a sci-fi favicon is published at `favicon.svg`. New articles include context from previous reports from the last `ARTICLE_CONTEXT_DAYS` days.
+Daily articles are written under `articles/YYYY-MM-DD.html` inside `WEB_DIR`, and `index.html` links to the archive. A clean RSS feed is published at `rss.xml` using `SITE_URL` for absolute links, and a sci-fi favicon is published at `favicon.svg`. New articles include context from previous reports from the last `ARTICLE_CONTEXT_DAYS` days.

 ## Install cron jobs

--- a/ha_observer.py
+++ b/ha_observer.py
@ -267,8 +267,9 @@ Use these only for trend/context awareness. Do not claim something happened toda

    return f"""You are writing today's Home Assistant smart-home blog article for the owner.

-Write a funny but useful morning briefing in a blog/article style. Use light humor, emojis,
-and playful headings, but remain factual and privacy-aware. Include:
+Write a funny but useful morning briefing in a clean blog/article style. Use light humor,
+but keep emojis/smileys rare: at most one in the whole article. Prefer clear headings,
+short paragraphs, and readable bullet lists. Remain factual and privacy-aware. Include:
 - A short comedy headline for the day
 - What seemed to happen at home today
 - Behavioral patterns that can reasonably be inferred
@ -340,22 +341,67 @@ def get_llm_conclusions(input_summary: str, previous_articles: str = "") -> str:
    return f"Unknown LLM_MODE={LLM_MODE!r}. Use none, pi, ollama, or openai."


-def markdownish_to_html(text: str) -> str:
-    safe = html.escape(text)
-    safe = re.sub(r"^### (.*)$", r"<h3>\1</h3>", safe, flags=re.MULTILINE)
-    safe = re.sub(r"^## (.*)$", r"<h2>\1</h2>", safe, flags=re.MULTILINE)
-    safe = re.sub(r"^# (.*)$", r"<h1>\1</h1>", safe, flags=re.MULTILINE)
-    safe = re.sub(r"^- (.*)$", r"<li>\1</li>", safe, flags=re.MULTILINE)
-    safe = safe.replace("\n", "<br>\n")
+def remove_most_emoji(text: str) -> str:
+    # Keep the writing readable on the blog page even if the model gets a bit too festive.
+    return re.sub(r"[\U0001F300-\U0001FAFF\U00002700-\U000027BF\U00002600-\U000026FF]+", "", text)
+
+
+def inline_markdown(text: str) -> str:
+    safe = html.escape(remove_most_emoji(text).strip())
+    safe = re.sub(r"\*\*(.*?)\*\*", r"<strong>\1</strong>", safe)
+    safe = re.sub(r"`([^`]+)`", r"<code>\1</code>", safe)
    return safe


+def markdownish_to_html(text: str) -> str:
+    blocks: list[str] = []
+    paragraph: list[str] = []
+    list_items: list[str] = []
+
+    def flush_paragraph() -> None:
+        nonlocal paragraph
+        if paragraph:
+            blocks.append(f"<p>{inline_markdown(' '.join(paragraph))}</p>")
+            paragraph = []
+
+    def flush_list() -> None:
+        nonlocal list_items
+        if list_items:
+            blocks.append("<ul>" + "".join(f"<li>{item}</li>" for item in list_items) + "</ul>")
+            list_items = []
+
+    for raw_line in text.splitlines():
+        line = raw_line.strip()
+        if not line:
+            flush_paragraph()
+            flush_list()
+            continue
+        heading = re.match(r"^(#{1,3})\s+(.+)$", line)
+        if heading:
+            flush_paragraph()
+            flush_list()
+            level = min(len(heading.group(1)), 3)
+            blocks.append(f"<h{level}>{inline_markdown(heading.group(2))}</h{level}>")
+            continue
+        bullet = re.match(r"^[-*]\s+(.+)$", line)
+        if bullet:
+            flush_paragraph()
+            list_items.append(inline_markdown(bullet.group(1)))
+            continue
+        flush_list()
+        paragraph.append(line)
+
+    flush_paragraph()
+    flush_list()
+    return "\n".join(blocks)
+
+
 BLOG_CSS = """
  :root { color-scheme: dark; --cyan:#00f5ff; --blue:#2777ff; --violet:#8b5cf6; --amber:#fbbf24; --panel:#07111fcc; --line:#1de7ff66; }
  * { box-sizing:border-box; }
  body {
    margin:0; min-height:100vh; color:#dff9ff; line-height:1.7;
-    font-family:'Rajdhani','Orbitron','Eurostile',system-ui,sans-serif;
+    font-family:Inter,ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;
    background:
      radial-gradient(circle at 16% 10%, #1746ff55 0 12rem, transparent 28rem),
      radial-gradient(circle at 82% 4%, #00f5ff30 0 10rem, transparent 24rem),
@ -372,29 +418,31 @@ BLOG_CSS = """
    background-size:54px 54px,54px 54px,180px 180px;
    mask-image:linear-gradient(to bottom,#000 0%,#000 55%,transparent 100%);
  }
-  body::after {
-    content:""; position:fixed; inset:0; pointer-events:none; opacity:.14;
-    background:repeating-linear-gradient(to bottom, transparent 0 3px, #ffffff 4px 5px);
-    mix-blend-mode:screen;
-  }
  header { position:relative; border-bottom:1px solid var(--line); background:linear-gradient(90deg,#020617dd,#051b33bb,#020617dd); box-shadow:0 0 42px #00d9ff22; }
  header::before, header::after { content:""; position:absolute; top:0; bottom:0; width:18vw; border-color:var(--cyan); opacity:.65; pointer-events:none; }
  header::before { left:0; border-top:2px solid; border-left:2px solid; clip-path:polygon(0 0,100% 0,35% 100%,0 100%); }
  header::after { right:0; border-top:2px solid; border-right:2px solid; clip-path:polygon(0 0,100% 0,100% 100%,65% 100%); }
  .wrap { max-width:1180px; margin:0 auto; padding:1.5rem; position:relative; }
  .masthead { padding:3rem 1.5rem 2.6rem; text-align:center; }
-  .kicker { color:var(--cyan); text-transform:uppercase; letter-spacing:.28em; font:800 .78rem system-ui,sans-serif; text-shadow:0 0 14px #00f5ff; }
+  .kicker { color:var(--cyan); text-transform:uppercase; letter-spacing:.28em; font:800 .78rem ui-monospace,SFMono-Regular,Menlo,monospace; text-shadow:0 0 14px #00f5ff; }
  h1 { margin:.35rem 0; font-size:clamp(2.4rem,7vw,6rem); line-height:.9; text-transform:uppercase; letter-spacing:.05em; color:#f8feff; text-shadow:0 0 12px #00f5ff,0 0 38px #2777ff; }
-  h2,h3 { color:#c8fbff; line-height:1.15; text-transform:uppercase; letter-spacing:.06em; text-shadow:0 0 12px #00f5ff88; }
+  h2,h3 { color:#c8fbff; line-height:1.2; letter-spacing:.03em; text-shadow:0 0 12px #00f5ff88; }
  article, aside {
    position:relative; background:linear-gradient(180deg,#071827d9,#050914e6); border:1px solid var(--line);
    clip-path:polygon(0 18px,18px 0,100% 0,100% calc(100% - 18px),calc(100% - 18px) 100%,0 100%);
    box-shadow:0 0 0 1px #2777ff22 inset,0 0 34px #00d9ff18,0 24px 60px #000b;
  }
  article::before, aside::before { content:""; position:absolute; inset:0; pointer-events:none; border:1px solid #ffffff12; clip-path:inherit; }
-  article { padding:clamp(1.1rem,3vw,2.2rem); }
-  article p, article li { font-size:1.06rem; color:#e6fbff; }
+  article { padding:clamp(1.2rem,3vw,2.4rem); }
+  article p { margin:0 0 1.05rem; max-width:72ch; }
+  article ul { margin:.2rem 0 1.2rem; padding-left:1.35rem; max-width:74ch; }
+  article li { margin:.35rem 0; }
+  article p, article li { font-size:1.04rem; color:#e6fbff; }
  article h1 { font-size:clamp(1.8rem,4vw,3.5rem); text-align:left; }
+  article h2 { margin-top:1.8rem; padding-top:1rem; border-top:1px solid #22d3ee33; }
+  article h1 + p, article h2 + p, article h3 + p { margin-top:.3rem; }
+  strong { color:#ffffff; font-weight:750; }
+  code { color:#fef3c7; background:#020617; border:1px solid #22d3ee33; padding:.08rem .28rem; }
  .layout { display:grid; grid-template-columns:minmax(0,1fr) 310px; gap:1.35rem; align-items:start; }
  aside { padding:1.1rem; position:sticky; top:1rem; }
  .archive { list-style:none; margin:0; padding:0; }
@ -465,18 +513,40 @@ def write_favicon() -> Path:
    return path


+def clean_rss_text(article_html: str) -> tuple[str, str]:
+    article_match = re.search(r"<article[^>]*>(.*?)</article>", article_html, flags=re.DOTALL | re.IGNORECASE)
+    content = article_match.group(1) if article_match else article_html
+    content = re.sub(r"<details.*?</details>", " ", content, flags=re.DOTALL | re.IGNORECASE)
+    content = re.sub(r"<p><a [^>]*>Permanent link.*?</a></p>", " ", content, flags=re.DOTALL | re.IGNORECASE)
+    title_match = re.search(r"<h1[^>]*>(.*?)</h1>|<h2[^>]*>(.*?)</h2>", content, flags=re.DOTALL | re.IGNORECASE)
+    title = "Smart Home Briefing"
+    if title_match:
+        title = re.sub(r"<[^>]+>", " ", title_match.group(1) or title_match.group(2) or "")
+        title = re.sub(r"\s+", " ", html.unescape(title)).strip() or "Smart Home Briefing"
+    text = re.sub(r"<br>\s*", "\n", content)
+    text = re.sub(r"</(p|li|h1|h2|h3)>", "\n", text, flags=re.IGNORECASE)
+    text = re.sub(r"<[^>]+>", " ", text)
+    text = html.unescape(text)
+    text = re.sub(r"[`*_#]", "", text)
+    text = re.sub(r"^[\s\-•]+", "", text, flags=re.MULTILINE)
+    text = re.sub(r"[ \t]+", " ", text)
+    text = re.sub(r"\n\s*\n+", "\n\n", text).strip()
+    return title, text
+
+
 def write_rss_feed() -> Path:
    articles_dir = WEB_DIR / "articles"
    items = []
    for path in sorted(articles_dir.glob("*.html"), reverse=True)[:20]:
-        title = path.stem
+        fallback_title = path.stem
        try:
-            title = datetime.strptime(path.stem, "%Y-%m-%d").strftime("Smart Home Briefing - %A, %B %-d, %Y")
+            fallback_title = datetime.strptime(path.stem, "%Y-%m-%d").strftime("Smart Home Briefing - %A, %B %-d, %Y")
        except ValueError:
-            title = f"Smart Home Briefing - {path.stem}"
+            fallback_title = f"Smart Home Briefing - {path.stem}"
        content = path.read_text(encoding="utf-8", errors="ignore")
-        description = re.sub(r"<[^>]+>", " ", content)
-        description = re.sub(r"\s+", " ", html.unescape(description)).strip()[:500]
+        article_title, article_text = clean_rss_text(content)
+        title = article_title if article_title != "Smart Home Briefing" else fallback_title
+        description = article_text[:600]
        pub_dt = datetime.fromtimestamp(path.stat().st_mtime, timezone.utc)
        url = site_url(f"articles/{path.name}")
        items.append(f"""
@ -488,12 +558,14 @@ def write_rss_feed() -> Path:
      <description>{html.escape(description)}</description>
    </item>""")
    now = format_datetime(datetime.now(timezone.utc), usegmt=True)
+    feed_url = site_url("rss.xml")
    feed = f"""<?xml version="1.0" encoding="UTF-8"?>
-<rss version="2.0">
+<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Smart Home Gossip Gazette</title>
    <link>{html.escape(site_url())}</link>
-    <description>Daily Home Assistant smart-home briefings from the orbital raccoon telemetry desk.</description>
+    <atom:link href="{html.escape(feed_url)}" rel="self" type="application/rss+xml" />
+    <description>Daily Home Assistant smart-home briefings.</description>
    <language>en</language>
    <lastBuildDate>{now}</lastBuildDate>
 {''.join(items)}
@ -512,7 +584,8 @@ def blog_shell(title: str, subtitle: str, main_content: str, archive_links: str)
 <meta charset="utf-8">
 <meta name="viewport" content="width=device-width, initial-scale=1">
 <title>{html.escape(title)}</title>
-<link rel="alternate" type="application/rss+xml" title="Smart Home Gossip Gazette RSS" href="{html.escape(site_href('rss.xml'))}">
+<link rel="canonical" href="{html.escape(site_url())}">
+<link rel="alternate" type="application/rss+xml" title="Smart Home Gossip Gazette RSS" href="{html.escape(site_url('rss.xml'))}">
 <link rel="icon" href="{html.escape(site_href('favicon.svg'))}" type="image/svg+xml">
 <style>{BLOG_CSS}</style>
 </head>
--- a/llm_instructions.md
+++ b/llm_instructions.md
@ -3,10 +3,8 @@
 Edit this file whenever you want to change how the 05:00 AI report is written.
 The contents are appended to the AI prompt before the Home Assistant data.

-Suggested directions:
-
 - Keep the tone funny, sarcastic, and playful, but still useful.
- Use clear confidence labels: **strong evidence**, **possible**, **wild guess**.
+- Use clear confidence labels: strong evidence, possible, wild guess.
 - Focus on patterns in occupancy, sleep/wake timing, lights, heating, doors, motion, media, and unusual sensor changes.
 - Point out privacy leaks: what could a nosy neighbor, burglar, or raccoon detective infer?
 - Recommend practical Home Assistant automations.
@ -14,7 +12,7 @@ Suggested directions:
 - Avoid being creepy about personal habits; summarize respectfully.
 - Prefer concise bullet points over long paragraphs.
 - entities marked smb_ are located in different house in Samobor, Croatia, others are in Sonderborg Denmark
- people FJR and Megane are my motorcycle and car not persons at home
+- people: FJR is my motorcycle and Megane is my car not persons at home

 Optional custom questions to answer: