Initial commit

2026-05-16 08:04:53 +00:00 · 2026-05-16 08:04:53 +00:00 · 01af871145
commit 01af871145
5 changed files with 615 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,15 @@
+# Local secrets/configuration
+*.conf
+.git.env
+
+# Generated/state files
+wallabag_downloaded.json
+out/
+
+# Python cache
+__pycache__/
+*.py[cod]
+
+# OS/editor noise
+.DS_Store
+*.swp
--- a/mail.conf.sample
+++ b/mail.conf.sample
@ -0,0 +1,6 @@
+SMTP_HOST=smtp.example.com
+SMTP_PORT=587
+SMTP_USER=user@example.com
+SMTP_PASS=change-me
+SMTP_SENDER=user@example.com
+KINDLE_EMAIL=your-kindle@example.com
--- a/send_to_kindle.py
+++ b/send_to_kindle.py
@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+import argparse
+import mimetypes
+import os
+import smtplib
+import re
+import zipfile
+import xml.etree.ElementTree as ET
+from email.message import EmailMessage
+from pathlib import Path
+
+DEFAULT_CONFIG = Path("./mail.conf")
+DEFAULT_OUT = Path("./out")
+
+
+def load_config(path: Path) -> dict:
+    """Load simple KEY=VALUE config file. Lines starting with # are ignored."""
+    cfg = {}
+    if not path.is_file():
+        return cfg
+    for line in path.read_text().splitlines():
+        line = line.strip()
+        if not line or line.startswith("#") or "=" not in line:
+            continue
+        key, value = line.split("=", 1)
+        cfg[key.strip()] = value.strip().strip('"').strip("'")
+    return cfg
+
+
+def get_value(args, cfg, attr, key, env=None, default=None):
+    val = getattr(args, attr, None)
+    if val not in (None, ""):
+        return val
+    if key in cfg and cfg[key] != "":
+        return cfg[key]
+    if env and os.getenv(env):
+        return os.getenv(env)
+    return default
+
+
+def chunks(items, size):
+    for i in range(0, len(items), size):
+        yield items[i:i + size]
+
+
+def epub_title(path: Path) -> str | None:
+    """Read dc:title from EPUB metadata, if available."""
+    try:
+        with zipfile.ZipFile(path) as z:
+            container = ET.fromstring(z.read("META-INF/container.xml"))
+            ns_container = {"c": "urn:oasis:names:tc:opendocument:xmlns:container"}
+            rootfile = container.find(".//c:rootfile", ns_container)
+            if rootfile is None:
+                return None
+            opf_path = rootfile.attrib["full-path"]
+            opf = ET.fromstring(z.read(opf_path))
+            ns = {"dc": "http://purl.org/dc/elements/1.1/"}
+            title = opf.find(".//dc:title", ns)
+            if title is not None and title.text:
+                return " ".join(title.text.replace("_", " ").split())
+    except Exception:
+        return None
+    return None
+
+
+def attachment_name(path: Path) -> str:
+    """Use EPUB metadata title as emailed attachment filename.
+
+    Local filenames stay unchanged. Amazon Send-to-Kindle often derives the
+    displayed document title from the email attachment filename, so use a nice
+    title-based attachment filename while keeping the .epub extension.
+    """
+    title = epub_title(path)
+    if not title:
+        return path.name
+    name = re.sub(r'[\\/:*?"<>|]+', ' ', title)
+    name = re.sub(r"\s+", " ", name).strip().rstrip('.')
+    return (name[:120] or path.stem) + path.suffix.lower()
+
+
+def send_to_kindle(smtp_host, smtp_port, smtp_user, smtp_pass, sender, kindle_email, file_paths):
+    file_paths = [Path(p) for p in file_paths]
+    for file_path in file_paths:
+        if not file_path.is_file():
+            raise FileNotFoundError(file_path)
+
+    msg = EmailMessage()
+    msg["From"] = sender
+    msg["To"] = kindle_email
+    msg["Subject"] = "Send to Kindle"
+    display_names = [attachment_name(p) for p in file_paths]
+    msg.set_content("Attached ebook(s):\n\n" + "\n".join(display_names))
+
+    for file_path in file_paths:
+        ctype, _ = mimetypes.guess_type(file_path)
+        if ctype is None:
+            ctype = "application/octet-stream"
+        maintype, subtype = ctype.split("/", 1)
+
+        with file_path.open("rb") as f:
+            msg.add_attachment(
+                f.read(),
+                maintype=maintype,
+                subtype=subtype,
+                filename=attachment_name(file_path),
+            )
+
+    with smtplib.SMTP_SSL(smtp_host, int(smtp_port)) as smtp:
+        smtp.login(smtp_user, smtp_pass)
+        smtp.send_message(msg)
+
+
+def find_epubs(out_dir: Path):
+    return sorted(out_dir.glob("*.epub"))
+
+
+def main():
+    p = argparse.ArgumentParser(description="Send ebook(s) to Kindle via email")
+    p.add_argument("file", nargs="?", help="ebook file, e.g. .epub/.pdf/.mobi. If omitted, sends all .epub files in ./out")
+    p.add_argument("--config", default=str(DEFAULT_CONFIG), help=f"config file, default: {DEFAULT_CONFIG}")
+    p.add_argument("--kindle", help="your Kindle email, e.g. name@kindle.com")
+    p.add_argument("--smtp-host")
+    p.add_argument("--smtp-port", type=int)
+    p.add_argument("--smtp-user")
+    p.add_argument("--smtp-pass")
+    p.add_argument("--sender")
+    p.add_argument("--max-attachments", type=int, default=16, help="maximum attachments per email, default: 16")
+    args = p.parse_args()
+
+    cfg = load_config(Path(args.config).expanduser())
+
+    settings = {
+        "smtp_host": get_value(args, cfg, "smtp_host", "SMTP_HOST", "SMTP_HOST"),
+        "smtp_port": get_value(args, cfg, "smtp_port", "SMTP_PORT", "SMTP_PORT", "465"),
+        "smtp_user": get_value(args, cfg, "smtp_user", "SMTP_USER", "SMTP_USER"),
+        "smtp_pass": get_value(args, cfg, "smtp_pass", "SMTP_PASS", "SMTP_PASS"),
+        "sender": get_value(args, cfg, "sender", "SMTP_SENDER", "SMTP_SENDER"),
+        "kindle": get_value(args, cfg, "kindle", "KINDLE_EMAIL", "KINDLE_EMAIL"),
+    }
+
+    missing = [k for k, v in settings.items() if not v]
+    if missing:
+        raise SystemExit(
+            "Missing: " + ", ".join(missing) +
+            f"\nAdd them to {args.config} or pass them as command-line options."
+        )
+
+    files = [Path(args.file).expanduser()] if args.file else find_epubs(DEFAULT_OUT)
+    if not files:
+        raise SystemExit(f"No EPUB files found in {DEFAULT_OUT}")
+
+    max_attachments = max(1, args.max_attachments)
+    batches = list(chunks(files, max_attachments))
+    for idx, batch in enumerate(batches, 1):
+        send_to_kindle(
+            settings["smtp_host"],
+            settings["smtp_port"],
+            settings["smtp_user"],
+            settings["smtp_pass"],
+            settings["sender"],
+            settings["kindle"],
+            batch,
+        )
+        suffix = f" ({idx}/{len(batches)})" if len(batches) > 1 else ""
+        print(f"Sent email{suffix}: {len(batch)} attachment(s)")
+        for file_path in batch:
+            file_path.unlink()
+            print(f"  - sent and deleted: {file_path}")
+
+
+if __name__ == "__main__":
+    main()
--- a/wallabag.conf.sample
+++ b/wallabag.conf.sample
@ -0,0 +1,5 @@
+WALLABAG_URL=https://wallabag.example.com
+CLIENT_ID=change-me
+CLIENT_SECRET=change-me
+USERNAME=change-me
+PASSWORD=change-me
--- a/wallabag_to_epub.py
+++ b/wallabag_to_epub.py
@ -0,0 +1,417 @@
+#!/usr/bin/env python3
+"""Fetch Wallabag articles and create one EPUB per article."""
+import argparse
+import html
+import json
+import re
+import time
+import mimetypes
+import urllib.parse
+import urllib.request
+import uuid
+import zipfile
+from datetime import datetime
+from pathlib import Path
+from xml.sax.saxutils import escape
+
+BASE_DIR = Path(".")
+DEFAULT_CONFIG = Path("./wallabag.conf")
+DEFAULT_OUT = Path("./out")
+DEFAULT_DB = Path("./wallabag_downloaded.json")
+
+
+def load_config(path: Path) -> dict:
+    cfg = {}
+    if path.is_file():
+        for line in path.read_text().splitlines():
+            line = line.strip()
+            if not line or line.startswith("#") or "=" not in line:
+                continue
+            k, v = line.split("=", 1)
+            cfg[k.strip()] = v.strip().strip('"').strip("'")
+    return cfg
+
+
+def http_json(url, method="GET", data=None, token=None):
+    body = None
+    headers = {"Accept": "application/json"}
+    if data is not None:
+        body = urllib.parse.urlencode(data).encode()
+        headers["Content-Type"] = "application/x-www-form-urlencoded"
+    if token:
+        headers["Authorization"] = f"Bearer {token}"
+    req = urllib.request.Request(url, data=body, headers=headers, method=method)
+    with urllib.request.urlopen(req, timeout=60) as r:
+        return json.loads(r.read().decode("utf-8"))
+
+
+def wallabag_token(cfg):
+    url = cfg["WALLABAG_URL"].rstrip("/") + "/oauth/v2/token"
+    return http_json(url, "POST", {
+        "grant_type": "password",
+        "client_id": cfg["CLIENT_ID"],
+        "client_secret": cfg["CLIENT_SECRET"],
+        "username": cfg["USERNAME"],
+        "password": cfg["PASSWORD"],
+    })["access_token"]
+
+
+def fetch_entries(cfg, token, limit=10, unread=True, starred=False, archive=False):
+    base = cfg["WALLABAG_URL"].rstrip("/") + "/api/entries.json"
+    qs = {
+        "perPage": str(limit),
+        "page": "1",
+        "sort": "created",
+        "order": "desc",
+        "detail": "full",
+    }
+    if unread:
+        qs["archive"] = "0"
+    if archive:
+        qs["archive"] = "1"
+    if starred:
+        qs["starred"] = "1"
+    data = http_json(base + "?" + urllib.parse.urlencode(qs), token=token)
+    return data.get("_embedded", {}).get("items", [])
+
+
+def mark_archived(cfg, token, entry_id):
+    url = cfg["WALLABAG_URL"].rstrip("/") + f"/api/entries/{entry_id}.json"
+    return http_json(url, "PATCH", {"archive": "1"}, token=token)
+
+
+def clean_fragment(content):
+    """Prepare Wallabag HTML for EPUB while preserving formatting where possible.
+
+    Wallabag already extracts readable article HTML, so keep headings, lists,
+    blockquotes, tables, links, inline styles/classes, and images. Remove only
+    active/interactive content and make common void tags XML-compatible.
+    """
+    if not content:
+        return "<p></p>"
+    content = re.sub(r"<script\b[^>]*>.*?</script>", "", content, flags=re.I | re.S)
+    content = re.sub(r"<iframe\b[^>]*>.*?</iframe>", "", content, flags=re.I | re.S)
+    content = re.sub(r"<form\b[^>]*>.*?</form>", "", content, flags=re.I | re.S)
+    content = re.sub(r"\s(on\w+)=([\"']).*?\2", "", content, flags=re.I | re.S)
+    # Many sites lazy-load images and Wallabag can keep the real URL in data-*
+    # while src is empty/a placeholder. Promote those before dropping extras.
+    content = promote_lazy_image_srcs(content)
+    content = re.sub(r"\s(srcset|sizes)=([\"']).*?\2", "", content, flags=re.I | re.S)
+    content = html.unescape(content)
+    content = re.sub(r"&(?!amp;|lt;|gt;|quot;|apos;|#\d+;|#x[0-9A-Fa-f]+;)", "&amp;", content)
+    content = re.sub(r"<(br|hr|img|meta|link|input)(\b[^>]*?)(?<!/)>", r"<\1\2 />", content, flags=re.I)
+    return content
+
+
+def promote_lazy_image_srcs(content):
+    """Use lazy-loader attributes as img src when src is missing/a placeholder."""
+    def repl(match):
+        tag = match.group(0)
+        attrs = dict((m.group(1).lower(), m.group(3)) for m in re.finditer(r"\s([\w:-]+)=([\"'])(.*?)\2", tag, flags=re.S))
+        src = (attrs.get("src") or "").strip()
+        lazy = None
+        for name in ("data-src", "data-original", "data-lazy-src", "data-url", "data-full-url"):
+            if attrs.get(name):
+                lazy = attrs[name].strip()
+                break
+        if not lazy:
+            return tag
+        if not src or src.startswith("data:") or "placeholder" in src.lower() or src in ("#", "/"):
+            if " src=" in tag.lower():
+                return re.sub(r"\ssrc=([\"']).*?\1", f' src="{lazy}"', tag, count=1, flags=re.I | re.S)
+            return tag[:-1] + f' src="{lazy}">'
+        return tag
+    return re.sub(r"<img\b[^>]*>", repl, content, flags=re.I | re.S)
+
+
+def fetch_image(url, referer=None):
+    headers = {
+        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124 Safari/537.36",
+        # Prefer Kindle-friendly formats. Some CDNs serve AVIF/WebP when asked,
+        # which Amazon's EPUB conversion may drop.
+        "Accept": "image/jpeg,image/png,image/gif,image/svg+xml,image/*;q=0.8,*/*;q=0.5",
+    }
+    if referer:
+        headers["Referer"] = referer
+    req = urllib.request.Request(url, headers=headers)
+    with urllib.request.urlopen(req, timeout=30) as r:
+        data = r.read(20 * 1024 * 1024 + 1)
+        if len(data) > 20 * 1024 * 1024:
+            raise ValueError("image is larger than 20 MiB")
+        ctype = (r.headers.get("Content-Type") or "").split(";", 1)[0].strip().lower()
+    return data, guess_image_type(url, data, ctype)
+
+
+def guess_image_type(url, data, ctype):
+    if ctype.startswith("image/"):
+        media_type = ctype
+    elif data.startswith(b"\xff\xd8\xff"):
+        media_type = "image/jpeg"
+    elif data.startswith(b"\x89PNG\r\n\x1a\n"):
+        media_type = "image/png"
+    elif data.startswith(b"GIF87a") or data.startswith(b"GIF89a"):
+        media_type = "image/gif"
+    elif data.startswith(b"RIFF") and data[8:12] == b"WEBP":
+        media_type = "image/webp"
+    elif b"<svg" in data[:500].lower():
+        media_type = "image/svg+xml"
+    else:
+        media_type = mimetypes.guess_type(urllib.parse.urlparse(url).path)[0] or "application/octet-stream"
+    ext = mimetypes.guess_extension(media_type) or ""
+    if ext == ".jpe":
+        ext = ".jpg"
+    elif ext == ".svgz":
+        ext = ".svg"
+    return media_type, ext or ".img"
+
+
+def sanitize_img_tag(tag):
+    """Keep Kindle/EPUB-friendly image attributes only."""
+    attrs = []
+    for m in re.finditer(r"\s([\w:-]+)=([\"'])(.*?)\2", tag, flags=re.S):
+        name = m.group(1).lower()
+        value = html.unescape(m.group(3))
+        if name in {"src", "alt", "title", "class", "width", "height"}:
+            attrs.append((name, value))
+    if not any(name == "alt" for name, _ in attrs):
+        attrs.append(("alt", "image"))
+    return "<img" + "".join(f' {name}="{html.escape(value, quote=True)}"' for name, value in attrs) + " />"
+
+
+def embed_images(content, base_url):
+    """Download <img> sources, rewrite them to local EPUB paths, return manifest items."""
+    images = []
+    by_src = {}
+
+    def repl(match):
+        tag, quote, src = match.group(0), match.group(1), html.unescape(match.group(2)).strip()
+        if not src or src.startswith(("data:", "cid:")):
+            return tag
+        abs_url = urllib.parse.urljoin(base_url, src)
+        parsed = urllib.parse.urlparse(abs_url)
+        if parsed.scheme not in ("http", "https"):
+            return tag
+        if abs_url not in by_src:
+            try:
+                data, (media_type, ext) = fetch_image(abs_url, referer=base_url)
+            except Exception as e:
+                print(f"Warning: could not download image {abs_url}: {e}")
+                return tag
+            item_id = f"img{len(images) + 1}"
+            href = f"images/{item_id}{ext}"
+            by_src[abs_url] = href
+            images.append({"id": item_id, "href": href, "media_type": media_type, "data": data})
+        new_src = by_src[abs_url]
+        tag = re.sub(r"\ssrc=([\"']).*?\1", f' src="{new_src}"', tag, count=1, flags=re.I | re.S)
+        return sanitize_img_tag(tag)
+
+    content = re.sub(r"<img\b[^>]*\ssrc=([\"'])(.*?)\1[^>]*>", repl, content, flags=re.I | re.S)
+    return content, images
+
+
+def safe_name(s):
+    s = re.sub(r"[^A-Za-z0-9_.-]+", "_", s).strip("_")
+    return s[:80] or "article"
+
+
+def display_title(s):
+    """Human-friendly Wallabag article title for EPUB metadata/display.
+
+    Filenames are sanitized separately with safe_name(); this function must not
+    use the filename. Some older/generated titles may contain underscores, so
+    turn those back into spaces for Kindle display.
+    """
+    s = html.unescape(s or "Wallabag article")
+    s = s.replace("_", " ")
+    s = re.sub(r"\s+", " ", s).strip()
+    return s or "Wallabag article"
+
+
+def metadata_title(s):
+    """Title string intended for EPUB metadata, not filename."""
+    return display_title(s).replace(".epub", "").strip()
+
+
+def build_epub(entry, out_path: Path, title: str | None = None):
+    """Build a single-article EPUB."""
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    book_id = f"urn:uuid:{uuid.uuid4()}"
+    now = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
+
+    etitle = metadata_title(title or entry.get("title") or "Wallabag article")
+    url = entry.get("url") or ""
+    domain = entry.get("domain_name") or urllib.parse.urlparse(url).netloc or "Wallabag"
+    published = entry.get("published_at") or entry.get("created_at") or ""
+    raw_content = entry.get("content") or "<p></p>"
+    preview = entry.get("preview_picture") or ""
+    if preview and preview not in raw_content:
+        raw_content = f'<figure><img src="{html.escape(preview, quote=True)}" alt="{html.escape(etitle, quote=True)}" /></figure>\n' + raw_content
+    content = clean_fragment(raw_content)
+    content, images = embed_images(content, url)
+
+    chapter = f'''<?xml version="1.0" encoding="utf-8"?>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+<head>
+  <title>{escape(etitle)}</title>
+  <link rel="stylesheet" type="text/css" href="style.css" />
+</head>
+<body>
+<article>
+<header>
+  <h1>{escape(etitle)}</h1>
+  <p class="source">{escape(domain)}{(' · ' + escape(published[:10])) if published else ''}</p>
+  <p class="source"><a href="{escape(url)}">{escape(url)}</a></p>
+</header>
+<section class="content">
+{content}
+</section>
+</article>
+</body></html>'''
+
+    container = '''<?xml version="1.0"?>
+<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
+  <rootfiles><rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/></rootfiles>
+</container>'''
+    image_manifest = "".join(
+        f'    <item id="{img["id"]}" href="{escape(img["href"])}" media-type="{escape(img["media_type"])}"/>\n'
+        for img in images
+    )
+    opf = f'''<?xml version="1.0" encoding="utf-8"?>
+<package xmlns="http://www.idpf.org/2007/opf" xmlns:dc="http://purl.org/dc/elements/1.1/" unique-identifier="BookId" version="3.0">
+  <metadata>
+    <dc:identifier id="BookId">{book_id}</dc:identifier>
+    <dc:title id="title">{escape(etitle)}</dc:title>
+    <meta refines="#title" property="title-type">main</meta>
+    <meta name="calibre:title_sort" content="{escape(etitle)}"/>
+    <dc:language>en</dc:language>
+    <dc:creator id="creator">{escape(domain)}</dc:creator>
+    <meta refines="#creator" property="role" scheme="marc:relators">aut</meta>
+    <dc:publisher>Wallabag</dc:publisher>
+    <dc:source>{escape(url)}</dc:source>
+    <meta property="dcterms:modified">{now}</meta>
+  </metadata>
+  <manifest>
+    <item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
+    <item id="css" href="style.css" media-type="text/css"/>
+    <item id="article" href="article.xhtml" media-type="application/xhtml+xml"/>
+{image_manifest}  </manifest>
+  <spine><itemref idref="article"/></spine>
+</package>'''
+    nav = f'''<?xml version="1.0" encoding="utf-8"?>
+<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
+<head><title>{escape(etitle)}</title></head>
+<body><nav epub:type="toc"><h1>{escape(etitle)}</h1><ol><li><a href="article.xhtml">Article</a></li></ol></nav></body></html>'''
+    css = """body{font-family:serif;line-height:1.45;margin:0;padding:1em;} article{max-width:42em;} h1{line-height:1.15;} img,video{max-width:100%;height:auto;} figure{margin:1em 0;} figcaption,.source{font-size:.85em;color:#666;} blockquote{border-left:3px solid #aaa;margin-left:.5em;padding-left:1em;color:#333;} pre,code{font-family:monospace;white-space:pre-wrap;} table{border-collapse:collapse;max-width:100%;} td,th{border:1px solid #ccc;padding:.25em;}"""
+
+    with zipfile.ZipFile(out_path, "w") as z:
+        z.writestr("mimetype", "application/epub+zip", compress_type=zipfile.ZIP_STORED)
+        z.writestr("META-INF/container.xml", container)
+        z.writestr("OEBPS/content.opf", opf)
+        z.writestr("OEBPS/nav.xhtml", nav)
+        z.writestr("OEBPS/style.css", css)
+        z.writestr("OEBPS/article.xhtml", chapter)
+        for img in images:
+            z.writestr("OEBPS/" + img["href"], img["data"])
+    print(f"Embedded {len(images)} image(s) in {out_path}")
+    return out_path
+
+
+def article_key(entry) -> str:
+    if entry.get("id") is not None:
+        return f"id:{entry['id']}"
+    if entry.get("url"):
+        return "url:" + entry["url"]
+    return "title:" + display_title(entry.get("title") or "")
+
+
+def load_downloaded(path: Path) -> dict:
+    if not path.is_file():
+        return {}
+    try:
+        data = json.loads(path.read_text())
+        return data if isinstance(data, dict) else {}
+    except Exception:
+        return {}
+
+
+def save_downloaded(path: Path, data: dict):
+    path.parent.mkdir(parents=True, exist_ok=True)
+    tmp = path.with_suffix(path.suffix + ".tmp")
+    tmp.write_text(json.dumps(data, indent=2, ensure_ascii=False, sort_keys=True) + "\n")
+    tmp.replace(path)
+
+
+def remember_downloaded(db: dict, entry, out: Path):
+    key = article_key(entry)
+    db[key] = {
+        "id": entry.get("id"),
+        "title": display_title(entry.get("title") or ""),
+        "url": entry.get("url"),
+        "epub": str(out),
+        "downloaded_at": datetime.now().isoformat(timespec="seconds"),
+    }
+
+
+def article_output_path(entry, out_dir: Path) -> Path:
+    title = entry.get("title") or f"wallabag-{entry.get('id', int(time.time()))}"
+    suffix = entry.get("id") or int(time.time())
+    return out_dir / f"{safe_name(title)}-{suffix}.epub"
+
+
+def main():
+    ap = argparse.ArgumentParser(description="Fetch Wallabag articles and build one EPUB per article")
+    ap.add_argument("--config", default=str(DEFAULT_CONFIG))
+    ap.add_argument("--limit", type=int, default=10)
+    ap.add_argument("--all", action="store_true", help="include archived/read articles too")
+    ap.add_argument("--starred", action="store_true", help="only starred articles")
+    ap.add_argument("--title", default=None, help="title override only when exporting one article")
+    ap.add_argument("--output", default=None, help="output directory, or .epub file if --limit 1")
+    ap.add_argument("--archive", action="store_true", help="mark fetched articles archived after successful build")
+    ap.add_argument("--db", default=str(DEFAULT_DB), help=f"download evidence DB, default: {DEFAULT_DB}")
+    ap.add_argument("--redownload", action="store_true", help="ignore evidence DB and download articles again")
+    args = ap.parse_args()
+
+    cfg = load_config(Path(args.config).expanduser())
+    missing = [k for k in ["WALLABAG_URL", "CLIENT_ID", "CLIENT_SECRET", "USERNAME", "PASSWORD"] if not cfg.get(k)]
+    if missing:
+        raise SystemExit("Missing in wallabag.conf: " + ", ".join(missing))
+
+    token = wallabag_token(cfg)
+    entries = fetch_entries(cfg, token, limit=args.limit, unread=not args.all, starred=args.starred)
+    if not entries:
+        raise SystemExit("No articles found.")
+
+    db_path = Path(args.db).expanduser()
+    downloaded = load_downloaded(db_path)
+    original_count = len(entries)
+    if not args.redownload:
+        entries = [e for e in entries if article_key(e) not in downloaded]
+    skipped = original_count - len(entries)
+    if skipped:
+        print(f"Skipped {skipped} already downloaded article(s). Use --redownload to fetch again.")
+    if not entries:
+        raise SystemExit("No new articles to download.")
+
+    output_arg = Path(args.output).expanduser() if args.output else DEFAULT_OUT
+    out_dir = output_arg if output_arg.suffix.lower() != ".epub" or len(entries) > 1 else output_arg.parent
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    created = []
+    for i, entry in enumerate(entries, 1):
+        out = output_arg if len(entries) == 1 and output_arg.suffix.lower() == ".epub" else article_output_path(entry, out_dir)
+        title = args.title if len(entries) == 1 and args.title else (entry.get("title") or f"Wallabag article {i}")
+        build_epub(entry, out, title)
+        remember_downloaded(downloaded, entry, out)
+        save_downloaded(db_path, downloaded)
+        created.append((entry, out))
+        print(f"Created: {out}")
+
+    if args.archive:
+        for entry, _ in created:
+            if entry.get("id") is not None:
+                mark_archived(cfg, token, entry["id"])
+        print(f"Archived {len(created)} articles in Wallabag.")
+
+
+if __name__ == "__main__":
+    main()