Initial commit

2026-05-16 08:04:53 +00:00 · 2026-05-16 08:04:53 +00:00 · 01af871145
commit 01af871145
5 changed files with 615 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,15 @@
 # Local secrets/configuration
 *.conf
 .git.env
 # Generated/state files
 wallabag_downloaded.json
 out/
 # Python cache
 __pycache__/
 *.py[cod]
 # OS/editor noise
 .DS_Store
 *.swp
--- a/mail.conf.sample
+++ b/mail.conf.sample
@ -0,0 +1,6 @@
 SMTP_HOST=smtp.example.com
 SMTP_PORT=587
 SMTP_USER=user@example.com
 SMTP_PASS=change-me
 SMTP_SENDER=user@example.com
 KINDLE_EMAIL=your-kindle@example.com
--- a/send_to_kindle.py
+++ b/send_to_kindle.py
@ -0,0 +1,172 @@
 #!/usr/bin/env python3
 import argparse
 import mimetypes
 import os
 import smtplib
 import re
 import zipfile
 import xml.etree.ElementTree as ET
 from email.message import EmailMessage
 from pathlib import Path
 DEFAULT_CONFIG = Path("./mail.conf")
 DEFAULT_OUT = Path("./out")
 def load_config(path: Path) -> dict:
    """Load simple KEY=VALUE config file. Lines starting with # are ignored."""
    cfg = {}
    if not path.is_file():
        return cfg
    for line in path.read_text().splitlines():
        line = line.strip()
        if not line or line.startswith("#") or "=" not in line:
            continue
        key, value = line.split("=", 1)
        cfg[key.strip()] = value.strip().strip('"').strip("'")
    return cfg
 def get_value(args, cfg, attr, key, env=None, default=None):
    val = getattr(args, attr, None)
    if val not in (None, ""):
        return val
    if key in cfg and cfg[key] != "":
        return cfg[key]
    if env and os.getenv(env):
        return os.getenv(env)
    return default
 def chunks(items, size):
    for i in range(0, len(items), size):
        yield items[i:i + size]
 def epub_title(path: Path) -> str | None:
    """Read dc:title from EPUB metadata, if available."""
    try:
        with zipfile.ZipFile(path) as z:
            container = ET.fromstring(z.read("META-INF/container.xml"))
            ns_container = {"c": "urn:oasis:names:tc:opendocument:xmlns:container"}
            rootfile = container.find(".//c:rootfile", ns_container)
            if rootfile is None:
                return None
            opf_path = rootfile.attrib["full-path"]
            opf = ET.fromstring(z.read(opf_path))
            ns = {"dc": "http://purl.org/dc/elements/1.1/"}
            title = opf.find(".//dc:title", ns)
            if title is not None and title.text:
                return " ".join(title.text.replace("_", " ").split())
    except Exception:
        return None
    return None
 def attachment_name(path: Path) -> str:
    """Use EPUB metadata title as emailed attachment filename.
    Local filenames stay unchanged. Amazon Send-to-Kindle often derives the
    displayed document title from the email attachment filename, so use a nice
    title-based attachment filename while keeping the .epub extension.
    """
    title = epub_title(path)
    if not title:
        return path.name
    name = re.sub(r'[\\/:*?"<>|]+', ' ', title)
    name = re.sub(r"\s+", " ", name).strip().rstrip('.')
    return (name[:120] or path.stem) + path.suffix.lower()
 def send_to_kindle(smtp_host, smtp_port, smtp_user, smtp_pass, sender, kindle_email, file_paths):
    file_paths = [Path(p) for p in file_paths]
    for file_path in file_paths:
        if not file_path.is_file():
            raise FileNotFoundError(file_path)
    msg = EmailMessage()
    msg["From"] = sender
    msg["To"] = kindle_email
    msg["Subject"] = "Send to Kindle"
    display_names = [attachment_name(p) for p in file_paths]
    msg.set_content("Attached ebook(s):\n\n" + "\n".join(display_names))
    for file_path in file_paths:
        ctype, _ = mimetypes.guess_type(file_path)
        if ctype is None:
            ctype = "application/octet-stream"
        maintype, subtype = ctype.split("/", 1)
        with file_path.open("rb") as f:
            msg.add_attachment(
                f.read(),
                maintype=maintype,
                subtype=subtype,
                filename=attachment_name(file_path),
            )
    with smtplib.SMTP_SSL(smtp_host, int(smtp_port)) as smtp:
        smtp.login(smtp_user, smtp_pass)
        smtp.send_message(msg)
 def find_epubs(out_dir: Path):
    return sorted(out_dir.glob("*.epub"))
 def main():
    p = argparse.ArgumentParser(description="Send ebook(s) to Kindle via email")
    p.add_argument("file", nargs="?", help="ebook file, e.g. .epub/.pdf/.mobi. If omitted, sends all .epub files in ./out")
    p.add_argument("--config", default=str(DEFAULT_CONFIG), help=f"config file, default: {DEFAULT_CONFIG}")
    p.add_argument("--kindle", help="your Kindle email, e.g. name@kindle.com")
    p.add_argument("--smtp-host")
    p.add_argument("--smtp-port", type=int)
    p.add_argument("--smtp-user")
    p.add_argument("--smtp-pass")
    p.add_argument("--sender")
    p.add_argument("--max-attachments", type=int, default=16, help="maximum attachments per email, default: 16")
    args = p.parse_args()
    cfg = load_config(Path(args.config).expanduser())
    settings = {
        "smtp_host": get_value(args, cfg, "smtp_host", "SMTP_HOST", "SMTP_HOST"),
        "smtp_port": get_value(args, cfg, "smtp_port", "SMTP_PORT", "SMTP_PORT", "465"),
        "smtp_user": get_value(args, cfg, "smtp_user", "SMTP_USER", "SMTP_USER"),
        "smtp_pass": get_value(args, cfg, "smtp_pass", "SMTP_PASS", "SMTP_PASS"),
        "sender": get_value(args, cfg, "sender", "SMTP_SENDER", "SMTP_SENDER"),
        "kindle": get_value(args, cfg, "kindle", "KINDLE_EMAIL", "KINDLE_EMAIL"),
    }
    missing = [k for k, v in settings.items() if not v]
    if missing:
        raise SystemExit(
            "Missing: " + ", ".join(missing) +
            f"\nAdd them to {args.config} or pass them as command-line options."
        )
    files = [Path(args.file).expanduser()] if args.file else find_epubs(DEFAULT_OUT)
    if not files:
        raise SystemExit(f"No EPUB files found in {DEFAULT_OUT}")
    max_attachments = max(1, args.max_attachments)
    batches = list(chunks(files, max_attachments))
    for idx, batch in enumerate(batches, 1):
        send_to_kindle(
            settings["smtp_host"],
            settings["smtp_port"],
            settings["smtp_user"],
            settings["smtp_pass"],
            settings["sender"],
            settings["kindle"],
            batch,
        )
        suffix = f" ({idx}/{len(batches)})" if len(batches) > 1 else ""
        print(f"Sent email{suffix}: {len(batch)} attachment(s)")
        for file_path in batch:
            file_path.unlink()
            print(f"  - sent and deleted: {file_path}")
 if __name__ == "__main__":
    main()
--- a/wallabag.conf.sample
+++ b/wallabag.conf.sample
@ -0,0 +1,5 @@
 WALLABAG_URL=https://wallabag.example.com
 CLIENT_ID=change-me
 CLIENT_SECRET=change-me
 USERNAME=change-me
 PASSWORD=change-me
--- a/wallabag_to_epub.py
+++ b/wallabag_to_epub.py
@ -0,0 +1,417 @@
 #!/usr/bin/env python3
 """Fetch Wallabag articles and create one EPUB per article."""
 import argparse
 import html
 import json
 import re
 import time
 import mimetypes
 import urllib.parse
 import urllib.request
 import uuid
 import zipfile
 from datetime import datetime
 from pathlib import Path
 from xml.sax.saxutils import escape
 BASE_DIR = Path(".")
 DEFAULT_CONFIG = Path("./wallabag.conf")
 DEFAULT_OUT = Path("./out")
 DEFAULT_DB = Path("./wallabag_downloaded.json")
 def load_config(path: Path) -> dict:
    cfg = {}
    if path.is_file():
        for line in path.read_text().splitlines():
            line = line.strip()
            if not line or line.startswith("#") or "=" not in line:
                continue
            k, v = line.split("=", 1)
            cfg[k.strip()] = v.strip().strip('"').strip("'")
    return cfg
 def http_json(url, method="GET", data=None, token=None):
    body = None
    headers = {"Accept": "application/json"}
    if data is not None:
        body = urllib.parse.urlencode(data).encode()
        headers["Content-Type"] = "application/x-www-form-urlencoded"
    if token:
        headers["Authorization"] = f"Bearer {token}"
    req = urllib.request.Request(url, data=body, headers=headers, method=method)
    with urllib.request.urlopen(req, timeout=60) as r:
        return json.loads(r.read().decode("utf-8"))
 def wallabag_token(cfg):
    url = cfg["WALLABAG_URL"].rstrip("/") + "/oauth/v2/token"
    return http_json(url, "POST", {
        "grant_type": "password",
        "client_id": cfg["CLIENT_ID"],
        "client_secret": cfg["CLIENT_SECRET"],
        "username": cfg["USERNAME"],
        "password": cfg["PASSWORD"],
    })["access_token"]
 def fetch_entries(cfg, token, limit=10, unread=True, starred=False, archive=False):
    base = cfg["WALLABAG_URL"].rstrip("/") + "/api/entries.json"
    qs = {
        "perPage": str(limit),
        "page": "1",
        "sort": "created",
        "order": "desc",
        "detail": "full",
    }
    if unread:
        qs["archive"] = "0"
    if archive:
        qs["archive"] = "1"
    if starred:
        qs["starred"] = "1"
    data = http_json(base + "?" + urllib.parse.urlencode(qs), token=token)
    return data.get("_embedded", {}).get("items", [])
 def mark_archived(cfg, token, entry_id):
    url = cfg["WALLABAG_URL"].rstrip("/") + f"/api/entries/{entry_id}.json"
    return http_json(url, "PATCH", {"archive": "1"}, token=token)
 def clean_fragment(content):
    """Prepare Wallabag HTML for EPUB while preserving formatting where possible.
    Wallabag already extracts readable article HTML, so keep headings, lists,
    blockquotes, tables, links, inline styles/classes, and images. Remove only
    active/interactive content and make common void tags XML-compatible.
    """
    if not content:
        return "<p></p>"
    content = re.sub(r"<script\b[^>]*>.*?</script>", "", content, flags=re.I | re.S)
    content = re.sub(r"<iframe\b[^>]*>.*?</iframe>", "", content, flags=re.I | re.S)
    content = re.sub(r"<form\b[^>]*>.*?</form>", "", content, flags=re.I | re.S)
    content = re.sub(r"\s(on\w+)=([\"']).*?\2", "", content, flags=re.I | re.S)
    # Many sites lazy-load images and Wallabag can keep the real URL in data-*
    # while src is empty/a placeholder. Promote those before dropping extras.
    content = promote_lazy_image_srcs(content)
    content = re.sub(r"\s(srcset|sizes)=([\"']).*?\2", "", content, flags=re.I | re.S)
    content = html.unescape(content)
    content = re.sub(r"&(?!amp;|lt;|gt;|quot;|apos;|#\d+;|#x[0-9A-Fa-f]+;)", "&amp;", content)
    content = re.sub(r"<(br|hr|img|meta|link|input)(\b[^>]*?)(?<!/)>", r"<\1\2 />", content, flags=re.I)
    return content
 def promote_lazy_image_srcs(content):
    """Use lazy-loader attributes as img src when src is missing/a placeholder."""
    def repl(match):
        tag = match.group(0)
        attrs = dict((m.group(1).lower(), m.group(3)) for m in re.finditer(r"\s([\w:-]+)=([\"'])(.*?)\2", tag, flags=re.S))
        src = (attrs.get("src") or "").strip()
        lazy = None
        for name in ("data-src", "data-original", "data-lazy-src", "data-url", "data-full-url"):
            if attrs.get(name):
                lazy = attrs[name].strip()
                break
        if not lazy:
            return tag
        if not src or src.startswith("data:") or "placeholder" in src.lower() or src in ("#", "/"):
            if " src=" in tag.lower():
                return re.sub(r"\ssrc=([\"']).*?\1", f' src="{lazy}"', tag, count=1, flags=re.I | re.S)
            return tag[:-1] + f' src="{lazy}">'
        return tag
    return re.sub(r"<img\b[^>]*>", repl, content, flags=re.I | re.S)
 def fetch_image(url, referer=None):
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124 Safari/537.36",
        # Prefer Kindle-friendly formats. Some CDNs serve AVIF/WebP when asked,
        # which Amazon's EPUB conversion may drop.
        "Accept": "image/jpeg,image/png,image/gif,image/svg+xml,image/*;q=0.8,*/*;q=0.5",
    }
    if referer:
        headers["Referer"] = referer
    req = urllib.request.Request(url, headers=headers)
    with urllib.request.urlopen(req, timeout=30) as r:
        data = r.read(20 * 1024 * 1024 + 1)
        if len(data) > 20 * 1024 * 1024:
            raise ValueError("image is larger than 20 MiB")
        ctype = (r.headers.get("Content-Type") or "").split(";", 1)[0].strip().lower()
    return data, guess_image_type(url, data, ctype)
 def guess_image_type(url, data, ctype):
    if ctype.startswith("image/"):
        media_type = ctype
    elif data.startswith(b"\xff\xd8\xff"):
        media_type = "image/jpeg"
    elif data.startswith(b"\x89PNG\r\n\x1a\n"):
        media_type = "image/png"
    elif data.startswith(b"GIF87a") or data.startswith(b"GIF89a"):
        media_type = "image/gif"
    elif data.startswith(b"RIFF") and data[8:12] == b"WEBP":
        media_type = "image/webp"
    elif b"<svg" in data[:500].lower():
        media_type = "image/svg+xml"
    else:
        media_type = mimetypes.guess_type(urllib.parse.urlparse(url).path)[0] or "application/octet-stream"
    ext = mimetypes.guess_extension(media_type) or ""
    if ext == ".jpe":
        ext = ".jpg"
    elif ext == ".svgz":
        ext = ".svg"
    return media_type, ext or ".img"
 def sanitize_img_tag(tag):
    """Keep Kindle/EPUB-friendly image attributes only."""
    attrs = []
    for m in re.finditer(r"\s([\w:-]+)=([\"'])(.*?)\2", tag, flags=re.S):
        name = m.group(1).lower()
        value = html.unescape(m.group(3))
        if name in {"src", "alt", "title", "class", "width", "height"}:
            attrs.append((name, value))
    if not any(name == "alt" for name, _ in attrs):
        attrs.append(("alt", "image"))
    return "<img" + "".join(f' {name}="{html.escape(value, quote=True)}"' for name, value in attrs) + " />"
 def embed_images(content, base_url):
    """Download <img> sources, rewrite them to local EPUB paths, return manifest items."""
    images = []
    by_src = {}
    def repl(match):
        tag, quote, src = match.group(0), match.group(1), html.unescape(match.group(2)).strip()
        if not src or src.startswith(("data:", "cid:")):
            return tag
        abs_url = urllib.parse.urljoin(base_url, src)
        parsed = urllib.parse.urlparse(abs_url)
        if parsed.scheme not in ("http", "https"):
            return tag
        if abs_url not in by_src:
            try:
                data, (media_type, ext) = fetch_image(abs_url, referer=base_url)
            except Exception as e:
                print(f"Warning: could not download image {abs_url}: {e}")
                return tag
            item_id = f"img{len(images) + 1}"
            href = f"images/{item_id}{ext}"
            by_src[abs_url] = href
            images.append({"id": item_id, "href": href, "media_type": media_type, "data": data})
        new_src = by_src[abs_url]
        tag = re.sub(r"\ssrc=([\"']).*?\1", f' src="{new_src}"', tag, count=1, flags=re.I | re.S)
        return sanitize_img_tag(tag)
    content = re.sub(r"<img\b[^>]*\ssrc=([\"'])(.*?)\1[^>]*>", repl, content, flags=re.I | re.S)
    return content, images
 def safe_name(s):
    s = re.sub(r"[^A-Za-z0-9_.-]+", "_", s).strip("_")
    return s[:80] or "article"
 def display_title(s):
    """Human-friendly Wallabag article title for EPUB metadata/display.
    Filenames are sanitized separately with safe_name(); this function must not
    use the filename. Some older/generated titles may contain underscores, so
    turn those back into spaces for Kindle display.
    """
    s = html.unescape(s or "Wallabag article")
    s = s.replace("_", " ")
    s = re.sub(r"\s+", " ", s).strip()
    return s or "Wallabag article"
 def metadata_title(s):
    """Title string intended for EPUB metadata, not filename."""
    return display_title(s).replace(".epub", "").strip()
 def build_epub(entry, out_path: Path, title: str | None = None):
    """Build a single-article EPUB."""
    out_path.parent.mkdir(parents=True, exist_ok=True)
    book_id = f"urn:uuid:{uuid.uuid4()}"
    now = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
    etitle = metadata_title(title or entry.get("title") or "Wallabag article")
    url = entry.get("url") or ""
    domain = entry.get("domain_name") or urllib.parse.urlparse(url).netloc or "Wallabag"
    published = entry.get("published_at") or entry.get("created_at") or ""
    raw_content = entry.get("content") or "<p></p>"
    preview = entry.get("preview_picture") or ""
    if preview and preview not in raw_content:
        raw_content = f'<figure><img src="{html.escape(preview, quote=True)}" alt="{html.escape(etitle, quote=True)}" /></figure>\n' + raw_content
    content = clean_fragment(raw_content)
    content, images = embed_images(content, url)
    chapter = f'''<?xml version="1.0" encoding="utf-8"?>
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en">
 <head>
  <title>{escape(etitle)}</title>
  <link rel="stylesheet" type="text/css" href="style.css" />
 </head>
 <body>
 <article>
 <header>
  <h1>{escape(etitle)}</h1>
  <p class="source">{escape(domain)}{(' · ' + escape(published[:10])) if published else ''}</p>
  <p class="source"><a href="{escape(url)}">{escape(url)}</a></p>
 </header>
 <section class="content">
 {content}
 </section>
 </article>
 </body></html>'''
    container = '''<?xml version="1.0"?>
 <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
  <rootfiles><rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/></rootfiles>
 </container>'''
    image_manifest = "".join(
        f'    <item id="{img["id"]}" href="{escape(img["href"])}" media-type="{escape(img["media_type"])}"/>\n'
        for img in images
    )
    opf = f'''<?xml version="1.0" encoding="utf-8"?>
 <package xmlns="http://www.idpf.org/2007/opf" xmlns:dc="http://purl.org/dc/elements/1.1/" unique-identifier="BookId" version="3.0">
  <metadata>
    <dc:identifier id="BookId">{book_id}</dc:identifier>
    <dc:title id="title">{escape(etitle)}</dc:title>
    <meta refines="#title" property="title-type">main</meta>
    <meta name="calibre:title_sort" content="{escape(etitle)}"/>
    <dc:language>en</dc:language>
    <dc:creator id="creator">{escape(domain)}</dc:creator>
    <meta refines="#creator" property="role" scheme="marc:relators">aut</meta>
    <dc:publisher>Wallabag</dc:publisher>
    <dc:source>{escape(url)}</dc:source>
    <meta property="dcterms:modified">{now}</meta>
  </metadata>
  <manifest>
    <item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
    <item id="css" href="style.css" media-type="text/css"/>
    <item id="article" href="article.xhtml" media-type="application/xhtml+xml"/>
 {image_manifest}  </manifest>
  <spine><itemref idref="article"/></spine>
 </package>'''
    nav = f'''<?xml version="1.0" encoding="utf-8"?>
 <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
 <head><title>{escape(etitle)}</title></head>
 <body><nav epub:type="toc"><h1>{escape(etitle)}</h1><ol><li><a href="article.xhtml">Article</a></li></ol></nav></body></html>'''
    css = """body{font-family:serif;line-height:1.45;margin:0;padding:1em;} article{max-width:42em;} h1{line-height:1.15;} img,video{max-width:100%;height:auto;} figure{margin:1em 0;} figcaption,.source{font-size:.85em;color:#666;} blockquote{border-left:3px solid #aaa;margin-left:.5em;padding-left:1em;color:#333;} pre,code{font-family:monospace;white-space:pre-wrap;} table{border-collapse:collapse;max-width:100%;} td,th{border:1px solid #ccc;padding:.25em;}"""
    with zipfile.ZipFile(out_path, "w") as z:
        z.writestr("mimetype", "application/epub+zip", compress_type=zipfile.ZIP_STORED)
        z.writestr("META-INF/container.xml", container)
        z.writestr("OEBPS/content.opf", opf)
        z.writestr("OEBPS/nav.xhtml", nav)
        z.writestr("OEBPS/style.css", css)
        z.writestr("OEBPS/article.xhtml", chapter)
        for img in images:
            z.writestr("OEBPS/" + img["href"], img["data"])
    print(f"Embedded {len(images)} image(s) in {out_path}")
    return out_path
 def article_key(entry) -> str:
    if entry.get("id") is not None:
        return f"id:{entry['id']}"
    if entry.get("url"):
        return "url:" + entry["url"]
    return "title:" + display_title(entry.get("title") or "")
 def load_downloaded(path: Path) -> dict:
    if not path.is_file():
        return {}
    try:
        data = json.loads(path.read_text())
        return data if isinstance(data, dict) else {}
    except Exception:
        return {}
 def save_downloaded(path: Path, data: dict):
    path.parent.mkdir(parents=True, exist_ok=True)
    tmp = path.with_suffix(path.suffix + ".tmp")
    tmp.write_text(json.dumps(data, indent=2, ensure_ascii=False, sort_keys=True) + "\n")
    tmp.replace(path)
 def remember_downloaded(db: dict, entry, out: Path):
    key = article_key(entry)
    db[key] = {
        "id": entry.get("id"),
        "title": display_title(entry.get("title") or ""),
        "url": entry.get("url"),
        "epub": str(out),
        "downloaded_at": datetime.now().isoformat(timespec="seconds"),
    }
 def article_output_path(entry, out_dir: Path) -> Path:
    title = entry.get("title") or f"wallabag-{entry.get('id', int(time.time()))}"
    suffix = entry.get("id") or int(time.time())
    return out_dir / f"{safe_name(title)}-{suffix}.epub"
 def main():
    ap = argparse.ArgumentParser(description="Fetch Wallabag articles and build one EPUB per article")
    ap.add_argument("--config", default=str(DEFAULT_CONFIG))
    ap.add_argument("--limit", type=int, default=10)
    ap.add_argument("--all", action="store_true", help="include archived/read articles too")
    ap.add_argument("--starred", action="store_true", help="only starred articles")
    ap.add_argument("--title", default=None, help="title override only when exporting one article")
    ap.add_argument("--output", default=None, help="output directory, or .epub file if --limit 1")
    ap.add_argument("--archive", action="store_true", help="mark fetched articles archived after successful build")
    ap.add_argument("--db", default=str(DEFAULT_DB), help=f"download evidence DB, default: {DEFAULT_DB}")
    ap.add_argument("--redownload", action="store_true", help="ignore evidence DB and download articles again")
    args = ap.parse_args()
    cfg = load_config(Path(args.config).expanduser())
    missing = [k for k in ["WALLABAG_URL", "CLIENT_ID", "CLIENT_SECRET", "USERNAME", "PASSWORD"] if not cfg.get(k)]
    if missing:
        raise SystemExit("Missing in wallabag.conf: " + ", ".join(missing))
    token = wallabag_token(cfg)
    entries = fetch_entries(cfg, token, limit=args.limit, unread=not args.all, starred=args.starred)
    if not entries:
        raise SystemExit("No articles found.")
    db_path = Path(args.db).expanduser()
    downloaded = load_downloaded(db_path)
    original_count = len(entries)
    if not args.redownload:
        entries = [e for e in entries if article_key(e) not in downloaded]
    skipped = original_count - len(entries)
    if skipped:
        print(f"Skipped {skipped} already downloaded article(s). Use --redownload to fetch again.")
    if not entries:
        raise SystemExit("No new articles to download.")
    output_arg = Path(args.output).expanduser() if args.output else DEFAULT_OUT
    out_dir = output_arg if output_arg.suffix.lower() != ".epub" or len(entries) > 1 else output_arg.parent
    out_dir.mkdir(parents=True, exist_ok=True)
    created = []
    for i, entry in enumerate(entries, 1):
        out = output_arg if len(entries) == 1 and output_arg.suffix.lower() == ".epub" else article_output_path(entry, out_dir)
        title = args.title if len(entries) == 1 and args.title else (entry.get("title") or f"Wallabag article {i}")
        build_epub(entry, out, title)
        remember_downloaded(downloaded, entry, out)
        save_downloaded(db_path, downloaded)
        created.append((entry, out))
        print(f"Created: {out}")
    if args.archive:
        for entry, _ in created:
            if entry.get("id") is not None:
                mark_archived(cfg, token, entry["id"])
        print(f"Archived {len(created)} articles in Wallabag.")
 if __name__ == "__main__":
    main()