🔧 Initial dev copy from live

2026-04-20 10:43:30 +02:00
commit 96977b576a
284 changed files with 95780 additions and 0 deletions
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/_pytest_port.py
+++ b/tests/_pytest_port.py
@@ -0,0 +1,42 @@
+"""
+Shared test server constants for use in individual test files.
+
+Instead of hardcoding ``BASE = "http://127.0.0.1:8788"`` in every test file,
+import from here so the port and state dir are always consistent with
+what conftest.py computed for this worktree.
+
+Usage::
+
+    from tests._pytest_port import BASE
+
+conftest.py publishes ``HERMES_WEBUI_TEST_PORT`` and
+``HERMES_WEBUI_TEST_STATE_DIR`` to ``os.environ`` at module level
+(before any test file is imported), so this module always reads the
+correct values.  The auto-derivation fallback matches conftest's logic
+exactly, so standalone imports also work correctly.
+"""
+import hashlib
+import os
+import pathlib
+
+def _auto_test_port(repo_root: pathlib.Path) -> int:
+    h = int(hashlib.md5(str(repo_root).encode()).hexdigest(), 16)
+    return 20000 + (h % 10000)
+
+def _auto_state_dir_name(repo_root: pathlib.Path) -> str:
+    h = hashlib.md5(str(repo_root).encode()).hexdigest()[:8]
+    return f"webui-test-{h}"
+
+_TESTS_DIR   = pathlib.Path(__file__).parent.resolve()
+_REPO_ROOT   = _TESTS_DIR.parent.resolve()
+_HERMES_HOME = pathlib.Path(os.getenv('HERMES_HOME',
+                             str(pathlib.Path.home() / '.hermes')))
+
+TEST_PORT = int(os.environ.get('HERMES_WEBUI_TEST_PORT',
+                               str(_auto_test_port(_REPO_ROOT))))
+BASE = f"http://127.0.0.1:{TEST_PORT}"
+
+TEST_STATE_DIR = pathlib.Path(os.environ.get(
+    'HERMES_WEBUI_TEST_STATE_DIR',
+    str(_HERMES_HOME / _auto_state_dir_name(_REPO_ROOT))
+))
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,392 @@
+"""
+Shared pytest fixtures for webui-mvp tests.
+
+TEST ISOLATION:
+  Tests run against a SEPARATE server instance on port 8788 with a
+  completely separate state directory. Production data is never touched.
+  The test state dir is wiped before each full test run and again on teardown.
+
+PATH DISCOVERY:
+  No hardcoded paths. Discovery order:
+    1. Environment variables (HERMES_WEBUI_AGENT_DIR, HERMES_WEBUI_PYTHON, etc.)
+    2. Sibling checkout heuristics relative to this repo
+    3. Common install paths (~/.hermes/hermes-agent)
+    4. System python3 as a last resort
+"""
+import json
+import os
+import pathlib
+import shutil
+import subprocess
+import time
+import urllib.request
+import urllib.error
+import pytest
+
+# ── Repo root discovery ────────────────────────────────────────────────────
+# conftest.py lives at <repo>/tests/conftest.py
+TESTS_DIR  = pathlib.Path(__file__).parent.resolve()
+REPO_ROOT  = TESTS_DIR.parent.resolve()
+HOME       = pathlib.Path.home()
+HERMES_HOME = pathlib.Path(os.getenv('HERMES_HOME', str(HOME / '.hermes')))
+
+# ── Test server config ────────────────────────────────────────────────────
+# Port and state dir auto-derive from the repo path when no env var is set,
+# giving every worktree its own isolated port (8800-8899) and state directory.
+# Override with HERMES_WEBUI_TEST_PORT / HERMES_WEBUI_TEST_STATE_DIR to pin.
+
+def _auto_test_port(repo_root) -> int:
+    """Map repo path to a unique port in 20000-29999 (10k range = near-zero collisions).
+    Far from system port ranges and Linux ephemeral ports (32768+).
+    Override with HERMES_WEBUI_TEST_PORT to use a specific port."""
+    import hashlib
+    h = int(hashlib.md5(str(repo_root).encode()).hexdigest(), 16)
+    return 20000 + (h % 10000)
+
+def _auto_state_dir_name(repo_root) -> str:
+    import hashlib
+    h = hashlib.md5(str(repo_root).encode()).hexdigest()[:8]
+    return f"webui-test-{h}"
+
+TEST_PORT      = int(os.getenv('HERMES_WEBUI_TEST_PORT',
+                               str(_auto_test_port(REPO_ROOT))))
+TEST_BASE      = f"http://127.0.0.1:{TEST_PORT}"
+TEST_STATE_DIR = pathlib.Path(os.getenv(
+    'HERMES_WEBUI_TEST_STATE_DIR',
+    str(HERMES_HOME / _auto_state_dir_name(REPO_ROOT))
+))
+TEST_WORKSPACE = TEST_STATE_DIR / 'test-workspace'
+
+# Publish at module level so _pytest_port.py (imported at collection time)
+# and any test file using os.environ sees the right values immediately.
+os.environ.setdefault('HERMES_WEBUI_TEST_PORT', str(TEST_PORT))
+os.environ.setdefault('HERMES_WEBUI_TEST_STATE_DIR', str(TEST_STATE_DIR))
+
+# ── Server script: always relative to repo root ───────────────────────────
+SERVER_SCRIPT = REPO_ROOT / 'server.py'
+if not SERVER_SCRIPT.exists():
+    raise RuntimeError(
+        f"server.py not found at {SERVER_SCRIPT}. "
+        "Is conftest.py in the tests/ subdirectory of the repo?"
+    )
+
+# ── Hermes agent discovery (mirrors api/config._discover_agent_dir) ───────
+def _discover_agent_dir() -> pathlib.Path:
+    candidates = [
+        os.getenv('HERMES_WEBUI_AGENT_DIR', ''),
+        str(HERMES_HOME / 'hermes-agent'),
+        str(REPO_ROOT.parent / 'hermes-agent'),
+        str(HOME / '.hermes' / 'hermes-agent'),
+        str(HOME / 'hermes-agent'),
+    ]
+    for c in candidates:
+        if not c:
+            continue
+        p = pathlib.Path(c).expanduser()
+        if p.exists() and (p / 'run_agent.py').exists():
+            return p.resolve()
+    return None
+
+# ── Python discovery (mirrors api/config._discover_python) ────────────────
+def _discover_python(agent_dir) -> str:
+    if os.getenv('HERMES_WEBUI_PYTHON'):
+        return os.getenv('HERMES_WEBUI_PYTHON')
+    if agent_dir:
+        venv_py = agent_dir / 'venv' / 'bin' / 'python'
+        if venv_py.exists():
+            return str(venv_py)
+    local_venv = REPO_ROOT / '.venv' / 'bin' / 'python'
+    if local_venv.exists():
+        return str(local_venv)
+    return shutil.which('python3') or shutil.which('python') or 'python3'
+
+HERMES_AGENT = _discover_agent_dir()
+VENV_PYTHON  = _discover_python(HERMES_AGENT)
+
+# Work dir: agent dir if found, else repo root
+WORKDIR = str(HERMES_AGENT) if HERMES_AGENT else str(REPO_ROOT)
+
+# ── Agent availability detection ─────────────────────────────────────────────
+# Tests that require hermes-agent modules (cron, skills, approval, chat/stream)
+# are skipped when the agent isn't installed, instead of failing with 500 errors.
+AGENT_AVAILABLE = HERMES_AGENT is not None
+
+def _check_agent_modules():
+    """Verify hermes-agent Python modules are actually importable."""
+    if not HERMES_AGENT:
+        return False
+    try:
+        import importlib
+        # These are the modules that cause 500 errors when missing
+        for mod in ['cron.jobs', 'tools.skills_tool']:
+            importlib.import_module(mod)
+        return True
+    except (ImportError, ModuleNotFoundError):
+        return False
+
+AGENT_MODULES_AVAILABLE = _check_agent_modules()
+
+# pytest marker: skip tests that need hermes-agent when it's not present
+requires_agent = pytest.mark.skipif(
+    not AGENT_AVAILABLE,
+    reason="hermes-agent not found (skipping agent-dependent test)"
+)
+requires_agent_modules = pytest.mark.skipif(
+    not AGENT_MODULES_AVAILABLE,
+    reason="hermes-agent Python modules not importable (cron, skills_tool)"
+)
+
+def pytest_configure(config):
+    config.addinivalue_line("markers", "requires_agent: skip when hermes-agent dir is not found")
+    config.addinivalue_line("markers", "requires_agent_modules: skip when hermes-agent Python modules are not importable")
+
+def pytest_collection_modifyitems(config, items):
+    """Auto-skip agent-dependent tests when hermes-agent is not available.
+
+    Instead of requiring markers on every test function, we pattern-match
+    test names to known categories that depend on hermes-agent modules.
+    This keeps the test files clean and ensures new cron/skills tests
+    get auto-skipped without manual annotation.
+    """
+    if AGENT_MODULES_AVAILABLE:
+        return  # everything available, run all tests
+
+    # Exact list of tests known to fail without hermes-agent.
+    # These hit server endpoints that import cron.jobs, tools.skills_tool,
+    # or require a running agent backend — returning 500 without the agent.
+    _AGENT_DEPENDENT_TESTS = {
+        # Cron endpoints (need cron.jobs module)
+        'test_crons_list',
+        'test_crons_list_has_required_fields',
+        'test_crons_output_requires_job_id',
+        'test_crons_output_real_job',
+        'test_crons_run_nonexistent',
+        'test_cron_create_success',
+        'test_cron_update_unknown_job_404',
+        'test_cron_delete_unknown_404',
+        'test_crons_output_limit_param',
+        # Skills endpoints (need tools.skills_tool module)
+        'test_skills_list',
+        'test_skills_list_has_required_fields',
+        'test_skills_content_known',
+        'test_skills_content_requires_name',
+        'test_skills_search_returns_subset',
+        'test_skill_save_delete_roundtrip',
+        'test_skill_delete_unknown_404',
+        # Agent backend (need running AIAgent)
+        'test_chat_stream_opens_successfully',
+        'test_approval_submit_and_respond',
+        # Security redaction (flaky — session state varies across test ordering)
+        'test_api_sessions_list_redacts_titles',
+        # Workspace path (macOS /tmp -> /private/tmp symlink)
+        'test_new_session_inherits_workspace',
+        'test_workspace_add_valid',
+        'test_workspace_rename',
+        'test_last_workspace_updates_on_session_update',
+        'test_new_session_inherits_last_workspace',
+    }
+
+    skip_marker = pytest.mark.skip(reason="requires hermes-agent (not installed)")
+    skipped = 0
+
+    for item in items:
+        if item.name in _AGENT_DEPENDENT_TESTS:
+            item.add_marker(skip_marker)
+            skipped += 1
+
+    if skipped:
+        print(f"\nWARNING: hermes-agent not found; {skipped} agent-dependent tests will be skipped\n")
+
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+def _post(base, path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(
+        base + path, data=data, headers={"Content-Type": "application/json"}
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read())
+    except urllib.error.HTTPError as e:
+        try:
+            return json.loads(e.read())
+        except Exception:
+            return {}
+
+
+def _wait_for_server(base, timeout=20):
+    deadline = time.time() + timeout
+    while time.time() < deadline:
+        try:
+            with urllib.request.urlopen(base + "/health", timeout=2) as r:
+                if json.loads(r.read()).get("status") == "ok":
+                    return True
+        except Exception:
+            time.sleep(0.3)
+    return False
+
+
+# ── Session-scoped test server ────────────────────────────────────────────────
+
+@pytest.fixture(scope="session", autouse=True)
+def test_server():
+    """
+    Start an isolated test server on TEST_PORT with a clean state directory.
+    Paths are discovered dynamically -- no hardcoded absolute path assumptions.
+    """
+    # Kill any leftover process on the test port before starting.
+    # Stale servers from QA harness runs or prior test sessions cause
+    # conftest to think the server is already up, producing false failures.
+    try:
+        import subprocess as _sp
+        _sp.run(['fuser', '-k', f'{TEST_PORT}/tcp'],
+                capture_output=True, timeout=5)
+    except Exception:
+        pass
+    import time as _time
+    _time.sleep(0.5)  # brief pause to let the port release
+
+    # Clean slate
+    if TEST_STATE_DIR.exists():
+        shutil.rmtree(TEST_STATE_DIR)
+    TEST_STATE_DIR.mkdir(parents=True)
+    TEST_WORKSPACE.mkdir(parents=True)
+
+    # Symlink real skills into test home so skill-related tests work,
+    # but all write-heavy state stays isolated.
+    real_skills  = HERMES_HOME / 'skills'
+    test_skills  = TEST_STATE_DIR / 'skills'
+    if real_skills.exists() and not test_skills.exists():
+        test_skills.symlink_to(real_skills)
+
+    # Isolated cron state
+    (TEST_STATE_DIR / 'cron').mkdir(parents=True, exist_ok=True)
+
+    # Expose TEST_STATE_DIR to the test process itself so that tests which write
+    # directly to state.db (e.g. test_gateway_sync.py) always use the same path
+    # as the server.  Other test files (test_auth_sessions.py) may override
+    # HERMES_WEBUI_STATE_DIR for their own purposes, but HERMES_WEBUI_TEST_STATE_DIR
+    # is reserved for this mapping and is never overridden by individual test files.
+    # Export both port and state-dir as env vars so individual test files
+    # can read them without importing conftest (avoids circular imports).
+    os.environ.setdefault('HERMES_WEBUI_TEST_PORT', str(TEST_PORT))
+    # os.environ already set at module level above; no-op here.
+
+    env = os.environ.copy()
+    # Strip real provider keys so test subprocess never inherits production credentials.
+    # The test server uses a mock/isolated config — no real API calls are made.
+    for _k in list(env):
+        if any(_k.startswith(p) for p in (
+            'OPENROUTER_API_KEY', 'OPENAI_API_KEY', 'ANTHROPIC_API_KEY',
+            'GOOGLE_API_KEY', 'DEEPSEEK_API_KEY',
+        )):
+            del env[_k]
+    env.update({
+        "HERMES_WEBUI_PORT":              str(TEST_PORT),
+        "HERMES_WEBUI_HOST":              "127.0.0.1",
+        "HERMES_WEBUI_STATE_DIR":         str(TEST_STATE_DIR),
+        "HERMES_WEBUI_DEFAULT_WORKSPACE": str(TEST_WORKSPACE),
+        "HERMES_WEBUI_DEFAULT_MODEL":     "openai/gpt-5.4-mini",
+        "HERMES_HOME":                    str(TEST_STATE_DIR),
+        # Belt-and-suspenders: HERMES_BASE_HOME hard-locks _DEFAULT_HERMES_HOME
+        # in api/profiles.py to the test state dir regardless of profile switching
+        # or any os.environ mutation that happens inside the server process.
+        # Without this, a profile switch or active_profile file in the real
+        # ~/.hermes can redirect _get_active_hermes_home() out of the sandbox,
+        # causing onboarding writes (config.yaml, .env) to land in the production
+        # ~/.hermes/profiles/webui/ and overwrite real API keys.
+        "HERMES_BASE_HOME":               str(TEST_STATE_DIR),
+    })
+
+    # Pass agent dir if discovered so server.py doesn't have to re-discover
+    if HERMES_AGENT:
+        env["HERMES_WEBUI_AGENT_DIR"] = str(HERMES_AGENT)
+
+    proc = subprocess.Popen(
+        [VENV_PYTHON, str(SERVER_SCRIPT)],
+        cwd=WORKDIR,
+        env=env,
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+    )
+
+    if not _wait_for_server(TEST_BASE, timeout=20):
+        proc.kill()
+        pytest.fail(
+            f"Test server on port {TEST_PORT} did not start within 20s.\n"
+            f"  server.py : {SERVER_SCRIPT}\n"
+            f"  python    : {VENV_PYTHON}\n"
+            f"  agent dir : {HERMES_AGENT}\n"
+            f"  workdir   : {WORKDIR}\n"
+        )
+
+    yield proc
+
+    proc.terminate()
+    try:
+        proc.wait(timeout=5)
+    except subprocess.TimeoutExpired:
+        proc.kill()
+
+    try:
+        shutil.rmtree(TEST_STATE_DIR)
+    except Exception:
+        pass
+
+
+# ── Test base URL ─────────────────────────────────────────────────────────────
+
+@pytest.fixture(scope="session")
+def base_url():
+    return TEST_BASE
+
+
+# ── Per-test session cleanup ──────────────────────────────────────────────────
+
+@pytest.fixture(autouse=True)
+def cleanup_test_sessions():
+    """
+    Yields a list for tests to register created session IDs.
+    Deletes all registered sessions after each test.
+    Resets last_workspace to the test workspace to prevent state bleed.
+    """
+    created: list[str] = []
+    yield created
+
+    for sid in created:
+        try:
+            _post(TEST_BASE, "/api/session/delete", {"session_id": sid})
+        except Exception:
+            pass
+
+    try:
+        _post(TEST_BASE, "/api/sessions/cleanup_zero_message")
+    except Exception:
+        pass
+
+    try:
+        last_ws_file = TEST_STATE_DIR / "last_workspace.txt"
+        last_ws_file.write_text(str(TEST_WORKSPACE), encoding='utf-8')
+    except Exception:
+        pass
+
+
+# ── Convenience helpers ────────────────────────────────────────────────────────
+
+def make_session_tracked(created_list, ws=None):
+    """
+    Create a session on the test server and register it for cleanup.
+
+    Usage:
+        def test_something(cleanup_test_sessions):
+            sid, ws = make_session_tracked(cleanup_test_sessions)
+    """
+    body = {}
+    if ws:
+        body["workspace"] = str(ws)
+    d = _post(TEST_BASE, "/api/session/new", body)
+    sid = d["session"]["session_id"]
+    ws_path = pathlib.Path(d["session"]["workspace"])
+    created_list.append(sid)
+    return sid, ws_path
--- a/tests/test_approval_queue.py
+++ b/tests/test_approval_queue.py
@@ -0,0 +1,188 @@
+"""Tests for approval queue multi-entry support (issue #527).
+
+Previously _pending[sid] held one entry, so simultaneous approvals overwrote
+each other. This PR changes submit_pending() to append to a list and adds
+approval_id so /api/approval/respond can target a specific entry.
+"""
+import json
+import pathlib
+import re
+import sys
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
+sys.path.insert(0, str(REPO_ROOT))
+
+ROUTES_SRC = (REPO_ROOT / "api" / "routes.py").read_text(encoding="utf-8")
+MESSAGES_JS = (REPO_ROOT / "static" / "messages.js").read_text(encoding="utf-8")
+INDEX_HTML = (REPO_ROOT / "static" / "index.html").read_text(encoding="utf-8")
+
+
+# ---------------------------------------------------------------------------
+# Static-analysis: Python routes
+# ---------------------------------------------------------------------------
+
+def test_submit_pending_appends_to_list():
+    """submit_pending() must append to a list, not overwrite."""
+    # The new wrapper must contain queue.append
+    assert "queue.append(entry)" in ROUTES_SRC, \
+        "submit_pending() must append entry to a list queue, not overwrite _pending[sid]"
+
+
+def test_submit_pending_adds_approval_id():
+    """Each queued entry must get a unique approval_id."""
+    assert "approval_id" in ROUTES_SRC and "uuid.uuid4().hex" in ROUTES_SRC, \
+        "submit_pending() must assign a uuid4 approval_id to each queued entry"
+
+
+def test_handle_approval_pending_returns_count():
+    """_handle_approval_pending must return pending_count in its response."""
+    assert '"pending_count"' in ROUTES_SRC, \
+        "_handle_approval_pending must include pending_count in the JSON response"
+
+
+def test_handle_approval_respond_pops_by_approval_id():
+    """_handle_approval_respond must target entry by approval_id."""
+    assert 'approval_id = body.get("approval_id"' in ROUTES_SRC, \
+        "_handle_approval_respond must read approval_id from request body"
+    assert 'entry.get("approval_id") == approval_id' in ROUTES_SRC, \
+        "_handle_approval_respond must find and pop the matching entry by approval_id"
+
+
+def test_handle_approval_respond_fallback_to_oldest():
+    """When no approval_id is given, fall back to popping the oldest entry (FIFO)."""
+    # The fallback path: queue.pop(0) when approval_id is empty
+    assert "queue.pop(0)" in ROUTES_SRC, \
+        "_handle_approval_respond must fall back to popping the oldest entry when approval_id is absent"
+
+
+def test_backward_compat_legacy_dict_value():
+    """The respond handler must tolerate a legacy single-dict value in _pending."""
+    assert "Legacy single-dict value" in ROUTES_SRC or \
+           "# Legacy single-dict" in ROUTES_SRC or \
+           "elif queue:" in ROUTES_SRC, \
+        "respond handler must handle legacy single-dict _pending values for backward compatibility"
+
+
+# ---------------------------------------------------------------------------
+# Static-analysis: JavaScript frontend
+# ---------------------------------------------------------------------------
+
+def test_respond_sends_approval_id():
+    """respondApproval() must include approval_id in the POST body."""
+    assert "approval_id: approvalId" in MESSAGES_JS, \
+        "respondApproval() must send approval_id in the POST body to /api/approval/respond"
+
+
+def test_show_approval_card_accepts_count():
+    """showApprovalCard must accept a pendingCount parameter."""
+    assert re.search(r"function showApprovalCard\(pending,\s*pendingCount\)", MESSAGES_JS), \
+        "showApprovalCard() must accept a pendingCount argument"
+
+
+def test_show_approval_card_renders_counter():
+    """showApprovalCard must display a '1 of N pending' counter when N > 1."""
+    assert '"1 of " + pendingCount + " pending"' in MESSAGES_JS or \
+           "'1 of ' + pendingCount + ' pending'" in MESSAGES_JS, \
+        "showApprovalCard() must render '1 of N pending' counter for multiple queued approvals"
+
+
+def test_approval_current_id_tracked():
+    """_approvalCurrentId must be set and cleared around each approval."""
+    assert "_approvalCurrentId" in MESSAGES_JS, \
+        "_approvalCurrentId must track the approval_id of the currently displayed card"
+    assert "_approvalCurrentId = pending.approval_id" in MESSAGES_JS or \
+           "_approvalCurrentId = pending.approval_id || null" in MESSAGES_JS, \
+        "_approvalCurrentId must be assigned from pending.approval_id"
+    # Must be nulled on respond
+    assert "_approvalCurrentId = null" in MESSAGES_JS, \
+        "_approvalCurrentId must be cleared when respondApproval() is called"
+
+
+def test_polling_passes_count_to_show():
+    """The poll loop must pass pending_count to showApprovalCard."""
+    assert "showApprovalCard(data.pending, data.pending_count" in MESSAGES_JS, \
+        "Poll loop must pass data.pending_count to showApprovalCard"
+
+
+# ---------------------------------------------------------------------------
+# HTML: counter element present
+# ---------------------------------------------------------------------------
+
+def test_approval_counter_element_exists():
+    """index.html must contain an approvalCounter element."""
+    assert 'id="approvalCounter"' in INDEX_HTML, \
+        "index.html must contain an element with id='approvalCounter' for the '1 of N' display"
+
+
+# ---------------------------------------------------------------------------
+# Functional: multiple entries behave correctly (via routes module directly)
+# ---------------------------------------------------------------------------
+
+def test_multiple_approvals_both_surfaced():
+    """Two submit_pending calls must produce two queued entries, not one."""
+    import threading
+    from api import routes as r
+
+    # Reset state
+    sid = "test-multi-approval-sid"
+    with r._lock:
+        r._pending.pop(sid, None)
+
+    r.submit_pending(sid, {"command": "cmd1", "pattern_key": "p1", "pattern_keys": ["p1"], "description": "d1"})
+    r.submit_pending(sid, {"command": "cmd2", "pattern_key": "p2", "pattern_keys": ["p2"], "description": "d2"})
+
+    with r._lock:
+        queue = r._pending.get(sid)
+
+    assert isinstance(queue, list), "After two submit_pending calls, _pending[sid] must be a list"
+    assert len(queue) == 2, f"Expected 2 queued entries, got {len(queue)}"
+    assert queue[0]["command"] == "cmd1"
+    assert queue[1]["command"] == "cmd2"
+    assert queue[0].get("approval_id"), "First entry must have an approval_id"
+    assert queue[1].get("approval_id"), "Second entry must have an approval_id"
+    assert queue[0]["approval_id"] != queue[1]["approval_id"], "Each entry must have a unique approval_id"
+
+    # Cleanup
+    with r._lock:
+        r._pending.pop(sid, None)
+
+
+def test_respond_by_approval_id_pops_correct_entry():
+    """Responding with approval_id must remove only the targeted entry."""
+    from api import routes as r
+
+    sid = "test-respond-by-id-sid"
+    with r._lock:
+        r._pending.pop(sid, None)
+
+    r.submit_pending(sid, {"command": "cmd1", "pattern_key": "p1", "pattern_keys": ["p1"], "description": "d1"})
+    r.submit_pending(sid, {"command": "cmd2", "pattern_key": "p2", "pattern_keys": ["p2"], "description": "d2"})
+
+    with r._lock:
+        queue = r._pending.get(sid, [])
+        aid2 = queue[1]["approval_id"] if len(queue) > 1 else None
+
+    assert aid2, "Second entry must have an approval_id"
+
+    # Respond to the SECOND entry by its approval_id
+    # We call the handler internals directly (no HTTP)
+    with r._lock:
+        queue = r._pending.get(sid, [])
+        popped = None
+        for i, entry in enumerate(queue):
+            if entry.get("approval_id") == aid2:
+                popped = queue.pop(i)
+                break
+
+    assert popped is not None, "Should have found and popped entry by approval_id"
+    assert popped["command"] == "cmd2", "Popped the wrong entry"
+
+    with r._lock:
+        remaining = r._pending.get(sid, [])
+
+    assert len(remaining) == 1, "One entry should remain after popping the second"
+    assert remaining[0]["command"] == "cmd1", "The remaining entry should be cmd1"
+
+    # Cleanup
+    with r._lock:
+        r._pending.pop(sid, None)
--- a/tests/test_approval_unblock.py
+++ b/tests/test_approval_unblock.py
@@ -0,0 +1,288 @@
+"""
+Tests for fix/approval-stuck-thinking:
+Verify that /api/approval/respond correctly unblocks gateway approval queues
+and that the approval module exports the symbols streaming.py and routes.py
+need to prevent the UI getting stuck in "Thinking…" during dangerous commands.
+"""
+
+import json
+import threading
+import uuid
+import urllib.request
+import urllib.error
+import urllib.parse
+
+import pytest
+
+# Import approval internals — shared module-level state within this process.
+# The HTTP tests use the test server (port 8788, separate process).
+# The unit tests operate directly on the module.
+try:
+    from tools.approval import (
+        register_gateway_notify,
+        unregister_gateway_notify,
+        resolve_gateway_approval,
+        _gateway_queues,
+        _gateway_notify_cbs,
+        _lock,
+        _ApprovalEntry,
+        submit_pending,
+    )
+    # has_pending and pop_pending were removed from tools.approval when the
+    # agent renamed has_pending -> has_blocking_approval (gateway queue check)
+    # and removed the polling-mode pop_pending. Routes now check _pending
+    # directly. These symbols are no longer part of the public API.
+    APPROVAL_AVAILABLE = True
+except ImportError:
+    APPROVAL_AVAILABLE = False
+
+pytestmark = pytest.mark.skipif(
+    not APPROVAL_AVAILABLE,
+    reason="tools.approval not available in this environment"
+)
+
+from tests._pytest_port import BASE
+
+
+def get(path):
+    url = BASE + path
+    with urllib.request.urlopen(url, timeout=10) as r:
+        return json.loads(r.read())
+
+
+def post(path, body=None):
+    url = BASE + path
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(url, data=data,
+          headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+# ── Unit tests (in-process, no HTTP server needed) ──────────────────────────
+
+class TestGatewayApprovalUnblocking:
+    """Unit tests for the gateway queue unblocking mechanism."""
+
+    def test_resolve_gateway_approval_sets_event(self):
+        """resolve_gateway_approval() must set the entry's event and store the result."""
+        sid = f"unit-resolve-{uuid.uuid4().hex[:8]}"
+        data = {"command": "rm -rf /tmp/x", "description": "recursive delete"}
+        entry = _ApprovalEntry(data)
+        with _lock:
+            _gateway_queues.setdefault(sid, []).append(entry)
+
+        resolved = resolve_gateway_approval(sid, "once", resolve_all=False)
+        assert resolved == 1
+        assert entry.event.is_set()
+        assert entry.result == "once"
+
+        # Queue should be cleaned up
+        with _lock:
+            assert sid not in _gateway_queues
+
+    def test_resolve_gateway_approval_deny(self):
+        """Deny choice is propagated correctly."""
+        sid = f"unit-deny-{uuid.uuid4().hex[:8]}"
+        entry = _ApprovalEntry({"command": "pkill -9 x", "description": "force kill"})
+        with _lock:
+            _gateway_queues.setdefault(sid, []).append(entry)
+
+        resolve_gateway_approval(sid, "deny")
+        assert entry.result == "deny"
+
+    def test_resolve_gateway_approval_no_queue_is_harmless(self):
+        """resolve_gateway_approval with no queue entry returns 0, no crash."""
+        sid = f"unit-no-queue-{uuid.uuid4().hex[:8]}"
+        result = resolve_gateway_approval(sid, "once")
+        assert result == 0
+
+    def test_resolve_all_unblocks_multiple_entries(self):
+        """resolve_all=True unblocks every pending entry in the queue."""
+        sid = f"unit-resolve-all-{uuid.uuid4().hex[:8]}"
+        entries = [_ApprovalEntry({"command": f"cmd{i}"}) for i in range(3)]
+        with _lock:
+            _gateway_queues[sid] = list(entries)
+
+        resolved = resolve_gateway_approval(sid, "session", resolve_all=True)
+        assert resolved == 3
+        for e in entries:
+            assert e.event.is_set()
+            assert e.result == "session"
+
+    def test_register_and_fire_notify_cb(self):
+        """register_gateway_notify stores the cb; calling it delivers approval data."""
+        sid = f"unit-notify-{uuid.uuid4().hex[:8]}"
+        fired = []
+        register_gateway_notify(sid, lambda d: fired.append(d))
+
+        with _lock:
+            cb = _gateway_notify_cbs.get(sid)
+        assert cb is not None
+
+        data = {"command": "test", "description": "test"}
+        cb(data)
+        assert fired == [data]
+
+        unregister_gateway_notify(sid)
+
+    def test_unregister_clears_cb_and_signals_entries(self):
+        """unregister_gateway_notify removes cb and unblocks any queued entries."""
+        sid = f"unit-unreg-{uuid.uuid4().hex[:8]}"
+        register_gateway_notify(sid, lambda d: None)
+
+        entry = _ApprovalEntry({"command": "x"})
+        with _lock:
+            _gateway_queues.setdefault(sid, []).append(entry)
+
+        unregister_gateway_notify(sid)
+
+        assert entry.event.is_set(), "unregister should signal blocked entries"
+        with _lock:
+            assert sid not in _gateway_notify_cbs
+            assert sid not in _gateway_queues
+
+    def test_streaming_approval_integration(self):
+        """
+        End-to-end unit simulation of the streaming.py fix:
+        1. streaming.py registers notify_cb
+        2. check_all_command_guards fires notify_cb (pushing approval SSE)
+        3. User responds — resolve_gateway_approval unblocks agent thread
+        4. Agent thread sees choice and continues
+        """
+        sid = f"unit-e2e-{uuid.uuid4().hex[:8]}"
+        approval_events_sent = []
+
+        # Step 1: streaming.py registers the notify callback
+        def _approval_notify_cb(approval_data):
+            approval_events_sent.append(approval_data)  # would be put('approval', ...)
+        register_gateway_notify(sid, _approval_notify_cb)
+
+        # Step 2: check_all_command_guards fires the callback and queues an entry
+        approval_data = {
+            "command": "rm -rf /tmp/test",
+            "pattern_key": "recursive delete",
+            "pattern_keys": ["recursive delete"],
+            "description": "recursive delete",
+        }
+        entry = _ApprovalEntry(approval_data)
+        with _lock:
+            _gateway_queues.setdefault(sid, []).append(entry)
+        # notify_cb fires synchronously (gateway notifies user)
+        with _lock:
+            cb = _gateway_notify_cbs.get(sid)
+        cb(approval_data)
+
+        assert len(approval_events_sent) == 1, "approval SSE event should have been queued"
+
+        # Step 3: user responds via /api/approval/respond → resolve_gateway_approval
+        resolved = resolve_gateway_approval(sid, "once")
+        assert resolved == 1
+
+        # Step 4: agent thread is unblocked with the correct choice
+        assert entry.event.is_set()
+        assert entry.result == "once"
+
+        # Cleanup
+        unregister_gateway_notify(sid)
+
+
+# ── Symbol existence tests ───────────────────────────────────────────────────
+
+class TestApprovalModuleExports:
+    """Verify the module exports all symbols that streaming.py and routes.py need."""
+
+    def test_register_gateway_notify_exported(self):
+        import tools.approval as ap
+        assert hasattr(ap, "register_gateway_notify"), \
+            "tools.approval must export register_gateway_notify"
+
+    def test_unregister_gateway_notify_exported(self):
+        import tools.approval as ap
+        assert hasattr(ap, "unregister_gateway_notify"), \
+            "tools.approval must export unregister_gateway_notify"
+
+    def test_resolve_gateway_approval_exported(self):
+        import tools.approval as ap
+        assert hasattr(ap, "resolve_gateway_approval"), \
+            "tools.approval must export resolve_gateway_approval"
+
+    def test_approval_entry_exported(self):
+        import tools.approval as ap
+        assert hasattr(ap, "_ApprovalEntry"), \
+            "tools.approval must export _ApprovalEntry"
+
+
+# ── HTTP regression tests (test server, port 8788) ───────────────────────────
+
+class TestApprovalHTTPEndpoints:
+    """
+    Regression tests for /api/approval/respond against the live test server.
+    These verify that the HTTP layer behaves correctly — they don't rely on
+    in-process module state shared with the server subprocess.
+    """
+
+    def test_respond_returns_ok_no_pending(self):
+        """respond with no pending entry returns ok (no crash, no 500)."""
+        sid = f"http-no-pending-{uuid.uuid4().hex[:8]}"
+        result, status = post("/api/approval/respond", {
+            "session_id": sid,
+            "choice": "deny",
+        })
+        assert status == 200
+        assert result["ok"] is True
+
+    def test_respond_clears_injected_pending(self):
+        """Inject a pending entry, respond, verify it's cleared."""
+        sid = f"http-clear-{uuid.uuid4().hex[:8]}"
+        cmd = "rm -rf /tmp/testdir"
+
+        inject = get(f"/api/approval/inject_test?session_id={urllib.parse.quote(sid)}"
+                     f"&pattern_key=recursive+delete&command={urllib.parse.quote(cmd)}")
+        assert inject["ok"] is True
+
+        data = get(f"/api/approval/pending?session_id={urllib.parse.quote(sid)}")
+        assert data["pending"] is not None
+
+        result, status = post("/api/approval/respond", {
+            "session_id": sid,
+            "choice": "deny",
+        })
+        assert status == 200
+        assert result["ok"] is True
+
+        data2 = get(f"/api/approval/pending?session_id={urllib.parse.quote(sid)}")
+        assert data2["pending"] is None, "pending should be cleared after respond"
+
+    def test_respond_rejects_invalid_choice(self):
+        """respond with an unknown choice returns 400."""
+        result, status = post("/api/approval/respond", {
+            "session_id": "some-session",
+            "choice": "INVALID",
+        })
+        assert status == 400
+
+    def test_respond_requires_session_id(self):
+        """respond without session_id returns 400."""
+        result, status = post("/api/approval/respond", {"choice": "deny"})
+        assert status == 400
+
+    def test_respond_session_choice_clears_pending(self):
+        """Inject pending, respond with 'session', verify cleared."""
+        sid = f"http-session-{uuid.uuid4().hex[:8]}"
+        inject = get(f"/api/approval/inject_test?session_id={urllib.parse.quote(sid)}"
+                     f"&pattern_key=force+kill+processes&command=pkill+-9+something")
+        assert inject["ok"] is True
+
+        result, status = post("/api/approval/respond", {
+            "session_id": sid,
+            "choice": "session",
+        })
+        assert status == 200
+        assert result["choice"] == "session"
+
+        data = get(f"/api/approval/pending?session_id={urllib.parse.quote(sid)}")
+        assert data["pending"] is None
--- a/tests/test_auth_sessions.py
+++ b/tests/test_auth_sessions.py
@@ -0,0 +1,134 @@
+"""
+Tests for auth session lifecycle — session creation, verification, expiry,
+and lazy pruning of expired entries.
+"""
+import time
+import unittest
+from pathlib import Path
+import tempfile
+import os
+
+# Isolate state dir so we don't touch real sessions
+_TEST_STATE = Path(tempfile.mkdtemp())
+os.environ["HERMES_WEBUI_STATE_DIR"] = str(_TEST_STATE)
+
+import sys
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+import importlib
+
+# Force re-import of auth module so it picks up our TEST_STATE_DIR
+auth = importlib.import_module("api.auth")
+
+
+class TestSessionPruning(unittest.TestCase):
+    """Verify expired session cleanup works correctly."""
+
+    def setUp(self):
+        # Clear any leftover sessions from other tests
+        auth._sessions.clear()
+
+    def test_session_created_valid(self):
+        """A fresh session token should verify as valid."""
+        token = auth.create_session()
+        self.assertTrue(auth.verify_session(token))
+
+    def test_expired_session_pruned(self):
+        """Manually inserting an expired entry should be pruned on next verify_session call."""
+        # Insert sessions that have already expired
+        auth._sessions["fake_token"] = time.time() - 100
+        auth._sessions["another_fake"] = time.time() - 50
+        # Insert one valid session (far future)
+        auth._sessions["good_token"] = time.time() + 3600
+
+        # _sessions has 3 entries, 2 expired
+        self.assertEqual(len(auth._sessions), 3)
+
+        # Call verify_session — this triggers _prune_expired_sessions()
+        # Cookie format is token.signature, so we need a dot to pass the early check
+        auth.verify_session("fake_token.fake_sig")
+
+        # After verification, only the valid session should remain
+        self.assertEqual(len(auth._sessions), 1)
+        self.assertIn("good_token", auth._sessions)
+        self.assertNotIn("fake_token", auth._sessions)
+        self.assertNotIn("another_fake", auth._sessions)
+
+    def test_prune_does_not_remove_valid_sessions(self):
+        """_prune_expired_sessions should never remove sessions that are still active."""
+        auth._sessions["active_1"] = time.time() + 86400  # 24 hours from now
+        auth._sessions["active_2"] = time.time() + 7200    # 2 hours from now
+        auth._sessions["expired_1"] = time.time() - 10
+
+        auth._prune_expired_sessions()
+
+        self.assertEqual(len(auth._sessions), 2)
+        self.assertIn("active_1", auth._sessions)
+        self.assertIn("active_2", auth._sessions)
+        self.assertNotIn("expired_1", auth._sessions)
+
+    def test_verify_session_prunes_before_verification(self):
+        """verify_session should prune expired entries before checking the target token.
+
+        This ensures that _prune_expired_sessions() is called at the very top
+        of verify_session(), so cleanup happens on every auth check.
+        """
+        auth._sessions["expired_for_test"] = time.time() - 999
+
+        # verify_session with an invalid cookie triggers the full path:
+        # _prune_expired_sessions -> signature check -> return False
+        result = auth.verify_session("nonexistent.bad_sig")
+        self.assertFalse(result)
+
+        # The expired entry should have been cleaned up
+        self.assertNotIn("expired_for_test", auth._sessions)
+
+    def test_prune_handles_empty_dict(self):
+        """_prune_expired_sessions should be safe on an empty dict."""
+        auth._sessions.clear()
+        auth._prune_expired_sessions()
+        self.assertEqual(len(auth._sessions), 0)
+
+    def test_session_ttl_is_24_hours(self):
+        """Newly created sessions should have the expected 24-hour TTL."""
+        auth._sessions.clear()
+        token_hex = auth.create_session().split(".")[0]
+        # The _sessions dict stores token -> expiry_time
+        # We can check the expiry is approximately SESSION_TTL seconds from now
+        # by looking up the raw entry via the token
+        from api.auth import _sessions, SESSION_TTL
+        # find our entry
+        for t, exp in _sessions.items():
+            if t == token_hex:
+                # expiry should be within 5 seconds of now + SESSION_TTL
+                expected = time.time() + SESSION_TTL
+                self.assertAlmostEqual(exp, expected, delta=5)
+                break
+        else:
+            self.fail("Session token not found in _sessions")
+
+
+class TestSessionInvalidation(unittest.TestCase):
+    """Test session logout / invalidation."""
+
+    def setUp(self):
+        auth._sessions.clear()
+
+    def test_invalidate_session_removes_token(self):
+        """Calling invalidate_session should remove the token from _sessions."""
+        token = auth.create_session()
+        self.assertTrue(auth.verify_session(token))
+
+        auth.invalidate_session(token)
+        # Token should be gone
+        self.assertFalse(auth.verify_session(token))
+
+    def test_invalidate_unknown_token_is_safe(self):
+        """Invalidating a non-existent token should not raise."""
+        auth._sessions.clear()
+        auth.invalidate_session("nonexistent_token")
+        # Should not raise
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_batch_fixes.py
+++ b/tests/test_batch_fixes.py
@@ -0,0 +1,226 @@
+"""Tests for the batch of fixes from PRs #506-#521 (v0.50.47).
+
+Covers:
+  - /root workspace unblocking (#510/#521)
+  - Attached-files split guard (#521)
+  - custom_providers model visibility (#515/#519)
+  - Cron skill cache invalidation (#507/#508)
+  - System (auto) theme (#504/#506/#509/#514)
+"""
+
+import pathlib
+import re
+
+REPO = pathlib.Path(__file__).parent.parent
+
+
+def read(rel):
+    return (REPO / rel).read_text()
+
+
+# ── Group A: /root workspace ──────────────────────────────────────────────────
+
+class TestRootWorkspaceUnblocked:
+
+    def test_root_not_in_blocked_system_roots(self):
+        src = read("api/workspace.py")
+        assert "Path('/root')" not in src, (
+            "/root must not be in _BLOCKED_SYSTEM_ROOTS — "
+            "breaks deployments where Hermes runs as root"
+        )
+
+    def test_etc_still_blocked(self):
+        """Sanity: other dangerous paths remain blocked."""
+        src = read("api/workspace.py")
+        assert "Path('/etc')" in src
+        assert "Path('/proc')" in src
+
+    def test_split_guard_present(self):
+        src = read("api/streaming.py")
+        assert "'\\n\\n[Attached files:' in msg_text" in src, (
+            "base_text split must guard against missing '[Attached files:' "
+            "to avoid empty-string on plain messages"
+        )
+
+
+# ── Group B: custom_providers visibility ─────────────────────────────────────
+
+class TestCustomProvidersVisibility:
+
+    def test_has_custom_providers_variable_present(self):
+        src = read("api/config.py")
+        assert "_has_custom_providers" in src, (
+            "_has_custom_providers variable must exist in get_available_models()"
+        )
+
+    def test_discard_custom_conditional_on_no_custom_providers(self):
+        src = read("api/config.py")
+        assert "not _has_custom_providers" in src, (
+            "detected_providers.discard('custom') must be gated on "
+            "'not _has_custom_providers'"
+        )
+
+    def test_custom_providers_isinstance_check(self):
+        src = read("api/config.py")
+        assert "isinstance(_custom_providers_cfg, list)" in src, (
+            "_has_custom_providers must check isinstance(..., list)"
+        )
+
+
+# ── Group C: cron skill cache ─────────────────────────────────────────────────
+
+class TestCronSkillCacheInvalidation:
+
+    def _panels_src(self):
+        return read("static/panels.js")
+
+    def test_cache_busted_on_form_open(self):
+        src = self._panels_src()
+        # toggleCronForm should set cache to null unconditionally
+        m = re.search(
+            r'function toggleCronForm\(\)\{.*?_cronSkillsCache=null',
+            src, re.DOTALL
+        )
+        assert m, (
+            "toggleCronForm must unconditionally null _cronSkillsCache "
+            "before fetching skills"
+        )
+
+    def test_cache_not_guarded_by_if_on_open(self):
+        src = self._panels_src()
+        # The old guard should be gone
+        assert "if(!_cronSkillsCache)" not in src, (
+            "toggleCronForm should not use 'if(!_cronSkillsCache)' guard — "
+            "cache must always be busted on open"
+        )
+
+    def test_cache_busted_on_skill_save(self):
+        src = self._panels_src()
+        # After submitSkillSave's api() call, _cronSkillsCache must be nulled
+        m = re.search(
+            r'async function submitSkillSave\(\).*?_skillsData\s*=\s*null.*?_cronSkillsCache\s*=\s*null',
+            src, re.DOTALL
+        )
+        assert m, (
+            "_cronSkillsCache must be set to null in submitSkillSave() "
+            "right after _skillsData = null"
+        )
+
+
+# ── Group D: System (auto) theme ──────────────────────────────────────────────
+
+class TestSystemTheme:
+
+    def test_apply_theme_helper_in_boot_js(self):
+        src = read("static/boot.js")
+        assert "function _applyTheme(" in src, (
+            "_applyTheme helper function must be defined in boot.js"
+        )
+
+    def test_apply_theme_resolves_system(self):
+        src = read("static/boot.js")
+        assert "normalized.theme==='system'" in src or "=== 'system'" in src, (
+            "_applyTheme must branch on 'system' to resolve via matchMedia"
+        )
+
+    def test_apply_theme_uses_matchmedia(self):
+        src = read("static/boot.js")
+        assert "prefers-color-scheme" in src, (
+            "_applyTheme must use matchMedia('(prefers-color-scheme:dark)')"
+        )
+
+    def test_load_settings_calls_apply_theme(self):
+        src = read("static/boot.js")
+        assert "_applyTheme(appearance.theme)" in src, (
+            "loadSettings must call _applyTheme() instead of direct data-theme assignment"
+        )
+
+    def test_system_option_in_theme_picker(self):
+        html = read("static/index.html")
+        assert "_pickTheme('system')" in html, (
+            "Theme picker must include a system theme button"
+        )
+        assert ">System<" in html, (
+            "Theme picker must show 'System' label"
+        )
+
+    def test_theme_picker_uses_pick_theme(self):
+        html = read("static/index.html")
+        assert "_pickTheme(" in html, (
+            "Theme buttons must call _pickTheme()"
+        )
+
+    def test_flicker_script_resolves_system(self):
+        html = read("static/index.html")
+        # The head flicker-prevention IIFE must handle 'system'
+        assert "==='system'" in html or "=== 'system'" in html, (
+            "Flicker-prevention head script must resolve 'system' before setting data-theme"
+        )
+        assert "legacy={slate:['dark','slate']" in html, (
+            "Flicker-prevention head script must normalize legacy theme names on first paint"
+        )
+
+    def test_system_in_commands_themes_list(self):
+        src = read("static/commands.js")
+        assert "'system'" in src, (
+            "/theme command must include 'system' in the valid themes array"
+        )
+
+    def test_commands_uses_apply_theme(self):
+        src = read("static/commands.js")
+        assert "_applyTheme(appearance.theme)" in src, (
+            "cmdTheme must call _applyTheme() with the normalized canonical theme"
+        )
+
+    def test_commands_accept_legacy_theme_aliases(self):
+        src = read("static/commands.js")
+        assert "const legacyThemes=Object.keys(_LEGACY_THEME_MAP||{});" in src, (
+            "cmdTheme must accept legacy theme aliases and map them onto canonical appearance values"
+        )
+
+    def test_panels_reverts_via_apply_theme(self):
+        src = read("static/panels.js")
+        assert "_applyTheme(_settingsThemeOnOpen)" in src or \
+               "_applyTheme(" in src, (
+            "_revertSettingsPreview must call _applyTheme() so 'system' "
+            "is correctly re-activated on settings discard"
+        )
+
+    def test_panels_saves_system_string_not_resolved(self):
+        src = read("static/panels.js")
+        assert "localStorage.getItem('hermes-theme')" in src, (
+            "_settingsThemeOnOpen must read from localStorage to preserve "
+            "the 'system' string, not the resolved 'dark'/'light'"
+        )
+
+    def test_i18n_cmd_theme_includes_system_english(self):
+        src = read("static/i18n.js")
+        assert "system/dark/light" in src, (
+            "English cmd_theme i18n key must include 'system' in the theme list"
+        )
+
+    def test_i18n_cmd_theme_all_locales(self):
+        src = read("static/i18n.js")
+        count = src.count("system/dark/light")
+        assert count >= 5, (
+            f"cmd_theme description should mention 'system' in all 5 locales; "
+            f"found {count}"
+        )
+
+    def test_theme_listener_cleanup_uses_stable_handler(self):
+        src = read("static/boot.js")
+        assert "_systemThemeMq&&_onSystemThemeChange" in src, (
+            "_applyTheme must track the active OS-theme listener so it can be removed cleanly"
+        )
+        assert "removeEventListener('change',_onSystemThemeChange)" in src, (
+            "_applyTheme must remove the previous OS-theme listener before adding a new one"
+        )
+
+    def test_panels_hydrates_appearance_before_models_fetch(self):
+        src = read("static/panels.js")
+        skin_idx = src.index("const skinVal=(settings.skin||'default').toLowerCase();")
+        models_idx = src.index("const models=await api('/api/models');")
+        assert skin_idx < models_idx, (
+            "loadSettingsPanel must hydrate theme/skin before awaiting /api/models, "
+            "otherwise a slow model fetch can clobber an in-progress skin selection"
+        )
--- a/tests/test_bugbatch_apr2026.py
+++ b/tests/test_bugbatch_apr2026.py
@@ -0,0 +1,140 @@
+"""
+Bug batch fixes — April 2026.
+
+Covers:
+- #594: .app-dialog and .file-rename-input have light theme overrides in style.css
+- #576: workspace panel localStorage restore is gated on session.workspace presence (boot.js)
+- #585: get_available_models() calls reload_config() before reading config cache
+- #567: docker-compose.yml comment mentions macOS UID mismatch
+- #590: _transcribeBlob already calls setComposerStatus('Transcribing…') — confirmed present
+"""
+import pathlib
+import re
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent
+STYLE_CSS = (REPO_ROOT / "static" / "style.css").read_text(encoding="utf-8")
+BOOT_JS   = (REPO_ROOT / "static" / "boot.js").read_text(encoding="utf-8")
+COMPOSE   = (REPO_ROOT / "docker-compose.yml").read_text(encoding="utf-8")
+
+
+# ── #594: light theme dialog overrides ───────────────────────────────────────
+
+def test_594_app_dialog_has_light_mode_override():
+    """style.css must have a light mode rule targeting .app-dialog background."""
+    assert ':root:not(.dark) .app-dialog{' in STYLE_CSS, (
+        "Missing light mode override for .app-dialog — dialogs appear dark on light theme"
+    )
+
+
+def test_594_app_dialog_input_has_light_mode_override():
+    """style.css must have a light mode rule for .app-dialog-input."""
+    assert ":root:not(.dark) .app-dialog-input{" in STYLE_CSS, (
+        "Missing light mode override for .app-dialog-input"
+    )
+
+
+def test_594_app_dialog_btn_has_light_mode_override():
+    """style.css must have a light mode rule for .app-dialog-btn."""
+    assert ":root:not(.dark) .app-dialog-btn{" in STYLE_CSS, (
+        "Missing light mode override for .app-dialog-btn"
+    )
+
+
+def test_594_app_dialog_close_has_light_mode_override():
+    """style.css must have a light mode rule for .app-dialog-close."""
+    assert ":root:not(.dark) .app-dialog-close{" in STYLE_CSS, (
+        "Missing light mode override for .app-dialog-close"
+    )
+
+
+def test_594_file_rename_input_has_light_mode_override():
+    """style.css must have a light mode rule for .file-rename-input."""
+    assert ":root:not(.dark) .file-rename-input{" in STYLE_CSS, (
+        "Missing light mode override for .file-rename-input"
+    )
+
+
+# ── #576: workspace panel snap fix ───────────────────────────────────────────
+
+def test_576_panel_restore_gated_on_workspace():
+    """boot.js: localStorage panel restore must be gated on session.workspace."""
+    # The guard must appear: session.workspace check before _workspacePanelMode='browse'
+    assert "S.session&&S.session.workspace&&localStorage.getItem('hermes-webui-workspace-panel')" in BOOT_JS, (
+        "Workspace panel localStorage restore must be gated on S.session.workspace "
+        "to prevent snap-open-then-closed on sessions without a workspace (#576)"
+    )
+
+
+def test_576_restore_happens_after_load_session():
+    """boot.js: loadSession() must come before the panel restore guard."""
+    load_pos    = BOOT_JS.find("await loadSession(saved)")
+    restore_pos = BOOT_JS.find("S.session&&S.session.workspace&&localStorage")
+    assert load_pos != -1, "loadSession call not found in boot.js"
+    assert restore_pos != -1, "workspace panel restore guard not found"
+    assert load_pos < restore_pos, (
+        "loadSession() must run before the panel restore guard "
+        "so S.session.workspace is known at restore time"
+    )
+
+
+# ── #585: get_available_models reloads config ─────────────────────────────────
+
+def test_585_get_available_models_calls_reload_config():
+    """api/config.py: get_available_models() must do a mtime-based reload check."""
+    config_src = (REPO_ROOT / "api" / "config.py").read_text(encoding="utf-8")
+    fn_start = config_src.find("def get_available_models()")
+    assert fn_start != -1, "get_available_models not found"
+    fn_body_end = config_src.find('"""', config_src.find('"""', fn_start + 30) + 3) + 3
+    # Must check mtime before reading config
+    mtime_pos    = config_src.find("_current_mtime", fn_body_end)
+    active_prov_pos = config_src.find("active_provider = None", fn_body_end)
+    assert mtime_pos != -1, (
+        "get_available_models() must check config file mtime before reading cache (#585)"
+    )
+    assert mtime_pos < active_prov_pos, (
+        "mtime check must come before active_provider = None in get_available_models()"
+    )
+
+
+# ── #567: docker-compose UID note ─────────────────────────────────────────────
+
+def test_567_compose_mentions_macos_uid():
+    """docker-compose.yml must mention macOS UID / id -u to help macOS users."""
+    assert "macOS" in COMPOSE or "macos" in COMPOSE.lower(), (
+        "docker-compose.yml should mention macOS UID issue (#567)"
+    )
+    assert "id -u" in COMPOSE, (
+        "docker-compose.yml should tell users to run 'id -u' to find their UID (#567)"
+    )
+
+
+# ── #590: transcription spinner already present ───────────────────────────────
+
+def test_590_transcribing_status_shown_before_fetch():
+    """boot.js: setComposerStatus('Transcribing…') must fire before the fetch call."""
+    transcribe_fn_start = BOOT_JS.find("async function _transcribeBlob(")
+    assert transcribe_fn_start != -1, "_transcribeBlob not found in boot.js"
+    fn_body = BOOT_JS[transcribe_fn_start:transcribe_fn_start + 600]
+    status_pos = fn_body.find("setComposerStatus('Transcribing")
+    fetch_pos  = fn_body.find("await fetch(")
+    assert status_pos != -1, (
+        "setComposerStatus('Transcribing…') must be called before the fetch in _transcribeBlob"
+    )
+    assert fetch_pos != -1, "await fetch not found in _transcribeBlob"
+    assert status_pos < fetch_pos, (
+        "setComposerStatus('Transcribing…') must appear before 'await fetch' "
+        "so the UI shows a spinner immediately on stop (#590)"
+    )
+
+
+def test_590_recording_stops_before_transcribe():
+    """boot.js: _setRecording(false) must fire in onstop before _transcribeBlob."""
+    onstop_start = BOOT_JS.find("mediaRecorder.onstop")
+    assert onstop_start != -1, "mediaRecorder.onstop not found"
+    onstop_body = BOOT_JS[onstop_start:onstop_start + 400]
+    rec_pos = onstop_body.find("_setRecording(false)")
+    blob_pos = onstop_body.find("_transcribeBlob(")
+    assert rec_pos != -1 and blob_pos != -1
+    assert rec_pos < blob_pos, (
+        "_setRecording(false) must come before _transcribeBlob so mic icon clears immediately"
+    )
--- a/tests/test_cancel_interrupt.py
+++ b/tests/test_cancel_interrupt.py
@@ -0,0 +1,115 @@
+"""
+Unit tests for cancel/interrupt functionality.
+Tests the integration between cancel_stream() and agent.interrupt().
+"""
+import pytest
+import queue
+import threading
+from unittest.mock import Mock
+
+from api.streaming import cancel_stream
+from api.config import AGENT_INSTANCES, STREAMS, CANCEL_FLAGS
+
+
+class TestCancelInterrupt:
+    """Test suite for cancel/interrupt functionality"""
+
+    def setup_method(self):
+        """Clean up before each test"""
+        AGENT_INSTANCES.clear()
+        STREAMS.clear()
+        CANCEL_FLAGS.clear()
+
+    def teardown_method(self):
+        """Clean up after each test"""
+        AGENT_INSTANCES.clear()
+        STREAMS.clear()
+        CANCEL_FLAGS.clear()
+
+    def test_cancel_calls_agent_interrupt(self):
+        """Verify that cancel_stream() calls agent.interrupt() when agent exists"""
+        # Setup
+        stream_id = "test_stream_123"
+        mock_agent = Mock()
+        mock_agent.interrupt = Mock()
+
+        STREAMS[stream_id] = queue.Queue()
+        CANCEL_FLAGS[stream_id] = threading.Event()
+        AGENT_INSTANCES[stream_id] = mock_agent
+
+        # Execute
+        result = cancel_stream(stream_id)
+
+        # Assert
+        assert result is True
+        mock_agent.interrupt.assert_called_once_with("Cancelled by user")
+        assert CANCEL_FLAGS[stream_id].is_set()
+
+    def test_cancel_handles_interrupt_exception(self):
+        """Verify that cancel_stream() handles interrupt() exceptions gracefully"""
+        stream_id = "test_stream_456"
+        mock_agent = Mock()
+        mock_agent.interrupt = Mock(side_effect=RuntimeError("Agent error"))
+
+        STREAMS[stream_id] = queue.Queue()
+        CANCEL_FLAGS[stream_id] = threading.Event()
+        AGENT_INSTANCES[stream_id] = mock_agent
+
+        # Should not raise exception
+        result = cancel_stream(stream_id)
+
+        # Assert
+        assert result is True
+        mock_agent.interrupt.assert_called_once()
+        assert CANCEL_FLAGS[stream_id].is_set()
+
+    def test_cancel_before_agent_ready(self):
+        """Test cancel when agent not yet stored in AGENT_INSTANCES (race condition)"""
+        stream_id = "test_stream_789"
+
+        STREAMS[stream_id] = queue.Queue()
+        CANCEL_FLAGS[stream_id] = threading.Event()
+        # Note: AGENT_INSTANCES[stream_id] not set (simulating race condition)
+
+        # Should succeed even without agent
+        result = cancel_stream(stream_id)
+
+        # Assert
+        assert result is True
+        assert CANCEL_FLAGS[stream_id].is_set()
+        # Agent will check this flag when it starts
+
+    def test_cancel_nonexistent_stream(self):
+        """Test cancel for a stream that doesn't exist"""
+        result = cancel_stream("nonexistent_stream")
+        assert result is False
+
+    def test_cancel_sets_cancel_event(self):
+        """Verify that cancel_stream() sets the cancel_event flag"""
+        stream_id = "test_stream_event"
+
+        STREAMS[stream_id] = queue.Queue()
+        cancel_event = threading.Event()
+        CANCEL_FLAGS[stream_id] = cancel_event
+
+        result = cancel_stream(stream_id)
+
+        assert result is True
+        assert cancel_event.is_set()
+
+    def test_cancel_puts_sentinel_in_queue(self):
+        """Verify that cancel_stream() puts cancel sentinel in queue"""
+        stream_id = "test_stream_queue"
+        q = queue.Queue()
+
+        STREAMS[stream_id] = q
+        CANCEL_FLAGS[stream_id] = threading.Event()
+
+        result = cancel_stream(stream_id)
+
+        assert result is True
+        # Check that cancel message was queued
+        assert not q.empty()
+        event_type, data = q.get_nowait()
+        assert event_type == 'cancel'
+        assert data['message'] == 'Cancelled by user'
--- a/tests/test_chinese_locale.py
+++ b/tests/test_chinese_locale.py
@@ -0,0 +1,111 @@
+from collections import Counter
+from pathlib import Path
+import re
+
+
+REPO = Path(__file__).resolve().parent.parent
+
+
+def read(path: Path) -> str:
+    return path.read_text(encoding="utf-8")
+
+
+def extract_locale_block(src: str, locale_key: str) -> str:
+    start_match = re.search(rf"\b{re.escape(locale_key)}\s*:\s*\{{", src)
+    assert start_match, f"{locale_key} locale block not found"
+
+    start = start_match.end() - 1  # "{"
+    depth = 0
+    in_single = False
+    in_double = False
+    in_backtick = False
+    escape = False
+
+    for i in range(start, len(src)):
+        ch = src[i]
+
+        if escape:
+            escape = False
+            continue
+
+        if in_single:
+            if ch == "\\":
+                escape = True
+            elif ch == "'":
+                in_single = False
+            continue
+
+        if in_double:
+            if ch == "\\":
+                escape = True
+            elif ch == '"':
+                in_double = False
+            continue
+
+        if in_backtick:
+            if ch == "\\":
+                escape = True
+            elif ch == "`":
+                in_backtick = False
+            continue
+
+        if ch == "'":
+            in_single = True
+            continue
+        if ch == '"':
+            in_double = True
+            continue
+        if ch == "`":
+            in_backtick = True
+            continue
+
+        if ch == "{":
+            depth += 1
+            continue
+        if ch == "}":
+            depth -= 1
+            if depth == 0:
+                return src[start + 1 : i]
+
+    raise AssertionError(f"{locale_key} locale block braces are not balanced")
+
+
+def test_chinese_locale_block_exists():
+    src = read(REPO / "static" / "i18n.js")
+    assert "\n  zh: {" in src
+    assert "_lang: 'zh'" in src
+    assert "_speech: 'zh-CN'" in src
+
+
+def test_chinese_locale_includes_representative_translations():
+    src = read(REPO / "static" / "i18n.js")
+    expected = [
+        "settings_title: '\\u8bbe\\u7f6e'",
+        "login_title: '\\u767b\\u5f55'",
+        "approval_heading: '需要审批'",
+        "tab_tasks: '任务'",
+        "tab_profiles: '配置'",
+        "session_time_just_now: '刚刚'",
+        "onboarding_title: '欢迎使用 Hermes Web UI'",
+        "onboarding_complete: '引导完成'",
+    ]
+    for entry in expected:
+        assert entry in src
+
+
+def test_chinese_locale_covers_english_keys():
+    src = read(REPO / "static" / "i18n.js")
+    key_pattern = re.compile(r"^\s{4}([a-zA-Z0-9_]+):", re.MULTILINE)
+    en_keys = set(key_pattern.findall(extract_locale_block(src, "en")))
+    zh_keys = set(key_pattern.findall(extract_locale_block(src, "zh")))
+
+    missing = sorted(en_keys - zh_keys)
+    assert not missing, f"Chinese locale missing keys: {missing}"
+
+
+def test_chinese_locale_has_no_duplicate_keys():
+    src = read(REPO / "static" / "i18n.js")
+    key_pattern = re.compile(r"^\s{4}([a-zA-Z0-9_]+):", re.MULTILINE)
+    keys = key_pattern.findall(extract_locale_block(src, "zh"))
+    duplicates = sorted(k for k, count in Counter(keys).items() if count > 1)
+    assert not duplicates, f"Chinese locale has duplicate keys: {duplicates}"
--- a/tests/test_clarify_unblock.py
+++ b/tests/test_clarify_unblock.py
@@ -0,0 +1,165 @@
+"""Tests for clarify prompt unblocking and HTTP endpoints."""
+
+import json
+import threading
+import uuid
+import urllib.request
+import urllib.error
+import urllib.parse
+
+import pytest
+
+try:
+    from api.clarify import (
+        register_gateway_notify,
+        unregister_gateway_notify,
+        resolve_clarify,
+        clear_pending,
+        _gateway_queues,
+        _gateway_notify_cbs,
+        _lock,
+        _ClarifyEntry,
+        submit_pending,
+    )
+    CLARIFY_AVAILABLE = True
+except ImportError:
+    CLARIFY_AVAILABLE = False
+
+pytestmark = pytest.mark.skipif(
+    not CLARIFY_AVAILABLE,
+    reason="api.clarify not available in this environment",
+)
+
+from tests._pytest_port import BASE
+
+
+def get(path):
+    url = BASE + path
+    with urllib.request.urlopen(url, timeout=10) as r:
+        return json.loads(r.read())
+
+
+def post(path, body=None):
+    url = BASE + path
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+class TestClarifyUnblocking:
+    """Unit tests for clarify queue resolution."""
+
+    def test_resolve_clarify_sets_event(self):
+        sid = f"unit-clarify-{uuid.uuid4().hex[:8]}"
+        entry = _ClarifyEntry({"question": "Pick one", "choices_offered": ["a", "b"]})
+        with _lock:
+            _gateway_queues.setdefault(sid, []).append(entry)
+
+        resolved = resolve_clarify(sid, "a", resolve_all=False)
+        assert resolved == 1
+        assert entry.event.is_set()
+        assert entry.result == "a"
+
+    def test_register_and_fire_notify_cb(self):
+        sid = f"unit-notify-{uuid.uuid4().hex[:8]}"
+        fired = []
+        register_gateway_notify(sid, lambda d: fired.append(d))
+
+        with _lock:
+            cb = _gateway_notify_cbs.get(sid)
+        assert cb is not None
+
+        data = {"question": "What now?", "choices_offered": ["x", "y"]}
+        cb(data)
+        assert fired == [data]
+
+        unregister_gateway_notify(sid)
+
+    def test_clear_pending_unblocks_waiters(self):
+        sid = f"unit-clear-{uuid.uuid4().hex[:8]}"
+        entry = _ClarifyEntry({"question": "Wait", "choices_offered": []})
+        with _lock:
+            _gateway_queues.setdefault(sid, []).append(entry)
+
+        cleared = clear_pending(sid)
+        assert cleared == 1
+        assert entry.event.is_set()
+        with _lock:
+            assert sid not in _gateway_queues
+
+    def test_submit_pending_registers_entry(self):
+        sid = f"unit-submit-{uuid.uuid4().hex[:8]}"
+        data = {"question": "Pick", "choices_offered": ["one", "two"], "session_id": sid}
+        entry = submit_pending(sid, data)
+        assert entry.data == data
+        with _lock:
+            assert sid in _gateway_queues
+
+        clear_pending(sid)
+
+
+class TestClarifyModuleExports:
+    def test_register_gateway_notify_exported(self):
+        import api.clarify as ap
+        assert hasattr(ap, "register_gateway_notify")
+
+    def test_unregister_gateway_notify_exported(self):
+        import api.clarify as ap
+        assert hasattr(ap, "unregister_gateway_notify")
+
+    def test_resolve_clarify_exported(self):
+        import api.clarify as ap
+        assert hasattr(ap, "resolve_clarify")
+
+    def test_clarify_entry_exported(self):
+        import api.clarify as ap
+        assert hasattr(ap, "_ClarifyEntry")
+
+
+class TestClarifyHTTPEndpoints:
+    """Regression tests for /api/clarify/respond against the live test server."""
+
+    def test_respond_returns_ok_no_pending(self):
+        sid = f"http-no-pending-{uuid.uuid4().hex[:8]}"
+        result, status = post("/api/clarify/respond", {
+            "session_id": sid,
+            "response": "Use option A",
+        })
+        assert status == 200
+        assert result["ok"] is True
+
+    def test_respond_requires_session_id(self):
+        result, status = post("/api/clarify/respond", {"response": "Hello"})
+        assert status == 400
+
+    def test_respond_requires_response(self):
+        sid = f"http-no-response-{uuid.uuid4().hex[:8]}"
+        result, status = post("/api/clarify/respond", {"session_id": sid})
+        assert status == 400
+
+    def test_respond_clears_injected_pending(self):
+        sid = f"http-clear-{uuid.uuid4().hex[:8]}"
+        question = urllib.parse.quote("Pick the better option")
+        choices = urllib.parse.quote("A")
+        inject = get(
+            f"/api/clarify/inject_test?session_id={urllib.parse.quote(sid)}"
+            f"&question={question}&choices={choices}"
+        )
+        assert inject["ok"] is True
+
+        data = get(f"/api/clarify/pending?session_id={urllib.parse.quote(sid)}")
+        assert data["pending"] is not None
+
+        result, status = post("/api/clarify/respond", {
+            "session_id": sid,
+            "response": "B",
+        })
+        assert status == 200
+        assert result["ok"] is True
+
+        data2 = get(f"/api/clarify/pending?session_id={urllib.parse.quote(sid)}")
+        assert data2["pending"] is None
--- a/tests/test_commands_endpoint.py
+++ b/tests/test_commands_endpoint.py
@@ -0,0 +1,84 @@
+"""Tests for GET /api/commands -- exposes hermes-agent COMMAND_REGISTRY."""
+import json
+import urllib.request
+
+import pytest
+
+from tests.conftest import TEST_BASE, requires_agent_modules
+
+
+def _get(path):
+    """GET helper -- returns parsed JSON or raises HTTPError."""
+    with urllib.request.urlopen(TEST_BASE + path, timeout=10) as r:
+        return json.loads(r.read())
+
+
+@requires_agent_modules
+def test_commands_endpoint_returns_list():
+    """GET /api/commands returns a JSON object with a 'commands' list."""
+    body = _get('/api/commands')
+    assert 'commands' in body
+    assert isinstance(body['commands'], list)
+    assert len(body['commands']) > 0
+
+
+@requires_agent_modules
+def test_commands_endpoint_includes_help():
+    """The 'help' command must always be present (it's not cli_only)."""
+    body = _get('/api/commands')
+    names = {c['name'] for c in body['commands']}
+    assert 'help' in names
+
+
+@requires_agent_modules
+def test_commands_endpoint_command_shape():
+    """Each command entry has the required fields."""
+    body = _get('/api/commands')
+    cmd = next(c for c in body['commands'] if c['name'] == 'help')
+    required = {
+        'name', 'description', 'category', 'aliases',
+        'args_hint', 'subcommands', 'cli_only', 'gateway_only',
+    }
+    assert set(cmd.keys()) >= required
+    assert isinstance(cmd['aliases'], list)
+    assert isinstance(cmd['subcommands'], list)
+    assert isinstance(cmd['cli_only'], bool)
+    assert isinstance(cmd['gateway_only'], bool)
+
+
+@requires_agent_modules
+def test_commands_endpoint_excludes_gateway_only_and_never_expose():
+    """gateway_only commands and the _NEVER_EXPOSE set are filtered out."""
+    body = _get('/api/commands')
+    names = {c['name'] for c in body['commands']}
+    # /sethome, /restart, /update are gateway_only; /commands is in _NEVER_EXPOSE
+    for name in ('sethome', 'restart', 'update', 'commands'):
+        assert name not in names, f"{name} must be excluded from /api/commands"
+
+
+@requires_agent_modules
+def test_commands_endpoint_keeps_new_with_reset_alias():
+    """The 'new' command stays exposed and carries its 'reset' alias."""
+    body = _get('/api/commands')
+    new_cmd = next(c for c in body['commands'] if c['name'] == 'new')
+    assert 'reset' in new_cmd['aliases']
+
+
+def test_list_commands_returns_empty_for_empty_registry():
+    """list_commands(_registry=[]) returns [] -- the same path as when
+    hermes_cli is missing (the empty-or-missing case)."""
+    from api.commands import list_commands
+    assert list_commands(_registry=[]) == []
+
+
+def test_list_commands_degrades_when_agent_missing(monkeypatch):
+    """If hermes_cli.commands is not importable, list_commands() returns []
+    via the ImportError path. Verified by stubbing sys.modules; test cleanup
+    is handled by monkeypatch + the fact that we don't reload api.commands."""
+    import sys
+    monkeypatch.setitem(sys.modules, 'hermes_cli.commands', None)
+    # NOTE: we do NOT reload api.commands. The lazy import inside
+    # list_commands() will re-attempt the import on each call and hit
+    # the stubbed-None module, raising ImportError, taking the fallback path.
+    from api.commands import list_commands
+    assert list_commands() == []
--- a/tests/test_custom_provider_display_name.py
+++ b/tests/test_custom_provider_display_name.py
@@ -0,0 +1,135 @@
+"""
+Tests for named custom provider display in the model dropdown (issue #557).
+
+When a custom_providers entry carries a `name` field (e.g. "Agent37"), the
+web UI model picker should show that name as the group header rather than the
+generic "Custom" label.
+"""
+import api.config as config
+
+
+def _models_with_cfg(model_cfg=None, custom_providers=None, active_provider=None):
+    """Temporarily patch config.cfg, call get_available_models(), restore."""
+    old_cfg = dict(config.cfg)
+    config.cfg.clear()
+    if model_cfg:
+        config.cfg["model"] = model_cfg
+    if custom_providers is not None:
+        config.cfg["custom_providers"] = custom_providers
+    try:
+        return config.get_available_models()
+    finally:
+        config.cfg.clear()
+        config.cfg.update(old_cfg)
+
+
+# ── Named provider shows its name in the dropdown ─────────────────────────────
+
+class TestNamedCustomProviderGroup:
+
+    def test_named_provider_uses_name_as_group_header(self):
+        """A custom_provider entry with name='Agent37' should produce
+        a group whose 'provider' key is 'Agent37', not 'Custom'."""
+        result = _models_with_cfg(
+            model_cfg={"provider": "custom", "base_url": "https://agent37.example.com/v1"},
+            custom_providers=[
+                {"name": "Agent37", "model": "default", "base_url": "https://agent37.example.com/v1"}
+            ],
+        )
+        group_names = [g["provider"] for g in result.get("groups", [])]
+        assert "Agent37" in group_names, (
+            f"Expected 'Agent37' in group names, got {group_names}"
+        )
+
+    def test_named_provider_does_not_produce_generic_custom(self):
+        """When all custom_provider entries have names, no group called 'Custom'
+        should appear alongside them."""
+        result = _models_with_cfg(
+            model_cfg={"provider": "custom", "base_url": "https://agent37.example.com/v1"},
+            custom_providers=[
+                {"name": "Agent37", "model": "default", "base_url": "https://agent37.example.com/v1"}
+            ],
+        )
+        group_names = [g["provider"] for g in result.get("groups", [])]
+        assert "Custom" not in group_names, (
+            f"Expected no generic 'Custom' group when all entries are named, got {group_names}"
+        )
+
+    def test_named_provider_model_appears_in_its_group(self):
+        """The model ID from the named entry should be inside the named group."""
+        result = _models_with_cfg(
+            model_cfg={"provider": "custom"},
+            custom_providers=[
+                {"name": "Agent37", "model": "my-llm", "base_url": "https://agent37.example.com/v1"}
+            ],
+        )
+        agent37_group = next(
+            (g for g in result.get("groups", []) if g["provider"] == "Agent37"), None
+        )
+        assert agent37_group is not None, "Expected an 'Agent37' group"
+        model_ids = [m["id"] for m in agent37_group.get("models", [])]
+        assert "my-llm" in model_ids, (
+            f"Expected 'my-llm' in Agent37 group models, got {model_ids}"
+        )
+
+    def test_multiple_named_providers_each_get_their_own_group(self):
+        """Two named custom providers should produce two distinct groups."""
+        result = _models_with_cfg(
+            model_cfg={"provider": "custom"},
+            custom_providers=[
+                {"name": "Agent37", "model": "fast-model"},
+                {"name": "PrivateProxy", "model": "private-llm"},
+            ],
+        )
+        group_names = [g["provider"] for g in result.get("groups", [])]
+        assert "Agent37" in group_names, f"Expected 'Agent37' group, got {group_names}"
+        assert "PrivateProxy" in group_names, f"Expected 'PrivateProxy' group, got {group_names}"
+        assert "Custom" not in group_names, f"No generic 'Custom' group expected, got {group_names}"
+
+    def test_multiple_models_in_same_named_provider(self):
+        """Multiple entries with the same name should be collapsed into one group."""
+        result = _models_with_cfg(
+            model_cfg={"provider": "custom"},
+            custom_providers=[
+                {"name": "Agent37", "model": "model-a"},
+                {"name": "Agent37", "model": "model-b"},
+            ],
+        )
+        agent37_groups = [g for g in result.get("groups", []) if g["provider"] == "Agent37"]
+        assert len(agent37_groups) == 1, (
+            f"Expected exactly one 'Agent37' group, got {len(agent37_groups)}"
+        )
+        model_ids = [m["id"] for m in agent37_groups[0].get("models", [])]
+        assert "model-a" in model_ids
+        assert "model-b" in model_ids
+
+
+# ── Unnamed entry still falls back to 'Custom' ─────────────────────────────────
+
+class TestUnnamedCustomProviderFallback:
+
+    def test_unnamed_entry_still_produces_custom_group(self):
+        """A custom_provider entry without a name should still show as 'Custom'."""
+        result = _models_with_cfg(
+            model_cfg={"provider": "custom"},
+            custom_providers=[
+                {"model": "unnamed-model"}
+            ],
+        )
+        group_names = [g["provider"] for g in result.get("groups", [])]
+        assert "Custom" in group_names, (
+            f"Expected generic 'Custom' group for unnamed entry, got {group_names}"
+        )
+
+    def test_mixed_named_and_unnamed_entries(self):
+        """Named and unnamed entries should appear in their respective groups."""
+        result = _models_with_cfg(
+            model_cfg={"provider": "custom"},
+            custom_providers=[
+                {"name": "Agent37", "model": "named-model"},
+                {"model": "unnamed-model"},
+            ],
+        )
+        group_names = [g["provider"] for g in result.get("groups", [])]
+        assert "Agent37" in group_names, f"Expected 'Agent37' group, got {group_names}"
+        assert "Custom" in group_names, f"Expected 'Custom' group for unnamed entry, got {group_names}"
--- a/tests/test_default_workspace_fallback.py
+++ b/tests/test_default_workspace_fallback.py
@@ -0,0 +1,148 @@
+import json
+from pathlib import Path
+
+import api.config as config
+
+
+def test_resolve_default_workspace_falls_back_to_existing_home_work(monkeypatch, tmp_path):
+    preferred = tmp_path / "work"
+    preferred.mkdir()
+    state_dir = tmp_path / "state"
+
+    monkeypatch.setattr(config, "HOME", tmp_path)
+    monkeypatch.setattr(config, "STATE_DIR", state_dir)
+
+    resolved = config.resolve_default_workspace("/definitely/not/usable")
+
+    assert resolved == preferred.resolve()
+
+
+
+def test_save_settings_rewrites_bad_default_workspace_to_fallback(monkeypatch, tmp_path):
+    preferred = tmp_path / "work"
+    preferred.mkdir()
+    state_dir = tmp_path / "state"
+    settings_file = tmp_path / "settings.json"
+
+    monkeypatch.setattr(config, "HOME", tmp_path)
+    monkeypatch.setattr(config, "STATE_DIR", state_dir)
+    monkeypatch.setattr(config, "SETTINGS_FILE", settings_file)
+    monkeypatch.setattr(config, "DEFAULT_WORKSPACE", preferred)
+
+    saved = config.save_settings({"default_workspace": "/definitely/not/usable"})
+    on_disk = json.loads(settings_file.read_text(encoding="utf-8"))
+
+    assert saved["default_workspace"] == str(preferred.resolve())
+    assert on_disk["default_workspace"] == str(preferred.resolve())
+
+
+def test_resolve_default_workspace_creates_home_workspace_when_missing(monkeypatch, tmp_path):
+    """When no preferred dir exists, resolve falls back to creating ~/workspace."""
+    state_dir = tmp_path / "state"
+    monkeypatch.setattr(config, "HOME", tmp_path)
+    monkeypatch.setattr(config, "STATE_DIR", state_dir)
+    # Neither ~/work nor ~/workspace exists yet
+    resolved = config.resolve_default_workspace(None)
+    assert resolved == (tmp_path / "workspace").resolve()
+    assert resolved.is_dir()
+
+
+def test_resolve_default_workspace_raises_when_all_candidates_fail(monkeypatch, tmp_path):
+    """RuntimeError is raised when every candidate is unwritable."""
+    import stat, pytest
+    # Make tmp_path read-only so mkdir inside it fails
+    tmp_path.chmod(stat.S_IRUSR | stat.S_IXUSR)
+    state_dir = tmp_path / "state"
+    monkeypatch.setattr(config, "HOME", tmp_path)
+    monkeypatch.setattr(config, "STATE_DIR", state_dir)
+    monkeypatch.delenv("HERMES_WEBUI_DEFAULT_WORKSPACE", raising=False)
+    try:
+        with pytest.raises(RuntimeError, match="Could not create or access"):
+            config.resolve_default_workspace(None)
+    finally:
+        tmp_path.chmod(stat.S_IRWXU)  # restore for cleanup
+
+
+def test_workspace_candidates_deduplicates_home_workspace(monkeypatch, tmp_path):
+    """~/workspace must appear at most once in the candidates list even if it exists."""
+    ws = tmp_path / "workspace"
+    ws.mkdir()
+    state_dir = tmp_path / "state"
+    monkeypatch.setattr(config, "HOME", tmp_path)
+    monkeypatch.setattr(config, "STATE_DIR", state_dir)
+    monkeypatch.delenv("HERMES_WEBUI_DEFAULT_WORKSPACE", raising=False)
+    candidates = config._workspace_candidates(None)
+    paths = [str(p) for p in candidates]
+    assert paths.count(str(ws.resolve())) <= 1, "~/workspace must not appear twice"
+
+
+def test_env_var_workspace_takes_priority_over_passed_raw(monkeypatch, tmp_path):
+    """HERMES_WEBUI_DEFAULT_WORKSPACE env var overrides a None raw arg but not a valid one."""
+    env_ws = tmp_path / "env_workspace"
+    env_ws.mkdir()
+    state_dir = tmp_path / "state"
+    monkeypatch.setattr(config, "HOME", tmp_path)
+    monkeypatch.setattr(config, "STATE_DIR", state_dir)
+    monkeypatch.setenv("HERMES_WEBUI_DEFAULT_WORKSPACE", str(env_ws))
+    # When raw is None, env var should be used
+    resolved = config.resolve_default_workspace(None)
+    assert resolved == env_ws.resolve()
+
+
+def test_ensure_workspace_dir_returns_false_for_unwritable_path(monkeypatch, tmp_path):
+    """_ensure_workspace_dir returns False for a path that can't be created."""
+    import stat
+    # Make parent read-only so mkdir fails
+    parent = tmp_path / "ro_parent"
+    parent.mkdir()
+    parent.chmod(stat.S_IRUSR | stat.S_IXUSR)
+    try:
+        result = config._ensure_workspace_dir(parent / "child")
+        assert result is False
+    finally:
+        parent.chmod(stat.S_IRWXU)
+
+
+def test_env_var_wins_over_settings_json_on_startup(monkeypatch, tmp_path):
+    """HERMES_WEBUI_DEFAULT_WORKSPACE must not be overridden by settings.json at startup.
+
+    Regression for GitHub issue #609: Docker deployments set the env var to a
+    volume mount, but settings.json from a previous container run used to
+    silently win, reverting the files panel to the old path.
+    """
+    import json as _json
+    import os as _os
+
+    env_ws = tmp_path / "env_workspace"
+    env_ws.mkdir()
+    settings_ws = tmp_path / "settings_workspace"
+    settings_ws.mkdir()
+    state_dir = tmp_path / "state"
+    state_dir.mkdir()
+    settings_file = state_dir / "settings.json"
+    settings_file.write_text(
+        _json.dumps({"default_workspace": str(settings_ws)}), encoding="utf-8"
+    )
+
+    monkeypatch.setattr(config, "HOME", tmp_path)
+    monkeypatch.setattr(config, "STATE_DIR", state_dir)
+    monkeypatch.setattr(config, "SETTINGS_FILE", settings_file)
+    # Simulate DEFAULT_WORKSPACE already set correctly from env var at import time
+    monkeypatch.setattr(config, "DEFAULT_WORKSPACE", env_ws.resolve())
+    monkeypatch.setenv("HERMES_WEBUI_DEFAULT_WORKSPACE", str(env_ws))
+
+    # Execute the patched startup block logic inline — env var present → skip override
+    current_ws = config.DEFAULT_WORKSPACE
+    startup_settings = config.load_settings()
+    if not _os.getenv("HERMES_WEBUI_DEFAULT_WORKSPACE"):
+        # This branch must be skipped because env var is set
+        current_ws = config.resolve_default_workspace(
+            startup_settings.get("default_workspace")
+        )
+
+    # env var was set → the if block was skipped → env path wins over settings.json
+    assert current_ws == env_ws.resolve(), (
+        f"Expected {env_ws.resolve()}, got {current_ws}. "
+        "settings.json must not override HERMES_WEBUI_DEFAULT_WORKSPACE."
+    )
+
--- a/tests/test_gateway_sync.py
+++ b/tests/test_gateway_sync.py
@@ -0,0 +1,420 @@
+"""
+Tests for Phase 1: Real-time Gateway Session Sync.
+
+Tests are ordered TDD-style:
+  1. Gateway sessions appear in /api/sessions when setting enabled
+  2. Gateway sessions excluded when setting disabled
+  3. Gateway sessions have correct metadata (source_tag, is_cli_session)
+  4. SSE stream endpoint opens and receives events
+  5. Watcher detects new sessions inserted into state.db
+  6. Settings UI has renamed label
+"""
+import json
+import os
+import pathlib
+import sqlite3
+import time
+import urllib.error
+import urllib.request
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
+from tests._pytest_port import BASE
+
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(BASE + path, data=data,
+                                  headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        try:
+            return json.loads(e.read()), e.code
+        except Exception:
+            return {}, e.code
+
+
+def _get_test_state_dir():
+    """Return the test state directory (matches conftest.py TEST_STATE_DIR).
+
+    conftest.py sets HERMES_WEBUI_TEST_STATE_DIR in the test-process environment
+    (via os.environ.setdefault) so that tests writing directly to state.db always
+    use the same path the test server was started with.  If the env var is not
+    set (e.g. when running this file standalone), fall back to the conftest
+    formula: HERMES_HOME/webui-mvp-test.
+    """
+    # Use _pytest_port which applies the same auto-derivation as conftest.py
+    from tests._pytest_port import TEST_STATE_DIR as _ptsd
+    return _ptsd
+
+
+def _get_state_db_path():
+    """Return path to the test state.db."""
+    return _get_test_state_dir() / 'state.db'
+
+
+def _ensure_state_db():
+    """Create state.db with sessions and messages tables if it doesn't exist.
+    Returns a connection. Does NOT delete existing data (safe for parallel tests).
+    """
+    db_path = _get_state_db_path()
+    db_path.parent.mkdir(parents=True, exist_ok=True)
+    conn = sqlite3.connect(str(db_path))
+    conn.row_factory = sqlite3.Row
+    conn.execute("PRAGMA journal_mode=WAL")
+    conn.executescript("""
+        CREATE TABLE IF NOT EXISTS sessions (
+            id TEXT PRIMARY KEY,
+            source TEXT NOT NULL,
+            user_id TEXT,
+            model TEXT,
+            started_at REAL NOT NULL,
+            message_count INTEGER DEFAULT 0,
+            title TEXT
+        );
+        CREATE TABLE IF NOT EXISTS messages (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            session_id TEXT NOT NULL,
+            role TEXT NOT NULL,
+            content TEXT,
+            timestamp REAL NOT NULL
+        );
+    """)
+    conn.commit()
+    return conn
+
+
+def _insert_gateway_session(conn, session_id='20260401_120000_abcdefgh', source='telegram',
+                             title='Telegram Chat', model='anthropic/claude-sonnet-4-5',
+                             started_at=None, message_count=2):
+    """Insert a gateway session into state.db."""
+    conn.execute(
+        "INSERT OR REPLACE INTO sessions (id, source, title, model, started_at, message_count) "
+        "VALUES (?, ?, ?, ?, ?, ?)",
+        (session_id, source, title, model, started_at or time.time(), message_count)
+    )
+    # Delete any existing messages for this session (idempotent re-insert)
+    conn.execute("DELETE FROM messages WHERE session_id = ?", (session_id,))
+    # Insert some messages
+    conn.execute(
+        "INSERT INTO messages (session_id, role, content, timestamp) VALUES (?, 'user', ?, ?)",
+        (session_id, 'Hello from Telegram', started_at or time.time())
+    )
+    conn.execute(
+        "INSERT INTO messages (session_id, role, content, timestamp) VALUES (?, 'assistant', ?, ?)",
+        (session_id, 'Hi there!', (started_at or time.time()) + 1)
+    )
+    conn.commit()
+
+
+def _remove_test_sessions(conn, *session_ids):
+    """Remove specific test sessions from state.db (parallel-safe cleanup)."""
+    for sid in session_ids:
+        conn.execute("DELETE FROM messages WHERE session_id = ?", (sid,))
+        conn.execute("DELETE FROM sessions WHERE id = ?", (sid,))
+    conn.commit()
+
+
+def _cleanup_state_db():
+    """Remove state.db if it exists (only used for tests that need a blank slate)."""
+    db_path = _get_state_db_path()
+    for p in [db_path, db_path.parent / 'state.db-wal', db_path.parent / 'state.db-shm']:
+        try:
+            p.unlink(missing_ok=True)
+        except Exception:
+            pass
+
+
+# ── Tests ──────────────────────────────────────────────────────────────────
+
+def test_gateway_sessions_appear_when_enabled():
+    """Gateway sessions from state.db appear in /api/sessions when show_cli_sessions is on."""
+    conn = _ensure_state_db()
+    try:
+        _insert_gateway_session(conn, session_id='gw_test_tg_001', source='telegram', title='TG Test Chat')
+
+        # Enable the setting
+        post('/api/settings', {'show_cli_sessions': True})
+
+        data, status = get('/api/sessions')
+        assert status == 200
+        sessions = data.get('sessions', [])
+        gw_ids = [s['session_id'] for s in sessions if s.get('session_id') == 'gw_test_tg_001']
+        assert len(gw_ids) == 1, f"Expected gateway session gw_test_tg_001, got {[s['session_id'] for s in sessions]}"
+    finally:
+        try:
+            _remove_test_sessions(conn, 'gw_test_tg_001')
+            conn.close()
+        except Exception:
+            pass
+        post('/api/settings', {'show_cli_sessions': False})
+
+
+def test_gateway_sessions_excluded_when_disabled():
+    """Gateway sessions are NOT returned when show_cli_sessions is off."""
+    conn = _ensure_state_db()
+    try:
+        _insert_gateway_session(conn, session_id='gw_test_dc_001', source='discord', title='DC Test Chat')
+
+        # Ensure setting is off
+        post('/api/settings', {'show_cli_sessions': False})
+
+        data, status = get('/api/sessions')
+        assert status == 200
+        sessions = data.get('sessions', [])
+        gw_ids = [s['session_id'] for s in sessions if s.get('session_id') == 'gw_test_dc_001']
+        assert len(gw_ids) == 0, "Gateway session should not appear when setting is off"
+    finally:
+        try:
+            _remove_test_sessions(conn, 'gw_test_dc_001')
+            conn.close()
+        except Exception:
+            pass
+
+
+def test_gateway_session_has_correct_metadata():
+    """Gateway sessions include source_tag and is_cli_session fields."""
+    conn = _ensure_state_db()
+    try:
+        _insert_gateway_session(conn, session_id='gw_meta_001', source='telegram', title='Meta Test')
+
+        post('/api/settings', {'show_cli_sessions': True})
+
+        data, status = get('/api/sessions')
+        assert status == 200
+        sessions = data.get('sessions', [])
+        gw = next((s for s in sessions if s['session_id'] == 'gw_meta_001'), None)
+        assert gw is not None, "Gateway session not found"
+        assert gw.get('source_tag') == 'telegram', f"Expected source_tag=telegram, got {gw.get('source_tag')}"
+        assert gw.get('is_cli_session') is True, "is_cli_session should be True for agent sessions"
+        assert gw.get('title') == 'Meta Test'
+    finally:
+        try:
+            _remove_test_sessions(conn, 'gw_meta_001')
+            conn.close()
+        except Exception:
+            pass
+        post('/api/settings', {'show_cli_sessions': False})
+
+
+def test_gateway_session_has_message_count():
+    """Gateway sessions report correct message_count from state.db."""
+    conn = _ensure_state_db()
+    try:
+        _insert_gateway_session(conn, session_id='gw_msg_001', source='discord', title='Msg Count Test', message_count=5)
+
+        post('/api/settings', {'show_cli_sessions': True})
+
+        data, status = get('/api/sessions')
+        assert status == 200
+        sessions = data.get('sessions', [])
+        gw = next((s for s in sessions if s['session_id'] == 'gw_msg_001'), None)
+        assert gw is not None
+        assert gw.get('message_count') == 5, f"Expected message_count=5, got {gw.get('message_count')}"
+    finally:
+        try:
+            _remove_test_sessions(conn, 'gw_msg_001')
+            conn.close()
+        except Exception:
+            pass
+        post('/api/settings', {'show_cli_sessions': False})
+
+
+def test_gateway_sessions_multiple_sources():
+    """Sessions from multiple gateway sources (telegram, discord, slack) all appear."""
+    conn = _ensure_state_db()
+    try:
+        _insert_gateway_session(conn, session_id='gw_multi_tg', source='telegram', title='TG Chat')
+        _insert_gateway_session(conn, session_id='gw_multi_dc', source='discord', title='DC Chat')
+        _insert_gateway_session(conn, session_id='gw_multi_sl', source='slack', title='SL Chat')
+
+        post('/api/settings', {'show_cli_sessions': True})
+
+        data, status = get('/api/sessions')
+        assert status == 200
+        sessions = data.get('sessions', [])
+        gw_ids = {s['session_id'] for s in sessions if s.get('session_id') in ('gw_multi_tg', 'gw_multi_dc', 'gw_multi_sl')}
+        assert len(gw_ids) == 3, f"Expected 3 gateway sessions, got {len(gw_ids)}: {gw_ids}"
+    finally:
+        try:
+            _remove_test_sessions(conn, 'gw_multi_tg', 'gw_multi_dc', 'gw_multi_sl')
+            conn.close()
+        except Exception:
+            pass
+        post('/api/settings', {'show_cli_sessions': False})
+
+
+def test_gateway_session_messages_readable():
+    """Gateway session messages can be loaded via /api/session."""
+    conn = _ensure_state_db()
+    try:
+        _insert_gateway_session(conn, session_id='gw_read_001', source='telegram', title='Readable')
+
+        post('/api/settings', {'show_cli_sessions': True})
+
+        data, status = get(f'/api/session?session_id=gw_read_001')
+        assert status == 200
+        msgs = data.get('session', {}).get('messages', [])
+        assert len(msgs) >= 2, f"Expected at least 2 messages, got {len(msgs)}"
+        assert msgs[0].get('role') == 'user'
+        assert msgs[0].get('content') == 'Hello from Telegram'
+    finally:
+        try:
+            _remove_test_sessions(conn, 'gw_read_001')
+            conn.close()
+        except Exception:
+            pass
+        post('/api/settings', {'show_cli_sessions': False})
+
+
+def test_importing_older_gateway_session_preserves_original_timestamps_and_order():
+    """Importing an older gateway session should not bump it above newer WebUI sessions."""
+    conn = _ensure_state_db()
+    older_started_at = time.time() - 1800
+    imported_sid = 'gw_import_old_001'
+    newer_webui_sid = None
+    try:
+        newer_webui, status = post('/api/session/new', {'model': 'openai/gpt-5'})
+        assert status == 200, newer_webui
+        newer_webui_sid = newer_webui['session']['session_id']
+
+        rename, rename_status = post(
+            '/api/session/rename',
+            {'session_id': newer_webui_sid, 'title': 'Newer WebUI Session'},
+        )
+        assert rename_status == 200, rename
+
+        _insert_gateway_session(
+            conn,
+            session_id=imported_sid,
+            source='discord',
+            title='Older imported gateway session',
+            started_at=older_started_at,
+        )
+        post('/api/settings', {'show_cli_sessions': True})
+
+        imported, imported_status = post('/api/session/import_cli', {'session_id': imported_sid})
+        assert imported_status == 200, imported
+        imported_session = imported['session']
+        assert abs(imported_session['created_at'] - older_started_at) < 2, imported_session
+        assert abs(imported_session['updated_at'] - older_started_at) < 5, imported_session
+
+        sessions_payload, sessions_status = get('/api/sessions')
+        assert sessions_status == 200, sessions_payload
+        ordered_ids = [item['session_id'] for item in sessions_payload.get('sessions', [])]
+        assert newer_webui_sid in ordered_ids, ordered_ids
+        assert imported_sid in ordered_ids, ordered_ids
+        assert ordered_ids.index(newer_webui_sid) < ordered_ids.index(imported_sid), ordered_ids
+    finally:
+        try:
+            _remove_test_sessions(conn, imported_sid)
+            conn.close()
+        except Exception:
+            pass
+        if imported_sid:
+            try:
+                post('/api/session/delete', {'session_id': imported_sid})
+            except Exception:
+                pass
+        if newer_webui_sid:
+            try:
+                post('/api/session/delete', {'session_id': newer_webui_sid})
+            except Exception:
+                pass
+        post('/api/settings', {'show_cli_sessions': False})
+
+
+
+def test_gateway_sse_stream_endpoint_exists():
+    """GET /api/sessions/gateway/stream returns a response (200 or 200-range)."""
+    # The SSE endpoint requires show_cli_sessions to be enabled
+    post('/api/settings', {'show_cli_sessions': True})
+    try:
+        req = urllib.request.Request(BASE + '/api/sessions/gateway/stream')
+        with urllib.request.urlopen(req, timeout=5) as r:
+            assert r.status in (200, 204), f"Expected 200/204, got {r.status}"
+            # SSE should have content-type text/event-stream
+            ctype = r.headers.get('Content-Type', '')
+            assert 'text/event-stream' in ctype, f"Expected text/event-stream, got {ctype}"
+    except Exception as e:
+        # Timeout is acceptable — means the connection is held open (SSE behavior)
+        if 'timed out' in str(e).lower() or 'timeout' in str(e).lower():
+            pass  # Good: SSE keeps the connection open
+        else:
+            raise
+    finally:
+        post('/api/settings', {'show_cli_sessions': False})
+
+
+def test_gateway_webui_sessions_not_duplicated():
+    """If a session_id exists both in WebUI store and state.db, it's not duplicated."""
+    # Create a WebUI session with a known ID
+    body = {}
+    d, _ = post('/api/session/new', body)
+    webui_sid = d['session']['session_id']
+
+    try:
+        # Insert the same session_id into state.db as a gateway session
+        conn = _ensure_state_db()
+        _insert_gateway_session(conn, session_id=webui_sid, source='telegram', title='Dup Test')
+        conn.close()
+
+        post('/api/settings', {'show_cli_sessions': True})
+
+        data, status = get('/api/sessions')
+        assert status == 200
+        sessions = data.get('sessions', [])
+        matching = [s for s in sessions if s['session_id'] == webui_sid]
+        assert len(matching) == 1, f"Expected 1 entry for {webui_sid}, got {len(matching)}"
+    finally:
+        try:
+            conn2 = sqlite3.connect(str(_get_state_db_path()))
+            _remove_test_sessions(conn2, webui_sid)
+            conn2.close()
+        except Exception:
+            pass
+        post('/api/session/delete', {'session_id': webui_sid})
+        post('/api/settings', {'show_cli_sessions': False})
+
+
+def test_gateway_sessions_no_state_db():
+    """When state.db doesn't exist, /api/sessions works fine (no gateway sessions)."""
+    _cleanup_state_db()
+
+    post('/api/settings', {'show_cli_sessions': True})
+    try:
+        data, status = get('/api/sessions')
+        assert status == 200
+        # Should succeed with just webui sessions (or empty)
+        assert 'sessions' in data
+    finally:
+        post('/api/settings', {'show_cli_sessions': False})
+
+
+def test_cli_sessions_still_work():
+    """CLI sessions (source='cli') still appear alongside gateway sessions."""
+    conn = _ensure_state_db()
+    try:
+        _insert_gateway_session(conn, session_id='cli_legacy_001', source='cli', title='CLI Legacy')
+        _insert_gateway_session(conn, session_id='gw_new_001', source='telegram', title='GW New')
+
+        post('/api/settings', {'show_cli_sessions': True})
+
+        data, status = get('/api/sessions')
+        assert status == 200
+        sessions = data.get('sessions', [])
+        agent_ids = {s['session_id'] for s in sessions if s.get('session_id') in ('cli_legacy_001', 'gw_new_001')}
+        assert len(agent_ids) == 2, f"Expected 2 agent sessions (cli + gateway), got {len(agent_ids)}"
+    finally:
+        try:
+            _remove_test_sessions(conn, 'cli_legacy_001', 'gw_new_001')
+            conn.close()
+        except Exception:
+            pass
+        post('/api/settings', {'show_cli_sessions': False})
--- a/tests/test_ime_composition.py
+++ b/tests/test_ime_composition.py
@@ -0,0 +1,61 @@
+import pathlib
+import re
+
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
+BOOT_JS = (REPO_ROOT / "static" / "boot.js").read_text(encoding="utf-8")
+UI_JS = (REPO_ROOT / "static" / "ui.js").read_text(encoding="utf-8")
+SESSIONS_JS = (REPO_ROOT / "static" / "sessions.js").read_text(encoding="utf-8")
+
+
+def _ime_guarded_enter_pattern(event_var_pattern, require_no_shift=False):
+    no_shift = rf"\s*&&\s*!\s*{event_var_pattern}\.shiftKey" if require_no_shift else ""
+    return (
+        rf"if\s*\(\s*{event_var_pattern}\.key\s*===\s*'Enter'{no_shift}\s*\)\s*\{{\s*"
+        rf"if\s*\(\s*{event_var_pattern}\.isComposing\s*\)\s*"
+        rf"(?:\{{\s*return\s*;?\s*\}}|return\s*;?)"
+    )
+
+
+def test_boot_chat_enter_send_respects_ime_composition():
+    assert re.search(
+        _ime_guarded_enter_pattern("e"),
+        BOOT_JS,
+        re.DOTALL,
+    ), "Chat composer Enter handler must ignore IME composition Enter in static/boot.js"
+    assert re.search(
+        _ime_guarded_enter_pattern("e", require_no_shift=True),
+        BOOT_JS,
+        re.DOTALL,
+    ), "Command dropdown Enter handler must ignore IME composition Enter in static/boot.js"
+
+
+def test_ui_enter_submit_paths_respect_ime_composition():
+    assert re.search(
+        rf"document\.addEventListener\('keydown',e=>\{{[\s\S]*?{_ime_guarded_enter_pattern('e')}",
+        UI_JS,
+        re.DOTALL,
+    ), \
+        "App dialog Enter handler must ignore IME composition Enter in static/ui.js"
+    assert re.search(
+        _ime_guarded_enter_pattern("e", require_no_shift=True),
+        UI_JS,
+        re.DOTALL,
+    ), \
+        "Message edit Enter-to-save handler must ignore IME composition Enter in static/ui.js"
+    assert re.search(
+        rf"inp\.onkeydown=\(e2\)=>\{{\s*{_ime_guarded_enter_pattern('e2')}",
+        UI_JS,
+        re.DOTALL,
+    ), \
+        "Workspace rename Enter handler must ignore IME composition Enter in static/ui.js"
+
+
+def test_sessions_enter_submit_paths_respect_ime_composition():
+    matches = re.findall(
+        _ime_guarded_enter_pattern(r"e2?"),
+        SESSIONS_JS,
+        re.DOTALL,
+    )
+    assert len(matches) >= 3, \
+        "Session and project rename/create Enter handlers must ignore IME composition Enter in static/sessions.js"
--- a/tests/test_issue336.py
+++ b/tests/test_issue336.py
@@ -0,0 +1,322 @@
+"""
+Tests for issue #336 — opt-in chat bubble layout (PR #398).
+
+Covers:
+- api/config.py: bubble_layout present in _SETTINGS_DEFAULTS with default False
+- api/config.py: bubble_layout present in _SETTINGS_BOOL_KEYS
+- api/config.py: bubble_layout not in password-filtered keys (safe to expose)
+- static/boot.js: boot path applies bubble-layout class from settings
+- static/boot.js: catch path removes bubble-layout class on API failure
+- static/panels.js: loadSettingsPanel reads bubble_layout checkbox
+- static/panels.js: saveSettings writes bubble_layout and toggles body class
+- static/style.css: body.bubble-layout CSS selectors present
+- static/style.css: responsive max-width rule for bubble layout
+- static/index.html: settingsBubbleLayout checkbox element present
+- static/index.html: i18n keys wired on label and description
+- static/i18n.js: English label and description keys present
+- static/i18n.js: Spanish label and description keys present
+- Integration: bubble_layout default is False in GET /api/settings
+- Integration: bubble_layout persists via POST /api/settings
+- Integration: non-bool value is coerced to bool on POST
+"""
+import json
+import pathlib
+import re
+import unittest
+import urllib.error
+import urllib.request
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent
+CONFIG_PY = (REPO_ROOT / "api" / "config.py").read_text()
+BOOT_JS = (REPO_ROOT / "static" / "boot.js").read_text()
+PANELS_JS = (REPO_ROOT / "static" / "panels.js").read_text()
+STYLE_CSS = (REPO_ROOT / "static" / "style.css").read_text()
+INDEX_HTML = (REPO_ROOT / "static" / "index.html").read_text()
+I18N_JS = (REPO_ROOT / "static" / "i18n.js").read_text()
+
+from tests._pytest_port import BASE
+
+
+def _get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+
+def _post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(
+        BASE + path, data=data, headers={"Content-Type": "application/json"}
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+# ── config.py static checks ───────────────────────────────────────────────
+
+
+class TestBubbleLayoutConfig(unittest.TestCase):
+    """Verify bubble_layout is correctly registered in config.py."""
+
+    def test_bubble_layout_in_settings_defaults(self):
+        """bubble_layout must appear in _SETTINGS_DEFAULTS."""
+        self.assertIn(
+            '"bubble_layout"',
+            CONFIG_PY,
+            "bubble_layout key missing from _SETTINGS_DEFAULTS in api/config.py",
+        )
+
+    def test_bubble_layout_default_is_false(self):
+        """bubble_layout default value must be False (opt-in, off by default)."""
+        # Match  "bubble_layout": False  with optional spacing
+        self.assertRegex(
+            CONFIG_PY,
+            r'"bubble_layout"\s*:\s*False',
+            "bubble_layout default must be False in _SETTINGS_DEFAULTS",
+        )
+
+    def test_bubble_layout_in_bool_keys(self):
+        """bubble_layout must be in _SETTINGS_BOOL_KEYS for coercion."""
+        # Find the _SETTINGS_BOOL_KEYS block and verify membership
+        bool_keys_match = re.search(
+            r"_SETTINGS_BOOL_KEYS\s*=\s*\{([^}]+)\}", CONFIG_PY, re.DOTALL
+        )
+        self.assertIsNotNone(
+            bool_keys_match, "_SETTINGS_BOOL_KEYS block not found in config.py"
+        )
+        self.assertIn(
+            '"bubble_layout"',
+            bool_keys_match.group(1),
+            "bubble_layout missing from _SETTINGS_BOOL_KEYS",
+        )
+
+
+# ── boot.js static checks ────────────────────────────────────────────────
+
+
+class TestBubbleLayoutBootJS(unittest.TestCase):
+    """Verify bubble-layout class management in boot.js."""
+
+    def test_boot_applies_bubble_layout_class(self):
+        """boot.js success path must toggle body.bubble-layout from settings."""
+        self.assertIn(
+            "classList.toggle('bubble-layout',!!s.bubble_layout)",
+            BOOT_JS,
+            "boot.js must call classList.toggle('bubble-layout', ...) on settings load",
+        )
+
+    def test_boot_catch_removes_bubble_layout_class(self):
+        """boot.js catch path must remove bubble-layout (default off on API failure)."""
+        self.assertIn(
+            "classList.remove('bubble-layout')",
+            BOOT_JS,
+            "boot.js catch block must call classList.remove('bubble-layout') on API failure",
+        )
+
+
+# ── panels.js static checks ──────────────────────────────────────────────
+
+
+class TestBubbleLayoutPanelsJS(unittest.TestCase):
+    """Verify settings panel wires the bubble_layout checkbox."""
+
+    def test_load_settings_reads_bubble_layout_checkbox(self):
+        """loadSettingsPanel must read the settingsBubbleLayout checkbox state."""
+        self.assertIn(
+            "settingsBubbleLayout",
+            PANELS_JS,
+            "panels.js must reference settingsBubbleLayout checkbox",
+        )
+
+    def test_save_settings_writes_bubble_layout(self):
+        """saveSettings must write body.bubble_layout from the checkbox."""
+        self.assertIn(
+            "body.bubble_layout",
+            PANELS_JS,
+            "saveSettings must set body.bubble_layout from checkbox",
+        )
+
+    def test_save_settings_toggles_body_class(self):
+        """saveSettings must apply body class toggle for live preview."""
+        self.assertIn(
+            "classList.toggle('bubble-layout', body.bubble_layout)",
+            PANELS_JS,
+            "saveSettings must toggle 'bubble-layout' on document.body for live preview",
+        )
+
+
+# ── style.css static checks ──────────────────────────────────────────────
+
+
+class TestBubbleLayoutCSS(unittest.TestCase):
+    """Verify CSS selectors for bubble layout are present and gated on body class."""
+
+    def test_user_row_right_align_selector_present(self):
+        """CSS must right-align user message rows when bubble-layout is active."""
+        self.assertIn(
+            "body.bubble-layout .msg-row:has(.msg-role.user)",
+            STYLE_CSS,
+            "CSS selector for user bubble alignment missing from style.css",
+        )
+
+    def test_assistant_row_left_align_selector_present(self):
+        """CSS must left-align assistant message rows when bubble-layout is active."""
+        self.assertIn(
+            "body.bubble-layout .msg-row:has(.msg-role.assistant)",
+            STYLE_CSS,
+            "CSS selector for assistant bubble alignment missing from style.css",
+        )
+
+    def test_bubble_layout_responsive_rule_present(self):
+        """A responsive max-width rule for narrow screens must be present."""
+        # Both selectors must appear inside a @media block
+        self.assertRegex(
+            STYLE_CSS,
+            r"@media\([^)]*700px[^)]*\)[^{]*\{[^}]*bubble-layout",
+            "Responsive bubble-layout rule (700px breakpoint) missing from style.css",
+        )
+
+
+# ── index.html static checks ─────────────────────────────────────────────
+
+
+class TestBubbleLayoutHTML(unittest.TestCase):
+    """Verify the settings checkbox is present and correctly wired in index.html."""
+
+    def test_settings_checkbox_present(self):
+        """The settingsBubbleLayout checkbox must exist in index.html."""
+        self.assertIn(
+            'id="settingsBubbleLayout"',
+            INDEX_HTML,
+            "settingsBubbleLayout checkbox missing from index.html",
+        )
+
+    def test_settings_label_i18n_key_wired(self):
+        """Label span must carry the settings_label_bubble_layout i18n key."""
+        self.assertIn(
+            'data-i18n="settings_label_bubble_layout"',
+            INDEX_HTML,
+            "settings_label_bubble_layout i18n key not wired on label span",
+        )
+
+    def test_settings_desc_i18n_key_wired(self):
+        """Description div must carry the settings_desc_bubble_layout i18n key."""
+        self.assertIn(
+            'data-i18n="settings_desc_bubble_layout"',
+            INDEX_HTML,
+            "settings_desc_bubble_layout i18n key not wired on description div",
+        )
+
+
+# ── i18n.js static checks ────────────────────────────────────────────────
+
+
+class TestBubbleLayoutI18N(unittest.TestCase):
+    """Verify English and Spanish locale keys are present in i18n.js."""
+
+    def _extract_locale_block(self, lang_start_marker, lang_end_marker):
+        """Extract the content between two locale markers."""
+        start = I18N_JS.find(lang_start_marker)
+        end = I18N_JS.find(lang_end_marker, start)
+        self.assertGreater(start, -1, f"Start marker '{lang_start_marker}' not found")
+        self.assertGreater(end, start, f"End marker '{lang_end_marker}' not found after start")
+        return I18N_JS[start:end]
+
+    def test_english_label_key_present(self):
+        """English locale must have settings_label_bubble_layout."""
+        en_block = self._extract_locale_block("\n  en: {", "\n  es: {")
+        self.assertIn(
+            "settings_label_bubble_layout",
+            en_block,
+            "settings_label_bubble_layout missing from English locale",
+        )
+
+    def test_english_desc_key_present(self):
+        """English locale must have settings_desc_bubble_layout."""
+        en_block = self._extract_locale_block("\n  en: {", "\n  es: {")
+        self.assertIn(
+            "settings_desc_bubble_layout",
+            en_block,
+            "settings_desc_bubble_layout missing from English locale",
+        )
+
+    def test_spanish_label_key_present(self):
+        """Spanish locale must have settings_label_bubble_layout."""
+        es_block = self._extract_locale_block("\n  es: {", "\n  de: {")
+        self.assertIn(
+            "settings_label_bubble_layout",
+            es_block,
+            "settings_label_bubble_layout missing from Spanish locale",
+        )
+
+    def test_spanish_desc_key_present(self):
+        """Spanish locale must have settings_desc_bubble_layout."""
+        es_block = self._extract_locale_block("\n  es: {", "\n  de: {")
+        self.assertIn(
+            "settings_desc_bubble_layout",
+            es_block,
+            "settings_desc_bubble_layout missing from Spanish locale",
+        )
+
+
+# ── Integration tests (require live server on test server port) ─────────────────
+
+
+class TestBubbleLayoutSettingsAPI(unittest.TestCase):
+    """Integration tests: bubble_layout via GET/POST /api/settings."""
+
+    def test_bubble_layout_default_is_false(self):
+        """GET /api/settings must return bubble_layout: false by default."""
+        try:
+            d, status = _get("/api/settings")
+        except OSError:
+            self.skipTest("Server not running on test server port")
+        self.assertEqual(status, 200)
+        self.assertIn(
+            "bubble_layout",
+            d,
+            "bubble_layout missing from GET /api/settings response",
+        )
+        self.assertFalse(
+            d["bubble_layout"],
+            "bubble_layout default must be False (opt-in feature)",
+        )
+
+    def test_bubble_layout_persists_true(self):
+        """POST /api/settings with bubble_layout:true must persist and round-trip."""
+        try:
+            _, status = _post("/api/settings", {"bubble_layout": True})
+        except OSError:
+            self.skipTest("Server not running on test server port")
+        self.assertEqual(status, 200)
+        d, _ = _get("/api/settings")
+        self.assertTrue(d["bubble_layout"], "bubble_layout=True must persist after POST")
+        # Restore
+        _post("/api/settings", {"bubble_layout": False})
+
+    def test_bubble_layout_persists_false(self):
+        """POST /api/settings with bubble_layout:false must persist and round-trip."""
+        try:
+            _post("/api/settings", {"bubble_layout": True})
+            _post("/api/settings", {"bubble_layout": False})
+        except OSError:
+            self.skipTest("Server not running on test server port")
+        d, _ = _get("/api/settings")
+        self.assertFalse(d["bubble_layout"], "bubble_layout=False must persist after POST")
+
+    def test_bubble_layout_truthy_string_coerced_to_bool(self):
+        """Non-bool truthy value must be coerced to bool by _SETTINGS_BOOL_KEYS logic."""
+        try:
+            _post("/api/settings", {"bubble_layout": "1"})
+        except OSError:
+            self.skipTest("Server not running on test server port")
+        d, _ = _get("/api/settings")
+        self.assertIsInstance(
+            d["bubble_layout"],
+            bool,
+            "bubble_layout must be a bool in API response (bool coercion via _SETTINGS_BOOL_KEYS)",
+        )
+        # Restore
+        _post("/api/settings", {"bubble_layout": False})
--- a/tests/test_issue341.py
+++ b/tests/test_issue341.py
@@ -0,0 +1,34 @@
+"""Tests for GitHub issue #341: .msg-body table CSS styles."""
+import os
+
+CSS_PATH = os.path.join(os.path.dirname(__file__), "..", "static", "style.css")
+
+
+def _read_css():
+    with open(CSS_PATH, "r") as f:
+        return f.read()
+
+
+def test_msg_body_table_css_present():
+    css = _read_css()
+    assert ".msg-body table" in css, ".msg-body table rule missing from style.css"
+    assert "border-collapse:collapse" in css, "border-collapse:collapse missing from style.css"
+
+
+def test_msg_body_table_th_td_present():
+    css = _read_css()
+    assert ".msg-body th" in css, ".msg-body th rule missing from style.css"
+    assert ".msg-body td" in css, ".msg-body td rule missing from style.css"
+
+
+def test_msg_body_table_tr_stripe_present():
+    css = _read_css()
+    assert ".msg-body tr:nth-child(even)" in css, ".msg-body tr:nth-child(even) rule missing from style.css"
+
+
+def test_msg_body_light_theme_overrides():
+    css = _read_css()
+    assert ':root:not(.dark) .msg-body th' in css, \
+        'Light-mode override for .msg-body th missing from style.css'
+    assert ':root:not(.dark) .msg-body td' in css, \
+        'Light-mode override for .msg-body td missing from style.css'
--- a/tests/test_issue342.py
+++ b/tests/test_issue342.py
@@ -0,0 +1,124 @@
+"""
+Tests for GitHub issue #342: auto-link plain URLs in chat messages.
+
+These are structural tests that verify the fix is present in static/ui.js
+without requiring a running server or JavaScript engine.
+"""
+import os
+import re
+
+UI_JS = os.path.join(os.path.dirname(__file__), '..', 'static', 'ui.js')
+
+
+def read_ui_js():
+    with open(UI_JS, 'r') as f:
+        return f.read()
+
+
+def test_autolink_comment_present():
+    """The Autolink comment should be present in renderMd() to document the feature."""
+    content = read_ui_js()
+    assert 'Autolink: convert plain URLs' in content, (
+        "Expected 'Autolink: convert plain URLs' comment not found in static/ui.js. "
+        "Did the autolink pass get added?"
+    )
+
+
+def test_autolink_regex_in_rendermd():
+    """The autolink regex pattern (https?://) should appear in renderMd()."""
+    content = read_ui_js()
+    # Locate the renderMd function body
+    rendermd_start = content.find('function renderMd(raw){')
+    assert rendermd_start != -1, "renderMd function not found in ui.js"
+    # Find the closing brace after renderMd (look for the autolink pattern within it)
+    rendermd_body = content[rendermd_start:rendermd_start + 5000]
+    assert 'https?:\\/\\/' in rendermd_body, (
+        "Autolink regex (https?:\\/\\/) not found inside renderMd() body."
+    )
+
+
+def test_autolink_uses_esc_for_xss_safety():
+    """The autolink code must use esc() to escape the display text of URLs, preventing XSS.
+    Note: esc() is intentionally NOT applied to the href value (that would corrupt & in
+    query strings). It IS applied to the visible link text (esc(clean)) to prevent XSS."""
+    content = read_ui_js()
+    # Find the autolink section (between the SAFE_TAGS pass and paragraph wrap)
+    autolink_idx = content.find('// Autolink: convert plain URLs')
+    assert autolink_idx != -1, "Autolink comment not found in ui.js"
+    # Extract the autolink block (next ~600 chars after the comment)
+    autolink_block = content[autolink_idx:autolink_idx + 600]
+    # esc() must be used on the visible link text to prevent XSS
+    assert 'esc(clean)' in autolink_block, (
+        "Autolink block should use esc(clean) for the link display text (XSS safety), "
+        "but it was not found."
+    )
+    # esc() must NOT be used on the href value — that breaks URLs containing &
+    assert 'href="${esc(clean)}"' not in autolink_block, (
+        "Autolink block should use href=\"${clean}\" (not esc'd) to preserve & in query strings."
+    )
+
+
+def test_autolink_in_inline_md():
+    """The autolink pass should also be present inside the inlineMd() helper."""
+    content = read_ui_js()
+    # Find inlineMd function
+    inline_start = content.find('function inlineMd(t){')
+    assert inline_start != -1, "inlineMd function not found in ui.js"
+    # Find closing brace of inlineMd by looking for 'return t;' followed by '}'
+    inline_end = content.find('return t;\n  }', inline_start)
+    assert inline_end != -1, "Could not locate end of inlineMd function"
+    inline_body = content[inline_start:inline_end + 20]
+    assert 'https?:\\/\\/' in inline_body, (
+        "Autolink regex not found inside inlineMd() — plain URLs in list items "
+        "and blockquotes won't be autolinked."
+    )
+
+
+def test_autolink_after_safe_tags_pass():
+    """The autolink pass must come AFTER the SAFE_TAGS escape pass (ordering matters)."""
+    content = read_ui_js()
+    safe_tags_idx = content.find('s=s.replace(/<\\/?[a-z][^>]*>/gi,tag=>SAFE_TAGS.test(tag)?tag:esc(tag));')
+    autolink_idx = content.find('// Autolink: convert plain URLs')
+    parts_idx = content.find('const parts=s.split(/\\n{2,}/);')
+    assert safe_tags_idx != -1, "SAFE_TAGS pass not found"
+    assert autolink_idx != -1, "Autolink pass not found"
+    assert parts_idx != -1, "Paragraph-wrap parts line not found"
+    assert safe_tags_idx < autolink_idx < parts_idx, (
+        f"Ordering wrong: SAFE_TAGS at {safe_tags_idx}, autolink at {autolink_idx}, "
+        f"parts (paragraph wrap) at {parts_idx}. "
+        "Autolink must come between SAFE_TAGS pass and paragraph wrap."
+    )
+
+
+def test_autolink_target_blank_and_rel():
+    """Autolinked URLs should open in a new tab with rel=noopener for security."""
+    content = read_ui_js()
+    autolink_idx = content.find('// Autolink: convert plain URLs')
+    assert autolink_idx != -1, "Autolink comment not found"
+    # Use a larger window to account for the stash preamble added by the fix
+    autolink_block = content[autolink_idx:autolink_idx + 700]
+    assert 'target="_blank"' in autolink_block, (
+        'Autolinked URLs should have target="_blank"'
+    )
+    assert 'rel="noopener"' in autolink_block, (
+        'Autolinked URLs should have rel="noopener" for security'
+    )
+
+
+def test_safe_tags_includes_anchor():
+    """SAFE_TAGS regex must include 'a' so <a> tags from autolink are not escaped."""
+    content = read_ui_js()
+    # Find the SAFE_TAGS definition line — the pattern contains slashes so we
+    # search for the line directly rather than extracting the regex literal.
+    safe_tags_line = None
+    for line in content.splitlines():
+        if 'const SAFE_TAGS=' in line:
+            safe_tags_line = line
+            break
+    assert safe_tags_line is not None, "SAFE_TAGS const definition not found in ui.js"
+    # The pattern should include 'a' as a tag alternative (e.g. |a|)
+    assert '|a|' in safe_tags_line or '|a)' in safe_tags_line, (
+        f"SAFE_TAGS line does not include 'a' tag — "
+        "<a> tags emitted by autolink would be escaped!\n"
+        f"Line: {safe_tags_line}"
+    )
--- a/tests/test_issue347.py
+++ b/tests/test_issue347.py
@@ -0,0 +1,348 @@
+"""
+Tests for GitHub issue #347: KaTeX / LaTeX math rendering in chat and workspace previews.
+
+Structural tests — no server required. Verify:
+- renderMd() stashes and restores $..$ and $$...$$ math delimiters
+- KaTeX lazy-load function exists and follows the mermaid pattern
+- KaTeX JS loaded from CDN with SRI integrity hash
+- KaTeX CSS loaded in index.html with SRI hash
+- CSS rules present for .katex-block and .katex-inline
+- SAFE_TAGS updated to allow <span> (for inline math)
+- renderKatexBlocks() is wired into the requestAnimationFrame call
+"""
+import pathlib
+import re
+
+REPO = pathlib.Path(__file__).parent.parent
+UI_JS   = (REPO / 'static' / 'ui.js').read_text(encoding='utf-8')
+INDEX   = (REPO / 'static' / 'index.html').read_text(encoding='utf-8')
+CSS     = (REPO / 'static' / 'style.css').read_text(encoding='utf-8')
+
+
+# ── renderMd pipeline ──────────────────────────────────────────────────────────
+
+def test_display_math_stash_present():
+    """renderMd must stash $$...$$ display math before other processing."""
+    assert r'\$\$([\s\S]+?)\$\$' in UI_JS or '$$' in UI_JS, \
+        'Display math $$..$$ stash regex not found in ui.js'
+    # The stash uses \\x00M token
+    assert '\\x00M' in UI_JS, 'Math stash token \\x00M not found in renderMd'
+
+
+def test_inline_math_stash_present():
+    """renderMd must stash $..$ inline math."""
+    # Inline math regex must be present
+    assert 'math_stash' in UI_JS, 'math_stash array not found in renderMd'
+
+
+def test_katex_block_placeholder_emitted():
+    """renderMd restore pass must emit .katex-block divs for display math."""
+    assert 'katex-block' in UI_JS, \
+        '.katex-block placeholder div not emitted by renderMd restore pass'
+
+
+def test_katex_inline_placeholder_emitted():
+    """renderMd restore pass must emit .katex-inline spans for inline math."""
+    assert 'katex-inline' in UI_JS, \
+        '.katex-inline placeholder span not emitted by renderMd restore pass'
+
+
+def test_data_katex_attribute_present():
+    """Placeholders must carry data-katex attribute for display/inline distinction."""
+    assert 'data-katex' in UI_JS, \
+        'data-katex attribute not found — renderKatexBlocks cannot distinguish display from inline'
+
+
+# ── renderKatexBlocks() ────────────────────────────────────────────────────────
+
+def test_render_katex_blocks_function_exists():
+    """renderKatexBlocks() function must exist in ui.js."""
+    assert 'function renderKatexBlocks()' in UI_JS, \
+        'renderKatexBlocks() function not found in ui.js'
+
+
+def test_katex_lazy_load_follows_mermaid_pattern():
+    """KaTeX must use the same lazy-load pattern as mermaid (load on first use)."""
+    assert '_katexLoading' in UI_JS, '_katexLoading flag not found'
+    assert '_katexReady' in UI_JS,   '_katexReady flag not found'
+
+
+def test_katex_js_loaded_from_cdn():
+    """KaTeX JS must be loaded from jsdelivr CDN."""
+    assert 'katex@0.16' in UI_JS, \
+        'KaTeX JS CDN URL not found in ui.js — expected katex@0.16.x'
+
+
+def test_katex_js_has_sri_hash():
+    """KaTeX JS CDN tag must have an SRI integrity hash."""
+    # The hash is in the script.integrity assignment
+    assert "script.integrity='sha384-" in UI_JS or 'script.integrity="sha384-' in UI_JS, \
+        'KaTeX JS SRI integrity hash not found in ui.js'
+
+
+def test_katex_display_mode_used():
+    """renderKatexBlocks must pass displayMode based on data-katex attribute."""
+    assert 'displayMode' in UI_JS, \
+        'displayMode not passed to katex.render() — display math will render inline'
+
+
+def test_katex_throw_on_error_false():
+    """KaTeX must be configured with throwOnError:false to degrade gracefully."""
+    assert 'throwOnError:false' in UI_JS, \
+        'throwOnError:false not set — bad LaTeX will throw and break the message'
+
+
+def test_render_katex_blocks_wired_into_raf():
+    """renderKatexBlocks() must be called in the same requestAnimationFrame as renderMermaidBlocks()."""
+    # Check that renderKatexBlocks appears somewhere near requestAnimationFrame
+    raf_idx = UI_JS.find('requestAnimationFrame')
+    # Find the rAF call that also contains renderKatexBlocks
+    has_katex_in_raf = any(
+        'renderKatexBlocks' in UI_JS[m.start():m.start()+200]
+        for m in re.finditer(r'requestAnimationFrame', UI_JS)
+    )
+    assert has_katex_in_raf, \
+        'renderKatexBlocks() not found in any requestAnimationFrame call — math will not render'
+
+
+# ── index.html ────────────────────────────────────────────────────────────────
+
+def test_katex_css_in_index_html():
+    """KaTeX CSS must be loaded in index.html."""
+    assert 'katex@0.16' in INDEX, \
+        'KaTeX CSS CDN link not found in index.html'
+
+
+def test_katex_css_has_sri_hash():
+    """KaTeX CSS link in index.html must have an SRI integrity hash."""
+    assert 'sha384-5TcZemv2l' in INDEX or 'integrity' in INDEX and 'katex' in INDEX, \
+        'KaTeX CSS SRI integrity hash not found in index.html'
+
+
+# ── style.css ─────────────────────────────────────────────────────────────────
+
+def test_katex_block_css_present():
+    """.katex-block CSS rule must exist for centered display math."""
+    assert '.katex-block' in CSS, \
+        '.katex-block CSS rule missing from style.css — display math will have no layout'
+
+
+def test_katex_inline_css_present():
+    """.katex-inline CSS rule must exist."""
+    assert '.katex-inline' in CSS, \
+        '.katex-inline CSS rule missing from style.css'
+
+
+def test_katex_block_text_align_center():
+    """.katex-block must be text-align:center for display math."""
+    assert 'text-align:center' in CSS, \
+        'text-align:center not found for .katex-block'
+
+
+# ── SAFE_TAGS ──────────────────────────────────────────────────────────────────
+
+def test_safe_tags_includes_span():
+    """SAFE_TAGS must include <span> to allow .katex-inline spans through the escape pass."""
+    # The SAFE_TAGS regex should contain 'span'
+    safe_tags_match = re.search(r'SAFE_TAGS\s*=\s*/.*?/i', UI_JS)
+    assert safe_tags_match, 'SAFE_TAGS pattern not found in ui.js'
+    assert 'span' in safe_tags_match.group(), \
+        '<span> not in SAFE_TAGS — inline math spans will be HTML-escaped and rendered as text'
+
+
+# ── Stash ordering: fence must protect code spans from math extraction ─────────
+
+WORKSPACE_JS = (REPO / 'static' / 'workspace.js').read_text(encoding='utf-8')
+
+
+def test_fence_stash_before_math_stash():
+    """fence_stash must be initialized and populated BEFORE math_stash in renderMd.
+
+    If math_stash runs first, dollar signs inside backtick code spans are extracted
+    as math, leaving placeholder tokens inside the stashed code string. The code span
+    then renders with KaTeX inside <code> instead of the literal dollar-sign text.
+    """
+    fence_pos = UI_JS.find("const fence_stash=[]")
+    math_pos = UI_JS.find("const math_stash=[]")
+    assert fence_pos != -1, "fence_stash not found in renderMd"
+    assert math_pos != -1, "math_stash not found in renderMd"
+    assert fence_pos < math_pos, (
+        "fence_stash must be declared BEFORE math_stash in renderMd "
+        f"(fence at char {fence_pos}, math at char {math_pos}). "
+        "If math runs first, `$x$` inside backticks gets extracted as math instead of code."
+    )
+
+
+def test_fence_stash_populated_before_math_stash():
+    """The fence_stash s.replace call must appear before any math_stash s.replace calls."""
+    # Find the s.replace call that populates each stash
+    fence_replace_pos = UI_JS.find("fence_stash.push(m)")
+    math_replace_pos = UI_JS.find("math_stash.push(")
+    assert fence_replace_pos != -1, "fence_stash population call not found"
+    assert math_replace_pos != -1, "math_stash population call not found"
+    assert fence_replace_pos < math_replace_pos, (
+        "fence_stash must be populated before math_stash to protect code span contents"
+    )
+
+
+def test_math_stash_comment_says_after_fence():
+    """The math stash comment should explain it runs AFTER fence_stash, not before."""
+    # Should not have the old misleading comment
+    assert "Must run BEFORE fence_stash" not in UI_JS, (
+        "Old misleading comment still present. Math stash runs AFTER fence_stash. "
+        "The comment should say 'Runs AFTER fence_stash'."
+    )
+
+
+# ── Pipeline regression: code spans protect their contents ────────────────────
+
+def test_math_restore_after_fence_restore():
+    """Math stash tokens are restored AFTER fence restore, so code spans get
+    their raw text back (not KaTeX placeholders)."""
+    fence_restore_pos = UI_JS.find("fence_stash[+i]")
+    math_restore_pos = UI_JS.find("math_stash[+i]")
+    assert fence_restore_pos != -1, "fence_stash restore not found"
+    assert math_restore_pos != -1, "math_stash restore not found"
+    # Both restores must exist; their relative order doesn't matter for correctness
+    # (they use different tokens: \x00F vs \x00M), but we assert both exist
+    assert fence_restore_pos != math_restore_pos, "fence and math restore must be separate calls"
+
+
+def test_stash_tokens_distinct():
+    """fence_stash and math_stash must use distinct sentinel tokens to avoid collisions."""
+    # fence uses \x00F, math uses \x00M (or similar unique prefix)
+    # The JS source uses escaped \\x00F and \\x00M as sentinel characters
+    # In the Python string read from the file these appear as '\\\\x00F' and '\\\\x00M'
+    assert "'\\\\x00F'" in UI_JS or 'x00F' in UI_JS, (
+        "fence stash token (\\x00F) not found — must be distinct from math token"
+    )
+    assert "'\\\\x00M'" in UI_JS or 'x00M' in UI_JS, (
+        "math stash token (\\x00M) not found — must be distinct from fence token"
+    )
+    # The two tokens must use different discriminator characters
+    assert 'x00F' in UI_JS and 'x00M' in UI_JS, (
+        "Both \\x00F (fence) and \\x00M (math) tokens must exist"
+    )
+
+
+# ── Workspace preview renderKatexBlocks wiring ────────────────────────────────
+
+def test_workspace_calls_render_katex_after_preview():
+    """workspace.js must call renderKatexBlocks() after setting previewMd.innerHTML.
+
+    Without this, math placeholders appear in workspace file previews but are never
+    rendered by KaTeX (renderKatexBlocks is only wired into renderMessages rAF).
+    """
+    assert "renderKatexBlocks" in WORKSPACE_JS, (
+        "workspace.js must call renderKatexBlocks() after renderMd() for file previews"
+    )
+
+
+def test_workspace_renders_katex_after_file_open():
+    """workspace.js renderKatexBlocks call must come after the renderMd(data.content) assignment."""
+    preview_md_pos = WORKSPACE_JS.find("renderMd(data.content)")
+    # Use the actual call string (not a stray regex match on 'M' characters)
+    katex_call_str = "renderKatexBlocks==='function'"
+    katex_call_pos = WORKSPACE_JS.find(katex_call_str)
+    assert preview_md_pos != -1, "renderMd(data.content) not found in workspace.js"
+    assert katex_call_pos != -1, (
+        "renderKatexBlocks guard (typeof renderKatexBlocks==='function') not found in workspace.js"
+    )
+    # The call after 'renderMd(data.content)' — find the LAST occurrence
+    # (there may be an earlier one in the save path at line ~153)
+    last_katex_pos = WORKSPACE_JS.rfind(katex_call_str)
+    assert last_katex_pos > preview_md_pos, (
+        "renderKatexBlocks must be called AFTER renderMd(data.content) in workspace.js "
+        f"(renderMd at {preview_md_pos}, last renderKatexBlocks at {last_katex_pos})"
+    )
+
+
+def test_workspace_katex_guarded_by_typeof():
+    """workspace.js renderKatexBlocks call must guard with typeof check for safety
+    in case KaTeX feature is not loaded (e.g. test environments, offline)."""
+    assert "typeof renderKatexBlocks" in WORKSPACE_JS, (
+        "workspace.js must guard renderKatexBlocks call with typeof check: "
+        "if(typeof renderKatexBlocks==='function')renderKatexBlocks()"
+    )
+
+
+# ── SAFE_TAGS: span addition should not expand attack surface ─────────────────
+
+def test_safe_tags_span_is_narrowly_scoped():
+    """SAFE_TAGS adding <span> is only a bypass if span carries dangerous attributes.
+    Verify the SAFE_TAGS regex tests the tag NAME only, not arbitrary attributes.
+    The rest of the pipeline uses esc() for user content, so attribute injection
+    into KaTeX spans isn't possible.
+    """
+    # The SAFE_TAGS regex must still require a word boundary / tag-end pattern
+    safe_tags_match = re.search(r"SAFE_TAGS\s*=\s*/(.+?)/i", UI_JS)
+    if not safe_tags_match:
+        safe_tags_match = re.search(r'SAFE_TAGS\s*=\s*/(.*?)/i', UI_JS)
+    assert safe_tags_match, "SAFE_TAGS regex not found"
+    pattern = safe_tags_match.group(1)
+    # Must have a trailing boundary check — ([\s>]|$) or similar
+    assert r"[\s>]" in pattern or r'[\s>]' in pattern, (
+        "SAFE_TAGS must enforce a boundary after the tag name to prevent "
+        "<spanxss> from matching when checking for <span>"
+    )
+
+
+# ── False-positive prevention ─────────────────────────────────────────────────
+
+def test_inline_math_regex_requires_non_space_boundaries():
+    """The $...$ inline regex must require non-space at both boundaries.
+
+    This prevents 'costs $5 and $10' from matching — the space after the opening
+    $ means it's a currency amount, not math.
+    """
+    # The inline math stash push is type:'inline' — find its containing replace() line
+    inline_push_idx = UI_JS.find("type:'inline',src:m")
+    assert inline_push_idx != -1, "Inline math stash push not found"
+    # Get the text from the start of that line back to find the regex
+    line_start = UI_JS.rfind('\n', 0, inline_push_idx) + 1
+    inline_line = UI_JS[line_start:inline_push_idx + 50]
+    # The regex must use \s (via [^\s...]) to exclude spaces at boundaries
+    assert '\\s' in inline_line or '[^' in inline_line, (
+        f"Inline math regex must exclude spaces at boundaries to prevent false "
+        f"positives on currency like $5. Found: {inline_line[:120]}"
+    )
+def test_display_math_stashed_before_inline():
+    """$$...$$ display math must be stashed before $...$ inline math.
+
+    If inline runs first on '$$x$$', it could match '$' + 'x' + '$' leaving
+    a stray outer '$', corrupting the output.
+    """
+    display_pos = UI_JS.find("type:'display',src:m")
+    inline_pos = UI_JS.find("type:'inline',src:m")
+    assert display_pos != -1, "display math stash not found"
+    assert inline_pos != -1, "inline math stash not found"
+    # First occurrence of display must be before first occurrence of inline
+    assert display_pos < inline_pos, (
+        "Display math ($$...$$) must be stashed before inline math ($...$) "
+        "to prevent $$ from being parsed as two adjacent inline delimiters"
+    )
+
+
+def test_math_stash_token_uses_single_backslash_null_byte():
+    """Math stash tokens must use the null-byte form (single backslash x00M).
+
+    The restore regex expects a null byte character. If the stash emits
+    a literal backslash+x00M (double backslash = 5-char string), the restore
+    regex never matches and the tokens appear verbatim in the rendered output.
+
+    The fence_stash correctly uses the null byte convention. Math stash must be consistent.
+    """
+    # In the source file, the correct form is: return '\x00M'
+    # The wrong form (double backslash) would be: return '\\x00M'
+    # Check that no double-backslash form exists in the math stash return statements
+    import re
+    bad_returns = re.findall(r"return\s+'\\\\x00M'", UI_JS)
+    assert not bad_returns, (
+        f"Found {len(bad_returns)} math stash return(s) using double-backslash \\\\x00M. "
+        "Must use single backslash '\x00M' (null byte) to match the restore regex."
+    )
+    # Positive check: single-backslash form must exist
+    good_returns = re.findall(r"math_stash\.push.*?return '\\x00M'", UI_JS, re.DOTALL)
+    assert good_returns, (
+        "Math stash return must use single-backslash '\x00M' (null byte convention)"
+    )
--- a/tests/test_issue357.py
+++ b/tests/test_issue357.py
@@ -0,0 +1,199 @@
+"""
+Tests for GitHub issue #357: Docker container fails to start without internet access.
+
+Structural tests — verify Dockerfile and docker_init.bash contain the expected
+patterns for pre-installed uv and workspace permission fixes.
+
+Two problems fixed:
+1. uv was downloaded at container startup; fails in air-gapped / firewalled environments.
+   Fix: pre-install uv in the Docker image at build time (system-wide in /usr/local/bin).
+2. workspace directory created with plain mkdir (as root); bind-mount dirs created by
+   Docker as root are unwritable by the hermeswebui user.
+   Fix: sudo mkdir + sudo chown for workspace directory.
+"""
+import pathlib
+import re
+
+REPO = pathlib.Path(__file__).parent.parent
+DOCKERFILE = (REPO / "Dockerfile").read_text(encoding="utf-8")
+INIT_SCRIPT = (REPO / "docker_init.bash").read_text(encoding="utf-8")
+
+
+# ── Dockerfile: uv pre-installed at build time ───────────────────────────────
+
+class TestDockerfileUvPreinstall:
+
+    def test_dockerfile_installs_uv_at_build_time(self):
+        """Dockerfile must install uv via RUN curl at build time (not only at runtime)."""
+        assert "RUN curl" in DOCKERFILE and "uv/install.sh" in DOCKERFILE, (
+            "Dockerfile must install uv at build time via RUN curl .../uv/install.sh"
+        )
+
+    def test_dockerfile_uv_installed_system_wide(self):
+        """uv must be installed to a system-wide directory (/usr/local/bin) accessible
+        to all users, not to a user-specific ~/.local/bin that another user can't see."""
+        # The install command must target /usr/local/bin or use root to install globally
+        uv_install_line = next(
+            (line for line in DOCKERFILE.splitlines() if "uv/install.sh" in line),
+            None,
+        )
+        assert uv_install_line is not None, "Could not find uv install line in Dockerfile"
+        # Must either use UV_INSTALL_DIR pointing to /usr/local/bin, or run as root
+        # (so the default install location is accessible to hermeswebui user)
+        has_system_dir = "/usr/local/bin" in uv_install_line or "UV_INSTALL_DIR=/usr/local/bin" in DOCKERFILE
+        assert has_system_dir, (
+            "uv must be installed to /usr/local/bin (system-wide) so hermeswebui user "
+            "can find it. Installing as hermeswebuitoo puts it in /home/hermeswebuitoo/.local/bin "
+            "which is NOT on hermeswebui's PATH."
+        )
+
+    def test_dockerfile_uv_installed_before_copy(self):
+        """uv installation must happen before COPY . /apptoo so it's in the image."""
+        uv_pos = DOCKERFILE.find("uv/install.sh")
+        copy_pos = DOCKERFILE.find("COPY . /apptoo")
+        assert uv_pos != -1, "uv install not found in Dockerfile"
+        assert copy_pos != -1, "COPY . /apptoo not found in Dockerfile"
+        assert uv_pos < copy_pos, "uv must be installed before COPY . /apptoo"
+
+    def test_dockerfile_uv_installed_as_root_or_before_user_switch(self):
+        """uv must be installed as root (USER root) to reach /usr/local/bin.
+        If installed as hermeswebuitoo, it lands in ~hermeswebuitoo/.local/bin,
+        which the hermeswebui user at runtime can't see.
+        """
+        lines = DOCKERFILE.splitlines()
+        uv_line_idx = next(i for i, l in enumerate(lines) if "uv/install.sh" in l)
+        # Find the last USER directive before the uv install line
+        user_before = None
+        for i in range(uv_line_idx - 1, -1, -1):
+            if lines[i].strip().startswith("USER "):
+                user_before = lines[i].strip().split()[1]
+                break
+        assert user_before == "root", (
+            f"uv install must run as USER root (found USER {user_before!r}). "
+            "Installing as hermeswebuitoo puts uv in /home/hermeswebuitoo/.local/bin "
+            "which is not accessible to the hermeswebui runtime user."
+        )
+
+
+# ── docker_init.bash: skip uv download when already present ─────────────────
+
+class TestInitScriptUvSkip:
+
+    def test_init_script_checks_uv_before_download(self):
+        """docker_init.bash must check 'command -v uv' before attempting download."""
+        assert "command -v uv" in INIT_SCRIPT, (
+            "docker_init.bash must check 'command -v uv' to skip download "
+            "when uv is already pre-installed in the image (#357)"
+        )
+
+    def test_init_script_skips_download_if_present(self):
+        """Init script must use conditional logic (if/else) around the uv download."""
+        # Pattern: if command -v uv ... else ... fi
+        assert re.search(r'if\s+command\s+-v\s+uv', INIT_SCRIPT), (
+            "docker_init.bash must use 'if command -v uv' guard around the download"
+        )
+
+    def test_init_script_curl_download_in_else_branch(self):
+        """The curl download must be in the else branch (only runs if uv not found)."""
+        # Find the conditional block
+        m = re.search(
+            r'if\s+command\s+-v\s+uv.*?fi',
+            INIT_SCRIPT, re.DOTALL
+        )
+        assert m, "Could not find uv conditional block in docker_init.bash"
+        block = m.group(0)
+        # curl must appear after 'else' not in the 'then' branch
+        else_pos = block.find("else")
+        curl_pos = block.find("curl")
+        assert else_pos != -1, "No 'else' branch in uv conditional"
+        assert curl_pos != -1, "No 'curl' in uv conditional block"
+        assert curl_pos > else_pos, (
+            "curl download must be in the 'else' branch, not the 'if/then' branch"
+        )
+
+    def test_init_script_error_exit_on_download_failure(self):
+        """Curl download must call error_exit on failure (not silently continue)."""
+        assert "error_exit" in INIT_SCRIPT and "Failed to install uv" in INIT_SCRIPT, (
+            "docker_init.bash must call error_exit if uv download fails, "
+            "so the container exits with a clear message instead of failing silently"
+        )
+
+    def test_init_script_path_includes_hermeswebui_local_bin(self):
+        """PATH must include /home/hermeswebui/.local/bin for fallback runtime install."""
+        assert "/home/hermeswebui/.local/bin" in INIT_SCRIPT, (
+            "docker_init.bash must include /home/hermeswebui/.local/bin in PATH "
+            "for the case where uv is installed at runtime via curl"
+        )
+
+
+# ── docker_init.bash: workspace directory permissions ────────────────────────
+
+class TestWorkspacePermissions:
+
+    def test_workspace_uses_sudo_mkdir(self):
+        """docker_init.bash must use 'sudo mkdir' for the workspace directory.
+
+        Docker auto-creates bind-mount directories as root if they don't exist,
+        leaving them unwritable by hermeswebui. sudo mkdir + chown fixes this.
+        """
+        # Find the workspace section
+        ws_section = INIT_SCRIPT[
+            INIT_SCRIPT.find("HERMES_WEBUI_DEFAULT_WORKSPACE"):
+            INIT_SCRIPT.find("HERMES_WEBUI_DEFAULT_WORKSPACE") + 800
+        ]
+        assert "sudo mkdir" in ws_section, (
+            "docker_init.bash must use 'sudo mkdir -p' for the workspace directory "
+            "to handle the case where Docker created the bind-mount dir as root (#357)"
+        )
+
+    def test_workspace_uses_sudo_chown(self):
+        """docker_init.bash must chown the workspace to hermeswebui when writable.
+
+        The chown is now conditional on the workspace being writable, to allow
+        read-only (:ro) workspace mounts without crashing (#670). The sudo chown
+        must still be present in the script (just guarded by [ -w ]).
+        """
+        assert 'sudo chown hermeswebui:hermeswebui "$HERMES_WEBUI_DEFAULT_WORKSPACE"' in INIT_SCRIPT, (
+            "docker_init.bash must 'sudo chown hermeswebui:hermeswebui' the workspace "
+            "when it is writable, so the app user can write to it (#357)"
+        )
+
+    def test_workspace_mkdir_before_chown(self):
+        """sudo mkdir must come before sudo chown in docker_init.bash."""
+        mkdir_pos = INIT_SCRIPT.find('sudo mkdir -p "$HERMES_WEBUI_DEFAULT_WORKSPACE"')
+        chown_pos = INIT_SCRIPT.find('sudo chown hermeswebui:hermeswebui "$HERMES_WEBUI_DEFAULT_WORKSPACE"')
+        assert mkdir_pos != -1, "sudo mkdir for workspace not found"
+        assert chown_pos != -1, "sudo chown for workspace not found"
+        assert mkdir_pos < chown_pos, "sudo mkdir must come before sudo chown"
+
+    def test_workspace_error_exit_on_mkdir_failure(self):
+        """sudo mkdir must call error_exit on failure."""
+        assert 'sudo mkdir -p "$HERMES_WEBUI_DEFAULT_WORKSPACE" || error_exit' in INIT_SCRIPT, (
+            "sudo mkdir for workspace must call error_exit on failure"
+        )
+
+    def test_workspace_chown_is_conditional_on_writable(self):
+        """chown and write-test must be skipped for read-only workspace mounts (#670).
+
+        The script must check [ -w "$HERMES_WEBUI_DEFAULT_WORKSPACE" ] before
+        attempting chown or a write test, so :ro bind-mounts don't crash startup.
+        """
+        assert '[ -w "$HERMES_WEBUI_DEFAULT_WORKSPACE" ]' in INIT_SCRIPT, (
+            "docker_init.bash must guard chown with [ -w ] to support read-only "
+            "workspace mounts (:ro) without crashing (#670)"
+        )
+        # Read-only path must log a clear message rather than calling error_exit
+        assert "read-only workspace is supported" in INIT_SCRIPT, (
+            "docker_init.bash must print a clear message when workspace is read-only (#670)"
+        )
+
+    def test_init_script_syntax_valid(self):
+        """docker_init.bash must pass bash -n syntax check."""
+        import subprocess
+        result = subprocess.run(
+            ["bash", "-n", str(REPO / "docker_init.bash")],
+            capture_output=True, text=True
+        )
+        assert result.returncode == 0, (
+            f"docker_init.bash failed bash -n syntax check:\n{result.stderr}"
+        )
--- a/tests/test_issue401.py
+++ b/tests/test_issue401.py
@@ -0,0 +1,114 @@
+"""
+Regression tests for tool-card persistence on session reload.
+
+The older loadSession() path rewrote message history on the client:
+- dropped role='tool' rows
+- dropped empty assistant rows even when they carried tool_calls
+- then ignored session.tool_calls on reload
+
+That broke both durable logging and page refresh for valid tool runs.
+"""
+import json
+import pathlib
+import subprocess
+import textwrap
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
+SESSIONS_JS = (REPO_ROOT / "static" / "sessions.js").read_text(encoding="utf-8")
+UI_JS = (REPO_ROOT / "static" / "ui.js").read_text(encoding="utf-8")
+
+
+def test_loadsession_preserves_tool_rows():
+    """Reload must keep tool rows in S.messages so snippets can be reconstructed."""
+    assert "if (m.role === 'tool') continue;" not in SESSIONS_JS, (
+        "loadSession() must not drop role='tool' messages; renderMessages() hides them "
+        "visually, but it still needs them for snippet reconstruction"
+    )
+
+
+def test_loadsession_uses_session_toolcalls_only_as_fallback():
+    """Session summaries are the fallback, not the primary reload source."""
+    assert "if(!hasMessageToolMetadata&&data.session.tool_calls&&data.session.tool_calls.length)" in SESSIONS_JS
+    assert "S.toolCalls=(data.session.tool_calls||[]).map(tc=>({...tc,done:true}));" in SESSIONS_JS
+    assert "S.toolCalls=[];" in SESSIONS_JS
+
+
+def test_rendermessages_treats_openai_toolcall_assistants_as_visible():
+    """OpenAI assistant rows with empty content but tool_calls must stay anchorable."""
+    assert "const hasTc=Array.isArray(m.tool_calls)&&m.tool_calls.length>0;" in UI_JS
+    assert "if(hasTc||hasTu||_messageHasReasoningPayload(m)) return true;" in UI_JS
+
+
+def _run_js(script_body: str) -> dict:
+    script = textwrap.dedent(f"""
+        function loadSessionShape(messages, sessionToolCalls) {{
+            const filtered = (messages || []).filter(m => m && m.role);
+            const hasMessageToolMetadata = filtered.some(m => {{
+                if (!m || m.role !== 'assistant') return false;
+                const hasTc = Array.isArray(m.tool_calls) && m.tool_calls.length > 0;
+                const hasTu = Array.isArray(m.content) && m.content.some(p => p && p.type === 'tool_use');
+                return hasTc || hasTu;
+            }});
+            const toolCalls = (!hasMessageToolMetadata && sessionToolCalls && sessionToolCalls.length)
+                ? sessionToolCalls.map(tc => ({{ ...tc, done: true }}))
+                : [];
+            return {{ filtered, hasMessageToolMetadata, toolCalls }};
+        }}
+
+        {script_body}
+    """)
+    proc = subprocess.run(["node", "-e", script], check=True, capture_output=True, text=True)
+    return json.loads(proc.stdout)
+
+
+def test_reload_keeps_empty_assistant_toolcall_anchor():
+    """OpenAI-style assistant {content:'', tool_calls:[...]} must survive reload."""
+    result = _run_js("""
+        const messages = [
+            { role: 'user', content: 'list files' },
+            {
+                role: 'assistant',
+                content: '',
+                tool_calls: [{ id: 'call-1', function: { name: 'terminal', arguments: '{}' } }]
+            },
+            { role: 'tool', tool_call_id: 'call-1', content: '{"output":"ok"}' },
+            { role: 'assistant', content: 'Done.' }
+        ];
+        const loaded = loadSessionShape(messages, [{ name: 'terminal', assistant_msg_idx: 1 }]);
+        process.stdout.write(JSON.stringify({
+            filtered_len: loaded.filtered.length,
+            has_metadata: loaded.hasMessageToolMetadata,
+            fallback_len: loaded.toolCalls.length,
+            assistant_tool_idx: loaded.filtered.findIndex(m => m.role === 'assistant' && m.tool_calls),
+            tool_idx: loaded.filtered.findIndex(m => m.role === 'tool')
+        }));
+    """)
+    assert result["filtered_len"] == 4
+    assert result["has_metadata"] is True
+    assert result["fallback_len"] == 0
+    assert result["assistant_tool_idx"] == 1
+    assert result["tool_idx"] == 2
+
+
+def test_reload_uses_session_summary_when_messages_have_no_tool_metadata():
+    """Older sessions should still render from session.tool_calls on reload."""
+    result = _run_js("""
+        const messages = [
+            { role: 'user', content: 'build site' },
+            { role: 'assistant', content: 'Starting.' },
+            { role: 'tool', content: '{"bytes_written": 4955}' },
+            { role: 'assistant', content: '' }
+        ];
+        const sessionToolCalls = [
+            { name: 'write_file', assistant_msg_idx: 1, snippet: 'bytes_written', tid: '' }
+        ];
+        const loaded = loadSessionShape(messages, sessionToolCalls);
+        process.stdout.write(JSON.stringify({
+            has_metadata: loaded.hasMessageToolMetadata,
+            fallback_len: loaded.toolCalls.length,
+            done_flag: loaded.toolCalls[0] && loaded.toolCalls[0].done === true
+        }));
+    """)
+    assert result["has_metadata"] is False
+    assert result["fallback_len"] == 1
+    assert result["done_flag"] is True
--- a/tests/test_issue470.py
+++ b/tests/test_issue470.py
@@ -0,0 +1,313 @@
+"""
+Tests for issue #470 — markdown link rendering bugs in renderMd():
+  1. Double-linking: [label](url) converted to <a>, then autolink re-matches
+     the URL inside href="..." and wraps it in a second <a>.
+  2. esc() applied to URLs in href attributes turns & → &amp;, breaking
+     URLs with query strings and producing &amp; in displayed link text.
+  3. Same double-linking bug inside table cells via inlineMd().
+
+These tests verify the fixes by asserting against the rendered HTML that
+ui.js serves, using a live server request to evaluate the actual JS output
+indirectly (via checking ui.js source for the fixed patterns) AND by
+running a lightweight Python mirror of the fixed renderMd logic.
+
+Strategy: verify the fix is present in the JS source, then test the
+expected rendering behaviour through the Python mirror.
+"""
+import pathlib
+import re
+import html as _html
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent
+UI_JS = (REPO_ROOT / "static" / "ui.js").read_text()
+
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+def esc(s):
+    return _html.escape(str(s), quote=True)
+
+
+def _make_link(url, label):
+    """Expected output for a [label](url) link after fix: href is NOT esc()-ed."""
+    return f'<a href="{url}" target="_blank" rel="noopener">{esc(label)}</a>'
+
+
+# Minimal Python mirror of the FIXED renderMd() — enough to test link behaviour.
+# Mirrors the stash-based approach introduced by the fix.
+
+def render_links_only(text):
+    """
+    Simplified render that only applies the link-related passes from the fixed
+    renderMd(): [label](url) conversion + autolink, with the stash protection.
+    Sufficient for testing that links render correctly without double-linking.
+    """
+    s = text
+
+    # Stash [label](url) links (fix: store href as raw URL, not esc(url))
+    link_stash = []
+    def stash_link(m):
+        label, url = m.group(1), m.group(2)
+        link_stash.append(f'<a href="{url}" target="_blank" rel="noopener">{esc(label)}</a>')
+        return f'\x00L{len(link_stash)-1}\x00'
+    s = re.sub(r'\[([^\]]+)\]\((https?://[^\)]+)\)', stash_link, s)
+
+    # Autolink bare URLs (should NOT match inside already-stashed placeholders)
+    def autolink(m):
+        url = m.group(1)
+        trail = url[-1] if url[-1] in '.,;:!?)' else ''
+        clean = url[:-1] if trail else url
+        return f'<a href="{clean}" target="_blank" rel="noopener">{esc(clean)}</a>{trail}'
+    s = re.sub(r'(https?://[^\s<>"\')\]]+)', autolink, s)
+
+    # Restore stashed links
+    s = re.sub(r'\x00L(\d+)\x00', lambda m: link_stash[int(m.group(1))], s)
+    return s
+
+
+def render_table_with_links(md):
+    """
+    Render a markdown table that may contain [label](url) cells.
+    Mirrors the fixed inlineMd() + table rendering.
+    """
+    lines = md.strip().split('\n')
+    if len(lines) < 2:
+        return md
+    def is_sep(r):
+        return bool(re.match(r'^\|[\s|:-]+\|$', r.strip()))
+    if not is_sep(lines[1]):
+        return md
+
+    def inline_md_fixed(t):
+        """Fixed inlineMd: stash links before autolink."""
+        stash = []
+        def stash_fn(m):
+            lb, u = m.group(1), m.group(2)
+            stash.append(f'<a href="{u}" target="_blank" rel="noopener">{esc(lb)}</a>')
+            return f'\x00L{len(stash)-1}\x00'
+        t = re.sub(r'\[([^\]]+)\]\((https?://[^\)]+)\)', stash_fn, t)
+        # autolink remaining bare URLs
+        def autolink(m):
+            url = m.group(1)
+            trail = url[-1] if url[-1] in '.,;:!?)' else ''
+            clean = url[:-1] if trail else url
+            return f'<a href="{clean}" target="_blank" rel="noopener">{esc(clean)}</a>{trail}'
+        t = re.sub(r'(https?://[^\s<>"\')\]]+)', autolink, t)
+        t = re.sub(r'\x00L(\d+)\x00', lambda m: stash[int(m.group(1))], t)
+        return t
+
+    def parse_row(r):
+        cells = r.strip().lstrip('|').rstrip('|').split('|')
+        return ''.join(f'<td>{inline_md_fixed(c.strip())}</td>' for c in cells)
+
+    def parse_header(r):
+        cells = r.strip().lstrip('|').rstrip('|').split('|')
+        return ''.join(f'<th>{inline_md_fixed(c.strip())}</th>' for c in cells)
+
+    header = f'<tr>{parse_header(lines[0])}</tr>'
+    body = ''.join(f'<tr>{parse_row(r)}</tr>' for r in lines[2:])
+    return f'<table><thead>{header}</thead><tbody>{body}</tbody></table>'
+
+
+# ── Source-level checks (verify fix is in the JS) ─────────────────────────────
+
+def test_inlinemd_uses_link_stash():
+    """Fixed inlineMd() must stash [label](url) links before autolink runs."""
+    assert '_link_stash' in UI_JS, (
+        "inlineMd() should use _link_stash to prevent double-linking"
+    )
+
+
+def test_inlinemd_no_esc_on_href():
+    """Fixed inlineMd() must not call esc() on the URL in href."""
+    # The old broken pattern had esc(u) inside the href
+    assert 'href="${esc(u)}"' not in UI_JS, (
+        "inlineMd() should not call esc() on href URL — it breaks & in query strings"
+    )
+
+
+def test_outer_link_pass_uses_a_stash():
+    """Fixed outer link pass must stash existing <a> tags before running."""
+    assert '_a_stash' in UI_JS, (
+        "Outer [label](url) pass should stash existing <a> tags to prevent autolink re-matching"
+    )
+
+
+def test_autolink_pass_uses_al_stash():
+    """Fixed autolink pass must stash existing <a> tags before running."""
+    assert '_al_stash' in UI_JS, (
+        "Autolink pass should stash existing <a> tags to prevent double-linking"
+    )
+
+
+def test_autolink_no_esc_on_href():
+    """Fixed autolink pass must not call esc() on href URL."""
+    idx = UI_JS.find('// Autolink: convert plain URLs to clickable links.')
+    assert idx != -1, "New autolink comment not found"
+    autolink_section = UI_JS[idx:idx+600]
+    # The return line should have href="${clean}" (JS template literal, no esc call)
+    assert 'href="${clean}"' in autolink_section, (
+        'Autolink should use href="${clean}" not href="${esc(clean)}"'
+    )
+    assert 'href="${esc(clean)}"' not in autolink_section, (
+        "Autolink should not esc() the URL in href"
+    )
+
+
+# ── Behaviour tests (Python mirror of fixed renderMd) ─────────────────────────
+
+def test_labeled_link_renders_as_single_anchor():
+    """[#461](https://github.com/.../461) must produce exactly one <a> tag."""
+    url = 'https://github.com/nesquena/hermes-webui/issues/461'
+    md = f'[#461]({url})'
+    result = render_links_only(md)
+    assert result.count('<a ') == 1, f"Expected 1 <a> tag, got: {result}"
+    assert result.count('</a>') == 1
+    assert f'href="{url}"' in result
+    assert '#461' in result
+    # Must not contain the raw brackets
+    assert '[#461]' not in result
+    assert f']({url})' not in result
+
+
+def test_href_not_html_escaped():
+    """URLs with & must appear as literal & in href, not &amp;."""
+    url = 'https://example.com/search?q=foo&bar=baz'
+    md = f'[Search]({url})'
+    result = render_links_only(md)
+    assert f'href="{url}"' in result, (
+        f"& in URL should not be escaped to &amp; in href. Got: {result}"
+    )
+    assert '&amp;' not in result
+
+
+def test_bare_url_not_double_linked():
+    """A bare https:// URL must produce exactly one <a> tag."""
+    url = 'https://github.com/nesquena/hermes-webui/issues/461'
+    result = render_links_only(url)
+    assert result.count('<a ') == 1, f"Expected 1 <a> tag, got: {result}"
+    assert result.count('</a>') == 1
+
+
+def test_labeled_link_in_table_cell_single_anchor():
+    """[#461](url) inside a markdown table cell must produce exactly one <a> tag."""
+    url = 'https://github.com/nesquena/hermes-webui/issues/461'
+    md = f'| Issue | Title |\n|---|---|\n| [#461]({url}) | Reasoning effort |'
+    result = render_table_with_links(md)
+    assert result.count('<a ') == 1, f"Expected 1 <a> in table, got: {result}"
+    assert f'href="{url}"' in result
+    assert '#461' in result
+    # No raw brackets should appear in output
+    assert '[#461]' not in result
+
+
+def test_multiple_links_in_table_no_double_linking():
+    """Multiple [label](url) links in a table must each produce exactly one <a>."""
+    urls = [
+        'https://github.com/nesquena/hermes-webui/issues/461',
+        'https://github.com/nesquena/hermes-webui/issues/462',
+        'https://github.com/nesquena/hermes-webui/issues/463',
+    ]
+    rows = '\n'.join(f'| [#{461+i}]({url}) | Title {i} |' for i, url in enumerate(urls))
+    md = f'| Issue | Title |\n|---|---|\n{rows}'
+    result = render_table_with_links(md)
+    assert result.count('<a ') == 3, f"Expected 3 <a> tags, got {result.count('<a ')}:\n{result}"
+    assert result.count('</a>') == 3
+    for url in urls:
+        assert f'href="{url}"' in result
+
+
+def test_link_label_is_escaped():
+    """The label text (not the URL) must still be HTML-escaped."""
+    url = 'https://example.com'
+    md = f'[Click <here>]({url})'
+    result = render_links_only(md)
+    assert '&lt;here&gt;' in result, "Label text should be HTML-escaped"
+    assert '<here>' not in result
+
+
+def test_link_not_broken_by_prior_autolink():
+    """A [label](url) followed by a bare URL must each produce one clean <a>."""
+    url1 = 'https://github.com/issues/461'
+    url2 = 'https://github.com/issues/462'
+    md = f'See [#461]({url1}) and also {url2}'
+    result = render_links_only(md)
+    assert result.count('<a ') == 2, f"Expected 2 links, got: {result}"
+    assert f'href="{url1}"' in result
+    assert f'href="{url2}"' in result
+    assert '#461' in result
+
+def test_href_quote_sanitized():
+    """A URL containing a double-quote must have it percent-encoded in href to prevent attribute breakout."""
+    # This would break out of href="..." and inject an event handler without the fix
+    url = 'https://evil.com" onmouseover="alert(1)'
+    # The [label](url) regex captures up to the closing ), so we test via the render helper
+    # by constructing a URL that contains a literal quote character
+    safe_url = 'https://example.com/path"with"quotes'
+    result = render_links_only(f'[click]({safe_url})')
+    # The href must not contain a raw unencoded double-quote
+    href_start = result.find('href="') + 6
+    href_end = result.find('"', href_start)
+    href_val = result[href_start:href_end]
+    assert '"' not in href_val, (
+        f"href value must not contain unencoded double-quote. Got href: {href_val}"
+    )
+
+
+def test_js_source_sanitizes_quotes_in_href():
+    """JS source must apply quote percent-encoding to URLs before placing in href."""
+    # Both the inlineMd stash and outer link pass must sanitize quotes
+    assert "%22" in UI_JS, (
+        "URL placed in href should have double-quotes percent-encoded via .replace to %22"
+    )
+
+# ── Code-inside-bold tests (pre-existing bug, fixed in same PR) ───────────────
+
+def test_js_inlinemd_stashes_code_before_bold():
+    """Fixed inlineMd() must stash backtick code spans before bold/italic processing."""
+    assert '_code_stash' in UI_JS, (
+        "inlineMd() should use _code_stash to protect backtick spans from bold/italic esc()"
+    )
+
+
+def test_code_inside_bold_renders_correctly():
+    """Inline code inside bold text must render as <strong><code>...</code></strong>,
+    not with escaped &lt;code&gt; tags visible on screen."""
+    # This was the pre-existing bug: **`esc()`** → <strong>&lt;code&gt;esc()&lt;/code&gt;</strong>
+    text = '**`esc()` on `href`**: breaks URLs'
+    # Simulate the fixed inlineMd()
+    code_stash = []
+    t = text
+    t = re.sub(r'`([^`\n]+)`',
+        lambda m: (code_stash.append(f'<code>{esc(m.group(1))}</code>') or f'\x00C{len(code_stash)-1}\x00'), t)
+    t = re.sub(r'\*\*(.+?)\*\*', lambda m: f'<strong>{esc(m.group(1))}</strong>', t)
+    t = re.sub(r'\x00C(\d+)\x00', lambda m: code_stash[int(m.group(1))], t)
+    assert '&lt;code&gt;' not in t, (
+        f"Code tags should not be HTML-escaped inside bold. Got: {t}"
+    )
+    assert '<code>esc()</code>' in t, (
+        f"Code tags should render as <code> elements inside bold. Got: {t}"
+    )
+    assert '<strong>' in t, "Bold should still render"
+
+
+def test_code_and_bold_mixed_no_escaping():
+    """Bold text containing multiple backtick spans must render all code tags correctly."""
+    cases = [
+        ('**`esc()` on `href`**', '<strong>', '<code>esc()</code>', '<code>href</code>'),
+        ('***`code` in bold-italic***', '<strong>', '<code>code</code>'),
+        ('`code` then **bold**', '<code>code</code>', '<strong>bold</strong>'),
+    ]
+    for args in cases:
+        text = args[0]
+        expected_fragments = args[1:]
+        code_stash = []
+        t = text
+        t = re.sub(r'`([^`\n]+)`',
+            lambda m: (code_stash.append(f'<code>{esc(m.group(1))}</code>') or f'\x00C{len(code_stash)-1}\x00'), t)
+        t = re.sub(r'\*\*\*(.+?)\*\*\*', lambda m: f'<strong><em>{esc(m.group(1))}</em></strong>', t)
+        t = re.sub(r'\*\*(.+?)\*\*', lambda m: f'<strong>{esc(m.group(1))}</strong>', t)
+        t = re.sub(r'\x00C(\d+)\x00', lambda m: code_stash[int(m.group(1))], t)
+        assert '&lt;code&gt;' not in t, f"Escaped code tag in: {text!r} → {t}"
+        for frag in expected_fragments:
+            assert frag in t, f"Expected {frag!r} in output of {text!r}, got: {t}"
--- a/tests/test_issue477.py
+++ b/tests/test_issue477.py
@@ -0,0 +1,26 @@
+"""Tests for fix #477: KaTeX font-src CSP fix."""
+import pathlib
+
+REPO = pathlib.Path(__file__).parent.parent
+HELPERS_PY = (REPO / "api" / "helpers.py").read_text(encoding="utf-8")
+
+
+def test_font_src_allows_jsdelivr():
+    """font-src must include cdn.jsdelivr.net for KaTeX fonts."""
+    assert "font-src 'self' data: https://cdn.jsdelivr.net" in HELPERS_PY, (
+        "api/helpers.py CSP must allow cdn.jsdelivr.net in font-src "
+        "so KaTeX math rendering fonts load without console errors."
+    )
+
+
+def test_font_src_still_allows_self_and_data():
+    """font-src must still allow self and data: (used by other font assets)."""
+    assert "'self'" in HELPERS_PY.split("font-src")[1].split(";")[0]
+    assert "data:" in HELPERS_PY.split("font-src")[1].split(";")[0]
+
+
+def test_script_src_already_allows_jsdelivr():
+    """script-src already allows cdn.jsdelivr.net — font-src should too."""
+    assert "https://cdn.jsdelivr.net" in HELPERS_PY.split("font-src")[0], (
+        "script-src should already allow cdn.jsdelivr.net (KaTeX JS)"
+    )
--- a/tests/test_issue486_487.py
+++ b/tests/test_issue486_487.py
@@ -0,0 +1,572 @@
+"""
+Tests for issue #486 (CSS: inline code in table cells) and
+issue #487 (JS renderer: markdown image syntax not implemented).
+
+Issue #486 — CSS fix in static/style.css:
+  Inline `code` spans inside table cells render with awkward sizing.
+  Fix: td code, th code { font-size: 0.85em; padding: 1px 4px; vertical-align: baseline; }
+
+Issue #487 — JS fix in static/ui.js:
+  ![alt](url) image syntax not handled — renders as stray ! + link.
+  Fix: add image pass to renderMd() (before link pass) and inlineMd()
+  reusing the .msg-media-img class.
+
+Strategy:
+  - Source-level checks verify the fixes are present in the JS/CSS.
+  - Python mirror tests verify the rendering logic with exhaustive edge cases,
+    especially code blocks inside tables (the specific case Nathan flagged).
+"""
+import pathlib
+import re
+import html as _html
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent
+UI_JS = (REPO_ROOT / "static" / "ui.js").read_text()
+STYLE_CSS = (REPO_ROOT / "static" / "style.css").read_text()
+
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+
+def esc(s):
+    return _html.escape(str(s), quote=True)
+
+
+def inline_md(t):
+    """
+    Python mirror of the fixed inlineMd() function — includes:
+    - _code_stash (protects backtick spans from bold/italic AND from image pass)
+    - image pass (NEW for #487 — runs while code stash is active, before link pass)
+    - _img_stash (protects rendered img tags from autolink touching src=)
+    - _link_stash (protects links from autolink)
+    - autolink
+    - code stash restore (after autolink, so code content is never autolinked)
+
+    Correct operation order:
+      1. code stash        — \x00C  protects `...` from bold and image pass
+      2. bold/italic       — runs on plain text only
+      3. image pass        — runs while code content is still stashed (so ![x](url)
+                             inside backticks stays protected as a \x00C token)
+      4. img stash         — \x00I  protects <img src="url"> from autolink
+      5. link stash        — \x00L  protects [label](url) links from autolink
+      6. autolink          — only matches URLs not already in a stash token
+      7. link stash restore
+      8. img stash restore
+      9. code stash restore — restores <code> tags last
+    """
+    # 1. Code stash — must be first to protect code content from all subsequent passes
+    code_stash = []
+    def stash_code(m):
+        code_stash.append(f'<code>{esc(m.group(1))}</code>')
+        return f'\x00C{len(code_stash)-1}\x00'
+    t = re.sub(r'`([^`\n]+)`', stash_code, t)
+
+    # 2. Bold/italic (code content is safely stashed)
+    t = re.sub(r'\*\*\*(.+?)\*\*\*', lambda m: f'<strong><em>{esc(m.group(1))}</em></strong>', t)
+    t = re.sub(r'\*\*(.+?)\*\*',     lambda m: f'<strong>{esc(m.group(1))}</strong>', t)
+    t = re.sub(r'\*([^*\n]+)\*',     lambda m: f'<em>{esc(m.group(1))}</em>', t)
+
+    # 3. Image pass (NEW — runs while code is still stashed, so ![x](url) inside
+    #    backticks is protected as a \x00C token and won't match here)
+    def render_image(m):
+        alt, url = m.group(1), m.group(2)
+        safe_url = url.replace('"', '%22')
+        return (f'<img src="{safe_url}" alt="{esc(alt)}" '
+                f'class="msg-media-img" loading="lazy" '
+                f'onclick="this.classList.toggle(\'msg-media-img--full\')">')
+    t = re.sub(r'!\[([^\]]*)\]\((https?://[^\)]+)\)', render_image, t)
+
+    # 4. Img stash — protect rendered <img> tags so autolink never touches src= values
+    img_stash = []
+    def stash_img(m):
+        img_stash.append(m.group(0))
+        return f'\x00I{len(img_stash)-1}\x00'
+    t = re.sub(r'<img\b[^>]*>', stash_img, t)
+
+    # 5. Link stash
+    link_stash = []
+    def stash_link(m):
+        lb, u = m.group(1), m.group(2)
+        link_stash.append(f'<a href="{u.replace(chr(34), "%22")}" target="_blank" rel="noopener">{esc(lb)}</a>')
+        return f'\x00L{len(link_stash)-1}\x00'
+    t = re.sub(r'\[([^\]]+)\]\((https?://[^\)]+)\)', stash_link, t)
+
+    # 6. Autolink (img and link URLs are both stashed — safe)
+    def autolink(m):
+        url = m.group(1)
+        trail = url[-1] if url[-1] in '.,;:!?)' else ''
+        clean = url[:-1] if trail else url
+        return f'<a href="{clean}" target="_blank" rel="noopener">{esc(clean)}</a>{trail}'
+    t = re.sub(r'(https?://[^\s<>"\')\]]+)', autolink, t)
+
+    # 7. Restore link stash
+    t = re.sub(r'\x00L(\d+)\x00', lambda m: link_stash[int(m.group(1))], t)
+
+    # 8. Restore img stash
+    t = re.sub(r'\x00I(\d+)\x00', lambda m: img_stash[int(m.group(1))], t)
+
+    # 9. Restore code stash (last — code content was never touched by any pass)
+    t = re.sub(r'\x00C(\d+)\x00', lambda m: code_stash[int(m.group(1))], t)
+    return t
+
+
+def render_table(md):
+    """Python mirror of the table pass, using inline_md() per cell."""
+    lines = md.strip().split('\n')
+    if len(lines) < 2:
+        return md
+
+    def is_sep(r):
+        return bool(re.match(r'^\|[\s|:-]+\|$', r.strip()))
+
+    if not is_sep(lines[1]):
+        return md
+
+    def parse_header(r):
+        cells = r.strip().lstrip('|').rstrip('|').split('|')
+        return ''.join(f'<th>{inline_md(c.strip())}</th>' for c in cells)
+
+    def parse_row(r):
+        cells = r.strip().lstrip('|').rstrip('|').split('|')
+        return ''.join(f'<td>{inline_md(c.strip())}</td>' for c in cells)
+
+    header = f'<tr>{parse_header(lines[0])}</tr>'
+    body = ''.join(f'<tr>{parse_row(r)}</tr>' for r in lines[2:])
+    return f'<table><thead>{header}</thead><tbody>{body}</tbody></table>'
+
+
+# ═════════════════════════════════════════════════════════════════════════════
+# ISSUE #486 — CSS: code inside table cells
+# ═════════════════════════════════════════════════════════════════════════════
+
+class TestIssue486CssCodeInTable:
+    """CSS fix: td code and th code must have targeted sizing rules."""
+
+    def test_td_code_font_size_present(self):
+        """msg-body td code rule must set font-size (e.g. 0.85em) to prevent oversized code."""
+        assert 'td code' in STYLE_CSS, (
+            "Missing 'td code' CSS rule — inline code in table cells needs sizing fix"
+        )
+
+    def test_th_code_rule_present(self):
+        """th code rule must also exist for header cells."""
+        assert 'th code' in STYLE_CSS, (
+            "Missing 'th code' CSS rule — inline code in header cells needs sizing fix"
+        )
+
+    def test_td_code_has_font_size(self):
+        """The td code / th code block must include a font-size declaration."""
+        # Find the msg-body scoped td code rule
+        idx = STYLE_CSS.find('td code')
+        assert idx != -1, "td code rule not found in style.css"
+        # Check nearby text (within 200 chars) has font-size
+        window = STYLE_CSS[idx:idx+200]
+        assert 'font-size' in window, (
+            f"td code rule must include font-size. Found near td code: {window!r}"
+        )
+
+    def test_td_code_has_padding(self):
+        """The td code / th code block must include a padding declaration."""
+        idx = STYLE_CSS.find('td code')
+        assert idx != -1
+        window = STYLE_CSS[idx:idx+200]
+        assert 'padding' in window, (
+            f"td code rule must include padding. Found near td code: {window!r}"
+        )
+
+    def test_td_code_has_vertical_align(self):
+        """The td code / th code block must include vertical-align: baseline."""
+        idx = STYLE_CSS.find('td code')
+        assert idx != -1
+        window = STYLE_CSS[idx:idx+200]
+        assert 'vertical-align' in window, (
+            f"td code rule must include vertical-align. Found near td code: {window!r}"
+        )
+
+    def test_code_renders_inside_table_cell(self):
+        """Inline `code` inside a table cell must render as <code> element."""
+        md = "| Syntax | Rendered |\n|---|---|\n| `code` | `code` |"
+        result = render_table(md)
+        assert '<code>code</code>' in result, (
+            f"Inline code in table cell should render as <code>. Got: {result}"
+        )
+
+    def test_bold_code_renders_inside_table_cell(self):
+        """**`bold code`** inside a table cell must render as <strong><code>."""
+        md = "| Style | Example |\n|---|---|\n| bold code | **`bold code`** |"
+        result = render_table(md)
+        # Should have code tag (even inside bold)
+        assert '<code>bold code</code>' in result, (
+            f"Bold code in table should render as <code>. Got: {result}"
+        )
+
+    def test_multiple_code_spans_in_same_cell(self):
+        """Multiple backtick spans in one cell all render as <code>."""
+        md = "| Combined |\n|---|\n| `a` and `b` |"
+        result = render_table(md)
+        assert result.count('<code>') == 2, (
+            f"Expected 2 code tags in cell, got: {result}"
+        )
+
+    def test_code_in_header_cell(self):
+        """`code` in a <th> header cell must also render as <code>."""
+        md = "| `header code` | Normal |\n|---|---|\n| data | data |"
+        result = render_table(md)
+        assert '<code>header code</code>' in result, (
+            f"Code in header cell should render. Got: {result}"
+        )
+
+    def test_code_not_mangled_by_bold_in_table(self):
+        """**`code`** in a table cell must NOT produce &lt;code&gt; (the pre-fix bug)."""
+        md = "| Pattern | Example |\n|---|---|\n| bold-code | **`npm install`** |"
+        result = render_table(md)
+        assert '&lt;code&gt;' not in result, (
+            f"Code tags inside bold in table must not be HTML-escaped. Got: {result}"
+        )
+        assert '<strong>' in result, "Bold wrapper should be present"
+        assert '<code>npm install</code>' in result
+
+    def test_code_with_special_chars_in_table(self):
+        """`<script>` inside a table cell must have the angle brackets escaped."""
+        md = "| Input | Output |\n|---|---|\n| `<script>` | sanitized |"
+        result = render_table(md)
+        assert '&lt;script&gt;' in result, (
+            f"Code content must be HTML-escaped. Got: {result}"
+        )
+        # The <code> wrapper itself must be there
+        assert '<code>' in result
+
+    def test_code_adjacent_to_link_in_table(self):
+        """`code` and [link](url) in same cell both render correctly."""
+        url = 'https://example.com'
+        md = f"| Mixed |\n|---|\n| `foo` and [bar]({url}) |"
+        result = render_table(md)
+        assert '<code>foo</code>' in result
+        assert f'href="{url}"' in result
+        assert 'bar' in result
+
+    def test_empty_code_span_in_table(self):
+        """Edge case: empty backtick span in table cell (`` ` ` ``) — no crash."""
+        # This won't match the code regex (requires at least 1 char), should pass through
+        md = "| Col |\n|---|\n| normal text |"
+        result = render_table(md)
+        assert '<td>normal text</td>' in result
+
+
+# ═════════════════════════════════════════════════════════════════════════════
+# ISSUE #487 — JS renderer: markdown image syntax
+# ═════════════════════════════════════════════════════════════════════════════
+
+class TestIssue487ImageRendering:
+    """Image syntax ![alt](url) must render as <img>, not as ! + link."""
+
+    # ── Source-level checks ──────────────────────────────────────────────────
+
+    def test_image_pass_present_in_ui_js(self):
+        """renderMd() must contain an image regex pass for ![alt](url)."""
+        assert '![' in UI_JS or r'!\[' in UI_JS, (
+            "ui.js should contain image syntax handling (![...](url) regex)"
+        )
+        # More specifically, look for the img tag being generated
+        assert 'msg-media-img' in UI_JS, (
+            "Image pass should reuse .msg-media-img class"
+        )
+
+    def test_image_pass_runs_before_link_pass_in_outer(self):
+        """Image regex must appear in ui.js BEFORE the [label](url) link pass."""
+        # Find the image pass position
+        img_idx = UI_JS.find('!\\[')
+        if img_idx == -1:
+            img_idx = UI_JS.find("![")
+        # Find the outer labeled link pass position (after table pass)
+        link_idx = UI_JS.find("Outer link pass for labeled links")
+        assert img_idx != -1, "Image pass not found in ui.js"
+        assert link_idx != -1, "Outer link pass comment not found in ui.js"
+        assert img_idx < link_idx, (
+            "Image pass must run before the outer [label](url) link pass "
+            "to prevent the image from being consumed as a plain link"
+        )
+
+    def test_image_url_sanitized_for_quotes(self):
+        """Image src URL must have double-quotes percent-encoded."""
+        # The image pass must use .replace(/"/g,'%22') or equivalent
+        # Look for the pattern near image handling
+        img_idx = UI_JS.find('msg-media-img')
+        assert img_idx != -1
+        # Find all occurrences — there's the MEDIA restore and the new image pass
+        # The new one should have %22 for URL sanitization
+        assert '%22' in UI_JS, (
+            "Image src URL must sanitize double-quotes to %22"
+        )
+
+    def test_image_alt_uses_esc(self):
+        """Alt text must be passed through esc() to prevent XSS."""
+        # Look for esc( call near the image rendering code
+        # The pattern should be: alt="${esc(alt)}"
+        assert 'esc(' in UI_JS, "esc() function must be used for alt text"
+
+    def test_safe_tags_includes_img(self):
+        """SAFE_TAGS allowlist must include 'img' to prevent the tag from being escaped."""
+        # Find the SAFE_TAGS regex in ui.js
+        safe_idx = UI_JS.find('SAFE_TAGS=')
+        assert safe_idx != -1, "SAFE_TAGS not found in ui.js"
+        safe_window = UI_JS[safe_idx:safe_idx+300]
+        assert 'img' in safe_window, (
+            f"SAFE_TAGS must include 'img' tag. Found: {safe_window!r}"
+        )
+
+    def test_inlinemd_has_image_pass(self):
+        """inlineMd() must also handle ![alt](url) for images inside table cells."""
+        # inlineMd is called for table cells, list items, blockquotes
+        # Find inlineMd function body
+        start = UI_JS.find('function inlineMd(')
+        assert start != -1, "inlineMd function not found"
+        # Get a generous window covering the function
+        fn_window = UI_JS[start:start+1500]
+        assert '![' in fn_window or r'!\[' in fn_window, (
+            "inlineMd() must handle image syntax for images in table cells"
+        )
+
+    # ── Behaviour tests (Python mirror) ─────────────────────────────────────
+
+    def test_basic_image_renders_as_img_tag(self):
+        """![alt](https://example.com/img.png) must produce an <img> tag."""
+        t = '![A cat](https://example.com/cat.png)'
+        result = inline_md(t)
+        assert '<img ' in result, f"Expected <img> tag, got: {result}"
+        assert 'src="https://example.com/cat.png"' in result
+        assert 'alt="A cat"' in result
+        # Must NOT have the raw ![...] syntax left over
+        assert '![' not in result
+        # Must NOT have a stray ! character
+        assert result.startswith('<img '), f"Result should start with img tag: {result}"
+
+    def test_image_does_not_render_as_link(self):
+        """![alt](url) must NOT produce an <a> tag (the pre-fix bug)."""
+        t = '![Logo](https://example.com/logo.png)'
+        result = inline_md(t)
+        assert '<a ' not in result, (
+            f"Image must not render as an <a> tag. Got: {result}"
+        )
+
+    def test_image_stray_exclamation_not_present(self):
+        """No stray ! character before the img tag (the pre-fix symptom)."""
+        t = '![alt](https://example.com/img.png)'
+        result = inline_md(t)
+        # Strip the img tag and check no ! is left
+        cleaned = re.sub(r'<img[^>]+>', '', result)
+        assert '!' not in cleaned, (
+            f"Stray ! character present after image render. Got: {result}"
+        )
+
+    def test_image_uses_msg_media_img_class(self):
+        """Rendered <img> must use class=\"msg-media-img\" for consistent styling."""
+        t = '![screenshot](https://example.com/shot.png)'
+        result = inline_md(t)
+        assert 'class="msg-media-img"' in result, (
+            f"Image must use .msg-media-img class. Got: {result}"
+        )
+
+    def test_image_has_lazy_loading(self):
+        """Rendered <img> must have loading=\"lazy\"."""
+        t = '![x](https://example.com/x.png)'
+        result = inline_md(t)
+        assert 'loading="lazy"' in result, f"Expected loading=lazy. Got: {result}"
+
+    def test_image_has_click_to_zoom(self):
+        """Rendered <img> must have onclick toggle for zoom."""
+        t = '![x](https://example.com/x.png)'
+        result = inline_md(t)
+        assert 'msg-media-img--full' in result, (
+            f"Image must have click-to-zoom onclick. Got: {result}"
+        )
+
+    def test_image_alt_is_escaped(self):
+        """Alt text with HTML special chars must be escaped."""
+        t = '![<evil>](https://example.com/img.png)'
+        result = inline_md(t)
+        assert '&lt;evil&gt;' in result, (
+            f"Alt text must be HTML-escaped. Got: {result}"
+        )
+        assert '<evil>' not in result
+
+    def test_image_url_quote_sanitized(self):
+        """Double-quote in image URL must be percent-encoded to prevent attribute breakout."""
+        t = '![x](https://example.com/path"with"quotes.png)'
+        result = inline_md(t)
+        # Find the src attribute value
+        src_match = re.search(r'src="([^"]*)"', result)
+        assert src_match, f"src attribute not found. Got: {result}"
+        src_val = src_match.group(1)
+        assert '"' not in src_val, (
+            f"Raw double-quote in src would break attribute. Got src: {src_val!r}"
+        )
+
+    def test_image_no_javascript_uri(self):
+        """javascript: URIs must not be rendered as image src (regex only matches http/https)."""
+        t = '![x](javascript:alert(1))'
+        result = inline_md(t)
+        # The regex requires https?://, so this should pass through unmodified
+        assert '<img ' not in result, (
+            f"javascript: URI must not render as <img>. Got: {result}"
+        )
+
+    def test_image_no_data_uri(self):
+        """data: URIs must not be rendered as image src."""
+        t = '![x](data:image/png;base64,abc123)'
+        result = inline_md(t)
+        assert '<img ' not in result, (
+            f"data: URI must not render as <img>. Got: {result}"
+        )
+
+    def test_image_followed_by_text(self):
+        """Image followed by plain text — only the image becomes an <img>."""
+        t = '![cat](https://example.com/cat.png) and some text'
+        result = inline_md(t)
+        assert '<img ' in result
+        assert 'and some text' in result
+
+    def test_image_preceded_by_text(self):
+        """Text before an image — both render correctly."""
+        t = 'Here is a screenshot: ![shot](https://example.com/shot.png)'
+        result = inline_md(t)
+        assert 'Here is a screenshot:' in result
+        assert '<img ' in result
+
+    def test_image_and_link_in_same_cell(self):
+        """Image and link in same inline context both render correctly."""
+        t = '![img](https://example.com/img.png) see [here](https://example.com)'
+        result = inline_md(t)
+        assert '<img ' in result
+        assert '<a href="https://example.com"' in result
+        assert '![' not in result
+
+    def test_image_inside_table_cell(self):
+        """![alt](url) inside a markdown table cell must render as <img>."""
+        md = ("| Image | Caption |\n"
+              "|---|---|\n"
+              "| ![logo](https://example.com/logo.png) | Company logo |")
+        result = render_table(md)
+        assert '<img ' in result, f"Image in table should render as <img>. Got: {result}"
+        assert 'src="https://example.com/logo.png"' in result
+        assert '<a ' not in result, "Image in table must not render as <a>"
+
+    def test_image_in_table_no_stray_exclamation(self):
+        """No stray ! before the <img> when image is inside a table cell."""
+        md = ("| X |\n|---|\n| ![x](https://x.com/x.png) |")
+        result = render_table(md)
+        # Strip known tags and check no ! appears
+        cleaned = re.sub(r'<[^>]+>', '', result)
+        assert '!' not in cleaned, (
+            f"Stray ! in table cell after image render. Cleaned: {cleaned!r}"
+        )
+
+    def test_empty_alt_text_image(self):
+        """![](url) with empty alt renders as <img> with empty alt attribute."""
+        t = '![](https://example.com/img.png)'
+        result = inline_md(t)
+        assert '<img ' in result
+        assert 'alt=""' in result
+
+    def test_multiple_images_in_one_cell(self):
+        """Two images in one table cell both render as <img> tags."""
+        t = ('![a](https://example.com/a.png) '
+             '![b](https://example.com/b.png)')
+        result = inline_md(t)
+        assert result.count('<img ') == 2, (
+            f"Expected 2 img tags. Got: {result}"
+        )
+
+    def test_image_with_https_url(self):
+        """https:// image URL renders correctly."""
+        t = '![secure](https://secure.example.com/img.jpg)'
+        result = inline_md(t)
+        assert 'src="https://secure.example.com/img.jpg"' in result
+
+    def test_image_with_http_url(self):
+        """http:// image URL also renders (non-https still valid)."""
+        t = '![old](http://example.com/img.jpg)'
+        result = inline_md(t)
+        assert '<img ' in result
+        assert 'src="http://example.com/img.jpg"' in result
+
+
+# ═════════════════════════════════════════════════════════════════════════════
+# Cross-cutting: code + image together inside tables (the edge case Nathan flagged)
+# ═════════════════════════════════════════════════════════════════════════════
+
+class TestEdgeCasesCodeAndImageInTables:
+    """Combination edge cases: code blocks and images mixed inside table cells."""
+
+    def test_code_and_image_in_same_table_row(self):
+        """Table row with code in one cell and image in another renders both correctly."""
+        md = ("| Code | Preview |\n"
+              "|---|---|\n"
+              "| `print('hello')` | ![screenshot](https://example.com/shot.png) |")
+        result = render_table(md)
+        assert "<code>print(&#x27;hello&#x27;)</code>" in result or "<code>print('hello')</code>" in result, (
+            f"Code cell should render as <code>. Got: {result}"
+        )
+        assert '<img ' in result, "Image cell should render as <img>"
+
+    def test_code_in_cell_with_image_in_next_cell(self):
+        """Multiple columns: code stays code, image stays image, no cross-contamination."""
+        md = ("| Step | Example |\n"
+              "|---|---|\n"
+              "| Run `npm install` | ![demo](https://example.com/demo.gif) |")
+        result = render_table(md)
+        assert '<code>npm install</code>' in result
+        assert '<img ' in result
+        assert '<a ' not in result  # image must not become a link
+
+    def test_bold_code_in_cell_and_image_in_cell(self):
+        """**`code`** in one cell and image in another — no esc() mangling."""
+        md = ("| Command | Result |\n"
+              "|---|---|\n"
+              "| **`git status`** | ![result](https://example.com/r.png) |")
+        result = render_table(md)
+        assert '&lt;code&gt;' not in result, (
+            "Bold+code in table cell must not produce escaped code tags"
+        )
+        assert '<code>git status</code>' in result
+        assert '<img ' in result
+
+    def test_link_code_image_all_in_table(self):
+        """Table with code, link, and image cells all render correctly."""
+        url = 'https://github.com/issues/486'
+        img_url = 'https://example.com/img.png'
+        md = (f"| Code | Link | Image |\n"
+              f"|---|---|---|\n"
+              f"| `var x = 1` | [#486]({url}) | ![img]({img_url}) |")
+        result = render_table(md)
+        assert '<code>var x = 1</code>' in result
+        assert f'href="{url}"' in result
+        assert '<img ' in result
+        # No double-linking
+        assert result.count('<a ') == 1
+
+    def test_image_url_with_query_string_in_table(self):
+        """Image URL with & in query string inside table cell — & not mangled."""
+        url = 'https://example.com/img?w=100&h=200'
+        md = f"| Image |\n|---|\n| ![sized]({url}) |"
+        result = render_table(md)
+        assert f'src="{url}"' in result, (
+            f"& in image URL must not be escaped. Got: {result}"
+        )
+
+    def test_image_adjacent_to_code_no_interference(self):
+        """Image immediately followed by code span in same cell — no token cross-talk."""
+        t = '![x](https://x.com/x.png) `code`'
+        result = inline_md(t)
+        assert '<img ' in result
+        assert '<code>code</code>' in result
+
+    def test_image_inside_code_span_not_rendered(self):
+        """An image syntax inside a backtick span must NOT render as an img tag."""
+        t = '`![not an image](https://example.com/img.png)`'
+        result = inline_md(t)
+        # The whole thing is inside backticks — should be literal code, not an img
+        assert '<img ' not in result, (
+            f"Image syntax inside code span must not render as <img>. Got: {result}"
+        )
+        # Should render as a code element with the raw text inside
+        assert '<code>' in result
--- a/tests/test_issue487b.py
+++ b/tests/test_issue487b.py
@@ -0,0 +1,131 @@
+r"""
+Regression test for image src URL corruption by the autolink pass.
+
+Bug: the _al_stash before the autolink pass only stashed <a> tags.
+<img> tags produced by the ![alt](url) image pass were NOT stashed,
+so the autolink regex matched the URL inside src="..." and wrapped it
+in <a href="...">url</a>, producing src="<a href="...">url</a>" —
+a completely broken image source.
+
+Fix: extend _al_stash regex to also stash <img> tags:
+  (<a\b[^>]*>[\s\S]*?<\/a>|<img\b[^>]*>)
+"""
+import pathlib
+import re
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent
+UI_JS = (REPO_ROOT / "static" / "ui.js").read_text()
+
+
+# ── Source-level check ────────────────────────────────────────────────────────
+
+def test_al_stash_includes_img_tags():
+    """_al_stash regex must stash both <a> and <img> tags to protect src= from autolink."""
+    assert '<img\\b[^>]*>' in UI_JS or '<img\\\\b[^>]*>' in UI_JS, (
+        "_al_stash should include <img> tag pattern to prevent autolink mangling src= URLs"
+    )
+
+
+# ── Behaviour tests (Python mirror of fixed pipeline) ─────────────────────────
+
+import html as _html
+def esc(s): return _html.escape(str(s), quote=True)
+
+SAFE_TAGS = re.compile(
+    r'^</?(strong|em|code|pre|h[1-6]|ul|ol|li|table|thead|tbody|tr|th|td'
+    r'|hr|blockquote|p|br|a|img|div|span)([\s>]|$)', re.I
+)
+
+
+def render_with_image_and_autolink(raw):
+    """Simulate the image pass + SAFE_TAGS + _al_stash + autolink pipeline."""
+    s = raw
+    # Image pass
+    s = re.sub(
+        r'!\[([^\]]*)\]\((https?://[^\)]+)\)',
+        lambda m: (
+            f'<img src="{m.group(2).replace(chr(34), "%22")}" '
+            f'alt="{esc(m.group(1))}" class="msg-media-img" loading="lazy">'
+        ),
+        s,
+    )
+    # SAFE_TAGS
+    s = re.sub(
+        r'</?[a-zA-Z][^>]*>',
+        lambda m: m.group() if SAFE_TAGS.match(m.group()) else esc(m.group()),
+        s,
+    )
+    # _al_stash (fixed: stashes both <a> and <img>)
+    al_stash = []
+    s = re.sub(
+        r'(<a\b[^>]*>[\s\S]*?<\/a>|<img\b[^>]*>)',
+        lambda m: (al_stash.append(m.group(1)) or f'\x00B{len(al_stash)-1}\x00'),
+        s,
+    )
+    # Autolink
+    def autolink(m):
+        url = m.group(1)
+        trail = url[-1] if url[-1] in '.,;:!?)' else ''
+        clean = url[:-1] if trail else url
+        return f'<a href="{clean}" target="_blank" rel="noopener">{esc(clean)}</a>{trail}'
+    s = re.sub(r'(https?://[^\s<>"\')\]]+)', autolink, s)
+    # Restore
+    s = re.sub(r'\x00B(\d+)\x00', lambda m: al_stash[int(m.group(1))], s)
+    return s
+
+
+def test_image_src_not_mangled_by_autolink():
+    """The URL inside src= of a rendered <img> must not be wrapped in <a> by autolink."""
+    url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/280px-PNG_transparency_demonstration_1.png'
+    result = render_with_image_and_autolink(f'![alt]({url})')
+    assert f'src="{url}"' in result, f"src= URL should be intact, got: {result[:200]}"
+    # The URL inside src= must NOT be wrapped in <a>
+    src_part = result.split('src="')[1].split('"')[0]
+    assert '<a ' not in src_part, f"src= must not contain <a> tag, got: {src_part}"
+    assert src_part == url, f"src= URL mangled: expected {url}, got {src_part}"
+
+
+def test_image_tag_renders_as_img():
+    """![alt](url) must produce an <img> tag, not a plain link."""
+    result = render_with_image_and_autolink('![Test image](https://example.com/img.png)')
+    assert '<img ' in result, f"Expected <img> tag, got: {result}"
+    assert 'src="https://example.com/img.png"' in result
+    assert '<a ' not in result  # no spurious link wrapper
+
+
+def test_image_and_link_in_same_paragraph():
+    """Image and link in same paragraph must each render correctly without interference."""
+    result = render_with_image_and_autolink(
+        'See ![logo](https://example.com/logo.png) and visit https://example.com'
+    )
+    assert '<img ' in result, "Image should render"
+    assert '<a ' in result, "Bare URL should autolink"
+    # img src must not contain <a>
+    src_part = result.split('src="')[1].split('"')[0]
+    assert '<a' not in src_part, f"src= mangled: {src_part}"
+
+
+def test_image_count_is_one():
+    """One ![alt](url) should produce exactly one <img> tag."""
+    result = render_with_image_and_autolink('![test](https://example.com/x.png)')
+    assert result.count('<img ') == 1, f"Expected 1 <img>, got {result.count('<img ')}: {result}"
+
+
+def test_multiple_images_not_mangled():
+    """Multiple images in one message each get clean src= values."""
+    urls = [
+        'https://example.com/a.png',
+        'https://example.com/b.png',
+    ]
+    raw = '\n\n'.join(f'![img{i}]({url})' for i, url in enumerate(urls))
+    result = render_with_image_and_autolink(raw)
+    for url in urls:
+        assert f'src="{url}"' in result, f"src= for {url} mangled in: {result[:300]}"
+
+
+def test_image_with_query_string_src_intact():
+    """Image URL with & in query string must have & (not &amp;) in src."""
+    url = 'https://example.com/img?w=100&h=200&fmt=png'
+    result = render_with_image_and_autolink(f'![img]({url})')
+    assert f'src="{url}"' in result, f"Query string URL mangled: {result[:200]}"
+    assert '&amp;' not in result.split('src="')[1].split('"')[0]
--- a/tests/test_issue569_579.py
+++ b/tests/test_issue569_579.py
@@ -0,0 +1,157 @@
+"""
+Tests for fixes:
+
+- #569: docker_init.bash auto-detects WANTED_UID/WANTED_GID from mounted workspace
+         so macOS users (UID 501) don't need to manually set the env var.
+- #579: Topbar message count already filters tool messages (role !== 'tool') —
+         confirmed present. Closing as already fixed by #584 which removed the
+         sidebar meta row (the only place raw message_count was ever displayed).
+"""
+import pathlib
+import re
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent
+INIT_SH   = (REPO_ROOT / "docker_init.bash").read_text(encoding="utf-8")
+UI_JS     = (REPO_ROOT / "static" / "ui.js").read_text(encoding="utf-8")
+
+
+# ── #569: docker UID/GID auto-detect ─────────────────────────────────────────
+
+def test_569_uid_autodetect_present():
+    """docker_init.bash must have workspace-based UID auto-detection (#569)."""
+    assert "stat -c '%u'" in INIT_SH or 'stat -c \'%u\'' in INIT_SH, (
+        "docker_init.bash must use stat to read workspace UID (#569)"
+    )
+
+
+def test_569_gid_autodetect_present():
+    """docker_init.bash must have workspace-based GID auto-detection (#569)."""
+    assert "stat -c '%g'" in INIT_SH or 'stat -c \'%g\'' in INIT_SH, (
+        "docker_init.bash must use stat to read workspace GID (#569)"
+    )
+
+
+def test_569_autodetect_before_usermod():
+    """UID auto-detect must appear before usermod call in docker_init.bash."""
+    detect_pos = INIT_SH.find("stat -c '%u'")
+    if detect_pos == -1:
+        detect_pos = INIT_SH.find("stat -c")
+    usermod_pos = INIT_SH.find("sudo usermod")
+    assert detect_pos != -1, "stat UID detection not found"
+    assert usermod_pos != -1, "sudo usermod not found"
+    assert detect_pos < usermod_pos, (
+        "UID auto-detect must occur before 'sudo usermod' so the correct UID "
+        "is used when remapping the hermeswebui user"
+    )
+
+
+def test_569_skips_root_uid():
+    """Auto-detect must not use UID 0 (root-owned mount = untrustworthy)."""
+    detect_block_start = INIT_SH.find("Auto-detect from mounted volumes")
+    assert detect_block_start != -1, "auto-detect comment block not found"
+    block = INIT_SH[detect_block_start:detect_block_start + 1200]
+    assert '"0"' in block or "'0'" in block, (
+        "Auto-detect block must skip UID 0 to avoid incorrectly using root ownership"
+    )
+
+
+def test_569_fallback_preserved():
+    """Hardcoded default 1024 fallback must still exist after auto-detect."""
+    assert "WANTED_UID=${WANTED_UID:-1024}" in INIT_SH, (
+        "WANTED_UID default fallback must remain so explicit env var still works"
+    )
+    assert "WANTED_GID=${WANTED_GID:-1024}" in INIT_SH, (
+        "WANTED_GID default fallback must remain"
+    )
+
+
+# ── #668: UID/GID auto-detect from hermes-home shared volume (two-container) ──
+
+def test_668_uid_autodetect_checks_hermes_home():
+    """docker_init.bash must probe hermes-home dirs for UID in two-container setups.
+
+    When hermes-agent and hermes-webui run in separate containers sharing a
+    named volume, /workspace may not exist but ~/.hermes will be owned by the
+    agent's UID. The init script must probe it so the webui user is remapped
+    to match (#668).
+    """
+    assert "/home/hermeswebui/.hermes" in INIT_SH, (
+        "docker_init.bash must probe /home/hermeswebui/.hermes for UID detection "
+        "to support two-container setups where /workspace may not exist (#668)"
+    )
+
+
+def test_668_gid_autodetect_checks_hermes_home():
+    """docker_init.bash must probe hermes-home dirs for GID in two-container setups (#668)."""
+    # Both UID and GID detection share the same probe dirs — check GID block too
+    gid_detect_start = INIT_SH.find("Auto-detect GID from mounted volumes")
+    assert gid_detect_start != -1, (
+        "GID auto-detect comment must be updated to mention shared volumes (#668)"
+    )
+    gid_block = INIT_SH[gid_detect_start:gid_detect_start + 600]
+    assert "/home/hermeswebui/.hermes" in gid_block or "HERMES_HOME" in gid_block, (
+        "GID auto-detect block must probe hermes-home dirs (#668)"
+    )
+
+
+def test_668_uid_probe_loop_uses_break():
+    """UID probe loop must stop on first match (no double-detection)."""
+    uid_detect_start = INIT_SH.find("Auto-detect from mounted volumes")
+    assert uid_detect_start != -1, "UID auto-detect comment not found"
+    uid_block = INIT_SH[uid_detect_start:uid_detect_start + 1200]
+    assert "break" in uid_block, (
+        "UID probe loop must break after first successful detection "
+        "to avoid being overridden by a later probe dir (#668)"
+    )
+
+
+def test_668_hermes_home_probe_before_workspace():
+    """Hermes-home probe must appear before /workspace probe in docker_init.bash (#668)."""
+    hermes_home_pos = INIT_SH.find("/home/hermeswebui/.hermes")
+    workspace_pos = INIT_SH.find('if [ -d "/workspace" ]')
+    assert hermes_home_pos != -1, "/home/hermeswebui/.hermes probe not found"
+    assert workspace_pos != -1, "/workspace probe not found"
+    assert hermes_home_pos < workspace_pos, (
+        "Hermes-home probe must come before /workspace probe — "
+        "shared volume UID should take priority over workspace UID (#668)"
+    )
+
+
+# ── #579: topbar message count already filters tool messages ──────────────────
+
+def test_579_topbar_filters_tool_messages():
+    """ui.js topbar count must filter out role='tool' messages (#579).
+
+    The sidebar previously showed raw message_count (which included tool
+    messages), causing a mismatch with the topbar. PR #584 removed the
+    sidebar count display entirely; the topbar was already correct.
+    This test locks in the existing topbar filter so it can't regress.
+    """
+    # Find the topbarMeta assignment
+    meta_pos = UI_JS.find("topbarMeta")
+    assert meta_pos != -1, "topbarMeta assignment not found in ui.js"
+
+    # Find the filter that precedes it — should exclude role==='tool'
+    context = UI_JS[max(0, meta_pos - 400):meta_pos + 100]
+    assert "role" in context and "tool" in context, (
+        "topbarMeta count must filter by role — "
+        "messages with role='tool' must be excluded from the displayed count"
+    )
+    # The filter must exclude tool messages (not include them)
+    assert "!=='tool'" in context or "!= 'tool'" in context or "role!=='tool'" in context, (
+        "topbar count filter must use !== 'tool' to exclude tool messages"
+    )
+
+
+def test_579_sidebar_no_longer_shows_raw_count():
+    """sessions.js must not reference message_count in the render path (#579).
+
+    After PR #584, the sidebar no longer shows message_count at all,
+    eliminating the inconsistency between sidebar (raw) and topbar (filtered).
+    """
+    sessions_js = (REPO_ROOT / "static" / "sessions.js").read_text(encoding="utf-8")
+    # message_count should not appear in the client-side session renderer
+    assert "message_count" not in sessions_js, (
+        "sessions.js must not reference message_count — "
+        "the meta row that displayed it was removed in PR #584"
+    )
--- a/tests/test_issue570_permission.py
+++ b/tests/test_issue570_permission.py
@@ -0,0 +1,56 @@
+"""
+Regression tests for GitHub issue #570 follow-up:
+PermissionError from SETTINGS_FILE.exists() in Docker UID-mismatch scenarios.
+
+When ~/.hermes is owned by a different UID than the container user (common in
+Docker setups), Path.exists() raises PermissionError instead of returning False.
+load_settings() must treat that as "file not accessible = use defaults" rather
+than propagating the exception up to crash the request handler.
+"""
+import stat
+import pytest
+import api.config as config
+
+
+def test_load_settings_returns_defaults_when_settings_file_unreadable(monkeypatch, tmp_path):
+    """PermissionError from SETTINGS_FILE.exists() must not propagate — return defaults instead.
+
+    Regression for issue #570 comment: Docker UID mismatch caused every request
+    to 500 because load_settings() called SETTINGS_FILE.exists() without catching OSError.
+    """
+    state_dir = tmp_path / "state"
+    state_dir.mkdir()
+    settings_file = state_dir / "settings.json"
+    # Create the file then make the parent unreadable so .exists() raises PermissionError
+    settings_file.write_text('{"send_key": "ctrl+enter"}', encoding="utf-8")
+    state_dir.chmod(stat.S_IWUSR)  # write-only: stat() on children will fail
+
+    monkeypatch.setattr(config, "SETTINGS_FILE", settings_file)
+
+    try:
+        result = config.load_settings()
+        # Must not raise; must return a dict with default values
+        assert isinstance(result, dict)
+        assert "send_key" in result
+        # The corrupted/inaccessible value should NOT appear — defaults win
+        assert result["send_key"] == config._SETTINGS_DEFAULTS["send_key"]
+    finally:
+        state_dir.chmod(stat.S_IRWXU)  # restore for cleanup
+
+
+def test_load_settings_returns_defaults_when_exists_raises_permission_error(monkeypatch, tmp_path):
+    """Direct simulation: monkeypatch SETTINGS_FILE.exists to raise PermissionError."""
+    from unittest import mock
+
+    state_dir = tmp_path / "state"
+    state_dir.mkdir()
+    settings_file = state_dir / "settings.json"
+
+    monkeypatch.setattr(config, "SETTINGS_FILE", settings_file)
+
+    with mock.patch.object(type(settings_file), "exists",
+                           side_effect=PermissionError("Permission denied")):
+        result = config.load_settings()
+
+    assert isinstance(result, dict)
+    assert result["send_key"] == config._SETTINGS_DEFAULTS["send_key"]
--- a/tests/test_issue572.py
+++ b/tests/test_issue572.py
@@ -0,0 +1,205 @@
+"""Tests for issue #572: onboarding must not fire or overwrite config for
+providers not in the quick-setup list (minimax-cn, deepseek, xai, etc.).
+
+Root cause: _provider_api_key_present() only knew about the four providers in
+_SUPPORTED_PROVIDER_SETUPS.  For any other provider it returned False, causing
+chat_ready=False, which made the wizard fire even when the user was fully
+configured.  The second part of the fix ensures _saveOnboardingProviderSetup()
+in the frontend also skips the POST when current_is_oauth is set.
+
+Covers:
+  1. _provider_api_key_present returns True for minimax-cn when
+     MINIMAX_CN_API_KEY is in env (via hermes_cli.auth.get_auth_status)
+  2. _status_from_runtime gives chat_ready=True for minimax-cn with a key set
+  3. get_onboarding_status returns completed=True for a fully-configured
+     unsupported provider when config.yaml exists
+  4. The hermes_cli import failure path is safe (falls back gracefully)
+"""
+from __future__ import annotations
+
+import os
+import pathlib
+import sys
+import types
+from unittest import mock
+
+import pytest
+
+
+def _inject_hermes_cli_auth(get_auth_status_return):
+    """Inject a minimal hermes_cli.auth stub into sys.modules.
+
+    CI doesn't install hermes_cli (it's a separate package).  Tests that
+    exercise the hermes_cli fallback path must inject the module themselves
+    rather than relying on mock.patch('hermes_cli.auth.get_auth_status')
+    which fails with ModuleNotFoundError when the module isn't installed.
+    """
+    mock_auth = types.ModuleType("hermes_cli.auth")
+    mock_auth.get_auth_status = mock.MagicMock(return_value=get_auth_status_return)
+    mock_hermes_cli = types.ModuleType("hermes_cli")
+
+    return mock.patch.dict(sys.modules, {
+        "hermes_cli": mock_hermes_cli,
+        "hermes_cli.auth": mock_auth,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Helper
+# ---------------------------------------------------------------------------
+
+def _call_provider_api_key_present(provider: str, cfg: dict = None, env_values: dict = None):
+    from api.onboarding import _provider_api_key_present
+    return _provider_api_key_present(provider, cfg or {}, env_values or {})
+
+
+# ---------------------------------------------------------------------------
+# 1. _provider_api_key_present via hermes_cli fallback
+# ---------------------------------------------------------------------------
+
+class TestProviderApiKeyPresentFallback:
+
+    def test_minimax_cn_logged_in_returns_true(self):
+        """minimax-cn: if hermes_cli.auth.get_auth_status returns logged_in, must be True."""
+        with mock.patch("api.onboarding._SUPPORTED_PROVIDER_SETUPS", {
+            "openrouter": {}, "anthropic": {}, "openai": {}, "custom": {}
+        }):
+            with _inject_hermes_cli_auth({"logged_in": True}):
+                result = _call_provider_api_key_present("minimax-cn")
+                assert result is True
+
+    def test_unsupported_provider_logged_out_returns_false(self):
+        """Unsupported provider with no key → False, no crash."""
+        with mock.patch("api.onboarding._SUPPORTED_PROVIDER_SETUPS", {
+            "openrouter": {}, "anthropic": {}, "openai": {}, "custom": {}
+        }):
+            with _inject_hermes_cli_auth({"logged_in": False}):
+                result = _call_provider_api_key_present("deepseek")
+                assert result is False
+
+    def test_hermes_cli_import_failure_is_safe(self):
+        """If hermes_cli is unavailable, falls back silently to False."""
+        import builtins
+        real_import = builtins.__import__
+
+        def _block_hermes_cli(name, *args, **kwargs):
+            if name.startswith("hermes_cli"):
+                raise ImportError("hermes_cli not available")
+            return real_import(name, *args, **kwargs)
+
+        with mock.patch("api.onboarding._SUPPORTED_PROVIDER_SETUPS", {
+            "openrouter": {}, "anthropic": {}, "openai": {}, "custom": {}
+        }):
+            with mock.patch("builtins.__import__", side_effect=_block_hermes_cli):
+                result = _call_provider_api_key_present("minimax-cn")
+                assert result is False  # safe fallback
+
+    def test_supported_provider_still_works_without_fallback(self):
+        """openrouter with env key must still succeed via the original path."""
+        from api.onboarding import _provider_api_key_present, _SUPPORTED_PROVIDER_SETUPS
+        env_values = {"OPENROUTER_API_KEY": "sk-test"}
+        result = _provider_api_key_present("openrouter", {}, env_values)
+        assert result is True
+
+    def test_inline_api_key_in_cfg_still_works(self):
+        """model.api_key in config.yaml must be recognized for any provider."""
+        cfg = {"model": {"provider": "minimax-cn", "default": "MiniMax-M2.7", "api_key": "key123"}}
+        result = _call_provider_api_key_present("minimax-cn", cfg)
+        assert result is True
+
+
+# ---------------------------------------------------------------------------
+# 2. _status_from_runtime: unsupported provider with key → chat_ready=True
+# ---------------------------------------------------------------------------
+
+class TestStatusFromRuntimeUnsupportedProvider:
+
+    def _run(self, provider: str, model: str, api_key_present: bool, oauth_present: bool = False):
+        from api.onboarding import _status_from_runtime
+        cfg = {"model": {"provider": provider, "default": model}}
+        with (
+            mock.patch("api.onboarding._HERMES_FOUND", True),
+            mock.patch("api.onboarding._load_env_file", return_value={}),
+            mock.patch("api.onboarding._get_active_hermes_home", return_value=pathlib.Path("/tmp")),
+            mock.patch("api.onboarding._provider_api_key_present", return_value=api_key_present),
+            mock.patch("api.onboarding._provider_oauth_authenticated", return_value=oauth_present),
+        ):
+            return _status_from_runtime(cfg, True)
+
+    def test_minimax_cn_with_key_gives_chat_ready(self):
+        """minimax-cn + api key present → chat_ready must be True."""
+        result = self._run("minimax-cn", "MiniMax-M2.7", api_key_present=True)
+        assert result["chat_ready"] is True, f"Expected chat_ready=True, got: {result}"
+        assert result["provider_ready"] is True
+        assert result["setup_state"] == "ready"
+
+    def test_deepseek_with_key_gives_chat_ready(self):
+        """deepseek + api key → chat_ready."""
+        result = self._run("deepseek", "deepseek-chat", api_key_present=True)
+        assert result["chat_ready"] is True
+
+    def test_unsupported_provider_no_key_no_oauth_gives_not_ready(self):
+        """No key, no oauth → provider_ready=False."""
+        result = self._run("minimax-cn", "MiniMax-M2.7", api_key_present=False, oauth_present=False)
+        assert result["chat_ready"] is False
+        assert result["provider_ready"] is False
+
+    def test_oauth_provider_still_works_via_oauth_path(self):
+        """openai-codex (OAuth) with no api_key but oauth present → ready."""
+        result = self._run("openai-codex", "codex-model", api_key_present=False, oauth_present=True)
+        assert result["chat_ready"] is True
+
+
+# ---------------------------------------------------------------------------
+# 3. get_onboarding_status: minimax-cn fully configured → completed=True
+# ---------------------------------------------------------------------------
+
+class TestOnboardingStatusUnsupportedProvider:
+
+    def _make_status(self, chat_ready: bool, provider: str = "minimax-cn"):
+        import api.onboarding as mod
+        fake_config_path = pathlib.Path("/tmp/_test_572_config.yaml")
+        cfg = {"model": {"provider": provider, "default": "MiniMax-M2.7"}}
+        runtime = {
+            "chat_ready": chat_ready,
+            "provider_configured": True,
+            "provider_ready": chat_ready,
+            "setup_state": "ready" if chat_ready else "provider_incomplete",
+            "provider_note": "test",
+            "current_provider": provider,
+            "current_model": "MiniMax-M2.7",
+            "current_base_url": None,
+            "env_path": "/tmp/.env",
+        }
+        with (
+            mock.patch.object(mod, "load_settings", return_value={}),
+            mock.patch.object(mod, "get_config", return_value=cfg),
+            mock.patch.object(mod, "verify_hermes_imports", return_value=(True, [], {})),
+            mock.patch.object(mod, "_status_from_runtime", return_value=runtime),
+            mock.patch.object(mod, "load_workspaces", return_value=[]),
+            mock.patch.object(mod, "get_last_workspace", return_value=None),
+            mock.patch.object(mod, "get_available_models", return_value=[]),
+            mock.patch.object(mod, "_get_config_path", return_value=fake_config_path),
+            mock.patch.object(pathlib.Path, "exists", return_value=True),
+        ):
+            return mod.get_onboarding_status()
+
+    def test_minimax_cn_chat_ready_skips_wizard(self):
+        """minimax-cn + chat_ready=True + config.yaml exists → wizard must NOT fire."""
+        result = self._make_status(chat_ready=True)
+        assert result["completed"] is True, (
+            "Wizard fired for minimax-cn user with valid config! "
+            "config.yaml + chat_ready=True must auto-complete onboarding regardless of provider."
+        )
+
+    def test_minimax_cn_not_ready_shows_wizard(self):
+        """minimax-cn + chat_ready=False → wizard fires so user can fix it."""
+        result = self._make_status(chat_ready=False)
+        assert result["completed"] is False
+
+    def test_current_is_oauth_set_for_unsupported_provider(self):
+        """setup.current_is_oauth must be True for minimax-cn (not in quick-setup list)."""
+        result = self._make_status(chat_ready=True)
+        assert result["setup"]["current_is_oauth"] is True, (
+            "current_is_oauth should be True for providers not in _SUPPORTED_PROVIDER_SETUPS"
+        )
--- a/tests/test_issue607.py
+++ b/tests/test_issue607.py
@@ -0,0 +1,98 @@
+"""Tests for PR #648 — Gemma 4 thinking token stripping (closes #607)."""
+import re
+import pathlib
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# _strip_thinking_markup tests
+# ---------------------------------------------------------------------------
+
+from api.streaming import _strip_thinking_markup, _looks_invalid_generated_title
+
+
+class TestGemma4ThinkingTokenStrip:
+    """Verify that <|turn|>thinking\n...\n<turn|> blocks are stripped."""
+
+    def test_strip_gemma4_basic(self):
+        """Basic Gemma 4 thinking block stripped, answer kept."""
+        raw = "<|turn|>thinking\nSome internal reasoning\n<turn|>Final answer"
+        result = _strip_thinking_markup(raw)
+        assert result == "Final answer"
+
+    def test_strip_gemma4_multiline_reasoning(self):
+        """Multi-line reasoning block stripped cleanly."""
+        raw = "<|turn|>thinking\nLine 1\nLine 2\nLine 3\n<turn|>Answer here"
+        result = _strip_thinking_markup(raw)
+        assert result == "Answer here"
+
+    def test_strip_gemma4_no_thinking_passthrough(self):
+        """Normal response without thinking tokens passes through unchanged."""
+        raw = "Normal response without thinking tokens"
+        result = _strip_thinking_markup(raw)
+        assert result == raw
+
+    def test_strip_gemma4_with_leading_whitespace(self):
+        """Leading whitespace before the thinking block is handled."""
+        raw = "\n\n<|turn|>thinking\nReasoning\n<turn|>Answer"
+        result = _strip_thinking_markup(raw)
+        assert result == "Answer"
+
+    def test_strip_gemma4_empty_reasoning(self):
+        """Empty reasoning block (just delimiters) is stripped."""
+        raw = "<|turn|>thinking\n<turn|>Response"
+        result = _strip_thinking_markup(raw)
+        assert result == "Response"
+
+    def test_strip_gemma4_case_insensitive(self):
+        """Pattern is case-insensitive (though Gemma 4 uses fixed case)."""
+        raw = "<|TURN|>THINKING\nreasoning\n<TURN|>answer"
+        result = _strip_thinking_markup(raw)
+        # The regex uses re.IGNORECASE — should strip uppercase variant too
+        assert "THINKING" not in result
+        assert "reasoning" not in result
+
+    def test_existing_think_tag_still_works(self):
+        """Ensure <think>...</think> still stripped (no regression)."""
+        raw = "<think>inner reasoning</think>Final"
+        result = _strip_thinking_markup(raw)
+        assert result == "Final"
+
+    def test_existing_channel_tag_still_works(self):
+        """Ensure <|channel|>thought...</channel|> still stripped."""
+        raw = "<|channel|>thoughtSome reasoning<channel|>Answer"
+        result = _strip_thinking_markup(raw)
+        assert result == "Answer"
+
+
+class TestGemma4TitleLeakDetection:
+    """Verify _looks_invalid_generated_title catches Gemma 4 leak."""
+
+    def test_detects_gemma4_leak_in_title(self):
+        raw = "<|turn|>thinking\nUser asked about X\n<turn|>Session Title"
+        assert _looks_invalid_generated_title(raw) is True
+
+    def test_clean_title_not_flagged(self):
+        assert _looks_invalid_generated_title("Python debugging session") is False
+
+
+class TestGemma4MessagesJsThinkPairs:
+    """Verify static/messages.js contains the correct Gemma 4 pair."""
+
+    def test_messages_js_has_correct_gemma4_open(self):
+        js = pathlib.Path("static/messages.js").read_text()
+        # Must have double-pipe format: <|turn|>thinking
+        assert "<|turn|>thinking" in js, (
+            "messages.js is missing correct Gemma 4 open delimiter '<|turn|>thinking'"
+        )
+
+    def test_messages_js_no_wrong_gemma4_open(self):
+        js = pathlib.Path("static/messages.js").read_text()
+        # Must NOT have single-pipe wrong format: <|turn>thinking
+        assert "<|turn>thinking" not in js, (
+            "messages.js still contains wrong Gemma 4 delimiter '<|turn>thinking' (missing |)"
+        )
+
+    def test_messages_js_has_gemma4_close(self):
+        js = pathlib.Path("static/messages.js").read_text()
+        assert "<turn|>" in js, "messages.js missing Gemma 4 close delimiter '<turn|>'"
--- a/tests/test_issue609.py
+++ b/tests/test_issue609.py
@@ -0,0 +1,107 @@
+"""
+Tests for GitHub issue #609 — Docker workspace path trust and env-var priority.
+
+Two independent bugs were fixed:
+
+  1. HERMES_WEBUI_DEFAULT_WORKSPACE env var was silently overridden by
+     settings.json at server startup.  The env var must always win.
+
+  2. resolve_trusted_workspace() rejected paths that are children of
+     DEFAULT_WORKSPACE (e.g. /data/workspace/project) when the default is a
+     Docker volume mount outside the user's home directory.  Any path under
+     the boot-time default should be trusted automatically.
+"""
+from pathlib import Path
+
+import pytest
+
+from api.workspace import resolve_trusted_workspace
+
+
+# ── Fix 2: trust paths under DEFAULT_WORKSPACE ───────────────────────────────
+
+def test_subdir_of_boot_default_is_trusted(monkeypatch, tmp_path):
+    """A subdirectory of BOOT_DEFAULT_WORKSPACE must be trusted without being in
+    the saved workspace list and without being under the user's home directory.
+
+    This is the core Docker case: DEFAULT_WORKSPACE=/data/workspace, and the
+    user tries to open /data/workspace/myproject — should NOT raise ValueError.
+    """
+    import api.workspace as ws_mod
+
+    boot_default = tmp_path / "data" / "workspace"
+    boot_default.mkdir(parents=True)
+    sub = boot_default / "myproject"
+    sub.mkdir()
+
+    monkeypatch.setattr(ws_mod, "_BOOT_DEFAULT_WORKSPACE", str(boot_default))
+
+    # Should not raise — sub is under the boot default
+    result = resolve_trusted_workspace(str(sub))
+    assert result == sub.resolve()
+
+
+def test_boot_default_itself_is_trusted(monkeypatch, tmp_path):
+    """The DEFAULT_WORKSPACE path itself must also be trusted (not only subdirs)."""
+    import api.workspace as ws_mod
+
+    boot_default = tmp_path / "data" / "workspace"
+    boot_default.mkdir(parents=True)
+
+    monkeypatch.setattr(ws_mod, "_BOOT_DEFAULT_WORKSPACE", str(boot_default))
+
+    result = resolve_trusted_workspace(str(boot_default))
+    assert result == boot_default.resolve()
+
+
+def test_path_outside_boot_default_and_home_is_rejected(monkeypatch, tmp_path):
+    """A path that is not under home, not in the saved list, and not under
+    DEFAULT_WORKSPACE must still be rejected."""
+    import api.workspace as ws_mod
+
+    boot_default = tmp_path / "data" / "workspace"
+    boot_default.mkdir(parents=True)
+    outside = tmp_path / "other_mount" / "secret"
+    outside.mkdir(parents=True)
+
+    monkeypatch.setattr(ws_mod, "_BOOT_DEFAULT_WORKSPACE", str(boot_default))
+
+    with pytest.raises(ValueError, match="outside the user home"):
+        resolve_trusted_workspace(str(outside))
+
+
+def test_none_path_returns_boot_default(monkeypatch, tmp_path):
+    """resolve_trusted_workspace(None) always returns the boot default unchanged."""
+    import api.workspace as ws_mod
+
+    boot_default = tmp_path / "data" / "workspace"
+    boot_default.mkdir(parents=True)
+
+    monkeypatch.setattr(ws_mod, "_BOOT_DEFAULT_WORKSPACE", str(boot_default))
+
+    result = resolve_trusted_workspace(None)
+    assert result == boot_default.resolve()
+
+
+def test_path_traversal_via_dotdot_does_not_escape_boot_default(monkeypatch, tmp_path):
+    """A path that uses `..` to escape DEFAULT_WORKSPACE must not be trusted by (C).
+
+    `Path.resolve()` collapses `..` before the `relative_to(boot_default)` check
+    runs, so `/data/workspace/../etc` resolves to `/etc` and is rejected (it's
+    also caught earlier by the system-roots block, but this test pins the
+    behavior in case the order of conditions ever changes).
+    """
+    import api.workspace as ws_mod
+
+    boot_default = tmp_path / "data" / "workspace"
+    boot_default.mkdir(parents=True)
+    sibling = tmp_path / "data" / "private"
+    sibling.mkdir(parents=True)
+
+    monkeypatch.setattr(ws_mod, "_BOOT_DEFAULT_WORKSPACE", str(boot_default))
+
+    # `boot_default/../private` resolves to `tmp_path/data/private`, which is
+    # NOT a child of boot_default and not under home — must reject.
+    escape = boot_default / ".." / "private"
+    with pytest.raises(ValueError, match="outside the user home"):
+        resolve_trusted_workspace(str(escape))
--- a/tests/test_issue644.py
+++ b/tests/test_issue644.py
@@ -0,0 +1,125 @@
+"""Tests for PR #644 — load provider models from config.yaml in get_available_models()."""
+import pytest
+import api.config as _cfg
+
+
+def _available_models_with_cfg(cfg_override):
+    """Helper: temporarily patch config.cfg, call get_available_models(), restore."""
+    old_cfg = dict(_cfg.cfg)
+    _cfg.cfg.clear()
+    _cfg.cfg.update(cfg_override)
+    try:
+        return _cfg.get_available_models()
+    finally:
+        _cfg.cfg.clear()
+        _cfg.cfg.update(old_cfg)
+
+
+class TestConfigYamlModelsLoading:
+    """Verify that providers with explicit models in config.yaml use those models."""
+
+    def test_provider_in_config_but_not_provider_models_gets_cfg_models(self):
+        """A provider only in cfg.providers (not _PROVIDER_MODELS) should appear
+        with its configured model list instead of being skipped entirely."""
+        cfg = {
+            "model": {"provider": "my-custom-llm"},
+            "providers": {
+                "my-custom-llm": {
+                    "base_url": "http://custom.local/v1",
+                    "models": ["custom-model-a", "custom-model-b"],
+                }
+            },
+        }
+        result = _available_models_with_cfg(cfg)
+        groups = {g["provider"]: g["models"] for g in result["groups"]}
+        # Provider should appear (previously it was silently skipped)
+        provider_names = [g["provider"] for g in result["groups"]]
+        found = any("my-custom-llm" in n.lower() or "My-Custom-Llm" in n for n in provider_names)
+        # If it appears, its models must include our cfg models
+        for g in result["groups"]:
+            if "custom" in g["provider"].lower():
+                model_ids = [m["id"] for m in g["models"]]
+                assert any("custom-model-a" in mid for mid in model_ids), (
+                    f"custom-model-a not in group models: {model_ids}"
+                )
+
+    def test_provider_models_dict_format_expanded(self):
+        """models: {model_id: {context_length: ...}} — keys become model IDs."""
+        cfg = {
+            "model": {"provider": "anthropic"},
+            "providers": {
+                "anthropic": {
+                    "models": {
+                        "claude-custom-1": {"context_length": 200000},
+                        "claude-custom-2": {"context_length": 100000},
+                    }
+                }
+            },
+        }
+        result = _available_models_with_cfg(cfg)
+        # Find Anthropic group
+        for g in result["groups"]:
+            if g["provider"] == "Anthropic":
+                model_ids = [m["id"] for m in g["models"]]
+                assert "claude-custom-1" in model_ids, (
+                    f"claude-custom-1 not in Anthropic models: {model_ids}"
+                )
+                assert "claude-custom-2" in model_ids, (
+                    f"claude-custom-2 not in Anthropic models: {model_ids}"
+                )
+                break
+
+    def test_provider_models_list_format_expanded(self):
+        """models: [model_id, ...] — items become model IDs."""
+        cfg = {
+            "model": {"provider": "anthropic"},
+            "providers": {
+                "anthropic": {
+                    "models": ["claude-list-only-1", "claude-list-only-2"],
+                }
+            },
+        }
+        result = _available_models_with_cfg(cfg)
+        for g in result["groups"]:
+            if g["provider"] == "Anthropic":
+                model_ids = [m["id"] for m in g["models"]]
+                assert "claude-list-only-1" in model_ids, (
+                    f"claude-list-only-1 not in Anthropic models: {model_ids}"
+                )
+                break
+
+    def test_provider_in_provider_models_but_no_cfg_override_unchanged(self):
+        """When no models key in cfg.providers, hardcoded _PROVIDER_MODELS still used."""
+        cfg = {
+            "model": {"provider": "anthropic"},
+            "providers": {
+                "anthropic": {
+                    "api_key": "sk-test",
+                    # No 'models' key
+                }
+            },
+        }
+        result = _available_models_with_cfg(cfg)
+        raw_ids = {m["id"] for m in _cfg._PROVIDER_MODELS.get("anthropic", [])}
+        for g in result["groups"]:
+            if g["provider"] == "Anthropic":
+                returned_ids = {m["id"] for m in g["models"]}
+                # Should still have the hardcoded models
+                overlap = raw_ids & returned_ids
+                assert overlap, (
+                    f"No _PROVIDER_MODELS models found in Anthropic group. "
+                    f"Expected subset of {raw_ids}, got {returned_ids}"
+                )
+                break
+
+    def test_non_dict_models_value_falls_through_gracefully(self):
+        """If models value is neither dict nor list (e.g. null), no crash."""
+        cfg = {
+            "model": {"provider": "anthropic"},
+            "providers": {
+                "anthropic": {"models": None},  # invalid — should not crash
+            },
+        }
+        # Should not raise
+        result = _available_models_with_cfg(cfg)
+        assert "groups" in result
--- a/tests/test_issue646.py
+++ b/tests/test_issue646.py
@@ -0,0 +1,54 @@
+"""Tests for PR #649 — empty DEFAULT_MODEL does not inject blank model entries."""
+import pytest
+from api import config as cfg
+
+
+class TestEmptyDefaultModel:
+    """Verify that DEFAULT_MODEL='' does not produce blank model entries."""
+
+    def test_no_empty_id_when_default_model_is_empty(self, monkeypatch):
+        """With empty DEFAULT_MODEL, no model entry should have id='' or label=''."""
+        monkeypatch.setattr(cfg, "DEFAULT_MODEL", "")
+        # Simulate the 'no providers' path by calling the model-list builder
+        # We test the config module directly since it's a pure function path.
+        # The key invariant: any model dict in the output must have non-empty id.
+        # We check the branches that were patched in PR #649.
+        
+        # Path 1: "no providers detected" branch
+        # When default_model="", we should NOT append a Default group with empty model
+        groups = []
+        default_model = cfg.DEFAULT_MODEL
+        if default_model:
+            label = default_model.split("/")[-1] if "/" in default_model else default_model
+            groups.append(
+                {"provider": "Default", "models": [{"id": default_model, "label": label}]}
+            )
+        
+        # With empty default_model, groups should be empty (not appended)
+        assert len(groups) == 0, "Empty default_model should not create any group"
+
+    def test_no_empty_id_when_default_model_is_set(self, monkeypatch):
+        """With a real DEFAULT_MODEL, the Default group should be created normally."""
+        monkeypatch.setattr(cfg, "DEFAULT_MODEL", "openrouter/mistralai/mistral-7b-instruct")
+        
+        groups = []
+        default_model = cfg.DEFAULT_MODEL
+        if default_model:
+            label = default_model.split("/")[-1] if "/" in default_model else default_model
+            groups.append(
+                {"provider": "Default", "models": [{"id": default_model, "label": label}]}
+            )
+        
+        assert len(groups) == 1
+        assert groups[0]["models"][0]["id"] == "openrouter/mistralai/mistral-7b-instruct"
+        assert groups[0]["models"][0]["label"] == "mistral-7b-instruct"
+
+    def test_default_model_env_var_empty_string_accepted(self, monkeypatch):
+        """Empty string is a valid DEFAULT_MODEL value — no KeyError or crash."""
+        import os
+        monkeypatch.setenv("HERMES_WEBUI_DEFAULT_MODEL", "")
+        # Verify the env var resolution pattern handles empty string gracefully
+        val = os.getenv("HERMES_WEBUI_DEFAULT_MODEL", "")
+        assert val == ""
+        # And that the guard works
+        assert not val  # empty string is falsy — the guard `if default_model:` fires correctly
--- a/tests/test_issue677.py
+++ b/tests/test_issue677.py
@@ -0,0 +1,135 @@
+"""
+Tests for fix #677: auto-scroll override during streaming.
+
+The scroll system has a _scrollPinned flag and scrollIfPinned() to respect
+user scroll position. The bug was that scrollToBottom() was called
+unconditionally inside renderMessages() and appendThinking(), even during
+an active stream — overriding any scroll position the user had set.
+"""
+import pathlib
+import re
+
+REPO = pathlib.Path(__file__).parent.parent
+UI_JS = (REPO / "static" / "ui.js").read_text(encoding="utf-8")
+INDEX_HTML = (REPO / "static" / "index.html").read_text(encoding="utf-8")
+STYLE_CSS = (REPO / "static" / "style.css").read_text(encoding="utf-8")
+
+
+class TestScrollPinningFix:
+
+    def test_render_messages_respects_active_stream(self):
+        """renderMessages() must not call scrollToBottom() while streaming (#677).
+
+        During an active stream, scrollToBottom() unconditionally re-pins scroll
+        and overrides the user's position. renderMessages() must use scrollIfPinned()
+        instead when S.activeStreamId is set.
+        """
+        # Find renderMessages function
+        rm_start = UI_JS.find("function renderMessages()")
+        assert rm_start != -1, "renderMessages() not found in ui.js"
+        rm_end = UI_JS.find("\nfunction ", rm_start + 1)
+        rm_body = UI_JS[rm_start:rm_end]
+
+        # Must check activeStreamId before deciding which scroll fn to call
+        assert "activeStreamId" in rm_body, (
+            "renderMessages() must check S.activeStreamId before scrolling — "
+            "unconditional scrollToBottom() overrides user scroll position (#677)"
+        )
+        # scrollIfPinned must be called inside renderMessages (stream path)
+        assert "scrollIfPinned()" in rm_body, (
+            "renderMessages() must call scrollIfPinned() during streaming (#677)"
+        )
+
+    def test_append_thinking_uses_scroll_if_pinned(self):
+        """appendThinking() must use scrollIfPinned() not scrollToBottom() (#677).
+
+        appendThinking() fires continuously during streaming — calling scrollToBottom()
+        inside it re-pins on every token, preventing the user from scrolling up.
+        """
+        at_start = UI_JS.find("function appendThinking(")
+        assert at_start != -1, "appendThinking() not found in ui.js"
+        at_end = UI_JS.find("\nfunction ", at_start + 1)
+        at_body = UI_JS[at_start:at_end]
+
+        assert "scrollIfPinned()" in at_body, (
+            "appendThinking() must call scrollIfPinned() not scrollToBottom() (#677)"
+        )
+        assert "scrollToBottom()" not in at_body, (
+            "appendThinking() must not call scrollToBottom() — it fires mid-stream (#677)"
+        )
+
+    def test_scroll_threshold_increased(self):
+        """Scroll re-pin threshold must be at least 150px (#677).
+
+        80px was too small — a fast mouse scroll wheel can jump 100–120px in one
+        tick, causing unintended re-pin. 150px gives a proper dead zone.
+        """
+        # Find the nearBottom assignment in the scroll listener
+        near_bottom_pos = UI_JS.find("nearBottom=")
+        if near_bottom_pos == -1:
+            near_bottom_pos = UI_JS.find("nearBottom =")
+        assert near_bottom_pos != -1, "nearBottom scroll threshold assignment not found"
+        threshold_line = UI_JS[near_bottom_pos:near_bottom_pos + 120]
+        # Extract the numeric threshold
+        match = re.search(r"<\s*(\d+)", threshold_line)
+        assert match, f"Numeric threshold not found near nearBottom assignment: {threshold_line!r}"
+        threshold = int(match.group(1))
+        assert threshold >= 150, (
+            f"Scroll re-pin threshold is {threshold}px — must be >= 150px to avoid "
+            f"hair-trigger re-pinning on fast scroll wheels (#677)"
+        )
+
+    def test_scroll_to_bottom_button_exists_in_html(self):
+        """index.html must contain a scroll-to-bottom button (#677).
+
+        All major streaming chat UIs (Claude, ChatGPT) show a floating ↓ button
+        when the user has scrolled up, giving a clear escape hatch to return to live output.
+        """
+        assert "scrollToBottomBtn" in INDEX_HTML, (
+            "index.html must contain a #scrollToBottomBtn element (#677)"
+        )
+        assert "scroll-to-bottom-btn" in INDEX_HTML, (
+            "index.html must use class scroll-to-bottom-btn for the scroll button (#677)"
+        )
+
+    def test_scroll_to_bottom_button_hidden_by_default(self):
+        """Scroll-to-bottom button must be hidden by default (display:none) (#677)."""
+        btn_pos = INDEX_HTML.find("scrollToBottomBtn")
+        assert btn_pos != -1
+        btn_context = INDEX_HTML[btn_pos:btn_pos + 200]
+        assert "display:none" in btn_context or 'display="none"' in btn_context, (
+            "scrollToBottomBtn must be hidden by default — only shown when user scrolls up (#677)"
+        )
+
+    def test_scroll_to_bottom_button_css_exists(self):
+        """style.css must have styling for .scroll-to-bottom-btn (#677)."""
+        assert ".scroll-to-bottom-btn" in STYLE_CSS, (
+            "style.css must define .scroll-to-bottom-btn styles (#677)"
+        )
+
+    def test_scroll_to_bottom_button_is_sticky(self):
+        """Scroll-to-bottom button must use position:sticky so it stays visible (#677)."""
+        btn_css_pos = STYLE_CSS.find(".scroll-to-bottom-btn")
+        assert btn_css_pos != -1
+        btn_css = STYLE_CSS[btn_css_pos:btn_css_pos + 300]
+        assert "sticky" in btn_css, (
+            ".scroll-to-bottom-btn must use position:sticky to stay at bottom of viewport (#677)"
+        )
+
+    def test_scroll_listener_hides_button_when_pinned(self):
+        """Scroll listener must hide the button when user is near the bottom (#677)."""
+        scroll_listener_start = UI_JS.find("el.addEventListener('scroll'")
+        assert scroll_listener_start != -1, "scroll event listener not found"
+        listener_block = UI_JS[scroll_listener_start:scroll_listener_start + 300]
+        assert "scrollToBottomBtn" in listener_block, (
+            "Scroll listener must show/hide scrollToBottomBtn based on _scrollPinned (#677)"
+        )
+
+    def test_scroll_to_bottom_button_calls_scroll_to_bottom(self):
+        """scrollToBottomBtn onclick must call scrollToBottom() (#677)."""
+        btn_pos = INDEX_HTML.find("scrollToBottomBtn")
+        assert btn_pos != -1
+        btn_context = INDEX_HTML[btn_pos:btn_pos + 200]
+        assert "scrollToBottom()" in btn_context, (
+            "scrollToBottomBtn onclick must call scrollToBottom() (#677)"
+        )
--- a/tests/test_issue_code_syntax_highlight.py
+++ b/tests/test_issue_code_syntax_highlight.py
@@ -0,0 +1,25 @@
+"""Regression tests for fenced code block syntax highlighting."""
+from pathlib import Path
+
+UI_JS = Path(__file__).resolve().parent.parent / "static" / "ui.js"
+
+
+def _read_ui_js() -> str:
+    return UI_JS.read_text()
+
+
+def test_fenced_code_blocks_add_prism_language_class():
+    js = _read_ui_js()
+    assert 'class="language-${esc(normalizedLang)}"' in js, (
+        "Fenced code blocks should add Prism language-* classes so syntax highlighting works"
+    )
+
+
+def test_fenced_code_blocks_keep_existing_pre_header_layout():
+    js = _read_ui_js()
+    assert 'return `${h}<pre><code${langAttr}>${esc(code.replace(/\\n$/,' in js, (
+        "The syntax-highlight fix should preserve the existing fenced code block layout"
+    )
+    assert '<div class="code-block">' not in js, (
+        "This fix should not introduce a new wrapper around fenced code blocks"
+    )
--- a/tests/test_issues_373_374_375.py
+++ b/tests/test_issues_373_374_375.py
@@ -0,0 +1,317 @@
+"""
+Tests for issues #373, #374, and #375.
+
+#373: Chat silently swallows errors — no feedback when agent fails to respond
+#374: Remove stale OpenAI models from default list (gpt-4o, o3)
+#375: Model dropdown should fetch live models from provider
+"""
+import pathlib
+import re
+
+REPO = pathlib.Path(__file__).parent.parent
+STREAMING_PY = (REPO / "api" / "streaming.py").read_text(encoding="utf-8")
+CONFIG_PY    = (REPO / "api" / "config.py").read_text(encoding="utf-8")
+ROUTES_PY    = (REPO / "api" / "routes.py").read_text(encoding="utf-8")
+MESSAGES_JS  = (REPO / "static" / "messages.js").read_text(encoding="utf-8")
+UI_JS        = (REPO / "static" / "ui.js").read_text(encoding="utf-8")
+
+
+# ── Issue #373: Silent error detection ──────────────────────────────────────
+
+class TestSilentErrorDetection:
+    """streaming.py must emit apperror when agent returns no assistant reply."""
+
+    def test_streaming_detects_no_assistant_reply(self):
+        """streaming.py must check if any assistant message was produced."""
+        assert "_assistant_added" in STREAMING_PY, (
+            "streaming.py must check whether an assistant message was produced (#373)"
+        )
+
+    def test_streaming_emits_apperror_on_no_response(self):
+        """streaming.py must emit apperror event when agent produced no reply."""
+        assert "no_response" in STREAMING_PY, (
+            "streaming.py must emit apperror with type='no_response' for silent failures (#373)"
+        )
+
+    def test_streaming_returns_early_after_apperror(self):
+        """streaming.py must return after emitting apperror (not also emit done)."""
+        # The return statement must come after the put('apperror') for no_response
+        no_resp_pos = STREAMING_PY.find("'no_response'")
+        return_pos = STREAMING_PY.find("return  # Don't emit done", no_resp_pos)
+        assert no_resp_pos != -1, "no_response type not found in streaming.py"
+        assert return_pos != -1, (
+            "streaming.py must return after emitting apperror to prevent also emitting done (#373)"
+        )
+        assert return_pos > no_resp_pos
+
+    def test_streaming_detects_auth_error_in_result(self):
+        """streaming.py must detect auth errors from the result object."""
+        assert "_is_auth" in STREAMING_PY, (
+            "streaming.py must detect auth errors in silent failures (#373)"
+        )
+        assert "auth_mismatch" in STREAMING_PY, (
+            "streaming.py must emit auth_mismatch type for auth failures (#373)"
+        )
+
+    def test_messages_js_done_handler_detects_no_reply(self):
+        """messages.js done handler must show an error if no assistant reply arrived."""
+        # Check for either the variable name or the inlined check pattern
+        has_no_reply_guard = (
+            "hasAssistantReply" in MESSAGES_JS
+            or ("role==='assistant'" in MESSAGES_JS and "No response received" in MESSAGES_JS)
+        )
+        assert has_no_reply_guard, (
+            "messages.js done handler must detect zero assistant replies (#373)"
+        )
+        assert "No response received" in MESSAGES_JS, (
+            "messages.js must show 'No response received' inline message (#373)"
+        )
+
+    def test_messages_js_handles_no_response_apperror_type(self):
+        """messages.js apperror handler must recognise the no_response type."""
+        assert "isNoResponse" in MESSAGES_JS or "no_response" in MESSAGES_JS, (
+            "messages.js apperror handler must handle type='no_response' (#373)"
+        )
+
+    def test_messages_js_no_response_label(self):
+        """messages.js must show a distinct label for no_response errors."""
+        assert "No response received" in MESSAGES_JS, (
+            "messages.js must display 'No response received' label for no_response errors (#373)"
+        )
+
+
+# ── Issue #374: Stale model list cleanup ─────────────────────────────────────
+
+class TestStaleModelListCleanup:
+    """gpt-4o and o3 must be removed from the primary OpenAI model lists."""
+
+    def test_gpt4o_removed_from_fallback_models(self):
+        """_FALLBACK_MODELS must not contain gpt-4o (issue #374)."""
+        fallback_block_start = CONFIG_PY.find("_FALLBACK_MODELS = [")
+        fallback_block_end = CONFIG_PY.find("]", fallback_block_start)
+        fallback_block = CONFIG_PY[fallback_block_start:fallback_block_end]
+        assert "gpt-4o" not in fallback_block, (
+            "_FALLBACK_MODELS still contains gpt-4o — remove it per issue #374"
+        )
+
+    def test_o3_removed_from_fallback_models(self):
+        """_FALLBACK_MODELS must not contain o3 (issue #374)."""
+        fallback_block_start = CONFIG_PY.find("_FALLBACK_MODELS = [")
+        fallback_block_end = CONFIG_PY.find("]", fallback_block_start)
+        fallback_block = CONFIG_PY[fallback_block_start:fallback_block_end]
+        assert '"o3"' not in fallback_block and "'o3'" not in fallback_block, (
+            "_FALLBACK_MODELS still contains o3 — remove it per issue #374"
+        )
+
+    def test_gpt4o_removed_from_provider_models_openai(self):
+        """_PROVIDER_MODELS['openai'] must not contain gpt-4o (issue #374)."""
+        openai_start = CONFIG_PY.find('"openai": [')
+        openai_end = CONFIG_PY.find("],", openai_start)
+        openai_block = CONFIG_PY[openai_start:openai_end]
+        assert "gpt-4o" not in openai_block, (
+            "_PROVIDER_MODELS['openai'] still contains gpt-4o — remove per issue #374"
+        )
+
+    def test_o3_removed_from_provider_models_openai(self):
+        """_PROVIDER_MODELS['openai'] must not contain o3 (issue #374)."""
+        openai_start = CONFIG_PY.find('"openai": [')
+        openai_end = CONFIG_PY.find("],", openai_start)
+        openai_block = CONFIG_PY[openai_start:openai_end]
+        assert '"o3"' not in openai_block and "'o3'" not in openai_block, (
+            "_PROVIDER_MODELS['openai'] still contains o3 — remove per issue #374"
+        )
+
+    def test_fallback_still_has_gpt54_mini(self):
+        """_FALLBACK_MODELS must still contain gpt-5.4-mini (not over-trimmed)."""
+        assert "gpt-5.4-mini" in CONFIG_PY, (
+            "_FALLBACK_MODELS must keep gpt-5.4-mini as primary OpenAI model (#374)"
+        )
+
+    def test_fallback_has_gpt54(self):
+        """_FALLBACK_MODELS must contain gpt-5.4-mini as the primary OpenAI option."""
+        from api.config import _FALLBACK_MODELS
+        ids = [m["id"] for m in _FALLBACK_MODELS]
+        assert any("gpt-5.4-mini" in mid for mid in ids), (
+            "_FALLBACK_MODELS must include gpt-5.4-mini as the primary OpenAI option"
+        )
+
+    def test_copilot_list_unchanged(self):
+        """Copilot provider model list should still include gpt-4o (it's a valid Copilot model)."""
+        copilot_start = CONFIG_PY.find('"copilot": [')
+        copilot_end = CONFIG_PY.find("],", copilot_start)
+        if copilot_start == -1:
+            return  # No copilot list — that's fine
+        copilot_block = CONFIG_PY[copilot_start:copilot_end]
+        assert "gpt-4o" in copilot_block, (
+            "Copilot provider model list should keep gpt-4o (it's available via Copilot) (#374)"
+        )
+
+
+# ── Issue #375: Live model fetching ─────────────────────────────────────────
+
+class TestLiveModelFetching:
+    """Backend and frontend must support live model fetching from provider APIs."""
+
+    def test_live_models_endpoint_exists_in_routes(self):
+        """routes.py must have a /api/models/live endpoint (#375)."""
+        assert "/api/models/live" in ROUTES_PY, (
+            "routes.py must define /api/models/live endpoint (#375)"
+        )
+
+    def test_live_models_handler_function_exists(self):
+        """routes.py must define _handle_live_models() function (#375)."""
+        assert "def _handle_live_models(" in ROUTES_PY, (
+            "routes.py must define _handle_live_models() for live model fetching (#375)"
+        )
+
+    def test_live_models_handler_validates_scheme(self):
+        """_handle_live_models must validate URL scheme to prevent file:// injection (B310)."""
+        assert "nosec B310" in ROUTES_PY or ("scheme" in ROUTES_PY and "http" in ROUTES_PY), (
+            "_handle_live_models must validate URL scheme before urlopen (#375)"
+        )
+
+    def test_live_models_handler_has_ssrf_guard(self):
+        """_handle_live_models must guard against SSRF (private IP access)."""
+        assert "ssrf_blocked" in ROUTES_PY or ("is_private" in ROUTES_PY and "live" in ROUTES_PY), (
+            "_handle_live_models must have SSRF protection for private IP ranges (#375)"
+        )
+
+    def test_live_models_all_providers_handled_via_agent(self):
+        """_handle_live_models must delegate to provider_model_ids() which handles all
+        providers gracefully — live fetch where possible, static fallback otherwise.
+        The old 'not_supported' return for Anthropic/Google is superseded: those
+        providers now return live or static model lists via the agent delegate."""
+        assert "provider_model_ids" in ROUTES_PY, (
+            "_handle_live_models must delegate to hermes_cli.models.provider_model_ids() "
+            "so all providers are handled uniformly (#375 upgrade)"
+        )
+
+    def test_frontend_has_fetch_live_models_function(self):
+        """ui.js must define _fetchLiveModels() for background live model loading (#375)."""
+        assert "function _fetchLiveModels(" in UI_JS or "async function _fetchLiveModels(" in UI_JS, (
+            "ui.js must define _fetchLiveModels() function (#375)"
+        )
+
+    def test_frontend_live_models_cache_exists(self):
+        """ui.js must cache live model responses to avoid redundant API calls (#375)."""
+        assert "_liveModelCache" in UI_JS, (
+            "ui.js must use _liveModelCache to avoid re-fetching on every dropdown open (#375)"
+        )
+
+    def test_frontend_calls_live_models_after_static_load(self):
+        """populateModelDropdown must call _fetchLiveModels after rendering the static list (#375)."""
+        assert "_fetchLiveModels" in UI_JS, (
+            "populateModelDropdown must call _fetchLiveModels for background update (#375)"
+        )
+
+    def test_frontend_live_fetch_only_adds_new_models(self):
+        """_fetchLiveModels must not duplicate models already in the static list (#375)."""
+        assert "existingIds" in UI_JS, (
+            "_fetchLiveModels must track existing model IDs to avoid duplicates (#375)"
+        )
+
+    def test_frontend_live_fetch_covers_all_providers(self):
+        """_fetchLiveModels no longer skips any provider — all providers return
+        live or fallback models via provider_model_ids() on the backend (#375 upgrade)."""
+        # The old skip list (anthropic, google, gemini) must be gone from the guard
+        skip_guard_pos = UI_JS.find("includes(provider)")
+        if skip_guard_pos != -1:
+            guard_line = UI_JS[max(0,skip_guard_pos-100):skip_guard_pos+50]
+            assert "anthropic" not in guard_line, (
+                "_fetchLiveModels must not skip anthropic — backend now handles it (#375 upgrade)"
+            )
+
+    def test_live_models_endpoint_wired_in_routes(self):
+        """The /api/models/live path must be handled in handle_get()."""
+        # Find handle_get and check our route appears inside it
+        handle_get_pos = ROUTES_PY.find("def handle_get(")
+        live_route_pos = ROUTES_PY.find('"/api/models/live"')
+        assert handle_get_pos != -1 and live_route_pos != -1
+        assert live_route_pos > handle_get_pos, (
+            "/api/models/live must be inside handle_get() (#375)"
+        )
+
+
+# ── #669: Gemini model IDs must be valid for Google AI Studio endpoint ────────
+
+class TestGeminiModelIds:
+    """Gemini 3.x model IDs must be valid for the native Google AI Studio provider.
+
+    The original code had gemini-3.1-flash-lite-preview missing from the
+    dropdown. The fallback list also erroneously used gemini-3.1-pro-preview
+    in some provider sections while omitting gemini-3.1-flash-lite-preview.
+    All provider sections must now include the full current Gemini 3.x lineup.
+    """
+
+    VALID_GEMINI_3 = [
+        "gemini-3.1-pro-preview",
+        "gemini-3-flash-preview",
+        "gemini-3.1-flash-lite-preview",
+    ]
+
+    def test_gemini_provider_models_has_3x(self):
+        """_PROVIDER_MODELS['gemini'] must contain valid Gemini 3.x model IDs (#669)."""
+        gemini_block_start = CONFIG_PY.find('"gemini": [')
+        assert gemini_block_start != -1, "_PROVIDER_MODELS['gemini'] block not found"
+        gemini_block = CONFIG_PY[gemini_block_start:gemini_block_start + 600]
+        for mid in self.VALID_GEMINI_3:
+            assert mid in gemini_block, (
+                f"_PROVIDER_MODELS['gemini'] must contain {mid!r} — "
+                f"this is a valid Google AI Studio model ID (#669)"
+            )
+
+    def test_gemini_provider_models_has_flash_lite(self):
+        """_PROVIDER_MODELS['gemini'] must contain gemini-3.1-flash-lite-preview (#669).
+
+        This was the model the reporter selected from the wizard — it must appear
+        in the native gemini provider model list so users can select it.
+        """
+        gemini_block_start = CONFIG_PY.find('"gemini": [')
+        assert gemini_block_start != -1
+        gemini_block = CONFIG_PY[gemini_block_start:gemini_block_start + 600]
+        assert "gemini-3.1-flash-lite-preview" in gemini_block, (
+            "_PROVIDER_MODELS['gemini'] missing gemini-3.1-flash-lite-preview — "
+            "this was the exact model the #669 reporter tried and got API_KEY_INVALID"
+        )
+
+    def test_fallback_models_has_gemini_3x(self):
+        """_FALLBACK_MODELS must contain valid Gemini 3.x OpenRouter model IDs (#669)."""
+        fallback_start = CONFIG_PY.find("_FALLBACK_MODELS = [")
+        fallback_end = CONFIG_PY.find("]", fallback_start + len("_FALLBACK_MODELS = ["))
+        # Find the closing bracket for the list (multi-line)
+        depth = 0
+        pos = fallback_start + len("_FALLBACK_MODELS = [")
+        for i, ch in enumerate(CONFIG_PY[pos:], start=pos):
+            if ch == '[':
+                depth += 1
+            elif ch == ']':
+                if depth == 0:
+                    fallback_end = i
+                    break
+                depth -= 1
+        fallback_block = CONFIG_PY[fallback_start:fallback_end]
+        for mid in ("google/gemini-3.1-pro-preview", "google/gemini-3-flash-preview"):
+            assert mid in fallback_block, (
+                f"_FALLBACK_MODELS must contain {mid!r} for OpenRouter Google models (#669)"
+            )
+
+    def test_gemini_provider_also_has_stable_25(self):
+        """_PROVIDER_MODELS['gemini'] must retain stable Gemini 2.5 models (#669)."""
+        gemini_block_start = CONFIG_PY.find('"gemini": [')
+        assert gemini_block_start != -1
+        gemini_block = CONFIG_PY[gemini_block_start:gemini_block_start + 600]
+        assert "gemini-2.5-pro" in gemini_block, (
+            "_PROVIDER_MODELS['gemini'] must keep gemini-2.5-pro as a stable fallback"
+        )
+
+    def test_no_invalid_gemini_3_pro_model(self):
+        """gemini-3-pro-preview must not appear — it was shut down March 9 2026 (#669)."""
+        assert "gemini-3-pro-preview" not in CONFIG_PY or "gemini-3.1-pro-preview" in CONFIG_PY, (
+            "gemini-3-pro-preview was shut down — use gemini-3.1-pro-preview instead (#669)"
+        )
+        # More precise: ensure the bare (non-.1) version isn't the only one present
+        count_bare = CONFIG_PY.count('"gemini-3-pro-preview"')
+        assert count_bare == 0, (
+            f"gemini-3-pro-preview appears {count_bare} time(s) in config.py — "
+            "it was shut down March 9 2026, use gemini-3.1-pro-preview (#669)"
+        )
--- a/tests/test_language_precedence.py
+++ b/tests/test_language_precedence.py
@@ -0,0 +1,262 @@
+import json
+import pathlib
+import re
+import subprocess
+import textwrap
+
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
+I18N_JS = (REPO_ROOT / "static" / "i18n.js").read_text(encoding="utf-8")
+BOOT_JS = (REPO_ROOT / "static" / "boot.js").read_text(encoding="utf-8")
+PANELS_JS = (REPO_ROOT / "static" / "panels.js").read_text(encoding="utf-8")
+
+
+def _run_i18n_case(script_expr: str) -> dict:
+    wrapped_expr = f"(() => ({script_expr}))()"
+    script = textwrap.dedent(
+        f"""
+        const fs = require('fs');
+        const vm = require('vm');
+        const src = fs.readFileSync({json.dumps(str(REPO_ROOT / "static" / "i18n.js"))}, 'utf8');
+        const storage = {{}};
+        const ctx = {{
+          localStorage: {{
+            getItem: (k) => Object.prototype.hasOwnProperty.call(storage, k) ? storage[k] : null,
+            setItem: (k, v) => {{ storage[k] = String(v); }},
+          }},
+          document: {{
+            documentElement: {{ lang: '' }},
+            querySelectorAll: () => [],
+          }},
+        }};
+        vm.createContext(ctx);
+        vm.runInContext(src, ctx);
+        const out = vm.runInContext({json.dumps(wrapped_expr)}, ctx);
+        process.stdout.write(JSON.stringify(out));
+        """
+    )
+    proc = subprocess.run(["node", "-e", script], check=True, capture_output=True, text=True)
+    return json.loads(proc.stdout)
+
+
+def _extract_call_arglists(src: str, fn_name: str) -> list[str]:
+    token = f"{fn_name}("
+    out = []
+    search_from = 0
+
+    while True:
+        start = src.find(token, search_from)
+        if start < 0:
+            return out
+
+        i = start + len(token)
+        depth = 1
+        in_single = False
+        in_double = False
+        in_backtick = False
+        escape = False
+
+        while i < len(src):
+            ch = src[i]
+
+            if escape:
+                escape = False
+                i += 1
+                continue
+
+            if in_single:
+                if ch == "\\":
+                    escape = True
+                elif ch == "'":
+                    in_single = False
+                i += 1
+                continue
+
+            if in_double:
+                if ch == "\\":
+                    escape = True
+                elif ch == '"':
+                    in_double = False
+                i += 1
+                continue
+
+            if in_backtick:
+                if ch == "\\":
+                    escape = True
+                elif ch == "`":
+                    in_backtick = False
+                i += 1
+                continue
+
+            if ch == "'":
+                in_single = True
+            elif ch == '"':
+                in_double = True
+            elif ch == "`":
+                in_backtick = True
+            elif ch == "(":
+                depth += 1
+            elif ch == ")":
+                depth -= 1
+                if depth == 0:
+                    out.append(src[start + len(token) : i])
+                    break
+            i += 1
+
+        search_from = start + len(token)
+
+
+def _split_top_level_args(arg_src: str) -> list[str]:
+    args = []
+    cur = []
+    paren = 0
+    brace = 0
+    bracket = 0
+    in_single = False
+    in_double = False
+    in_backtick = False
+    escape = False
+
+    for ch in arg_src:
+        if escape:
+            cur.append(ch)
+            escape = False
+            continue
+
+        if in_single:
+            cur.append(ch)
+            if ch == "\\":
+                escape = True
+            elif ch == "'":
+                in_single = False
+            continue
+
+        if in_double:
+            cur.append(ch)
+            if ch == "\\":
+                escape = True
+            elif ch == '"':
+                in_double = False
+            continue
+
+        if in_backtick:
+            cur.append(ch)
+            if ch == "\\":
+                escape = True
+            elif ch == "`":
+                in_backtick = False
+            continue
+
+        if ch == "'":
+            in_single = True
+            cur.append(ch)
+            continue
+        if ch == '"':
+            in_double = True
+            cur.append(ch)
+            continue
+        if ch == "`":
+            in_backtick = True
+            cur.append(ch)
+            continue
+
+        if ch == "(":
+            paren += 1
+            cur.append(ch)
+            continue
+        if ch == ")":
+            paren -= 1
+            cur.append(ch)
+            continue
+        if ch == "{":
+            brace += 1
+            cur.append(ch)
+            continue
+        if ch == "}":
+            brace -= 1
+            cur.append(ch)
+            continue
+        if ch == "[":
+            bracket += 1
+            cur.append(ch)
+            continue
+        if ch == "]":
+            bracket -= 1
+            cur.append(ch)
+            continue
+
+        if ch == "," and paren == 0 and brace == 0 and bracket == 0:
+            args.append("".join(cur).strip())
+            cur = []
+            continue
+
+        cur.append(ch)
+
+    if cur:
+        args.append("".join(cur).strip())
+    return args
+
+
+def _has_precedence_call(src: str, first_arg: str) -> bool:
+    expected_second = {
+        "localStorage.getItem('hermes-lang')",
+        'localStorage.getItem("hermes-lang")',
+    }
+    for arg_src in _extract_call_arglists(src, "resolvePreferredLocale"):
+        args = _split_top_level_args(arg_src)
+        if len(args) < 2:
+            continue
+        first = re.sub(r"\s+", "", args[0])
+        second = re.sub(r"\s+", "", args[1])
+        if first == first_arg and second in expected_second:
+            return True
+    return False
+
+
+def test_i18n_exposes_locale_resolvers():
+    assert "function resolveLocale(" in I18N_JS
+    assert "function resolvePreferredLocale(" in I18N_JS
+
+
+def test_locale_alias_resolution_and_precedence_logic():
+    result = _run_i18n_case(
+        """
+{
+  zhCn: resolveLocale('zh-CN'),
+  zhTw: resolveLocale('zh_TW'),
+  enUs: resolveLocale('EN-us'),
+  esMx: resolveLocale('es-MX'),
+  bad: resolveLocale('xx-YY'),
+  preferred1: resolvePreferredLocale('zh-CN', 'en'),
+  preferred2: resolvePreferredLocale('xx-YY', 'zh-Hant'),
+  preferred3: resolvePreferredLocale('', 'xx-YY'),
+}
+        """
+    )
+    assert result["zhCn"] == "zh"
+    assert result["zhTw"] == "zh-Hant"
+    assert result["enUs"] == "en"
+    assert result["esMx"] == "es"
+    assert result["bad"] is None
+    assert result["preferred1"] == "zh"
+    assert result["preferred2"] == "zh-Hant"
+    assert result["preferred3"] == "en"
+
+
+def test_set_locale_normalizes_alias_and_persists_canonical_key():
+    result = _run_i18n_case(
+        """
+{
+  ...(setLocale('zh-CN'), {}),
+  saved: localStorage.getItem('hermes-lang'),
+  htmlLang: document.documentElement.lang,
+}
+        """
+    )
+    assert result["saved"] == "zh"
+    assert result["htmlLang"] == "zh-CN"
+
+
+def test_boot_and_settings_panel_use_shared_locale_precedence():
+    assert _has_precedence_call(BOOT_JS, "s.language")
+    assert _has_precedence_call(PANELS_JS, "settings.language")
--- a/tests/test_login_locale.py
+++ b/tests/test_login_locale.py
@@ -0,0 +1,86 @@
+import json
+import urllib.error
+import urllib.request
+
+
+from tests._pytest_port import BASE
+
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+
+def get_raw(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return r.read().decode(), r.status
+
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(
+        BASE + path, data=data, headers={"Content-Type": "application/json"}
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+def _current_language():
+    settings, status = get("/api/settings")
+    assert status == 200
+    return settings.get("language") or "en"
+
+
+def test_login_page_uses_simplified_chinese_for_zh_cn_alias():
+    prev_lang = _current_language()
+    try:
+        saved, status = post("/api/settings", {"language": "zh-CN"})
+        assert status == 200
+        assert saved.get("language") == "zh-CN"
+        html, status2 = get_raw("/login")
+        assert status2 == 200
+        assert 'lang="zh-CN"' in html
+        assert "\u767b\u5f55" in html
+        assert "\u8f93\u5165\u5bc6\u7801\u7ee7\u7eed\u4f7f\u7528" in html
+    finally:
+        restored, restore_status = post("/api/settings", {"language": prev_lang})
+        assert restore_status == 200
+        assert restored.get("language") == prev_lang
+
+
+def test_login_page_uses_traditional_chinese_for_zh_hant():
+    prev_lang = _current_language()
+    try:
+        saved, status = post("/api/settings", {"language": "zh-Hant"})
+        assert status == 200
+        assert saved.get("language") == "zh-Hant"
+        html, status2 = get_raw("/login")
+        assert status2 == 200
+        assert 'lang="zh-TW"' in html
+        assert "\u8f38\u5165\u5bc6\u78bc\u7e7c\u7e8c\u4f7f\u7528" in html
+        assert "\u5bc6\u78bc\u932f\u8aa4" in html
+    finally:
+        restored, restore_status = post("/api/settings", {"language": prev_lang})
+        assert restore_status == 200
+        assert restored.get("language") == prev_lang
+
+
+def test_login_page_uses_russian_for_ru():
+    prev_lang = _current_language()
+    try:
+        saved, status = post("/api/settings", {"language": "ru"})
+        assert status == 200
+        assert saved.get("language") == "ru"
+        html, status2 = get_raw("/login")
+        assert status2 == 200
+        assert 'lang="ru-RU"' in html
+        assert "\u0412\u043e\u0439\u0442\u0438" in html
+        assert "\u0412\u0432\u0435\u0434\u0438\u0442\u0435 \u043f\u0430\u0440\u043e\u043b\u044c, \u0447\u0442\u043e\u0431\u044b \u043f\u0440\u043e\u0434\u043e\u043b\u0436\u0438\u0442\u044c" in html
+        assert "\u041d\u0435\u0432\u0435\u0440\u043d\u044b\u0439 \u043f\u0430\u0440\u043e\u043b\u044c" in html
+    finally:
+        restored, restore_status = post("/api/settings", {"language": prev_lang})
+        assert restore_status == 200
+        assert restored.get("language") == prev_lang
--- a/tests/test_media_inline.py
+++ b/tests/test_media_inline.py
@@ -0,0 +1,216 @@
+"""
+Tests for feat #450: MEDIA: token inline rendering in web UI chat.
+
+Covers:
+1. /api/media endpoint: serves local image files by absolute path
+2. /api/media endpoint: rejects paths outside allowed roots (path traversal)
+3. /api/media endpoint: 404 for non-existent files
+4. /api/media endpoint: auth gate when auth is enabled
+5. renderMd() MEDIA: stash/restore logic (static JS analysis)
+6. /api/media endpoint: integration test via live server (requires 8788)
+"""
+from __future__ import annotations
+
+import json
+import os
+import pathlib
+import tempfile
+import unittest
+import urllib.error
+import urllib.request
+
+from tests._pytest_port import BASE, TEST_STATE_DIR
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent
+UI_JS = (REPO_ROOT / "static" / "ui.js").read_text(encoding="utf-8")
+
+
+# ── Static analysis: renderMd MEDIA stash ────────────────────────────────────
+
+class TestMediaRenderMdStash(unittest.TestCase):
+    """Verify the MEDIA: stash/restore logic exists in ui.js."""
+
+    def test_media_stash_defined(self):
+        self.assertIn("media_stash", UI_JS,
+                      "media_stash array must be defined in renderMd()")
+
+    def test_media_token_regex(self):
+        self.assertIn("MEDIA:", UI_JS,
+                      "MEDIA: token regex must be present in renderMd()")
+
+    def test_media_restore_produces_img_tag(self):
+        self.assertIn("msg-media-img", UI_JS,
+                      "restore pass must produce <img class='msg-media-img'>")
+
+    def test_media_restore_produces_download_link(self):
+        self.assertIn("msg-media-link", UI_JS,
+                      "restore pass must produce download link for non-image files")
+
+    def test_media_api_url_pattern(self):
+        self.assertIn("api/media?path=", UI_JS,
+                      "renderMd must build api/media?path=... URL for local files")
+
+    def test_media_stash_uses_null_byte_token(self):
+        self.assertIn("\\x00D", UI_JS,
+                      "MEDIA stash must use null-byte token (\\x00D) to avoid conflicts")
+
+    def test_media_stash_runs_before_fence_stash(self):
+        media_pos = UI_JS.find("media_stash")
+        fence_pos = UI_JS.find("fence_stash")
+        self.assertGreater(fence_pos, media_pos,
+                           "media_stash must be defined before fence_stash in renderMd()")
+
+    def test_image_extension_regex_covers_common_types(self):
+        # The JS source has these extensions in a regex like /\.png|jpg|.../i
+        # Check for the extension strings (without the dot, which may be escaped as \.)
+        for ext in ["png", "jpg", "jpeg", "gif", "webp"]:
+            self.assertIn(ext, UI_JS,
+                          f"Image extension {ext} must be in the MEDIA img-check regex")
+
+    def test_http_url_media_rendered_as_img(self):
+        # renderMd should treat MEDIA:https://... as an <img>
+        # In the JS source, the regex is /^https?:\/\//i (escaped)
+        self.assertTrue(
+            "https?:" in UI_JS or "http" in UI_JS,
+            "MEDIA: restore must handle HTTPS URLs",
+        )
+
+    def test_zoom_toggle_on_click(self):
+        self.assertIn("msg-media-img--full", UI_JS,
+                      "Clicking the image must toggle msg-media-img--full class for zoom")
+
+
+# ── Static analysis: CSS ──────────────────────────────────────────────────────
+
+class TestMediaCSS(unittest.TestCase):
+
+    CSS = (REPO_ROOT / "static" / "style.css").read_text(encoding="utf-8")
+
+    def test_msg_media_img_class_defined(self):
+        self.assertIn(".msg-media-img", self.CSS)
+
+    def test_msg_media_img_max_width(self):
+        # Should have a max-width to prevent huge images breaking layout
+        idx = self.CSS.find(".msg-media-img{")
+        self.assertGreater(idx, 0)
+        rule = self.CSS[idx:idx+200]
+        self.assertIn("max-width", rule)
+
+    def test_msg_media_img_full_class_defined(self):
+        self.assertIn(".msg-media-img--full", self.CSS,
+                      "Full-size toggle class must exist for zoom-on-click")
+
+    def test_msg_media_link_class_defined(self):
+        self.assertIn(".msg-media-link", self.CSS,
+                      "Download link style must be defined for non-image media")
+
+
+# ── Backend: /api/media endpoint (unit-level, no server needed) ─────────────
+
+class TestMediaEndpointUnit(unittest.TestCase):
+    """Test route registration and handler logic via imports."""
+
+    def test_handle_media_function_exists(self):
+        from api import routes
+        self.assertTrue(
+            hasattr(routes, "_handle_media"),
+            "_handle_media must be defined in api/routes.py",
+        )
+
+    def test_api_media_route_registered(self):
+        """The GET dispatch must include the /api/media path."""
+        routes_src = (REPO_ROOT / "api" / "routes.py").read_text(encoding="utf-8")
+        self.assertIn('"/api/media"', routes_src,
+                      '/api/media must be registered in the GET route dispatch')
+
+    def test_allowed_roots_include_tmp(self):
+        """Handler must allow /tmp so screenshot paths work."""
+        routes_src = (REPO_ROOT / "api" / "routes.py").read_text(encoding="utf-8")
+        self.assertIn('/tmp', routes_src,
+                      '/tmp must be in the allowed roots list for /api/media')
+
+    def test_svg_forces_download(self):
+        """.svg must not be served inline (XSS risk)."""
+        routes_src = (REPO_ROOT / "api" / "routes.py").read_text(encoding="utf-8")
+        # SVG should be in _DOWNLOAD_TYPES or explicitly excluded from inline
+        self.assertIn("image/svg+xml", routes_src,
+                      "SVG MIME type must be handled (forced download) in _handle_media")
+
+    def test_non_image_forces_download(self):
+        """Non-image files should be forced to download, not served inline."""
+        routes_src = (REPO_ROOT / "api" / "routes.py").read_text(encoding="utf-8")
+        self.assertIn("_INLINE_IMAGE_TYPES", routes_src,
+                      "_INLINE_IMAGE_TYPES whitelist must exist in _handle_media")
+
+
+# ── Integration tests: live server on TEST_PORT ───────────────────────────────
+# No collection-time skip guard — conftest.py starts the server via its
+# autouse session fixture BEFORE tests run.  A collection-time check always
+# sees no server and turns every test into a skip.  Instead we assert
+# reachability inside setUp() so failures are loud errors, not silent skips.
+
+
+class TestMediaEndpointIntegration(unittest.TestCase):
+
+    def setUp(self):
+        try:
+            urllib.request.urlopen(BASE + "/health", timeout=5)
+        except Exception as exc:
+            self.fail(f"Test server at {BASE} is not reachable: {exc}")
+
+    def _get(self, path):
+        try:
+            with urllib.request.urlopen(BASE + path, timeout=10) as r:
+                return r.read(), r.status, r.headers
+        except urllib.error.HTTPError as e:
+            return e.read(), e.code, e.headers
+
+    def test_no_path_returns_400(self):
+        _, status, _ = self._get("/api/media")
+        self.assertEqual(status, 400)
+
+    def test_nonexistent_file_returns_404(self):
+        _, status, _ = self._get("/api/media?path=/tmp/__hermes_nonexistent_12345.png")
+        self.assertEqual(status, 404)
+
+    def test_path_outside_allowed_root_rejected(self):
+        # /etc/passwd is outside allowed roots
+        _, status, _ = self._get("/api/media?path=/etc/passwd")
+        self.assertIn(status, {403, 404})
+
+    def test_valid_png_served_with_image_mime(self):
+        """Create a 1-pixel PNG in /tmp and verify it's served correctly."""
+        # Minimal valid 1x1 transparent PNG (67 bytes)
+        png_bytes = (
+            b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01'
+            b'\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\x00'
+            b'\x01\x00\x00\x05\x00\x01\r\n-\xb4\x00\x00\x00\x00IEND\xaeB`\x82'
+        )
+        with tempfile.NamedTemporaryFile(
+            suffix=".png", prefix="hermes_test_", dir="/tmp", delete=False
+        ) as f:
+            f.write(png_bytes)
+            tmp_path = f.name
+        try:
+            body, status, headers = self._get(
+                f"/api/media?path={urllib.request.quote(tmp_path)}"
+            )
+            self.assertEqual(status, 200, f"Expected 200, got {status}")
+            ct = headers.get("Content-Type", "")
+            self.assertIn("image/png", ct, f"Expected image/png, got {ct}")
+            self.assertEqual(body, png_bytes)
+        finally:
+            pathlib.Path(tmp_path).unlink(missing_ok=True)
+
+    def test_path_traversal_rejected(self):
+        _, status, _ = self._get(
+            "/api/media?path=" + urllib.request.quote("/tmp/../../etc/passwd")
+        )
+        self.assertIn(status, {403, 404})
+
+    def test_health_check_still_works(self):
+        """Sanity: server is up and /health works."""
+        body, status, _ = self._get("/health")
+        self.assertEqual(status, 200)
+        d = json.loads(body)
+        self.assertEqual(d["status"], "ok")
--- a/tests/test_minimax_provider.py
+++ b/tests/test_minimax_provider.py
@@ -0,0 +1,148 @@
+"""
+Tests for MiniMax provider support in the model/provider discovery layer.
+
+Covers:
+  - MiniMax models appear in the fallback model list
+  - MINIMAX_API_KEY env var is scanned and detected from os.environ
+  - @minimax: provider hint routing works correctly
+  - minimax/MiniMax-M2.7 (slash format) is routed via openrouter when active provider differs
+"""
+import os
+import api.config as config
+
+
+# ── Helper ────────────────────────────────────────────────────────────────────
+
+def _resolve_with_config(model_id, provider=None, base_url=None):
+    old_cfg = dict(config.cfg)
+    model_cfg = {}
+    if provider:
+        model_cfg['provider'] = provider
+    if base_url:
+        model_cfg['base_url'] = base_url
+    config.cfg['model'] = model_cfg if model_cfg else {}
+    try:
+        return config.resolve_model_provider(model_id)
+    finally:
+        config.cfg.clear()
+        config.cfg.update(old_cfg)
+
+
+# ── Fallback model list ───────────────────────────────────────────────────────
+
+def test_minimax_m2_7_in_fallback_models():
+    """MiniMax-M2.7 must appear in the hardcoded fallback model list."""
+    ids = [m['id'] for m in config._FALLBACK_MODELS]
+    assert 'minimax/MiniMax-M2.7' in ids, (
+        f"minimax/MiniMax-M2.7 missing from _FALLBACK_MODELS. Found: {ids}"
+    )
+
+
+def test_minimax_m2_7_highspeed_in_fallback_models():
+    """MiniMax-M2.7-highspeed must appear in the hardcoded fallback model list."""
+    ids = [m['id'] for m in config._FALLBACK_MODELS]
+    assert 'minimax/MiniMax-M2.7-highspeed' in ids, (
+        f"minimax/MiniMax-M2.7-highspeed missing from _FALLBACK_MODELS. Found: {ids}"
+    )
+
+
+def test_minimax_fallback_provider_label():
+    """MiniMax fallback entries must use 'MiniMax' as the provider label."""
+    minimax_entries = [m for m in config._FALLBACK_MODELS if 'minimax' in m['id'].lower()]
+    assert minimax_entries, "No MiniMax entries found in _FALLBACK_MODELS"
+    for entry in minimax_entries:
+        assert entry['provider'] == 'MiniMax', (
+            f"Expected provider='MiniMax', got '{entry['provider']}' for {entry['id']}"
+        )
+
+
+# ── _PROVIDER_MODELS ──────────────────────────────────────────────────────────
+
+def test_minimax_provider_models_has_m2_7():
+    """_PROVIDER_MODELS['minimax'] must include MiniMax-M2.7."""
+    models = config._PROVIDER_MODELS.get('minimax', [])
+    ids = [m['id'] for m in models]
+    assert 'MiniMax-M2.7' in ids, (
+        f"MiniMax-M2.7 missing from _PROVIDER_MODELS['minimax']. Found: {ids}"
+    )
+
+
+def test_minimax_provider_models_has_highspeed():
+    """_PROVIDER_MODELS['minimax'] must include MiniMax-M2.7-highspeed."""
+    models = config._PROVIDER_MODELS.get('minimax', [])
+    ids = [m['id'] for m in models]
+    assert 'MiniMax-M2.7-highspeed' in ids, (
+        f"MiniMax-M2.7-highspeed missing from _PROVIDER_MODELS['minimax']. Found: {ids}"
+    )
+
+
+# ── MINIMAX_API_KEY env var detection ─────────────────────────────────────────
+
+def test_minimax_api_key_in_env_scan_tuple():
+    """MINIMAX_API_KEY must be included in the env var scan performed by
+    get_available_models(), so users who export MINIMAX_API_KEY see the
+    MiniMax provider in the dropdown without editing ~/.hermes/.env."""
+    import inspect, ast, textwrap
+    src = inspect.getsource(config.get_available_models)
+    assert 'MINIMAX_API_KEY' in src, (
+        "MINIMAX_API_KEY not found in get_available_models() source — "
+        "it must be added to the env var scan tuple so os.environ is checked."
+    )
+
+
+def test_minimax_cn_api_key_in_env_scan_tuple():
+    """MINIMAX_CN_API_KEY must also be scanned (mainland China API key variant)."""
+    import inspect
+    src = inspect.getsource(config.get_available_models)
+    assert 'MINIMAX_CN_API_KEY' in src, (
+        "MINIMAX_CN_API_KEY not found in get_available_models() source."
+    )
+
+
+def test_minimax_detected_from_os_environ(monkeypatch):
+    """Setting MINIMAX_API_KEY in os.environ triggers minimax provider detection."""
+    monkeypatch.setenv('MINIMAX_API_KEY', 'test-key-from-env')
+    old_cfg = dict(config.cfg)
+    # Clear model config so the env-var fallback path is exercised
+    config.cfg['model'] = {}
+    try:
+        result = config.get_available_models()
+        provider_names = [g['provider'] for g in result['groups']]
+        assert 'MiniMax' in provider_names, (
+            f"MiniMax not detected when MINIMAX_API_KEY is set in os.environ. "
+            f"Active provider groups: {provider_names}"
+        )
+    finally:
+        config.cfg.clear()
+        config.cfg.update(old_cfg)
+
+
+# ── Model routing ─────────────────────────────────────────────────────────────
+
+def test_provider_hint_minimax_m2_7():
+    """@minimax:MiniMax-M2.7 routes to minimax provider with bare model name."""
+    model, provider, base_url = _resolve_with_config(
+        '@minimax:MiniMax-M2.7', provider='anthropic',
+    )
+    assert model == 'MiniMax-M2.7'
+    assert provider == 'minimax'
+    assert base_url is None
+
+
+def test_provider_hint_minimax_highspeed():
+    """@minimax:MiniMax-M2.7-highspeed routes to minimax provider."""
+    model, provider, base_url = _resolve_with_config(
+        '@minimax:MiniMax-M2.7-highspeed', provider='openai',
+    )
+    assert model == 'MiniMax-M2.7-highspeed'
+    assert provider == 'minimax'
+
+
+def test_minimax_slash_format_routes_openrouter_when_not_active():
+    """minimax/MiniMax-M2.7 (slash format) routes via openrouter when active
+    provider is anthropic (cross-provider routing)."""
+    model, provider, base_url = _resolve_with_config(
+        'minimax/MiniMax-M2.7', provider='anthropic',
+    )
+    assert model == 'minimax/MiniMax-M2.7'
+    assert provider == 'openrouter'
--- a/tests/test_mobile_layout.py
+++ b/tests/test_mobile_layout.py
@@ -0,0 +1,278 @@
+"""
+Mobile layout regression tests — run on every QA pass.
+
+These tests check that the CSS and HTML structure required for correct
+mobile rendering (375px–640px viewport widths) is intact after every change.
+They are static checks (no server needed) that catch common regressions:
+
+  - Mobile breakpoints present for key layout elements
+  - Right panel slide-over markup and CSS intact
+  - Profile dropdown not clipped by overflow on mobile
+  - Composer footer chips scroll correctly on narrow viewports
+  - Mobile sidebar navigation stays available on phones
+  - No full-viewport overflow that would break scroll
+
+Run as part of the standard test suite:
+    pytest tests/test_mobile_layout.py -v
+"""
+
+import pathlib
+import re
+
+REPO = pathlib.Path(__file__).parent.parent
+HTML = (REPO / "static" / "index.html").read_text(encoding="utf-8")
+CSS  = (REPO / "static" / "style.css").read_text(encoding="utf-8")
+
+
+# ── Mobile breakpoint rules ───────────────────────────────────────────────────
+
+def test_mobile_breakpoint_900px_present():
+    """@media(max-width:900px) must hide the right panel and show mobile-files-btn."""
+    assert "@media(max-width:900px)" in CSS or "@media (max-width: 900px)" in CSS, \
+        "Missing @media(max-width:900px) breakpoint in style.css"
+    # Right panel should be hidden at 900px, replaced by slide-over
+    assert ".rightpanel{display:none" in CSS or ".rightpanel {display:none" in CSS or \
+           re.search(r'max-width:900px\).*?\.rightpanel\{display:none', CSS, re.DOTALL), \
+        ".rightpanel must be display:none at max-width:900px (slide-over replaces it)"
+
+
+def test_mobile_breakpoint_640px_present():
+    """@media(max-width:640px) must exist for narrow phone layouts."""
+    assert "@media(max-width:640px)" in CSS or "@media (max-width: 640px)" in CSS, \
+        "Missing @media(max-width:640px) breakpoint in style.css"
+
+
+def test_rightpanel_mobile_slide_over_css():
+    """Right panel must have position:fixed slide-over CSS for mobile."""
+    # At max-width:900px the rightpanel should be position:fixed, off-screen right
+    assert "position:fixed" in CSS, \
+        "style.css must have position:fixed for rightpanel mobile slide-over"
+    assert ".rightpanel.mobile-open{right:0" in CSS or ".rightpanel.mobile-open {right:0" in CSS, \
+        ".rightpanel.mobile-open must set right:0 to slide panel in from right"
+    assert "right:-320px" in CSS or "right: -320px" in CSS, \
+        "rightpanel must start off-screen (right:-320px) on mobile"
+
+
+def test_mobile_overlay_present():
+    """Mobile overlay element must exist for tap-to-close sidebar behavior."""
+    assert 'id="mobileOverlay"' in HTML, \
+        "#mobileOverlay element missing from index.html"
+    assert "mobile-overlay" in CSS, \
+        ".mobile-overlay CSS rule missing from style.css"
+
+
+def test_sidebar_nav_present():
+    """Sidebar top navigation tabs must be present."""
+    assert 'class="sidebar-nav"' in HTML, \
+        ".sidebar-nav missing from index.html"
+    assert ".sidebar-nav{" in CSS or ".sidebar-nav {" in CSS, \
+        ".sidebar-nav CSS rule missing from style.css"
+
+
+def test_mobile_does_not_hide_sidebar_nav():
+    """Phone breakpoint must keep the sidebar top navigation visible."""
+    mobile_block = re.search(r'@media\(max-width:640px\)\{(.*)\n\s*\}', CSS, re.DOTALL)
+    assert mobile_block, "Missing @media(max-width:640px) block in style.css"
+    assert ".sidebar-nav{display:none" not in mobile_block.group(1).replace(" ", ""), \
+        ".sidebar-nav must stay visible on mobile"
+
+
+def test_mobile_files_button_present():
+    """Mobile files toggle button (#btnWorkspacePanelToggle.workspace-toggle-btn) must be in HTML and CSS."""
+    assert 'id="btnWorkspacePanelToggle"' in HTML, \
+        "#btnWorkspacePanelToggle missing from index.html"
+    assert "workspace-toggle-btn" in CSS, \
+        ".workspace-toggle-btn CSS missing from style.css"
+
+
+# ── Profile dropdown overflow ─────────────────────────────────────────────────
+
+def test_profile_dropdown_not_clipped_by_overflow():
+    """Profile dropdown must not be inside an overflow:hidden or overflow-x:auto ancestor
+    without a higher z-index escape hatch.
+
+    The topbar-chips container uses overflow-x:auto on mobile, which creates a
+    stacking context that clips absolutely-positioned children. The profile dropdown
+    must use position:fixed on mobile OR the topbar-chips must not clip it.
+    """
+    # The profile-chip wrapper must have position:relative so the dropdown can escape
+    assert 'id="profileChipWrap"' in HTML, \
+        "#profileChipWrap missing from index.html"
+    # Profile dropdown must have a z-index high enough to clear the topbar
+    assert ".profile-dropdown{" in CSS or ".profile-dropdown {" in CSS, \
+        ".profile-dropdown CSS rule missing"
+    # z-index must be at least 200 (topbar is z-index:10)
+    m = re.search(r'\.profile-dropdown\{[^}]*z-index:(\d+)', CSS)
+    if m:
+        assert int(m.group(1)) >= 100, \
+            f".profile-dropdown z-index {m.group(1)} is too low — must be >= 100 to clear topbar"
+
+
+def test_topbar_chips_mobile_overflow():
+    """topbar-chips must use overflow-x:auto on mobile for chip scrolling.
+
+    Chips (profile, workspace, model, files) must scroll horizontally on narrow
+    viewports rather than wrapping onto a second line which would break the topbar layout.
+    """
+    # At narrow viewport, topbar-chips should scroll
+    assert "overflow-x:auto" in CSS or "overflow-x: auto" in CSS, \
+        "topbar-chips must have overflow-x:auto for mobile chip scrolling"
+
+
+# ── Workspace panel close ─────────────────────────────────────────────────────
+
+def test_workspace_close_button_present():
+    """Workspace panel must have a close/hide button accessible on mobile."""
+    # Accept handleWorkspaceClose() (two-step close: file→browse→closed), or the
+    # lower-level functions directly.  handleWorkspaceClose is preferred because
+    # it dismisses a file preview first before closing the panel.
+    has_close = (
+        'onclick="handleWorkspaceClose()"' in HTML or
+        'onclick="closeWorkspacePanel()"' in HTML or
+        'onclick="toggleWorkspacePanel()"' in HTML
+    )
+    assert has_close, \
+        "handleWorkspaceClose() or closeWorkspacePanel() must be wired to a button to close the workspace panel on mobile"
+
+
+def test_toggle_mobile_files_js_defined():
+    """toggleMobileFiles() must be defined in boot.js."""
+    boot_js = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
+    assert "function toggleMobileFiles()" in boot_js, \
+        "toggleMobileFiles() missing from static/boot.js"
+    assert "mobile-open" in boot_js, \
+        "toggleMobileFiles() must toggle mobile-open class on the right panel"
+
+
+def test_new_conversation_closes_mobile_sidebar():
+    """New conversation must close the mobile drawer so the chat pane is visible immediately."""
+    boot_js = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
+    click_line = next((ln for ln in boot_js.splitlines() if "$('btnNewChat').onclick" in ln), "")
+    assert click_line, "btnNewChat onclick handler missing from static/boot.js"
+    assert "closeMobileSidebar" in click_line, \
+        "btnNewChat handler must closeMobileSidebar() after creating the new session"
+
+    shortcut_line = next((ln for ln in boot_js.splitlines() if "e.key==='k'" in ln or "e.key === 'k'" in ln), "")
+    assert shortcut_line, "Cmd/Ctrl+K new chat shortcut missing from static/boot.js"
+    shortcut_block = "\n".join(boot_js.splitlines()[boot_js.splitlines().index(shortcut_line):boot_js.splitlines().index(shortcut_line)+4])
+    assert "closeMobileSidebar" in shortcut_block, \
+        "Cmd/Ctrl+K new chat shortcut must closeMobileSidebar() after creating the new session"
+
+
+# ── Viewport and scroll safety ────────────────────────────────────────────────
+
+def test_body_overflow_hidden():
+    """body must have overflow:hidden to prevent double scrollbars on mobile."""
+    assert "body{" in CSS or "body {" in CSS, \
+        "body rule missing from style.css"
+    assert re.search(r'body\{[^}]*overflow:hidden', CSS), \
+        "body must have overflow:hidden to prevent double scrollbars"
+
+
+def test_flex_parents_allow_message_scroller_to_shrink():
+    """The top-level flex containers must opt into min-height:0 so .messages can scroll on mobile.
+
+    Mobile Safari/Chrome can trap scroll when a flex child with overflow:auto sits inside
+    parents whose min-height remains auto. Both .layout and .main need min-height:0.
+    """
+    assert re.search(r'\.layout\{[^}]*min-height:0', CSS), \
+        ".layout must set min-height:0 so the chat column can shrink and scroll"
+    assert re.search(r'\.main\{[^}]*min-height:0', CSS), \
+        ".main must set min-height:0 so .messages remains scrollable while busy"
+
+
+def test_messages_touch_scrolling_hints_present():
+    """The messages scroller must advertise touch-friendly scrolling behavior.
+
+    On mobile browsers, momentum scrolling and explicit pan-y/overscroll behavior help
+    prevent the chat area from feeling locked while the app body itself stays overflow:hidden.
+    """
+    assert re.search(r'\.messages\{[^}]*-webkit-overflow-scrolling:\s*touch', CSS), \
+        ".messages must enable -webkit-overflow-scrolling:touch for mobile momentum scroll"
+    assert re.search(r'\.messages\{[^}]*touch-action:\s*pan-y', CSS), \
+        ".messages must set touch-action:pan-y so vertical swipe gestures scroll the transcript"
+    assert re.search(r'\.messages\{[^}]*overscroll-behavior-y:\s*contain', CSS), \
+        ".messages must contain vertical overscroll so the transcript keeps the gesture"
+
+
+def test_100dvh_viewport_height():
+    """Layout must use 100dvh (dynamic viewport height) for correct mobile sizing.
+
+    On mobile Safari and Chrome, 100vh includes the browser chrome (address bar),
+    causing content to be hidden. 100dvh accounts for the actual available height.
+    """
+    assert "100dvh" in CSS, \
+        "style.css must use 100dvh for correct mobile viewport height (100vh hides content under address bar)"
+
+
+def test_composer_touch_target_size():
+    """Send button and composer inputs must have minimum 44px touch targets on mobile.
+
+    Apple HIG and Google Material guidelines both require 44px minimum touch targets.
+    """
+    # Check that mobile CSS doesn't make the send button smaller than 44×44
+    # We check that there's at least a min-height definition for touch targets
+    assert re.search(r'(min-height|height).*44px', CSS), \
+        "style.css must define 44px minimum touch targets for mobile (send button, nav buttons)"
+
+
+# ── Input zoom prevention ─────────────────────────────────────────────────────
+
+def test_composer_textarea_font_size_mobile():
+    """Composer textarea must have font-size >= 16px on mobile.
+
+    iOS Safari zooms the viewport when an input with font-size < 16px is focused,
+    which breaks the layout. The composer textarea must be >= 16px at mobile widths.
+    """
+    # Check for 16px font-size on the textarea in a mobile breakpoint
+    assert re.search(r'font-size:16px', CSS), \
+        "Composer textarea must have font-size:16px at mobile widths to prevent iOS zoom-on-focus"
+
+
+
+# ── Sidebar tabs on mobile ───────────────────────────────────────────────────
+
+def test_profiles_sidebar_tab_present():
+    """Sidebar tab strip must include Profiles."""
+    assert 'class="nav-tab" data-panel="profiles"' in HTML, \
+        "Sidebar nav must have a Profiles tab"
+
+
+def test_mobile_bottom_nav_removed():
+    """The old fixed mobile bottom nav should not be present anymore."""
+    assert "mobile-bottom-nav" not in HTML, \
+        "mobile-bottom-nav markup should be removed from index.html"
+    assert "mobile-bottom-nav" not in CSS, \
+        "mobile-bottom-nav CSS should be removed from style.css"
+
+
+# ── Mobile Enter key inserts newline (PR #315, fixes #269) ───────────────────
+
+def test_mobile_enter_newline_condition_present():
+    """boot.js keydown handler must detect touch-primary devices via pointer:coarse."""
+    boot_js = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
+    assert "pointer:coarse" in boot_js, \
+        "boot.js must use pointer:coarse media query for mobile Enter detection"
+
+
+def test_mobile_enter_newline_uses_match_media():
+    """boot.js must call matchMedia for pointer detection, not a hardcoded flag."""
+    boot_js = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
+    assert "matchMedia('(pointer:coarse)')" in boot_js or 'matchMedia("(pointer:coarse)")' in boot_js, \
+        "boot.js must use matchMedia('(pointer:coarse)') for mobile detection"
+
+
+def test_mobile_enter_newline_only_overrides_enter_default():
+    """Mobile newline override must only apply when _sendKey is the default 'enter'."""
+    boot_js = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
+    # The _mobileDefault check must gate on _sendKey==='enter' so ctrl+enter users aren't affected
+    assert "_sendKey===" in boot_js and "'enter'" in boot_js, \
+        "Mobile newline fallback must check window._sendKey==='enter' to avoid overriding user preference"
+
+
+def test_mobile_enter_does_not_affect_desktop_logic():
+    """The mobile Enter override must not alter the existing else branch for desktop users."""
+    boot_js = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
+    # The else branch (desktop, sends on Enter without Shift) must still be present
+    assert "if(!e.shiftKey){e.preventDefault();send();" in boot_js, \
+        "Desktop Enter-to-send logic (else branch) must still be present in boot.js"
--- a/tests/test_model_resolver.py
+++ b/tests/test_model_resolver.py
@@ -0,0 +1,476 @@
+"""
+Tests for resolve_model_provider() model routing logic.
+Verifies that model IDs are correctly resolved to (model, provider, base_url)
+tuples for different provider configurations.
+"""
+import api.config as config
+
+
+def _resolve_with_config(model_id, provider=None, base_url=None, default=None, custom_providers=None):
+    """Helper: temporarily set config.cfg model/custom provider sections, call resolve, restore."""
+    old_cfg = dict(config.cfg)
+    model_cfg = {}
+    if provider:
+        model_cfg['provider'] = provider
+    if base_url:
+        model_cfg['base_url'] = base_url
+    if default:
+        model_cfg['default'] = default
+    config.cfg['model'] = model_cfg if model_cfg else {}
+    if custom_providers is not None:
+        config.cfg['custom_providers'] = custom_providers
+    try:
+        return config.resolve_model_provider(model_id)
+    finally:
+        config.cfg.clear()
+        config.cfg.update(old_cfg)
+
+
+# ── OpenRouter prefix handling ────────────────────────────────────────────
+
+def test_openrouter_free_keeps_full_path():
+    """openrouter/free must NOT be stripped to 'free' when provider is openrouter."""
+    model, provider, base_url = _resolve_with_config(
+        'openrouter/free', provider='openrouter',
+        base_url='https://openrouter.ai/api/v1',
+    )
+    assert model == 'openrouter/free', f"Expected 'openrouter/free', got '{model}'"
+    assert provider == 'openrouter'
+
+
+def test_openrouter_model_with_provider_prefix():
+    """anthropic/claude-sonnet-4.6 via openrouter keeps full path."""
+    model, provider, base_url = _resolve_with_config(
+        'anthropic/claude-sonnet-4.6', provider='openrouter',
+        base_url='https://openrouter.ai/api/v1',
+    )
+    assert model == 'anthropic/claude-sonnet-4.6'
+    assert provider == 'openrouter'
+
+
+# ── Direct provider prefix stripping ─────────────────────────────────────
+
+def test_anthropic_prefix_stripped_for_direct_api():
+    """anthropic/claude-sonnet-4.6 strips prefix when provider is anthropic."""
+    model, provider, base_url = _resolve_with_config(
+        'anthropic/claude-sonnet-4.6', provider='anthropic',
+    )
+    assert model == 'claude-sonnet-4.6'
+    assert provider == 'anthropic'
+
+
+def test_openai_prefix_stripped_for_direct_api():
+    """openai/gpt-5.4-mini strips prefix when provider is openai."""
+    model, provider, base_url = _resolve_with_config(
+        'openai/gpt-5.4-mini', provider='openai',
+    )
+    assert model == 'gpt-5.4-mini'
+    assert provider == 'openai'
+
+
+# ── Cross-provider routing ───────────────────────────────────────────────
+
+def test_cross_provider_routes_through_openrouter():
+    """Picking openai model when config is anthropic routes via openrouter."""
+    model, provider, base_url = _resolve_with_config(
+        'openai/gpt-5.4-mini', provider='anthropic',
+    )
+    assert model == 'openai/gpt-5.4-mini'
+    assert provider == 'openrouter'
+    assert base_url is None  # openrouter uses its own endpoint
+
+
+# ── Bare model names ─────────────────────────────────────────────────────
+
+def test_bare_model_uses_config_provider():
+    """A model name without / uses the config provider and base_url."""
+    model, provider, base_url = _resolve_with_config(
+        'gemma-4-26B', provider='custom',
+        base_url='http://192.168.1.160:4000',
+    )
+    assert model == 'gemma-4-26B'
+    assert provider == 'custom'
+    assert base_url == 'http://192.168.1.160:4000'
+
+
+def test_empty_model_returns_config_defaults():
+    """Empty model string returns config provider and base_url."""
+    model, provider, base_url = _resolve_with_config(
+        '', provider='anthropic',
+    )
+    assert model == ''
+    assert provider == 'anthropic'
+
+
+# ── @provider:model hint routing (Issue #138 v2) ────────────────────────
+
+def test_provider_hint_routes_to_specific_provider():
+    """@minimax:MiniMax-M2.7 routes to minimax provider directly."""
+    model, provider, base_url = _resolve_with_config(
+        '@minimax:MiniMax-M2.7', provider='anthropic',
+    )
+    assert model == 'MiniMax-M2.7'
+    assert provider == 'minimax'
+    assert base_url is None  # resolve_runtime_provider will fill this
+
+
+def test_provider_hint_zai():
+    """@zai:GLM-5 routes to zai provider directly."""
+    model, provider, base_url = _resolve_with_config(
+        '@zai:GLM-5', provider='openai',
+    )
+    assert model == 'GLM-5'
+    assert provider == 'zai'
+
+
+def test_provider_hint_deepseek():
+    """@deepseek:deepseek-chat routes to deepseek provider."""
+    model, provider, base_url = _resolve_with_config(
+        '@deepseek:deepseek-chat', provider='anthropic',
+    )
+    assert model == 'deepseek-chat'
+    assert provider == 'deepseek'
+
+
+def test_slash_prefix_non_default_still_routes_openrouter():
+    """minimax/MiniMax-M2.7 (old format) still routes through openrouter."""
+    model, provider, base_url = _resolve_with_config(
+        'minimax/MiniMax-M2.7', provider='anthropic',
+    )
+    assert model == 'minimax/MiniMax-M2.7'
+    assert provider == 'openrouter'
+
+
+def test_custom_provider_model_with_slash_routes_to_named_custom_provider():
+    """Slash-containing custom endpoint model IDs must not be mistaken for OpenRouter models."""
+    model, provider, base_url = _resolve_with_config(
+        'google/gemma-4-26b-a4b',
+        provider='openrouter',
+        base_url='https://openrouter.ai/api/v1',
+        custom_providers=[{
+            'name': 'Local LM Studio',
+            'base_url': 'http://lmstudio.local:1234/v1',
+            'model': 'google/gemma-4-26b-a4b',
+        }],
+    )
+    assert model == 'google/gemma-4-26b-a4b'
+    assert provider == 'custom:local-lm-studio'
+    assert base_url == 'http://lmstudio.local:1234/v1'
+
+
+# ── get_available_models() @provider: hint behaviour ──────────────────────
+
+def _available_models_with_provider(provider):
+    """Helper: temporarily set active_provider in config."""
+    old_cfg = dict(config.cfg)
+    config.cfg['model'] = {'provider': provider}
+    try:
+        return config.get_available_models()
+    finally:
+        config.cfg.clear()
+        config.cfg.update(old_cfg)
+
+
+def test_non_default_provider_models_use_hint_prefix():
+    """With anthropic as default, minimax model IDs should use @minimax: prefix."""
+    result = _available_models_with_provider('anthropic')
+    groups = {g['provider']: g['models'] for g in result['groups']}
+    if 'MiniMax' in groups:
+        for m in groups['MiniMax']:
+            assert m['id'].startswith('@minimax:'), (
+                f"Expected @minimax: prefix, got: {m['id']!r}"
+            )
+
+
+def test_no_duplicate_when_default_model_is_prefixed():
+    """Issue #147 Bug 2: 'anthropic/claude-opus-4.6' as default_model must not
+    inject a duplicate alongside the existing bare 'claude-opus-4.6' entry in
+    the same provider group."""
+    import api.config as _cfg
+    old_cfg = dict(_cfg.cfg)
+    _cfg.cfg['model'] = {
+        'provider': 'anthropic',
+        'default': 'anthropic/claude-opus-4.6',
+    }
+    try:
+        result = _cfg.get_available_models()
+        norm = lambda mid: mid.split('/', 1)[-1] if '/' in mid else mid
+        # Check each group individually: no group should have two entries that
+        # normalize to the same bare model name
+        for g in result['groups']:
+            bare_ids = [norm(m['id']) for m in g['models']]
+            duplicates = [mid for mid in set(bare_ids) if bare_ids.count(mid) > 1]
+            assert not duplicates, (
+                f"Provider group '{g['provider']}' has duplicate models after normalization: "
+                f"{duplicates}\nFull group: {[m['id'] for m in g['models']]}"
+            )
+    finally:
+        _cfg.cfg.clear()
+        _cfg.cfg.update(old_cfg)
+
+
+def test_default_provider_models_not_prefixed():
+    """The active provider's models remain bare (no @prefix added)."""
+    import api.config as _cfg
+    raw_anthropic_ids = {m['id'] for m in _cfg._PROVIDER_MODELS.get('anthropic', [])}
+    result = _available_models_with_provider('anthropic')
+    groups = {g['provider']: g['models'] for g in result['groups']}
+    if 'Anthropic' in groups:
+        returned_ids = {m['id'] for m in groups['Anthropic']}
+        for bare_id in raw_anthropic_ids:
+            assert bare_id in returned_ids, (
+                f"_PROVIDER_MODELS entry '{bare_id}' is missing from the Anthropic group"
+            )
+
+
+# ── get_available_models(): phantom "Custom" group regression ─────────────
+#
+# When the user has model.provider set to a real provider (e.g. openai-codex)
+# AND a model.base_url set, hermes_cli reports the 'custom' pseudo-provider as
+# authenticated. The WebUI picker must NOT build a separate "Custom" group in
+# that case — the base_url belongs to the active provider.
+
+def _available_models_with_full_cfg(provider, default, base_url):
+    """Helper: set model.provider, model.default, model.base_url at once.
+
+    Clears model-override env vars (HERMES_MODEL, OPENAI_MODEL, LLM_MODEL)
+    during the call so the real hermes profile environment doesn't leak into
+    the test and override the fixture's default model.
+    """
+    import os
+    import api.config as _cfg
+    old_cfg = dict(_cfg.cfg)
+    _cfg.cfg['model'] = {
+        'provider': provider,
+        'default': default,
+        'base_url': base_url,
+    }
+    try:
+        _cfg._cfg_mtime = _cfg.Path(_cfg._get_config_path()).stat().st_mtime
+    except Exception:
+        pass
+    # Clear model-override env vars to prevent the real profile from leaking in
+    _model_env_keys = ('HERMES_MODEL', 'OPENAI_MODEL', 'LLM_MODEL')
+    _saved_env = {k: os.environ.pop(k, None) for k in _model_env_keys}
+    try:
+        return _cfg.get_available_models()
+    finally:
+        _cfg.cfg.clear()
+        _cfg.cfg.update(old_cfg)
+        for k, v in _saved_env.items():
+            if v is not None:
+                os.environ[k] = v
+
+
+def test_no_phantom_custom_group_when_active_provider_is_set(monkeypatch):
+    """Issue: with provider=openai-codex + base_url set, gpt-5.4 was landing
+    under a phantom "Custom" group instead of the "OpenAI Codex" group."""
+    import sys, types
+
+    # Force hermes_cli to report both the real provider and the phantom
+    # 'custom' as authenticated, simulating what list_available_providers()
+    # returns when base_url is configured.
+    fake_mod = types.ModuleType('hermes_cli.models')
+    fake_mod.list_available_providers = lambda: [
+        {'id': 'openai-codex', 'authenticated': True},
+        {'id': 'custom',       'authenticated': True},
+    ]
+    fake_auth = types.ModuleType('hermes_cli.auth')
+    fake_auth.get_auth_status = lambda pid: {'key_source': 'env'}
+    monkeypatch.setitem(sys.modules, 'hermes_cli.models', fake_mod)
+    monkeypatch.setitem(sys.modules, 'hermes_cli.auth', fake_auth)
+
+    result = _available_models_with_full_cfg(
+        provider='openai-codex',
+        default='gpt-5.4',
+        base_url='https://chatgpt.com/backend-api/codex',
+    )
+    group_names = [g['provider'] for g in result['groups']]
+    assert 'Custom' not in group_names, (
+        f"Phantom 'Custom' group present; full groups: {group_names}"
+    )
+
+
+def test_default_model_lands_under_active_provider_group(monkeypatch):
+    """The configured default_model must appear under the active provider's
+    display group, even when the model isn't in _PROVIDER_MODELS[provider]
+    AND the active provider isn't the alphabetical first detected provider.
+
+    Regression guard for a hyphen-vs-space bug in the "ensure default_model
+    appears" post-pass: the substring check `active_provider.lower() in
+    g.get('provider', '').lower()` was failing for 'openai-codex' vs
+    display name 'OpenAI Codex' (hyphen vs. space), silently falling back
+    to groups[0] — which, when another provider sorted earlier
+    alphabetically (e.g. 'anthropic'), placed gpt-5.4 in the WRONG group.
+    """
+    import sys, types
+    fake_mod = types.ModuleType('hermes_cli.models')
+    fake_mod.list_available_providers = lambda: [
+        {'id': 'anthropic',    'authenticated': True},  # sorts before openai-codex
+        {'id': 'openai-codex', 'authenticated': True},
+        {'id': 'custom',       'authenticated': True},
+    ]
+    fake_auth = types.ModuleType('hermes_cli.auth')
+    fake_auth.get_auth_status = lambda pid: {'key_source': 'env'}
+    monkeypatch.setitem(sys.modules, 'hermes_cli.models', fake_mod)
+    monkeypatch.setitem(sys.modules, 'hermes_cli.auth', fake_auth)
+
+    result = _available_models_with_full_cfg(
+        provider='openai-codex',
+        default='gpt-5.4',
+        base_url='https://chatgpt.com/backend-api/codex',
+    )
+    groups = {g['provider']: [m['id'] for m in g['models']] for g in result['groups']}
+    assert 'OpenAI Codex' in groups, f"OpenAI Codex group missing: {list(groups)}"
+    norm = lambda mid: mid.split('/', 1)[-1].split(':', 1)[-1]
+    assert 'gpt-5.4' in {norm(mid) for mid in groups['OpenAI Codex']}, (
+        f"gpt-5.4 not in OpenAI Codex group; contents: {groups['OpenAI Codex']}"
+    )
+    # And crucially, it must NOT have landed in the alphabetically-first
+    # group (Anthropic) via the fallback path.
+    assert 'gpt-5.4' not in {norm(mid) for mid in groups.get('Anthropic', [])}, (
+        f"gpt-5.4 leaked into Anthropic group via fallback: {groups.get('Anthropic')}"
+    )
+
+
+def test_unknown_providers_do_not_inherit_default_model(monkeypatch):
+    """Detected providers without their own model catalog must not be filled
+    with the global default_model placeholder.
+
+    Regression guard for the bug where Alibaba / Minimax-Cn ended up showing
+    gpt-5.4-mini even though those providers do not serve it.
+    """
+    import sys, types
+
+    fake_mod = types.ModuleType('hermes_cli.models')
+    fake_mod.list_available_providers = lambda: [
+        {'id': 'openai-codex', 'authenticated': True},
+        {'id': 'alibaba',      'authenticated': True},
+        {'id': 'minimax-cn',   'authenticated': True},
+    ]
+    fake_auth = types.ModuleType('hermes_cli.auth')
+    fake_auth.get_auth_status = lambda pid: {'key_source': 'env'}
+    monkeypatch.setitem(sys.modules, 'hermes_cli.models', fake_mod)
+    monkeypatch.setitem(sys.modules, 'hermes_cli.auth', fake_auth)
+
+    result = _available_models_with_full_cfg(
+        provider='openai-codex',
+        default='gpt-5.4-mini',
+        base_url='',
+    )
+    groups = {g['provider']: [m['id'] for m in g['models']] for g in result['groups']}
+    norm = lambda mid: mid.split('/', 1)[-1].split(':', 1)[-1]
+
+    assert 'Alibaba' not in groups, (
+        f"Alibaba should not inherit the default model placeholder: {groups}"
+    )
+    assert 'Minimax-Cn' not in groups, (
+        f"Minimax-Cn should not inherit the default model placeholder: {groups}"
+    )
+    assert not any(
+        norm(mid) == 'gpt-5.4-mini'
+        for mid in groups.get('Alibaba', []) + groups.get('Minimax-Cn', [])
+    ), (
+        f"Unknown provider groups still inherited the default model: {groups}"
+    )
+
+
+def test_custom_endpoint_uses_model_config_api_key_for_model_discovery(monkeypatch):
+    """Custom endpoint model discovery must use model.api_key from config.yaml,
+    not only environment variables, otherwise the dropdown collapses to the
+    default model when /v1/models requires auth."""
+    import json as _json
+    import api.config as _cfg
+
+    old_cfg = dict(_cfg.cfg)
+    _cfg.cfg['model'] = {
+        'provider': 'custom',
+        'default': 'gpt-5.4',
+        'base_url': 'https://example.test/v1',
+        'api_key': 'sk-test-model-key',
+    }
+    try:
+        _cfg._cfg_mtime = _cfg.Path(_cfg._get_config_path()).stat().st_mtime
+    except Exception:
+        pass
+    _cfg.cfg.pop('providers', None)
+
+    captured = {}
+
+    class _Resp:
+        def read(self):
+            return _json.dumps({'data': [{'id': 'gpt-5.2', 'name': 'GPT-5.2'}]}).encode('utf-8')
+        def __enter__(self):
+            return self
+        def __exit__(self, exc_type, exc, tb):
+            return False
+
+    def _fake_urlopen(req, timeout=10):
+        captured['auth'] = req.get_header('Authorization')
+        captured['ua'] = req.get_header('User-agent')
+        return _Resp()
+
+    monkeypatch.setattr('urllib.request.urlopen', _fake_urlopen)
+    monkeypatch.setattr('socket.getaddrinfo', lambda *a, **k: [])
+    monkeypatch.delenv('OPENAI_API_KEY', raising=False)
+    monkeypatch.delenv('HERMES_API_KEY', raising=False)
+    monkeypatch.delenv('HERMES_OPENAI_API_KEY', raising=False)
+    monkeypatch.delenv('LOCAL_API_KEY', raising=False)
+    monkeypatch.delenv('OPENROUTER_API_KEY', raising=False)
+    monkeypatch.delenv('API_KEY', raising=False)
+    try:
+        result = _cfg.get_available_models()
+    finally:
+        _cfg.cfg.clear()
+        _cfg.cfg.update(old_cfg)
+
+    assert captured['auth'] == 'Bearer sk-test-model-key'
+    assert captured['ua'] == 'OpenAI/Python 1.0'
+    groups = {g['provider']: [m['id'] for m in g['models']] for g in result['groups']}
+    assert 'Custom' in groups
+    assert 'gpt-5.2' in groups['Custom']
+
+
+# -- Issue #230: custom provider with slash model name -----------------------
+
+def test_custom_endpoint_slash_model_routes_to_custom_not_openrouter():
+    """Regression test for #230.
+
+    When provider=custom (or any non-openrouter provider) and base_url is set,
+    a model name containing a slash (e.g. google/gemma-4-26b-a4b) must NOT be
+    rerouted to OpenRouter -- it should stay on the configured custom endpoint.
+    """
+    # --- custom provider with slash model name should NOT go to openrouter ---
+    model, provider, base_url = _resolve_with_config(
+        'google/gemma-4-26b-a4b',
+        provider='custom',
+        base_url='http://127.0.0.1:1234/v1',
+        default='google/gemma-4-26b-a4b',
+    )
+    assert provider.startswith('custom'), (
+        "Expected provider starting with 'custom', got '{}'. "
+        "Slash in model name should NOT trigger OpenRouter rerouting when base_url is set.".format(provider)
+    )
+    assert base_url == 'http://127.0.0.1:1234/v1', (
+        "Expected base_url 'http://127.0.0.1:1234/v1', got '{}'.".format(base_url)
+    )
+    # Fix #433: provider prefix is now stripped for custom endpoints so stale
+    # prefixed model IDs from previous sessions do not break custom endpoint routing.
+    assert model == 'gemma-4-26b-a4b', (
+        "Model name prefix should be stripped for custom base_url endpoint, got '{}'.".format(model)
+    )
+
+    # --- openrouter with slash model name MUST still route to openrouter -----
+    model_or, provider_or, _ = _resolve_with_config(
+        'google/gemma-4-26b-a4b',
+        provider='openrouter',
+        base_url='https://openrouter.ai/api/v1',
+        default='google/gemma-4-26b-a4b',
+    )
+    assert provider_or == 'openrouter', (
+        "Expected provider 'openrouter', got '{}'. "
+        "Slash model via openrouter provider must still resolve to openrouter.".format(provider_or)
+    )
+    assert model_or == 'google/gemma-4-26b-a4b', (
+        "Model name should be preserved for openrouter, got '{}'.".format(model_or)
+    )
--- a/tests/test_onboarding_existing_config.py
+++ b/tests/test_onboarding_existing_config.py
@@ -0,0 +1,368 @@
+"""Tests for fix: onboarding wizard must not fire when Hermes is already configured.
+
+Issue #420 — existing Hermes users (config.yaml present + chat_ready) were
+shown the first-run wizard because the only gate was settings.onboarding_completed.
+
+Covers:
+  (a) config.yaml present + chat_ready=True  →  completed=True (no wizard)
+  (b) no config.yaml                         →  completed=False (wizard fires)
+  (c) apply_onboarding_setup refuses to overwrite an existing config without
+      confirm_overwrite=True
+"""
+from __future__ import annotations
+
+import json
+import os
+import pathlib
+import urllib.error
+import urllib.request
+from unittest import mock
+
+import pytest
+
+# Skip tests that call apply_onboarding_setup → _save_yaml_config when PyYAML is missing
+try:
+    import yaml as _yaml
+    _HAS_YAML = True
+except ImportError:
+    _HAS_YAML = False
+_needs_yaml = pytest.mark.skipif(not _HAS_YAML, reason="PyYAML not installed — onboarding setup tests require it")
+
+# ---------------------------------------------------------------------------
+# Unit tests — no live server needed, test logic directly via imports
+# ---------------------------------------------------------------------------
+
+
+def _make_status(*, config_exists: bool, chat_ready: bool, onboarding_done: bool = False):
+    """Call get_onboarding_status() with a controlled filesystem + settings."""
+    import importlib
+
+    # Import fresh copies each call so module-level state doesn't bleed across
+    import api.onboarding as mod
+
+    fake_config_path = pathlib.Path("/tmp/_test_config.yaml")
+
+    settings = {"onboarding_completed": onboarding_done}
+
+    # Build a minimal runtime dict that get_onboarding_status() would produce
+    # from _status_from_runtime.  We only need the keys the gate checks.
+    runtime = {
+        "chat_ready": chat_ready,
+        "provider_configured": chat_ready,
+        "provider_ready": chat_ready,
+        "setup_state": "ready" if chat_ready else "needs_provider",
+        "provider_note": "test note",
+        "current_provider": "openrouter" if chat_ready else None,
+        "current_model": "anthropic/claude-sonnet-4.6" if chat_ready else None,
+        "current_base_url": None,
+        "env_path": "/tmp/.hermes_test/.env",
+    }
+
+    with (
+        mock.patch.object(mod, "load_settings", return_value=settings),
+        mock.patch.object(mod, "get_config", return_value={}),
+        mock.patch.object(
+            mod,
+            "verify_hermes_imports",
+            return_value=(chat_ready, [], {}),
+        ),
+        mock.patch.object(mod, "_status_from_runtime", return_value=runtime),
+        mock.patch.object(mod, "load_workspaces", return_value=[]),
+        mock.patch.object(mod, "get_last_workspace", return_value=None),
+        mock.patch.object(mod, "get_available_models", return_value=[]),
+        mock.patch.object(mod, "_get_config_path", return_value=fake_config_path),
+        mock.patch.object(pathlib.Path, "exists") as mock_exists,
+    ):
+        # Make Path(_get_config_path()).exists() return config_exists
+        mock_exists.return_value = config_exists
+        result = mod.get_onboarding_status()
+
+    return result
+
+
+class TestOnboardingGate:
+    def test_config_exists_and_chat_ready_returns_completed_true(self):
+        """Primary fix: existing valid config → wizard must NOT fire."""
+        result = _make_status(config_exists=True, chat_ready=True)
+        assert result["completed"] is True, (
+            "Wizard fired for existing Hermes user! "
+            "config.yaml + chat_ready must auto-complete onboarding."
+        )
+
+    def test_no_config_returns_completed_false(self):
+        """Fresh install with no config → wizard should fire."""
+        result = _make_status(config_exists=False, chat_ready=False)
+        assert result["completed"] is False, (
+            "Fresh install must show the wizard (completed should be False)."
+        )
+
+    def test_config_exists_but_not_chat_ready_still_shows_wizard(self):
+        """Broken/incomplete config (config.yaml exists but chat_ready=False) →
+        still show wizard so the user can fix it."""
+        result = _make_status(config_exists=True, chat_ready=False)
+        # Should NOT be auto-completed — config is present but broken
+        assert result["completed"] is False, (
+            "Broken config (chat_ready=False) must still show the wizard."
+        )
+
+    def test_onboarding_done_flag_always_respected(self):
+        """If user already completed onboarding in settings, never show wizard."""
+        result = _make_status(config_exists=False, chat_ready=False, onboarding_done=True)
+        assert result["completed"] is True
+
+    def test_config_exists_always_exposed_in_system(self):
+        """config_exists must still appear in the response system block."""
+        result = _make_status(config_exists=True, chat_ready=True)
+        assert "config_exists" in result["system"]
+        assert result["system"]["config_exists"] is True
+
+
+class TestApplyOnboardingSetupGuard:
+    """Fix #2: apply_onboarding_setup must not silently overwrite config.yaml."""
+
+    def _call_setup(self, body: dict, config_yaml_exists: bool):
+        import api.onboarding as mod
+
+        fake_config_path = pathlib.Path("/tmp/_test_config.yaml")
+
+        with (
+            mock.patch.object(mod, "_get_config_path", return_value=fake_config_path),
+            mock.patch.object(pathlib.Path, "exists", return_value=config_yaml_exists),
+        ):
+            return mod.apply_onboarding_setup(body)
+
+    def test_setup_blocked_when_config_exists_without_confirm(self):
+        """Must return an error dict (not raise) if config.yaml exists and no confirm_overwrite."""
+        result = self._call_setup(
+            {
+                "provider": "openrouter",
+                "model": "anthropic/claude-sonnet-4.6",
+                "api_key": "test-key",
+            },
+            config_yaml_exists=True,
+        )
+        assert isinstance(result, dict), "Expected a dict response, not an exception"
+        assert result.get("error") == "config_exists", (
+            f"Expected error='config_exists', got: {result}"
+        )
+        assert result.get("requires_confirm") is True
+
+    @_needs_yaml
+    def test_setup_allowed_with_confirm_overwrite(self):
+        """With confirm_overwrite=True, setup may proceed (will hit real logic)."""
+        import api.onboarding as mod
+        import tempfile
+
+        fake_config_path = pathlib.Path("/tmp/_test_config_confirm.yaml")
+        fake_config_path.unlink(missing_ok=True)  # start clean
+        try:
+            with tempfile.TemporaryDirectory() as tmp_home:
+                tmp_home_path = pathlib.Path(tmp_home)
+                # Without patching Path.exists, use a non-existent path so it won't block.
+                # Also redirect _get_active_hermes_home so .env writes go to the temp dir,
+                # never to the real ~/.hermes/.env.
+                with mock.patch.object(mod, "_get_active_hermes_home", return_value=tmp_home_path):
+                    result = mod.apply_onboarding_setup(
+                        {
+                            "provider": "openrouter",
+                            "model": "anthropic/claude-sonnet-4.6",
+                            "api_key": "test-key-confirm",
+                            "confirm_overwrite": True,
+                        }
+                    )
+            # Should NOT return config_exists error
+            if isinstance(result, dict):
+                assert result.get("error") != "config_exists", (
+                    "confirm_overwrite=True should bypass the config-exists guard."
+                )
+        finally:
+            fake_config_path.unlink(missing_ok=True)
+
+    @_needs_yaml
+    def test_setup_allowed_when_no_config_exists(self):
+        """Fresh install: no config.yaml → setup proceeds normally (no blocking error)."""
+        import api.onboarding as mod
+        import tempfile
+
+        fake_config_path = pathlib.Path("/tmp/_test_config_fresh.yaml")
+        fake_config_path.unlink(missing_ok=True)
+        try:
+            with tempfile.TemporaryDirectory() as tmp_home:
+                tmp_home_path = pathlib.Path(tmp_home)
+                # Redirect both config path and hermes home so writes stay in /tmp,
+                # never touching the real ~/.hermes/.env.
+                with (
+                    mock.patch.object(mod, "_get_config_path", return_value=fake_config_path),
+                    mock.patch.object(mod, "_get_active_hermes_home", return_value=tmp_home_path),
+                ):
+                    result = mod.apply_onboarding_setup(
+                        {
+                            "provider": "openrouter",
+                            "model": "anthropic/claude-sonnet-4.6",
+                            "api_key": "test-key-fresh",
+                        }
+                    )
+            if isinstance(result, dict):
+                assert result.get("error") != "config_exists"
+        finally:
+            fake_config_path.unlink(missing_ok=True)
+
+
+# ---------------------------------------------------------------------------
+# Integration tests — require the live test server on port 8788
+# ---------------------------------------------------------------------------
+
+from tests._pytest_port import BASE
+
+
+def _http_get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+
+def _http_post(path, body=None):
+    req = urllib.request.Request(
+        BASE + path,
+        data=json.dumps(body or {}).encode(),
+        headers={"Content-Type": "application/json"},
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+def _server_hermes_home() -> pathlib.Path:
+    data, _ = _http_get("/api/onboarding/status")
+    env_path = data.get("system", {}).get("env_path", "")
+    if env_path:
+        return pathlib.Path(env_path).parent
+    return pathlib.Path(os.environ.get("HERMES_WEBUI_TEST_STATE_DIR", str(pathlib.Path.home() / ".hermes" / "webui-mvp-test")))
+
+
+def _server_reachable() -> bool:
+    try:
+        _http_get("/health")
+        return True
+    except Exception:
+        return False
+
+
+# No collection-time skip guard — conftest.py starts the server via its
+# autouse session fixture BEFORE tests run.  A collection-time check always
+# sees no server and turns every test into a skip.  Server reachability is
+# asserted inside the _require_server fixture instead so failures are loud.
+
+
+class TestOnboardingGateIntegration:
+    """Live-server integration tests for the onboarding gate fix."""
+
+    @pytest.fixture(autouse=True)
+    def _require_server(self):
+        """Assert server is reachable at test runtime (not collection time)."""
+        if not _server_reachable():
+            pytest.fail(f"Test server at {BASE} is not reachable")
+
+    @pytest.fixture(autouse=True)
+    def _clean(self):
+        hermes_home = _server_hermes_home()
+        for rel in ("config.yaml", ".env"):
+            (hermes_home / rel).unlink(missing_ok=True)
+        yield
+        for rel in ("config.yaml", ".env"):
+            (hermes_home / rel).unlink(missing_ok=True)
+        # Force the server to reload its in-memory config after file deletion.
+        # apply_onboarding_setup() calls reload_config() which caches provider
+        # state in the server process.  Deleting files on disk does not clear
+        # that cache — the next test would see provider_configured=True.
+        # GET /api/personalities always calls reload_config(), giving us a
+        # cheap way to flush the cache without a server restart.
+        try:
+            _http_get("/api/personalities")
+        except Exception:
+            pass
+
+    def test_no_config_wizard_fires(self):
+        """No config.yaml → completed=False."""
+        data, status = _http_get("/api/onboarding/status")
+        assert status == 200
+        assert data["completed"] is False
+
+    @_needs_yaml
+    def test_existing_config_and_chat_ready_skips_wizard(self):
+        """Write a valid config.yaml + .env → completed must be True."""
+        import yaml
+
+        hermes_home = _server_hermes_home()
+        # Write a real config.yaml
+        cfg = {"model": {"provider": "openrouter", "default": "anthropic/claude-sonnet-4.6"}}
+        (hermes_home / "config.yaml").write_text(
+            yaml.safe_dump(cfg, sort_keys=False), encoding="utf-8"
+        )
+        # Write a fake API key so provider_ready (and thus chat_ready) fires
+        # — but only when hermes_cli imports are available
+        data, _ = _http_get("/api/onboarding/status")
+        if data["system"]["hermes_found"] and data["system"]["imports_ok"]:
+            (hermes_home / ".env").write_text(
+                "OPENROUTER_API_KEY=test-existing-key\n", encoding="utf-8"
+            )
+            data, status = _http_get("/api/onboarding/status")
+            assert status == 200
+            assert data["completed"] is True, (
+                "Existing config + chat_ready must auto-complete onboarding."
+            )
+        else:
+            # Agent not installed: chat_ready is always False, so wizard still
+            # fires — that is the correct behaviour (can't verify readiness).
+            assert data["completed"] is False
+
+    @_needs_yaml
+    def test_setup_blocked_for_existing_config(self):
+        """POST /api/onboarding/setup must return config_exists error if config.yaml exists."""
+        import yaml
+
+        hermes_home = _server_hermes_home()
+        cfg = {"model": {"provider": "openrouter", "default": "anthropic/claude-sonnet-4.6"}}
+        (hermes_home / "config.yaml").write_text(
+            yaml.safe_dump(cfg, sort_keys=False), encoding="utf-8"
+        )
+
+        data, status = _http_post(
+            "/api/onboarding/setup",
+            {
+                "provider": "openrouter",
+                "model": "anthropic/claude-sonnet-4.6",
+                "api_key": "test-key",
+            },
+        )
+        assert status == 200
+        assert data.get("error") == "config_exists", (
+            f"Expected config_exists guard. Got: {data}"
+        )
+        assert data.get("requires_confirm") is True
+
+    @_needs_yaml
+    def test_setup_allowed_with_confirm_overwrite(self):
+        """POST /api/onboarding/setup with confirm_overwrite=True succeeds."""
+        import yaml
+
+        hermes_home = _server_hermes_home()
+        cfg = {"model": {"provider": "openrouter", "default": "anthropic/claude-sonnet-4.6"}}
+        (hermes_home / "config.yaml").write_text(
+            yaml.safe_dump(cfg, sort_keys=False), encoding="utf-8"
+        )
+
+        data, status = _http_post(
+            "/api/onboarding/setup",
+            {
+                "provider": "openrouter",
+                "model": "anthropic/claude-sonnet-4.6",
+                "api_key": "test-key",
+                "confirm_overwrite": True,
+            },
+        )
+        assert status == 200
+        assert data.get("error") != "config_exists", (
+            "confirm_overwrite=True must bypass the guard."
+        )
--- a/tests/test_onboarding_mvp.py
+++ b/tests/test_onboarding_mvp.py
@@ -0,0 +1,244 @@
+"""Onboarding MVP tests — first-run wizard and provider config persistence.
+
+Tests that call /api/onboarding/setup require PyYAML in the test server's
+Python environment (the agent venv). They are skipped when hermes-agent is
+not installed, since the server falls back to system Python which typically
+lacks pyyaml.
+"""
+import json
+import pathlib
+import sys
+import urllib.error
+import urllib.request
+
+import pytest
+
+from tests._pytest_port import BASE
+
+# Check if pyyaml is available — onboarding setup tests need it on the server
+try:
+    import yaml as _yaml
+    _HAS_YAML = True
+except ImportError:
+    _HAS_YAML = False
+_needs_yaml = pytest.mark.skipif(not _HAS_YAML, reason="PyYAML not installed — onboarding setup tests require it")
+
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+
+def post(path, body=None):
+    req = urllib.request.Request(
+        BASE + path,
+        data=json.dumps(body or {}).encode(),
+        headers={"Content-Type": "application/json"},
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+def _server_hermes_home() -> pathlib.Path:
+    """Get the hermes home path the test server is actually using.
+
+    Using the server's own /api/onboarding/status response is more robust than
+    reading TEST_STATE_DIR from conftest, which can get the wrong path when
+    conftest is imported multiple times under different HERMES_HOME environments
+    (api.config resets HERMES_HOME at module import time via init_profile_state).
+    """
+    data, _ = get("/api/onboarding/status")
+    env_path = data.get("system", {}).get("env_path", "")
+    if env_path:
+        return pathlib.Path(env_path).parent
+    # Fallback
+    hermes_home = pathlib.Path.home() / ".hermes"
+    return hermes_home / "webui-mvp-test"
+
+
+@pytest.fixture(autouse=True)
+def clean_hermes_config_files():
+    hermes_home = _server_hermes_home()
+    for rel in ("config.yaml", ".env"):
+        (hermes_home / rel).unlink(missing_ok=True)
+    yield
+    for rel in ("config.yaml", ".env"):
+        (hermes_home / rel).unlink(missing_ok=True)
+
+
+
+def test_onboarding_status_defaults_incomplete():
+    data, status = get("/api/onboarding/status")
+    assert status == 200
+    assert data["completed"] is False
+    assert data["settings"]["password_enabled"] is False
+    assert data["system"]["provider_configured"] is False
+    assert data["system"]["chat_ready"] is False
+    assert data["system"]["setup_state"] in {"needs_provider", "agent_unavailable"}
+    assert "provider_note" in data["system"]
+    assert isinstance(data["workspaces"]["items"], list)
+    assert data["setup"]["providers"]
+
+
+@_needs_yaml
+def test_onboarding_setup_openrouter_writes_real_config_and_env():
+    data, status = post(
+        "/api/onboarding/setup",
+        {
+            "provider": "openrouter",
+            "model": "anthropic/claude-sonnet-4.6",
+            "api_key": "sk-or-test",
+        },
+    )
+    assert status == 200
+    assert data["system"]["provider_configured"] is True
+    assert data["system"]["provider_ready"] is True
+    if data["system"]["imports_ok"] and data["system"]["hermes_found"]:
+        assert data["system"]["chat_ready"] is True
+        assert data["system"]["setup_state"] == "ready"
+    else:
+        assert data["system"]["chat_ready"] is False
+        assert data["system"]["setup_state"] == "agent_unavailable"
+
+    cfg_text = (_server_hermes_home() / "config.yaml").read_text(encoding="utf-8")
+    env_text = (_server_hermes_home() / ".env").read_text(encoding="utf-8")
+    assert "provider: openrouter" in cfg_text
+    assert "default: anthropic/claude-sonnet-4.6" in cfg_text
+    assert "OPENROUTER_API_KEY=sk-or-test" in env_text
+
+
+@_needs_yaml
+def test_onboarding_setup_custom_endpoint_writes_runtime_files():
+    data, status = post(
+        "/api/onboarding/setup",
+        {
+            "provider": "custom",
+            "model": "google/gemma-3-27b-it",
+            "base_url": "http://localhost:4000/v1",
+            "api_key": "sk-custom-test",
+        },
+    )
+    assert status == 200
+    assert data["system"]["provider_configured"] is True
+    assert data["system"]["provider_ready"] is True
+    if data["system"]["imports_ok"] and data["system"]["hermes_found"]:
+        assert data["system"]["chat_ready"] is True
+        assert data["system"]["setup_state"] == "ready"
+    else:
+        assert data["system"]["chat_ready"] is False
+        assert data["system"]["setup_state"] == "agent_unavailable"
+    assert data["system"]["current_provider"] == "custom"
+    assert data["system"]["current_base_url"] == "http://localhost:4000/v1"
+
+    cfg_text = (_server_hermes_home() / "config.yaml").read_text(encoding="utf-8")
+    env_text = (_server_hermes_home() / ".env").read_text(encoding="utf-8")
+    assert "provider: custom" in cfg_text
+    assert "default: google/gemma-3-27b-it" in cfg_text
+    assert "base_url: http://localhost:4000/v1" in cfg_text
+    assert "OPENAI_API_KEY=sk-custom-test" in env_text
+
+
+@_needs_yaml
+def test_onboarding_setup_detects_incomplete_saved_provider():
+    status, code = post(
+        "/api/onboarding/setup",
+        {
+            "provider": "anthropic",
+            "model": "claude-sonnet-4.6",
+            "api_key": "sk-ant-test",
+        },
+    )
+    assert code == 200
+
+    (_server_hermes_home() / ".env").unlink(missing_ok=True)
+    data, status_code = get("/api/onboarding/status")
+    assert status_code == 200
+    assert data["system"]["provider_configured"] is True
+    assert data["system"]["provider_ready"] is False
+    assert data["system"]["chat_ready"] is False
+    assert data["system"]["setup_state"] in {"provider_incomplete", "agent_unavailable"}
+
+
+@_needs_yaml
+def test_onboarding_setup_rejects_missing_custom_base_url():
+    data, status = post(
+        "/api/onboarding/setup",
+        {
+            "provider": "custom",
+            "model": "qwen2.5-coder",
+            "api_key": "sk-test",
+        },
+    )
+    assert status == 400
+    assert "base_url is required" in data["error"]
+
+
+def test_onboarding_complete_persists_flag():
+    data, status = post("/api/onboarding/complete", {})
+    assert status == 200
+    assert data["completed"] is True
+
+    settings = json.loads(
+        (_server_hermes_home() / "settings.json").read_text(encoding="utf-8")
+    )
+    assert settings["onboarding_completed"] is True
+
+    data2, status2 = get("/api/onboarding/status")
+    assert status2 == 200
+    assert data2["completed"] is True
+
+
+def test_onboarding_complete_preserves_other_settings():
+    """Completing onboarding must not overwrite other user settings."""
+    # Use send_key (a safe enum setting) to verify settings preservation
+    # without contaminating bot_name or theme checks in other test files.
+    # Use GET /api/settings (not onboarding status) to check preservation
+    # since the onboarding status only returns a subset of settings fields.
+    try:
+        saved, s1 = post("/api/settings", {"send_key": "ctrl+enter"})
+        assert s1 == 200
+        assert saved["send_key"] == "ctrl+enter"
+
+        _, s2 = post("/api/onboarding/complete", {})
+        assert s2 == 200
+
+        # Verify the non-onboarding setting survived the completion call
+        current_settings, s3 = get("/api/settings")
+        assert s3 == 200
+        assert current_settings["send_key"] == "ctrl+enter"
+    finally:
+        # Always restore default send_key to avoid contaminating other tests
+        post("/api/settings", {"send_key": "enter"})
+
+def test_onboarding_already_completed_status():
+    """After marking onboarding complete, status must reflect completed=True
+    so the wizard does not re-appear for returning users."""
+    done, status = post("/api/onboarding/complete", {})
+    assert status == 200
+    assert done["completed"] is True
+
+    data, status2 = get("/api/onboarding/status")
+    assert status2 == 200
+    assert data["completed"] is True
+
+    # Reset so test doesn't contaminate others
+    post("/api/settings", {"onboarding_completed": False})
+
+
+@_needs_yaml
+def test_onboarding_setup_rejects_api_key_with_newline():
+    """API keys containing embedded newlines must be rejected to prevent .env injection."""
+    injected_key = "sk-bad" + chr(10) + "OTHER_KEY=injected"
+    data, status = post(
+        "/api/onboarding/setup",
+        {
+            "provider": "openrouter",
+            "model": "anthropic/claude-sonnet-4.6",
+            "api_key": injected_key,
+        },
+    )
+    assert status == 400
+    assert "newline" in data["error"].lower()
--- a/tests/test_onboarding_network.py
+++ b/tests/test_onboarding_network.py
@@ -0,0 +1,184 @@
+"""
+Tests: onboarding /api/onboarding/setup network restriction logic (issue #390).
+
+Covers:
+  1. Request from 127.0.0.1 (loopback) is allowed without auth
+  2. Request from RFC-1918 private IP (172.x, 192.168.x, 10.x) is allowed without auth
+  3. Request from public IP is blocked without auth → 403
+  4. X-Forwarded-For loopback IP is trusted → allowed
+  5. X-Forwarded-For private IP is trusted → allowed
+  6. X-Forwarded-For public IP → still blocked
+  7. X-Real-IP loopback → allowed
+  8. HERMES_WEBUI_ONBOARDING_OPEN=1 bypasses the check entirely
+  9. Auth enabled → check skipped, any IP allowed
+"""
+
+import json
+import os
+import pathlib
+import sys
+import unittest.mock
+import urllib.error
+import urllib.request
+
+import pytest
+
+REPO = pathlib.Path(__file__).parent.parent
+from tests._pytest_port import BASE
+
+# ---------------------------------------------------------------------------
+# Unit tests — directly test the IP-resolution + guard logic in routes.py
+# without needing a live server. We replicate the logic to keep tests fast
+# and independent of server startup.
+# ---------------------------------------------------------------------------
+
+def _is_local_from_handler(
+    raw_ip: str,
+    xff: str = "",
+    xri: str = "",
+    auth_enabled: bool = False,
+    open_env: bool = False,
+) -> bool | str:
+    """
+    Mirror of the onboarding IP check in api/routes.py.
+    Returns True if the request would be allowed, False if blocked,
+    or the error message string if blocked.
+    """
+    import ipaddress
+
+    if auth_enabled or open_env:
+        return True
+
+    _xff = xff.split(",")[0].strip() if xff else ""
+    _xri = xri.strip()
+    _ip_str = _xff or _xri or raw_ip
+    try:
+        addr = ipaddress.ip_address(_ip_str)
+        is_local = addr.is_loopback or addr.is_private
+    except ValueError:
+        is_local = False
+
+    return is_local
+
+
+class TestOnboardingIPLogic:
+    """Unit tests for the IP-resolution logic (no live server needed)."""
+
+    def test_loopback_allowed(self):
+        assert _is_local_from_handler("127.0.0.1") is True
+
+    def test_ipv6_loopback_allowed(self):
+        assert _is_local_from_handler("::1") is True
+
+    def test_private_172_allowed(self):
+        """Docker bridge addresses (172.17.x.x) must be allowed."""
+        assert _is_local_from_handler("172.17.0.1") is True
+
+    def test_private_192168_allowed(self):
+        assert _is_local_from_handler("192.168.1.100") is True
+
+    def test_private_10_allowed(self):
+        assert _is_local_from_handler("10.0.0.5") is True
+
+    def test_public_ip_blocked(self):
+        assert _is_local_from_handler("8.8.8.8") is False
+
+    def test_xff_loopback_trusted(self):
+        """Reverse proxy sets X-Forwarded-For to 127.0.0.1 — should be allowed."""
+        assert _is_local_from_handler("172.20.0.1", xff="127.0.0.1") is True
+
+    def test_xff_private_trusted(self):
+        """Reverse proxy sets X-Forwarded-For to LAN IP — should be allowed."""
+        assert _is_local_from_handler("172.20.0.1", xff="192.168.1.50") is True
+
+    def test_xff_public_blocked(self):
+        """Public IP in X-Forwarded-For should still be blocked."""
+        assert _is_local_from_handler("172.20.0.1", xff="8.8.8.8") is False
+
+    def test_xff_first_entry_used(self):
+        """X-Forwarded-For may have multiple IPs; only the first (client) is used."""
+        # First entry is private → allowed
+        assert _is_local_from_handler("172.20.0.1", xff="10.0.0.1, 172.20.0.1") is True
+        # First entry is public → blocked
+        assert _is_local_from_handler("172.20.0.1", xff="8.8.8.8, 172.20.0.1") is False
+
+    def test_xreal_ip_loopback_trusted(self):
+        """X-Real-IP loopback → allowed."""
+        assert _is_local_from_handler("172.20.0.1", xri="127.0.0.1") is True
+
+    def test_xreal_ip_private_trusted(self):
+        assert _is_local_from_handler("172.20.0.1", xri="10.1.2.3") is True
+
+    def test_xff_takes_priority_over_xri(self):
+        """X-Forwarded-For wins over X-Real-IP when both present."""
+        # XFF says public, XRI says local → blocked (XFF takes priority)
+        assert _is_local_from_handler("172.20.0.1", xff="8.8.8.8", xri="127.0.0.1") is False
+
+    def test_open_env_bypasses_check(self):
+        """HERMES_WEBUI_ONBOARDING_OPEN=1 allows any IP."""
+        assert _is_local_from_handler("8.8.8.8", open_env=True) is True
+
+    def test_auth_enabled_bypasses_check(self):
+        """When auth is enabled, IP check is skipped entirely."""
+        assert _is_local_from_handler("8.8.8.8", auth_enabled=True) is True
+
+    def test_invalid_ip_blocked(self):
+        """Malformed IP in header → treated as non-local → blocked."""
+        assert _is_local_from_handler("not-an-ip") is False
+
+
+# ---------------------------------------------------------------------------
+# Integration tests — hit the live test server at test server port
+# ---------------------------------------------------------------------------
+
+@pytest.mark.integration
+class TestOnboardingSetupEndpoint:
+    """
+    Integration tests for /api/onboarding/setup.
+    These require the test server running on test server port.
+    """
+
+    def _post(self, path: str, data: dict, headers: dict | None = None) -> tuple[int, dict]:
+        payload = json.dumps(data).encode()
+        req = urllib.request.Request(
+            BASE + path,
+            data=payload,
+            method="POST",
+            headers={"Content-Type": "application/json", **(headers or {})},
+        )
+        try:
+            with urllib.request.urlopen(req, timeout=10) as r:
+                return r.status, json.loads(r.read())
+        except urllib.error.HTTPError as e:
+            return e.code, json.loads(e.read())
+
+    def test_loopback_request_allowed(self):
+        """
+        Requests from 127.0.0.1 (which is what the test server sees) should
+        pass the IP check. We confirm no 403 is returned.
+        """
+        # The test server runs on 127.0.0.1:{TEST_PORT} so client_address[0] is 127.0.0.1.
+        # A valid setup payload with a mock provider should not be rejected for IP reasons.
+        # We patch apply_onboarding_setup to avoid actually writing any config.
+        import unittest.mock
+        with unittest.mock.patch("api.onboarding.apply_onboarding_setup", return_value={"ok": True}):
+            status, body = self._post(
+                "/api/onboarding/setup",
+                {"provider": "anthropic", "model": "claude-sonnet-4.6", "api_key": "test-key"},
+            )
+        # Should not be 403 (IP blocked). May be 200 or another error from apply logic.
+        assert status != 403, f"Got 403 — IP check incorrectly blocked loopback. Body: {body}"
+
+    def test_xff_loopback_header_respected(self):
+        """
+        Simulated reverse proxy: raw TCP is 127.0.0.1 but X-Forwarded-For is also
+        127.0.0.1. Should be allowed.
+        """
+        import unittest.mock
+        with unittest.mock.patch("api.onboarding.apply_onboarding_setup", return_value={"ok": True}):
+            status, body = self._post(
+                "/api/onboarding/setup",
+                {"provider": "anthropic", "model": "claude-sonnet-4.6", "api_key": "test-key"},
+                headers={"X-Forwarded-For": "127.0.0.1"},
+            )
+        assert status != 403, f"Got 403 with XFF=127.0.0.1. Body: {body}"
--- a/tests/test_onboarding_static.py
+++ b/tests/test_onboarding_static.py
@@ -0,0 +1,58 @@
+import pathlib
+
+
+REPO = pathlib.Path(__file__).parent.parent
+
+
+def read(path):
+    return (REPO / path).read_text(encoding="utf-8")
+
+
+def test_index_contains_onboarding_overlay_markup():
+    html = read("static/index.html")
+    assert 'id="onboardingOverlay"' in html
+    assert 'id="onboardingBody"' in html
+    assert 'id="onboardingNextBtn"' in html
+    assert 'src="static/onboarding.js"' in html
+
+
+def test_onboarding_css_rules_exist():
+    css = read("static/style.css")
+    for selector in (
+        ".onboarding-overlay",
+        ".onboarding-card",
+        ".onboarding-step",
+        ".onboarding-status.warn",
+    ):
+        assert selector in css
+
+
+def test_onboarding_js_exposes_bootstrap_hooks():
+    js = read("static/onboarding.js")
+    assert "async function loadOnboardingWizard()" in js
+    assert "async function nextOnboardingStep()" in js
+    assert "api('/api/onboarding/status')" in js
+    assert "api('/api/onboarding/setup'" in js
+    assert "api('/api/onboarding/complete'" in js
+
+
+def test_onboarding_uses_i18n_helpers():
+    html = read("static/index.html")
+    js = read("static/onboarding.js")
+    i18n = read("static/i18n.js")
+    assert 'data-i18n="onboarding_title"' in html
+    assert 'data-i18n="onboarding_continue"' in html
+    assert "t('onboarding_step_system_title')" in js
+    assert "t('onboarding_step_setup_title')" in js
+    assert "t('onboarding_complete')" in js
+    assert "onboarding_title: 'Welcome to Hermes Web UI'" in i18n
+    assert "onboarding_title: 'Bienvenido a Hermes Web UI'" in i18n
+
+
+def test_bootstrap_script_contains_official_installer_and_windows_guard():
+    src = read("bootstrap.py")
+    assert (
+        "https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh"
+        in src
+    )
+    assert "Native Windows is not supported" in src
--- a/tests/test_opencode_providers.py
+++ b/tests/test_opencode_providers.py
@@ -0,0 +1,121 @@
+"""
+Tests for OpenCode Zen and OpenCode Go provider support.
+Verifies provider registration in display/model catalogs and
+env-var fallback detection.
+"""
+import os
+import sys
+import types
+import api.config as config
+
+
+# ── Provider registration ─────────────────────────────────────────────
+
+def test_opencode_zen_in_provider_display():
+    assert "opencode-zen" in config._PROVIDER_DISPLAY
+    assert config._PROVIDER_DISPLAY["opencode-zen"] == "OpenCode Zen"
+
+
+def test_opencode_go_in_provider_display():
+    assert "opencode-go" in config._PROVIDER_DISPLAY
+    assert config._PROVIDER_DISPLAY["opencode-go"] == "OpenCode Go"
+
+
+def test_opencode_zen_in_provider_models():
+    assert "opencode-zen" in config._PROVIDER_MODELS
+    ids = [m["id"] for m in config._PROVIDER_MODELS["opencode-zen"]]
+    assert "claude-opus-4-6" in ids
+    assert "gpt-5.4-pro" in ids
+    assert "glm-5.1" in ids
+
+
+def test_opencode_go_in_provider_models():
+    assert "opencode-go" in config._PROVIDER_MODELS
+    ids = [m["id"] for m in config._PROVIDER_MODELS["opencode-go"]]
+    assert "glm-5.1" in ids
+    assert "glm-5" in ids
+    assert "mimo-v2-pro" in ids
+
+
+# ── Env-var fallback detection ────────────────────────────────────────
+
+def _models_with_env_key(monkeypatch, env_var, expected_provider_display):
+    """Helper: fake hermes_cli unavailable, set an env var, check detection."""
+    # Force the env-var fallback path by making hermes_cli import fail
+    fake_mod = types.ModuleType("hermes_cli.models")
+    fake_mod.list_available_providers = None  # will raise on call
+    monkeypatch.setitem(sys.modules, "hermes_cli.models", fake_mod)
+    monkeypatch.delattr(fake_mod, "list_available_providers")
+
+    old_cfg = dict(config.cfg)
+    config.cfg["model"] = {}
+    config.cfg.pop("custom_providers", None)
+    monkeypatch.setenv(env_var, "test-key")
+    try:
+        result = config.get_available_models()
+        providers = [g["provider"] for g in result["groups"]]
+        assert expected_provider_display in providers, (
+            f"Expected {expected_provider_display} in {providers}"
+        )
+    finally:
+        config.cfg.clear()
+        config.cfg.update(old_cfg)
+
+
+def test_opencode_zen_detected_via_env_key(monkeypatch):
+    _models_with_env_key(monkeypatch, "OPENCODE_ZEN_API_KEY", "OpenCode Zen")
+
+
+def test_opencode_go_detected_via_env_key(monkeypatch):
+    _models_with_env_key(monkeypatch, "OPENCODE_GO_API_KEY", "OpenCode Go")
+
+
+def test_openai_codex_model_catalog_includes_gpt54():
+    """openai-codex catalog must include gpt-5.4 and the standard Codex lineup."""
+    assert "openai-codex" in config._PROVIDER_MODELS
+    ids = [m["id"] for m in config._PROVIDER_MODELS["openai-codex"]]
+    assert "gpt-5.4" in ids, f"gpt-5.4 missing from openai-codex catalog: {ids}"
+    assert "gpt-5.4-mini" in ids, f"gpt-5.4-mini missing from openai-codex catalog: {ids}"
+    assert "gpt-5.3-codex" in ids, f"gpt-5.3-codex missing from openai-codex catalog: {ids}"
+    assert "gpt-5.2-codex" in ids, f"gpt-5.2-codex missing from openai-codex catalog: {ids}"
+
+
+def test_openai_codex_display_name():
+    """openai-codex must have a human-readable display name."""
+    assert "openai-codex" in config._PROVIDER_DISPLAY
+    assert config._PROVIDER_DISPLAY["openai-codex"] == "OpenAI Codex"
+
+
+def test_live_models_handler_delegates_to_provider_model_ids():
+    """_handle_live_models must delegate to the agent's provider_model_ids()
+    rather than maintain its own per-provider fetch logic.
+    """
+    import pathlib
+    routes_src = (pathlib.Path(__file__).parent.parent / "api" / "routes.py").read_text()
+    assert "provider_model_ids" in routes_src, (
+        "_handle_live_models must call hermes_cli.models.provider_model_ids() "
+        "to delegate all provider-specific live-fetch logic to the agent"
+    )
+    # The old per-provider base_url hardcoding should be gone
+    assert "https://api.openai.com/v1" not in routes_src, (
+        "_handle_live_models must not hardcode api.openai.com — "
+        "provider resolution is handled by the agent"
+    )
+    assert "not_supported" not in routes_src, (
+        "_handle_live_models must not return not_supported for any provider — "
+        "provider_model_ids() falls back to static list automatically"
+    )
+
+
+def test_live_models_ui_no_longer_skips_any_provider():
+    """_fetchLiveModels in ui.js must not exclude any provider from live fetching.
+    Previously anthropic, google, and gemini were skipped — now provider_model_ids()
+    handles them all (with graceful fallback to static lists).
+    """
+    import pathlib
+    ui_src = (pathlib.Path(__file__).parent.parent / "static" / "ui.js").read_text()
+    # The old exclusion list must be gone
+    assert "includes(provider)" not in ui_src or "anthropic" not in ui_src[:ui_src.find("includes(provider)")+100], (
+        "_fetchLiveModels must not skip anthropic, google, or gemini — "
+        "the backend now returns live models for all providers"
+    )
--- a/tests/test_orphaned_tool_messages.py
+++ b/tests/test_orphaned_tool_messages.py
@@ -0,0 +1,175 @@
+"""Tests for _sanitize_messages_for_api() orphaned-tool-message stripping.
+
+Regression for issue #534: strictly-conformant providers (Mercury-2/Inception,
+newer OpenAI models) reject histories containing tool-role messages whose
+tool_call_id has no matching tool_calls entry in a prior assistant message.
+"""
+import sys
+import pathlib
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
+sys.path.insert(0, str(REPO_ROOT))
+
+from api.streaming import _sanitize_messages_for_api
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _asst_with_tool_call(call_id="call-1", call_id_key="id"):
+    return {
+        "role": "assistant",
+        "content": None,
+        "tool_calls": [{"type": "function", call_id_key: call_id, "function": {"name": "terminal", "arguments": "{}"}}],
+        "_ts": 12345,  # extra field that should be stripped
+    }
+
+
+def _tool_result(call_id="call-1"):
+    return {"role": "tool", "tool_call_id": call_id, "content": "ok", "_ts": 12345}
+
+
+def _user(text="hello"):
+    return {"role": "user", "content": text, "_ts": 12345}
+
+
+def _asst(text="hi"):
+    return {"role": "assistant", "content": text, "_ts": 12345}
+
+
+# ---------------------------------------------------------------------------
+# Tests: normal valid histories are preserved
+# ---------------------------------------------------------------------------
+
+def test_valid_tool_roundtrip_preserved():
+    """A linked assistant→tool pair must be kept intact."""
+    msgs = [_user(), _asst_with_tool_call("call-1"), _tool_result("call-1"), _asst()]
+    result = _sanitize_messages_for_api(msgs)
+    roles = [m["role"] for m in result]
+    assert roles == ["user", "assistant", "tool", "assistant"]
+
+
+def test_extra_fields_stripped():
+    """Non-API fields (_ts etc.) are always stripped."""
+    msgs = [_user(), _asst()]
+    result = _sanitize_messages_for_api(msgs)
+    for m in result:
+        assert "_ts" not in m
+
+
+def test_valid_history_without_tool_messages_unchanged():
+    """Plain user/assistant history with no tool calls is passed through unchanged."""
+    msgs = [_user("a"), _asst("b"), _user("c"), _asst("d")]
+    result = _sanitize_messages_for_api(msgs)
+    assert len(result) == 4
+    assert all(m["role"] in ("user", "assistant") for m in result)
+
+
+def test_multiple_valid_tool_calls_preserved():
+    """Multiple linked tool_call_ids in one assistant message are all preserved."""
+    asst = {
+        "role": "assistant",
+        "content": None,
+        "tool_calls": [
+            {"type": "function", "id": "call-1", "function": {"name": "f1", "arguments": "{}"}},
+            {"type": "function", "id": "call-2", "function": {"name": "f2", "arguments": "{}"}},
+        ],
+    }
+    msgs = [_user(), asst, _tool_result("call-1"), _tool_result("call-2"), _asst()]
+    result = _sanitize_messages_for_api(msgs)
+    roles = [m["role"] for m in result]
+    assert roles == ["user", "assistant", "tool", "tool", "assistant"]
+
+
+# ---------------------------------------------------------------------------
+# Tests: orphaned tool messages are dropped
+# ---------------------------------------------------------------------------
+
+def test_orphaned_tool_message_dropped():
+    """A tool message with no matching assistant tool_call is dropped."""
+    msgs = [_user(), _asst(), _tool_result("call-orphan")]
+    result = _sanitize_messages_for_api(msgs)
+    roles = [m["role"] for m in result]
+    assert "tool" not in roles
+    assert roles == ["user", "assistant"]
+
+
+def test_tool_message_missing_tool_call_id_dropped():
+    """A tool message with no tool_call_id at all is dropped."""
+    msg = {"role": "tool", "content": "result"}
+    msgs = [_user(), _asst_with_tool_call("call-1"), msg]
+    result = _sanitize_messages_for_api(msgs)
+    roles = [m["role"] for m in result]
+    assert "tool" not in roles
+
+
+def test_partially_orphaned_tool_messages():
+    """In a mixed batch, only the orphaned tool messages are dropped."""
+    asst = _asst_with_tool_call("call-valid")
+    msgs = [
+        _user(),
+        asst,
+        _tool_result("call-valid"),   # linked → kept
+        _tool_result("call-ghost"),   # orphaned → dropped
+        _asst(),
+    ]
+    result = _sanitize_messages_for_api(msgs)
+    roles = [m["role"] for m in result]
+    assert roles == ["user", "assistant", "tool", "assistant"]
+    # The kept tool message has the right call_id
+    tool_msgs = [m for m in result if m["role"] == "tool"]
+    assert tool_msgs[0]["tool_call_id"] == "call-valid"
+
+
+def test_orphaned_tool_only_history():
+    """A history consisting only of orphaned tool messages returns empty."""
+    msgs = [_tool_result("dangling-1"), _tool_result("dangling-2")]
+    result = _sanitize_messages_for_api(msgs)
+    assert result == []
+
+
+# ---------------------------------------------------------------------------
+# Tests: Anthropic 'call_id' field name (not OpenAI 'id')
+# ---------------------------------------------------------------------------
+
+def test_anthropic_call_id_field_recognized():
+    """Anthropic tool calls use 'call_id' not 'id' — both must be recognized."""
+    asst = _asst_with_tool_call("call-anthropic", call_id_key="call_id")
+    msgs = [_user(), asst, _tool_result("call-anthropic"), _asst()]
+    result = _sanitize_messages_for_api(msgs)
+    roles = [m["role"] for m in result]
+    assert roles == ["user", "assistant", "tool", "assistant"]
+
+
+# ---------------------------------------------------------------------------
+# Tests: edge cases
+# ---------------------------------------------------------------------------
+
+def test_empty_messages_list():
+    assert _sanitize_messages_for_api([]) == []
+
+
+def test_non_dict_messages_skipped():
+    """Non-dict items in the messages list are silently ignored."""
+    msgs = ["not a dict", None, _user("hi"), 42]
+    result = _sanitize_messages_for_api(msgs)
+    assert len(result) == 1
+    assert result[0]["role"] == "user"
+
+
+def test_tool_calls_none_does_not_crash():
+    """An assistant message with tool_calls=None is handled without crashing."""
+    asst = {"role": "assistant", "content": "hello", "tool_calls": None}
+    msgs = [_user(), asst, _tool_result("call-1")]
+    result = _sanitize_messages_for_api(msgs)
+    # call-1 has no valid parent (tool_calls=None → no IDs registered) → dropped
+    roles = [m["role"] for m in result]
+    assert "tool" not in roles
+
+
+def test_system_messages_preserved():
+    """System messages are always preserved."""
+    msgs = [{"role": "system", "content": "You are helpful."}, _user(), _asst()]
+    result = _sanitize_messages_for_api(msgs)
+    assert result[0]["role"] == "system"
--- a/tests/test_profile_env_isolation.py
+++ b/tests/test_profile_env_isolation.py
@@ -0,0 +1,67 @@
+import importlib
+import os
+import sys
+from pathlib import Path
+
+
+def test_profile_switch_clears_previous_profile_env_vars(monkeypatch, tmp_path):
+    base = tmp_path / ".hermes"
+    (base / "profiles" / "p1").mkdir(parents=True)
+    (base / "profiles" / "p2").mkdir(parents=True)
+    (base / "profiles" / "p1" / ".env").write_text(
+        "OPENAI_API_KEY=secret-from-p1\nCUSTOM_TOKEN=token-from-p1\n",
+        encoding="utf-8",
+    )
+
+    monkeypatch.setenv("HERMES_BASE_HOME", str(base))
+    monkeypatch.delenv("HERMES_HOME", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("CUSTOM_TOKEN", raising=False)
+
+    sys.modules.pop("api.profiles", None)
+    profiles = importlib.import_module("api.profiles")
+    profiles = importlib.reload(profiles)
+
+    profiles.init_profile_state()
+    profiles.switch_profile("p1")
+    assert os.environ.get("OPENAI_API_KEY") == "secret-from-p1"
+    assert os.environ.get("CUSTOM_TOKEN") == "token-from-p1"
+
+    profiles.switch_profile("p2")
+    assert os.environ.get("OPENAI_API_KEY") is None
+    assert os.environ.get("CUSTOM_TOKEN") is None
+    assert profiles.get_active_profile_name() == "p2"
+
+
+def test_profile_switch_replaces_overlapping_keys(monkeypatch, tmp_path):
+    base = tmp_path / ".hermes"
+    (base / "profiles" / "p1").mkdir(parents=True)
+    (base / "profiles" / "p2").mkdir(parents=True)
+    (base / "profiles" / "p1" / ".env").write_text(
+        "OPENAI_API_KEY=secret-from-p1\nONLY_P1=one\n",
+        encoding="utf-8",
+    )
+    (base / "profiles" / "p2" / ".env").write_text(
+        "OPENAI_API_KEY=secret-from-p2\nONLY_P2=two\n",
+        encoding="utf-8",
+    )
+
+    monkeypatch.setenv("HERMES_BASE_HOME", str(base))
+    monkeypatch.delenv("HERMES_HOME", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("ONLY_P1", raising=False)
+    monkeypatch.delenv("ONLY_P2", raising=False)
+
+    sys.modules.pop("api.profiles", None)
+    profiles = importlib.import_module("api.profiles")
+    profiles = importlib.reload(profiles)
+
+    profiles.init_profile_state()
+    profiles.switch_profile("p1")
+    assert os.environ.get("OPENAI_API_KEY") == "secret-from-p1"
+    assert os.environ.get("ONLY_P1") == "one"
+
+    profiles.switch_profile("p2")
+    assert os.environ.get("OPENAI_API_KEY") == "secret-from-p2"
+    assert os.environ.get("ONLY_P1") is None
+    assert os.environ.get("ONLY_P2") == "two"
--- a/tests/test_profile_path_security.py
+++ b/tests/test_profile_path_security.py
@@ -0,0 +1,63 @@
+import importlib
+import os
+import sys
+import tempfile
+from pathlib import Path
+
+import pytest
+
+REPO_ROOT = Path(__file__).parent.parent.resolve()
+if str(REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(REPO_ROOT))
+
+
+def _reload_profiles_module(base_home: Path):
+    os.environ["HERMES_BASE_HOME"] = str(base_home)
+    os.environ["HERMES_HOME"] = str(base_home)
+
+    for name in ["api.config", "api.profiles"]:
+        if name in sys.modules:
+            del sys.modules[name]
+
+    profiles = importlib.import_module("api.profiles")
+    return profiles
+
+
+def test_switch_profile_rejects_path_traversal():
+    with tempfile.TemporaryDirectory() as td:
+        temp_root = Path(td)
+        base = temp_root / ".hermes"
+        (base / "profiles").mkdir(parents=True)
+        (temp_root / "escape-target").mkdir()
+
+        profiles = _reload_profiles_module(base)
+
+        with pytest.raises(ValueError):
+            profiles.switch_profile("../../escape-target")
+
+
+def test_delete_profile_rejects_path_traversal():
+    with tempfile.TemporaryDirectory() as td:
+        temp_root = Path(td)
+        base = temp_root / ".hermes"
+        (base / "profiles").mkdir(parents=True)
+        (temp_root / "escape-target").mkdir()
+
+        profiles = _reload_profiles_module(base)
+
+        with pytest.raises(ValueError):
+            profiles.delete_profile_api("../../escape-target")
+
+
+def test_switch_profile_allows_valid_profile_name():
+    with tempfile.TemporaryDirectory() as td:
+        temp_root = Path(td)
+        base = temp_root / ".hermes"
+        profile_dir = base / "profiles" / "demo"
+        profile_dir.mkdir(parents=True)
+
+        profiles = _reload_profiles_module(base)
+        result = profiles.switch_profile("demo")
+
+        assert result["active"] == "demo"
+        assert Path(os.environ["HERMES_HOME"]).resolve() == profile_dir.resolve()
--- a/tests/test_provider_mismatch.py
+++ b/tests/test_provider_mismatch.py
@@ -0,0 +1,325 @@
+"""
+Tests for issue #266 — provider/model mismatch warning.
+
+Covers:
+  1. streaming.py: auth errors detected and classified as 'auth_mismatch'
+  2. static/ui.js: _checkProviderMismatch() helper exists and logic is correct
+  3. static/messages.js: apperror handler has auth_mismatch branch
+  4. static/i18n.js: provider_mismatch_warning and provider_mismatch_label keys
+     present in all locales (en, es, de, ru, zh, zh-Hant)
+  5. static/boot.js: modelSelect.onchange calls _checkProviderMismatch
+  6. /api/models: response includes active_provider field
+"""
+import json
+import pathlib
+import re
+import urllib.request
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
+from tests._pytest_port import BASE
+
+
+def _read(rel_path: str) -> str:
+    return (REPO_ROOT / rel_path).read_text(encoding="utf-8")
+
+
+# ── 1. streaming.py: auth error detection ───────────────────────────────────
+
+class TestStreamingAuthErrorDetection:
+    """streaming.py must classify auth/401 errors as auth_mismatch."""
+
+    def test_auth_mismatch_type_defined_in_streaming(self):
+        """'auth_mismatch' type must be emitted for auth errors."""
+        src = _read("api/streaming.py")
+        assert "auth_mismatch" in src, (
+            "auth_mismatch type not found in streaming.py — "
+            "401/auth errors will not be surfaced with a helpful message"
+        )
+
+    def test_is_auth_error_flag_defined(self):
+        """is_auth_error variable must exist in the error handler."""
+        src = _read("api/streaming.py")
+        assert "is_auth_error" in src, (
+            "is_auth_error flag not found in streaming.py"
+        )
+
+    def test_auth_error_detects_401(self):
+        """'401' must be part of the auth error detection logic."""
+        src = _read("api/streaming.py")
+        # Find the is_auth_error block
+        idx = src.find("is_auth_error")
+        assert idx != -1
+        block = src[idx:idx + 400]
+        assert "'401'" in block or '"401"' in block, (
+            "'401' not in is_auth_error detection block"
+        )
+
+    def test_auth_error_detects_unauthorized(self):
+        """'unauthorized' must be part of the auth error detection logic."""
+        src = _read("api/streaming.py")
+        idx = src.find("is_auth_error")
+        block = src[idx:idx + 400]
+        assert "unauthorized" in block.lower(), (
+            "'unauthorized' not in is_auth_error detection block"
+        )
+
+    def test_auth_error_hint_mentions_hermes_model(self):
+        """The auth_mismatch hint must mention 'hermes model' command."""
+        src = _read("api/streaming.py")
+        # Find the auth_mismatch apperror block
+        idx = src.find("auth_mismatch")
+        block = src[idx:idx + 500]
+        assert "hermes model" in block, (
+            "auth_mismatch hint must mention 'hermes model' command "
+            "so users know how to fix provider mismatch"
+        )
+
+    def test_auth_error_does_not_catch_rate_limit(self):
+        """Rate limit errors must not be reclassified as auth_mismatch."""
+        src = _read("api/streaming.py")
+        # is_rate_limit must come before is_auth_error in the elif chain
+        rl_idx = src.find("is_rate_limit")
+        ae_idx = src.find("is_auth_error")
+        assert rl_idx < ae_idx, (
+            "is_rate_limit check should precede is_auth_error — "
+            "rate limit errors must not be mistaken for auth errors"
+        )
+
+
+# ── 2. static/ui.js: _checkProviderMismatch() ───────────────────────────────
+
+class TestCheckProviderMismatch:
+    """ui.js must expose _checkProviderMismatch() helper."""
+
+    def test_function_defined(self):
+        """_checkProviderMismatch function must be defined in ui.js."""
+        src = _read("static/ui.js")
+        assert "function _checkProviderMismatch" in src, (
+            "_checkProviderMismatch not defined in ui.js"
+        )
+
+    def test_uses_window_active_provider(self):
+        """Function must read window._activeProvider."""
+        src = _read("static/ui.js")
+        idx = src.find("function _checkProviderMismatch")
+        block = src[idx:idx + 800]
+        assert "_activeProvider" in block, (
+            "_checkProviderMismatch must read window._activeProvider"
+        )
+
+    def test_skips_check_for_openrouter(self):
+        """OpenRouter can route to any provider — skip the warning."""
+        src = _read("static/ui.js")
+        idx = src.find("function _checkProviderMismatch")
+        block = src[idx:idx + 800]
+        assert "openrouter" in block.lower(), (
+            "_checkProviderMismatch must skip the check for openrouter"
+        )
+
+    def test_skips_check_for_custom(self):
+        """Custom endpoints can serve any model — skip the warning."""
+        src = _read("static/ui.js")
+        idx = src.find("function _checkProviderMismatch")
+        block = src[idx:idx + 800]
+        assert "custom" in block.lower(), (
+            "_checkProviderMismatch must skip the check for custom provider"
+        )
+
+    def test_active_provider_stored_on_model_load(self):
+        """populateModelDropdown must store active_provider from /api/models."""
+        src = _read("static/ui.js")
+        # Find the function definition (skip the comment that also mentions the name)
+        idx = src.find("async function populateModelDropdown")
+        assert idx != -1, "async function populateModelDropdown not found"
+        block = src[idx:idx + 800]
+        assert "_activeProvider" in block, (
+            "populateModelDropdown must set window._activeProvider "
+            "from the /api/models response"
+        )
+
+
+# ── 3. static/messages.js: apperror handler ─────────────────────────────────
+
+class TestApperrorHandler:
+    """messages.js apperror handler must handle auth_mismatch type."""
+
+    def test_auth_mismatch_type_handled(self):
+        """apperror handler must check for type='auth_mismatch'."""
+        src = _read("static/messages.js")
+        assert "auth_mismatch" in src, (
+            "auth_mismatch type not handled in messages.js apperror handler"
+        )
+
+    def test_provider_mismatch_label(self):
+        """'Provider mismatch' label must appear in the error handling."""
+        src = _read("static/messages.js")
+        assert "Provider mismatch" in src, (
+            "'Provider mismatch' label not found in messages.js"
+        )
+
+    def test_is_auth_mismatch_variable(self):
+        """isAuthMismatch variable must be defined."""
+        src = _read("static/messages.js")
+        assert "isAuthMismatch" in src, (
+            "isAuthMismatch variable not found in messages.js apperror handler"
+        )
+
+
+# ── 4. static/i18n.js: all locales ───────────────────────────────────────────
+
+class TestI18nProviderMismatch:
+    """All locales must have provider_mismatch_warning and provider_mismatch_label."""
+
+    REQUIRED_KEYS = ["provider_mismatch_warning", "provider_mismatch_label"]
+
+    def _locale_names(self, src: str) -> list[str]:
+        pattern = re.compile(
+            r"^\s{2}(?:'(?P<quoted>[A-Za-z0-9-]+)'|(?P<plain>[A-Za-z0-9-]+))\s*:\s*\{",
+            re.MULTILINE,
+        )
+        names = []
+        for match in pattern.finditer(src):
+            names.append(match.group("quoted") or match.group("plain"))
+        return names
+
+    def _count_key(self, src: str, key: str) -> int:
+        return len(re.findall(r'\b' + re.escape(key) + r'\b', src))
+
+    def test_all_locales_have_warning_key(self):
+        """provider_mismatch_warning must appear in all locales."""
+        src = _read("static/i18n.js")
+        locale_count = len(self._locale_names(src))
+        count = self._count_key(src, "provider_mismatch_warning")
+        assert count >= locale_count, (
+            f"provider_mismatch_warning found {count} times, expected >= {locale_count} "
+            f"(one per locale)"
+        )
+
+    def test_all_locales_have_label_key(self):
+        """provider_mismatch_label must appear in all locales."""
+        src = _read("static/i18n.js")
+        locale_count = len(self._locale_names(src))
+        count = self._count_key(src, "provider_mismatch_label")
+        assert count >= locale_count, (
+            f"provider_mismatch_label found {count} times, expected >= {locale_count}"
+        )
+
+    def test_warning_is_function_in_en(self):
+        """English provider_mismatch_warning must be a function (m, p) => ..."""
+        src = _read("static/i18n.js")
+        # Find the en block
+        en_start = src.find("\n  en: {")
+        es_start = src.find("\n  es: {")
+        en_block = src[en_start:es_start]
+        assert "provider_mismatch_warning" in en_block, "Key not in en block"
+        idx = en_block.find("provider_mismatch_warning")
+        line = en_block[idx:idx + 200]
+        # Must be a function, not a plain string
+        assert "=>" in line, (
+            "provider_mismatch_warning in en locale must be an arrow function "
+            "that takes (m, p) parameters for model and provider interpolation"
+        )
+
+    def test_spanish_locale_key_coverage(self):
+        """Spanish locale must have the new keys (parity with English)."""
+        src = _read("static/i18n.js")
+        es_start = src.find("\n  es: {")
+        de_start = src.find("\n  de: {")
+        es_block = src[es_start:de_start]
+        for key in self.REQUIRED_KEYS:
+            assert key in es_block, f"Key '{key}' missing from Spanish locale"
+
+
+# ── 5. static/boot.js: dropdown change handler ──────────────────────────────
+
+class TestBootModelSelectChange:
+    """boot.js modelSelect.onchange must call _checkProviderMismatch."""
+
+    def test_onchange_calls_check_function(self):
+        """modelSelect.onchange must invoke _checkProviderMismatch."""
+        src = _read("static/boot.js")
+        assert "_checkProviderMismatch" in src, (
+            "boot.js modelSelect.onchange must call _checkProviderMismatch "
+            "to warn users about provider/model mismatches"
+        )
+        # Verify it's called from the onchange handler (near modelSelect.onchange)
+        idx = src.find("'modelSelect').onchange") or src.find('"modelSelect").onchange')
+        if idx == -1:
+            # Try alternate patterns
+            idx = src.find("modelSelect")
+        block_start = src.rfind("\n", 0, src.find("_checkProviderMismatch")) or 0
+        surrounding = src[max(0, block_start - 200):block_start + 400]
+        assert "modelSelect" in surrounding or "selectedModel" in surrounding, (
+            "_checkProviderMismatch must be called in the context of model selection"
+        )
+
+    def test_onchange_shows_toast_on_mismatch(self):
+        """The warning must be shown via showToast, not alert()."""
+        src = _read("static/boot.js")
+        # Both _checkProviderMismatch call and showToast must be near each other
+        idx = src.find("_checkProviderMismatch")
+        assert idx != -1, "_checkProviderMismatch not found in boot.js"
+        block = src[idx:idx + 300]
+        assert "showToast" in block, (
+            "Provider mismatch warning must be shown via showToast(), not alert()"
+        )
+
+
+# ── 6. /api/models: active_provider in response ──────────────────────────────
+
+def test_api_models_includes_active_provider():
+    """/api/models must include 'active_provider' key in response."""
+    with urllib.request.urlopen(BASE + "/api/models", timeout=10) as r:
+        data = json.loads(r.read())
+    # active_provider can be None/null but the key must exist
+    assert "active_provider" in data, (
+        "/api/models response missing 'active_provider' field — "
+        "frontend needs this to detect provider mismatches"
+    )
+
+
+# ── Model switch toast (#419) ─────────────────────────────────────────────────
+
+class TestModelSwitchToast:
+    """Toast appears when user switches model during an active session."""
+
+    def test_toast_in_model_select_onchange(self):
+        """modelSelect.onchange must show a toast when S.messages is non-empty."""
+        src = _read("static/boot.js")
+        # Find the onchange block
+        idx = src.find("modelSelect').onchange")
+        assert idx != -1, "modelSelect.onchange not found in boot.js"
+        block = src[idx:idx + 1100]
+        assert "Model change takes effect in your next conversation" in block, (
+            "modelSelect.onchange must show a toast when switching model mid-session"
+        )
+
+    def test_toast_guards_on_messages_length(self):
+        """Toast must only fire when there are existing messages (active session)."""
+        src = _read("static/boot.js")
+        idx = src.find("Model change takes effect in your next conversation")
+        assert idx != -1
+        # Look back 200 chars for the S.messages guard
+        surrounding = src[max(0, idx - 200):idx + 50]
+        assert "S.messages" in surrounding and ".length" in surrounding, (
+            "Model switch toast must be gated on S.messages.length > 0"
+        )
+
+    def test_toast_uses_show_toast_not_alert(self):
+        """Toast must use showToast(), not alert()."""
+        src = _read("static/boot.js")
+        idx = src.find("Model change takes effect in your next conversation")
+        assert idx != -1
+        surrounding = src[max(0, idx - 50):idx + 100]
+        assert "showToast" in surrounding, "Must use showToast() not alert()"
+        assert "alert(" not in surrounding, "Must not use alert()"
+
+    def test_toast_has_typeof_showtoast_guard(self):
+        """Toast call must guard typeof showToast to be safe during boot."""
+        src = _read("static/boot.js")
+        idx = src.find("Model change takes effect in your next conversation")
+        assert idx != -1
+        surrounding = src[max(0, idx - 100):idx + 50]
+        assert "typeof showToast" in surrounding, (
+            "showToast call must be guarded with typeof check"
+        )
--- a/tests/test_regressions.py
+++ b/tests/test_regressions.py
@@ -0,0 +1,766 @@
+"""
+Regression tests -- one test per bug that was introduced and fixed.
+These tests exist specifically to prevent those bugs from silently returning.
+
+Each test is tagged with the sprint/commit where the bug was found and fixed.
+"""
+import json
+import os
+import pathlib
+import time
+import urllib.error
+import urllib.request
+import urllib.parse
+REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
+
+from tests._pytest_port import BASE
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+def get_raw(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return r.read(), r.headers.get("Content-Type",""), r.status
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(
+        BASE + path, data=data, headers={"Content-Type": "application/json"}
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+def make_session(created_list):
+    d, _ = post("/api/session/new", {})
+    sid = d["session"]["session_id"]
+    created_list.append(sid)
+    return sid
+
+
+# ── R1: uuid not imported in server.py (Sprint 10 split regression) ──────────
+
+def test_chat_start_returns_stream_id(cleanup_test_sessions):
+    """R1: chat/start must return stream_id -- catches missing uuid import.
+    When uuid was missing, this returned 500 (NameError).
+    """
+    sid = make_session(cleanup_test_sessions)
+    data, status = post("/api/chat/start", {
+        "session_id": sid,
+        "message": "ping",
+        "model": "openai/gpt-5.4-mini",
+    })
+    # Must return 200 with a stream_id -- not 500
+    assert status == 200, f"chat/start failed with {status}: {data}"
+    assert "stream_id" in data, "stream_id missing from chat/start response"
+    assert len(data["stream_id"]) > 8, "stream_id looks invalid"
+    post("/api/session/delete", {"session_id": sid})
+    cleanup_test_sessions.clear()
+
+
+# ── R2: AIAgent not imported in api/streaming.py (Sprint 10 split regression) ─
+
+def test_chat_stream_opens_successfully(cleanup_test_sessions):
+    """R2: After chat/start, GET /api/chat/stream must return 200 (SSE opens).
+    When AIAgent was missing, the thread crashed immediately, popped STREAMS,
+    and the SSE GET returned 404.
+    """
+    sid = make_session(cleanup_test_sessions)
+    data, status = post("/api/chat/start", {
+        "session_id": sid,
+        "message": "say: hello",
+        "model": "openai/gpt-5.4-mini",
+    })
+    assert status == 200, f"chat/start failed: {data}"
+    stream_id = data["stream_id"]
+
+    # Open the SSE stream -- must return 200, not 404
+    # We only check headers (don't read the full stream body)
+    req = urllib.request.Request(BASE + f"/api/chat/stream?stream_id={stream_id}")
+    try:
+        r = urllib.request.urlopen(req, timeout=3)
+        assert r.status == 200, f"SSE stream returned {r.status} (expected 200)"
+        ct = r.headers.get("Content-Type", "")
+        assert "text/event-stream" in ct, f"Wrong Content-Type: {ct}"
+        r.close()
+    except urllib.error.HTTPError as e:
+        assert False, f"SSE stream returned {e.code} -- AIAgent may not be imported"
+    except Exception:
+        pass  # timeout or connection close after brief read is fine
+
+    post("/api/session/delete", {"session_id": sid})
+    cleanup_test_sessions.clear()
+
+
+# ── R3: Session.__init__ missing tool_calls param (Sprint 10 split regression) ─
+
+def test_session_with_tool_calls_in_json_loads_ok(cleanup_test_sessions):
+    """R3: Sessions that have tool_calls in their JSON must load without 500.
+    When tool_calls=None was missing from Session.__init__, loading such sessions
+    threw TypeError: unexpected keyword argument.
+    """
+    sid = make_session(cleanup_test_sessions)
+
+    # Manually inject tool_calls into the session's JSON file
+    sessions_dir = pathlib.Path(os.environ.get("HERMES_WEBUI_TEST_STATE_DIR", str(pathlib.Path.home() / ".hermes" / "webui-mvp-test"))) / "sessions"
+    session_file = sessions_dir / f"{sid}.json"
+    if session_file.exists():
+        d = json.loads(session_file.read_text())
+        d["tool_calls"] = [
+            {"name": "terminal", "snippet": "test output", "tid": "test_tid_001", "assistant_msg_idx": 1}
+        ]
+        session_file.write_text(json.dumps(d))
+
+    # Loading the session must return 200, not 500
+    data, status = get(f"/api/session?session_id={urllib.parse.quote(sid)}")
+    assert status == 200, f"Session with tool_calls returned {status}: {data}"
+    assert data["session"]["session_id"] == sid
+
+    post("/api/session/delete", {"session_id": sid})
+    cleanup_test_sessions.clear()
+
+
+# ── R4: has_pending not imported in streaming.py (Sprint 10 split regression) ─
+
+def test_streaming_py_imports_has_pending(cleanup_test_sessions):
+    """R4: api/streaming.py must import or define has_pending.
+    When missing, the approval check mid-stream caused NameError.
+    """
+    src = (REPO_ROOT / "api/streaming.py").read_text()
+    assert "has_pending" in src, "has_pending not found in api/streaming.py"
+    # Verify it's imported (not just used)
+    assert "import" in src and "has_pending" in src, \
+        "has_pending must be imported in api/streaming.py"
+
+
+def test_aiagent_imported_in_streaming(cleanup_test_sessions):
+    """R2b: api/streaming.py must import AIAgent.
+    When missing, the streaming thread crashed immediately after being spawned.
+    """
+    src = (REPO_ROOT / "api/streaming.py").read_text()
+    assert "AIAgent" in src, "AIAgent not referenced in api/streaming.py"
+    assert "from run_agent import AIAgent" in src or "import AIAgent" in src, \
+        "AIAgent must be imported in api/streaming.py"
+
+
+# ── R5: SSE loop did not break on cancel event (Sprint 10 bug) ───────────────
+
+def test_cancel_nonexistent_stream_returns_not_cancelled(cleanup_test_sessions):
+    """R5a: Cancel endpoint works and returns cancelled:false for unknown stream."""
+    data, status = get("/api/chat/cancel?stream_id=nonexistent_test_xyz")
+    assert status == 200
+    assert data["ok"] is True
+    assert data["cancelled"] is False
+
+
+def test_server_py_sse_loop_breaks_on_cancel(cleanup_test_sessions):
+    """R5b: SSE loop must include 'cancel' in the break condition.
+    When missing, the connection hung after the cancel event was processed.
+    Sprint 11: logic moved from server.py to api/routes.py -- check both.
+    """
+    import re
+    # Check server.py first, then api/routes.py (Sprint 11 extracted routes)
+    src = (REPO_ROOT / "server.py").read_text()
+    routes_src = (REPO_ROOT / "api" / "routes.py").read_text() if (REPO_ROOT / "api" / "routes.py").exists() else ""
+    combined = src + routes_src
+    m = re.search(r"if event in \([^)]+\):\s*break", combined)
+    assert m, "SSE break condition not found in server.py or api/routes.py"
+    assert "cancel" in m.group(), \
+        f"'cancel' missing from SSE break condition: {m.group()}"
+
+
+# ── R6: Test cron isolation (Sprint 10) ──────────────────────────────────────
+
+def test_real_jobs_json_not_polluted_by_tests(cleanup_test_sessions):
+    """R6: Test runs must not write to the real ~/.hermes/cron/jobs.json.
+    When HERMES_HOME isolation was missing, every test run added test-job-* entries.
+    """
+    real_jobs_path = pathlib.Path.home() / ".hermes" / "cron" / "jobs.json"
+    if not real_jobs_path.exists():
+        return  # no jobs file at all -- fine
+
+    jobs = json.loads(real_jobs_path.read_text())
+    if isinstance(jobs, dict):
+        jobs = jobs.get("jobs", [])
+
+    test_jobs = [j for j in jobs if j.get("name", "").startswith("test-job-")]
+    assert len(test_jobs) == 0, \
+        f"Real jobs.json contains {len(test_jobs)} test-job-* entries: " \
+        f"{[j['name'] for j in test_jobs]}"
+
+
+# ── General: api modules all importable ──────────────────────────────────────
+
+def test_all_api_modules_importable(cleanup_test_sessions):
+    """All api/ modules must be importable without NameError or ImportError.
+    Catches missing imports introduced during future module splits.
+    """
+    import ast, pathlib
+    api_dir = REPO_ROOT / "api"
+    for module_file in api_dir.glob("*.py"):
+        src = module_file.read_text()
+        try:
+            ast.parse(src)
+        except SyntaxError as e:
+            assert False, f"{module_file.name} has syntax error: {e}"
+
+
+def test_server_py_importable(cleanup_test_sessions):
+    """server.py must parse without syntax errors after any split."""
+    import ast, pathlib
+    src = (REPO_ROOT / "server.py").read_text()
+    try:
+        ast.parse(src)
+    except SyntaxError as e:
+        assert False, f"server.py has syntax error: {e}"
+
+# ── R7: Cross-session busy state bleed ───────────────────────────────────────
+
+def test_loadSession_resets_busy_state_for_idle_session(cleanup_test_sessions):
+    """R7: sessions.js loadSession for a non-inflight session must reset S.busy to false.
+    When missing, switching from a busy session to an idle one left the Send button
+    disabled, showed the wrong activity bar, and pointed Cancel at the wrong stream.
+    """
+    src = (REPO_ROOT / "static/sessions.js").read_text()
+    # The fix adds explicit S.busy=false in the non-inflight else branch
+    assert "S.busy=false;" in src,         "sessions.js loadSession must set S.busy=false when loading a non-inflight session"
+    # btnSend state must be refreshed via updateSendBtn
+    assert "updateSendBtn()" in src,         "sessions.js loadSession must call updateSendBtn for non-inflight sessions"
+
+
+def test_done_handler_guards_setbusy_with_inflight_check(cleanup_test_sessions):
+    """R7b: messages.js done/error handlers must not call setBusy(false) if the
+    currently viewed session is itself still in-flight.
+    When missing, finishing session A while viewing in-flight session B would
+    disable B's Send button.
+    """
+    src = (REPO_ROOT / "static/messages.js").read_text()
+    # The fix wraps setBusy(false) in a guard
+    assert "INFLIGHT[S.session.session_id]" in src,         "messages.js must guard setBusy(false) with INFLIGHT check for current session"
+
+
+def test_refresh_handler_does_not_drop_tool_messages_needed_by_todos(cleanup_test_sessions):
+    """Todo panel state must survive session reload/refresh.
+    The UI can hide tool-role messages from the visible transcript, but it must not
+    destroy the raw session messages because loadTodos reconstructs state from the
+    latest todo tool output.
+    """
+    sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
+    ui_src = (REPO_ROOT / "static/ui.js").read_text()
+    panels_src = (REPO_ROOT / "static/panels.js").read_text()
+
+    assert "data.session.messages=(data.session.messages||[]).filter(" not in sessions_src, \
+        "sessions.js must not overwrite raw session.messages when filtering transcript display"
+    assert "S.messages = (data.session.messages || []).filter(" not in ui_src, \
+        "ui.js refreshSession must not rebuild S.messages by discarding tool messages from the raw session payload"
+    assert "const sourceMessages = (S.session && Array.isArray(S.session.messages) && S.session.messages.length) ? S.session.messages : S.messages;" in panels_src, \
+        "loadTodos must prefer raw S.session.messages so todo state survives reloads"
+
+
+def test_cancel_button_not_cleared_across_sessions(cleanup_test_sessions):
+    """R7c: The Cancel button and activeStreamId must only be cleared when the
+    done/error event belongs to the currently viewed session.
+    """
+    src = (REPO_ROOT / "static/messages.js").read_text()
+    # Both clear operations must be inside the activeSid === S.session guard
+    # We check for the pattern added by the fix
+    assert "S.session.session_id===activeSid" in src,         "messages.js must guard activeStreamId/Cancel clearing with session identity check"
+
+# ── R8: Session delete does not invalidate index (ghost sessions) ─────────────
+
+def test_deleted_session_does_not_appear_in_list(cleanup_test_sessions):
+    """R8: After deleting a session, it must not appear in /api/sessions.
+    When _index.json was not invalidated on delete, the session reappeared
+    in the list even after the JSON file was removed.
+    """
+    # Create a session with a title so it shows in the list
+    d, _ = post("/api/session/new", {})
+    sid = d["session"]["session_id"]
+    post("/api/session/rename", {"session_id": sid, "title": "regression-test-delete-R8"})
+
+    # Verify it appears
+    sessions, _ = get("/api/sessions")
+    ids_before = [s["session_id"] for s in sessions["sessions"]]
+    assert sid in ids_before, "Session must appear in list before delete"
+
+    # Delete it
+    result, status = post("/api/session/delete", {"session_id": sid})
+    assert status == 200 and result.get("ok") is True
+
+    # Verify it no longer appears -- even after a second fetch (index rebuild)
+    sessions2, _ = get("/api/sessions")
+    ids_after = [s["session_id"] for s in sessions2["sessions"]]
+    assert sid not in ids_after,         f"Deleted session {sid} still appears in list -- index not invalidated on delete"
+
+
+def test_server_delete_invalidates_index(cleanup_test_sessions):
+    """R8b: session/delete handler must unlink _index.json.
+    Static check that the fix is in place.
+    Sprint 11: handler moved from server.py to api/routes.py -- check both.
+    """
+    src = (REPO_ROOT / "server.py").read_text()
+    routes_src = (REPO_ROOT / "api" / "routes.py").read_text() if (REPO_ROOT / "api" / "routes.py").exists() else ""
+    # Find the delete handler in either file
+    for label, text in [("server.py", src), ("api/routes.py", routes_src)]:
+        # Accept both single-quote and double-quote style (formatting varies by contributor)
+        delete_idx = max(
+            text.find("if parsed.path == '/api/session/delete':"),
+            text.find('if parsed.path == "/api/session/delete":'),
+        )
+        if delete_idx >= 0:
+            # Use 1200 chars to accommodate any validation/guard code added
+            # before the SESSION_INDEX_FILE.unlink() call (e.g. session_id
+            # character checks, path traversal guards).
+            delete_block = text[delete_idx:delete_idx+1200]
+            assert "SESSION_INDEX_FILE" in delete_block, \
+                f"{label} session/delete must invalidate SESSION_INDEX_FILE"
+            return
+    assert False, "session/delete handler not found in server.py or api/routes.py"
+
+# ── R9: Token/tool SSE events write to wrong session after switch ─────────────
+
+def test_token_handler_guards_session_id(cleanup_test_sessions):
+    """R9a: The SSE token event handler must check activeSid before writing to DOM.
+    When missing, tokens from session A would render into session B's message area
+    if the user switched sessions mid-stream.
+    Sprint 12: handler moved into _wireSSE(source), so search source.addEventListener.
+    """
+    src = (REPO_ROOT / "static/messages.js").read_text()
+    # Sprint 12 refactored es.addEventListener -> source.addEventListener inside _wireSSE()
+    token_idx = src.find("source.addEventListener('token'")
+    if token_idx < 0:
+        token_idx = src.find("es.addEventListener('token'")
+    assert token_idx >= 0, "token event handler not found"
+    token_block = src[token_idx:token_idx+300]
+    assert "activeSid" in token_block, \
+        "token handler must check activeSid before writing to DOM"
+    assert "S.session.session_id!==activeSid" in token_block or \
+           "S.session.session_id===activeSid" in token_block, \
+    "token handler must compare current session to activeSid"
+
+
+def test_tool_handler_guards_session_id(cleanup_test_sessions):
+    """R9b: The SSE tool event handler must check activeSid before writing to DOM.
+    When missing, tool cards from session A would render into session B's message area.
+    Sprint 12: handler moved into _wireSSE(source), so search source.addEventListener.
+    """
+    src = (REPO_ROOT / "static/messages.js").read_text()
+    tool_idx = src.find("source.addEventListener('tool'")
+    if tool_idx < 0:
+        tool_idx = src.find("es.addEventListener('tool'")
+    assert tool_idx >= 0, "tool event handler not found"
+    tool_block = src[tool_idx:tool_idx+400]
+    assert "activeSid" in tool_block, \
+        "tool handler must check activeSid before writing to DOM"
+
+
+# ── R10: respondApproval uses wrong session_id after switch (multi-session) ─
+
+def test_respond_approval_uses_approval_session_id(cleanup_test_sessions):
+    """R10: respondApproval must use the session_id of the session that triggered
+    the approval, not S.session.session_id (which may be a different session
+    if the user switched while approval was pending).
+    """
+    src = (REPO_ROOT / "static/messages.js").read_text()
+    # The fix introduces _approvalSessionId to track the correct session
+    assert "_approvalSessionId" in src,         "messages.js must use _approvalSessionId in respondApproval"
+    # respondApproval must use _approvalSessionId, not S.session.session_id directly
+    idx = src.find("async function respondApproval(")
+    assert idx >= 0, "respondApproval not found"
+    fn_body = src[idx:idx+300]
+    assert "_approvalSessionId" in fn_body,         "respondApproval must read _approvalSessionId, not S.session.session_id"
+
+
+# ── R11: Tool progress must not use shared status chrome ──────────────────
+
+def test_tool_status_only_shown_for_current_session(cleanup_test_sessions):
+    """R11: Tool progress should not drive the global status bar or composer
+    status. Live tool cards in the current conversation are the authoritative
+    progress UI, which avoids cross-session status leakage entirely.
+    """
+    src = (REPO_ROOT / "static/messages.js").read_text()
+    # Sprint 12: handler moved into _wireSSE(source)
+    tool_idx = src.find("source.addEventListener('tool'")
+    if tool_idx < 0:
+        tool_idx = src.find("es.addEventListener('tool'")
+    assert tool_idx >= 0
+    tool_block = src[tool_idx:tool_idx+400]
+    assert "setStatus(" not in tool_block, \
+        "tool handler should not use the global activity/status bar"
+    assert "setComposerStatus(" not in tool_block, \
+        "tool handler should not use composer status for tool progress"
+
+# ── R12: Live tool cards lost on switch-away and switch-back ──────────────
+
+def test_loadSession_inflight_restores_live_tool_cards(cleanup_test_sessions):
+    """R12: When switching back to an in-flight session, live tool cards in
+    #liveToolCards must be restored from S.toolCalls.
+    When missing, tool cards disappeared on switch-away even though the session
+    was still processing.
+    """
+    src = (REPO_ROOT / "static/sessions.js").read_text()
+    # INFLIGHT branch must call appendLiveToolCard
+    inflight_idx = src.find("if(INFLIGHT[sid]){")
+    assert inflight_idx >= 0, "INFLIGHT branch not found in loadSession"
+    inflight_block = src[inflight_idx:inflight_idx+500]
+    assert "appendLiveToolCard" in inflight_block,         "loadSession INFLIGHT branch must restore live tool cards via appendLiveToolCard"
+    assert "clearLiveToolCards" in inflight_block,         "loadSession INFLIGHT branch must clear old live cards before restoring"
+
+# ── R13: renderMessages() called before S.busy=false in done handler ────────
+
+def test_done_handler_sets_busy_false_before_renderMessages(cleanup_test_sessions):
+    """R13: In the done handler, S.busy must be set to false BEFORE renderMessages()
+    is called for the active session. The !S.busy guard in renderMessages() controls
+    whether settled tool cards are rendered. When S.busy=true during renderMessages(),
+    tool cards are skipped entirely after a response completes.
+    """
+    src = (REPO_ROOT / "static/messages.js").read_text()
+    # Sprint 12: handler moved into _wireSSE(source)
+    done_idx = src.find("source.addEventListener('done'")
+    if done_idx < 0:
+        done_idx = src.find("es.addEventListener('done'")
+    assert done_idx >= 0
+    done_block = src[done_idx:done_idx+2500]
+    # S.busy=false must appear before renderMessages() within the done handler
+    busy_pos = done_block.find("S.busy=false;")
+    render_pos = done_block.find("renderMessages()")
+    assert busy_pos >= 0, "done handler must set S.busy=false before renderMessages()"
+    assert busy_pos < render_pos,         f"S.busy=false (pos {busy_pos}) must come before renderMessages() (pos {render_pos})"
+
+
+# ── R14: send() uses stale modelSelect.value instead of session model ────────
+
+def test_send_uses_session_model_as_authoritative_source(cleanup_test_sessions):
+    """R14: send() must use S.session.model as the authoritative model, not just
+    $('modelSelect').value. When a session was created with a model not in the
+    current dropdown list, the select value would be stale after switching sessions,
+    causing the wrong model to be sent.
+    """
+    src = (REPO_ROOT / "static/messages.js").read_text()
+    # The model field in the chat/start payload must prefer S.session.model
+    chat_start_idx = src.find("/api/chat/start")
+    assert chat_start_idx >= 0
+    payload_block = src[chat_start_idx:chat_start_idx+300]
+    assert "S.session.model" in payload_block,         "send() must use S.session.model in the chat/start payload"
+
+
+# ── R15: newSession does not clear live tool cards ────────────────────────────
+
+def test_newSession_clears_live_tool_cards(cleanup_test_sessions):
+    """R15: newSession() must call clearLiveToolCards() so live cards from a
+    previous in-flight session don't persist when starting a fresh conversation.
+    """
+    src = (REPO_ROOT / "static/sessions.js").read_text()
+    new_sess_idx = src.find("async function newSession(")
+    assert new_sess_idx >= 0
+    # Find end of newSession (next async function)
+    next_fn = src.find("async function ", new_sess_idx + 10)
+    new_sess_body = src[new_sess_idx:next_fn]
+    assert "clearLiveToolCards" in new_sess_body,         "newSession() must call clearLiveToolCards() to clear stale live cards"
+
+
+def test_newSession_resets_busy_state_for_fresh_chat(cleanup_test_sessions):
+    """R15b: newSession() must reset the viewed chat to idle state.
+    Without this, starting a second chat while another session is streaming leaves
+    S.busy=true, so the first send in the new chat gets incorrectly queued.
+    """
+    src = (REPO_ROOT / "static/sessions.js").read_text()
+    new_sess_idx = src.find("async function newSession(")
+    assert new_sess_idx >= 0
+    next_fn = src.find("async function ", new_sess_idx + 10)
+    new_sess_body = src[new_sess_idx:next_fn]
+    assert "S.busy=false;" in new_sess_body, \
+        "newSession() must clear S.busy so a fresh chat is immediately sendable"
+    assert "S.activeStreamId=null;" in new_sess_body, \
+        "newSession() must clear the active stream id for the newly viewed chat"
+    assert "updateQueueBadge(S.session.session_id);" in new_sess_body, \
+        "newSession() must refresh the badge for the new session rather than leaving the old session's queue badge visible"
+
+
+def test_session_scoped_message_queue_frontend_wiring(cleanup_test_sessions):
+    """R15bb: queued follow-ups must stay attached to their originating session.
+    The frontend should use a session-keyed queue store and drain only the active
+    session's queued messages when that session becomes idle.
+    """
+    ui_src = (REPO_ROOT / "static/ui.js").read_text()
+    messages_src = (REPO_ROOT / "static/messages.js").read_text()
+    sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
+    assert "const SESSION_QUEUES" in ui_src
+    assert "function queueSessionMessage" in ui_src
+    assert "function shiftQueuedSessionMessage" in ui_src
+    assert "const sid=S.session&&S.session.session_id;" in ui_src
+    assert "const next=sid?shiftQueuedSessionMessage(sid):null;" in ui_src
+    assert "queueSessionMessage(S.session.session_id" in messages_src
+    assert "updateQueueBadge(S.session.session_id);" in messages_src
+    assert "updateQueueBadge(sid);" in sessions_src
+
+
+def test_chat_start_persists_pending_turn_metadata_for_reload_recovery(cleanup_test_sessions):
+    """R15c: chat/start must expose enough pending-turn metadata for a reload to
+    rebuild the in-flight conversation instead of showing a blank session.
+    """
+    routes_src = (REPO_ROOT / "api/routes.py").read_text()
+    assert 's.active_stream_id = stream_id' in routes_src
+    assert 's.pending_user_message = msg' in routes_src
+    assert 's.pending_attachments = attachments' in routes_src
+    assert '"active_stream_id": getattr(s, "active_stream_id", None)' in routes_src
+    assert '"pending_user_message": getattr(s, "pending_user_message", None)' in routes_src
+
+
+def test_reload_path_restores_pending_message_and_reattaches_live_stream(cleanup_test_sessions):
+    """R15d: the frontend reload path must show the pending user turn and
+    reattach to the live SSE stream after loadSession().
+    """
+    sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
+    ui_src = (REPO_ROOT / "static/ui.js").read_text()
+    messages_src = (REPO_ROOT / "static/messages.js").read_text()
+    assert 'getPendingSessionMessage' in ui_src
+    assert 'pending_user_message' in ui_src
+    assert 'function attachLiveStream' in messages_src
+    assert 'const pendingMsg=typeof getPendingSessionMessage' in sessions_src
+    assert 'const activeStreamId=data.session.active_stream_id||null;' in sessions_src
+    assert 'attachLiveStream(sid, activeStreamId' in sessions_src
+    assert 'if (S.activeStreamId && S.activeStreamId === streamId) return;' in ui_src
+
+
+# ── R16: Switching away/back must preserve live partial assistant output ─────
+
+
+def test_live_stream_tokens_persist_partial_assistant_for_session_switch(cleanup_test_sessions):
+    """R16: in-flight assistant text must be mirrored into INFLIGHT session state,
+    and the live stream must rebind to the rebuilt DOM after switching away and back.
+    Without this, partial assistant output disappears until the final done payload lands.
+    """
+    messages_src = (REPO_ROOT / "static/messages.js").read_text()
+    ui_src = (REPO_ROOT / "static/ui.js").read_text()
+
+    assert "content:assistantText" in messages_src, \
+        "messages.js must persist the partial assistant text into INFLIGHT state"
+    assert "_live:true" in messages_src, \
+        "messages.js must mark the persisted in-flight assistant row so renderMessages can re-anchor it"
+    assert "syncInflightAssistantMessage();" in messages_src, \
+        "token handler must update INFLIGHT state before checking the active session"
+    assert "assistantRow&&!assistantRow.isConnected" in messages_src, \
+        "live stream must drop stale detached assistant DOM references after session switches"
+    assert "data-live-assistant" in ui_src, \
+        "renderMessages must preserve a live-assistant DOM anchor when rebuilding the thread"
+
+
+def test_inflight_session_state_tracks_live_tool_cards_per_session(cleanup_test_sessions):
+    """R16b: live tool cards must be stored on the in-flight session, not only in the
+    global S.toolCalls array, so switching chats does not lose or misattach them.
+    """
+    messages_src = (REPO_ROOT / "static/messages.js").read_text()
+    sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
+
+    assert "INFLIGHT[activeSid].toolCalls.push(tc);" in messages_src, \
+        "tool SSE handler must persist live tool calls onto the in-flight session"
+    assert "S.toolCalls=(INFLIGHT[sid].toolCalls||[]);" in sessions_src, \
+        "loadSession() must restore live tool calls from the in-flight session state"
+
+
+def test_loadSession_inflight_sets_busy_before_renderMessages(cleanup_test_sessions):
+    """R16c: loading an in-flight session must mark it busy before renderMessages().
+    Otherwise renderMessages() treats S.toolCalls as settled history cards and the
+    same tool call appears once inline and once in the live tool host after a
+    session switch.
+    """
+    src = (REPO_ROOT / "static/sessions.js").read_text()
+    inflight_idx = src.find("if(INFLIGHT[sid]){")
+    assert inflight_idx >= 0, "INFLIGHT branch not found in loadSession"
+    inflight_block = src[inflight_idx:inflight_idx+700]
+    busy_pos = inflight_block.find("S.busy=true;")
+    render_pos = inflight_block.find("renderMessages();appendThinking();")
+    assert busy_pos >= 0, "loadSession INFLIGHT branch must set S.busy=true"
+    assert render_pos >= 0, "loadSession INFLIGHT branch must call renderMessages()"
+    assert busy_pos < render_pos, \
+        "loadSession must set S.busy=true before renderMessages() to avoid duplicate tool cards"
+
+
+def test_streaming_bridge_accepts_current_tool_progress_callback_signature(cleanup_test_sessions):
+    """R17: api/streaming.py must accept the current Hermes agent callback contract.
+    The agent now calls tool_progress_callback(event_type, name, preview, args, **kwargs).
+    If the WebUI bridge only accepts (name, preview, args), live tool updates silently vanish.
+    """
+    src = (REPO_ROOT / "api/streaming.py").read_text()
+    assert "def on_tool(*cb_args, **cb_kwargs):" in src, \
+        "streaming.py must accept variable callback args for tool progress events"
+    assert "reasoning_callback=on_reasoning" in src, \
+        "streaming.py must wire the agent's reasoning callback into the SSE bridge"
+    assert "put('tool_complete'" in src or 'put("tool_complete"' in src, \
+        "streaming.py must emit live tool completion SSE events"
+
+
+def test_messages_js_supports_live_reasoning_and_tool_completion(cleanup_test_sessions):
+    """R18: messages.js must render live reasoning and react to tool completion events.
+    Without these handlers, the operator only sees generic Thinking… or nothing
+    until the final done snapshot redraws the whole turn.
+    """
+    src = (REPO_ROOT / "static/messages.js").read_text()
+    assert "let reasoningText=''" in src, \
+        "messages.js must track streamed reasoning text separately from assistant text"
+    assert "let liveReasoningText=''" in src or 'let liveReasoningText = ""' in src, \
+        "messages.js must track the currently active reasoning segment separately from cumulative reasoning"
+    assert "source.addEventListener('reasoning'" in src or 'source.addEventListener("reasoning"' in src, \
+        "messages.js must listen for live reasoning SSE events"
+    assert "source.addEventListener('tool_complete'" in src or 'source.addEventListener("tool_complete"' in src, \
+        "messages.js must listen for live tool completion SSE events"
+    assert "function _parseStreamState()" in src, \
+        "messages.js must parse live stream state into reasoning + visible answer"
+
+
+def test_ui_js_can_upgrade_thinking_spinner_into_live_reasoning_card(cleanup_test_sessions):
+    """R19: ui.js must be able to replace the placeholder thinking spinner with
+    streamed reasoning text while a turn is in progress.
+    """
+    src = (REPO_ROOT / "static/ui.js").read_text()
+    assert "function _thinkingMarkup(text='')" in src or 'function _thinkingMarkup(text="")' in src, \
+        "ui.js must centralize thinking row markup so it can switch between spinner and live text"
+    assert "function updateThinking(text=''){appendThinking(text);}" in src or 'function updateThinking(text=""){appendThinking(text);}' in src, \
+        "ui.js must expose an updateThinking helper for live reasoning rendering"
+    assert "function finalizeThinkingCard()" in src, \
+        "ui.js must expose a helper to finalize one live thinking card before starting another"
+
+
+def test_ui_js_keeps_split_thinking_cards_and_assistant_header(cleanup_test_sessions):
+    """R19b: settled render should keep distinct thinking cards for split assistant
+    turns inside a single assistant turn container, preserving one assistant header
+    for the whole response while keeping multiple thinking cards distinct.
+    """
+    src = (REPO_ROOT / "static" / "ui.js").read_text()
+    assert "pendingTurnThinking" not in src, \
+        "renderMessages must not merge distinct thinking blocks into one settled card"
+    assert "_createAssistantTurn(" in src, \
+        "renderMessages must build a shared assistant turn wrapper instead of separate top-level rows"
+    assert "assistant-segment" in src, \
+        "settled assistant turns must preserve per-message segments for multiple thinking/tool/result blocks"
+
+
+def test_ui_js_keeps_reasoning_only_assistant_messages_visible(cleanup_test_sessions):
+    """R19c: assistant messages that only contain reasoning must still survive
+    rerenders, otherwise prior thinking cards disappear on the next turn.
+    """
+    src = (REPO_ROOT / "static" / "ui.js").read_text()
+    assert "function _messageHasReasoningPayload(m)" in src, \
+        "ui.js must detect reasoning-only assistant messages"
+    assert "hasTc||hasTu||_messageHasReasoningPayload(m)" in src.replace(' ', ''), \
+        "renderMessages visibility filter must preserve reasoning-only assistant messages"
+
+
+def test_ui_js_does_not_hide_anchor_segments_that_contain_thinking(cleanup_test_sessions):
+    """R19c2: assistant anchor segments that contain a thinking card must remain
+    visible; only truly empty tool-call anchor segments should be hidden.
+    """
+    src = (REPO_ROOT / "static" / "ui.js").read_text()
+    compact = src.replace(' ', '').replace('\n', '')
+    assert "}elseif(!thinkingText){" in compact, \
+        "renderMessages must only hide assistant anchor segments when they have no thinking content"
+
+
+def test_messages_js_live_assistant_segment_reuses_live_turn_wrapper(cleanup_test_sessions):
+    """R19d: live streaming must reuse the existing live assistant turn wrapper created
+    by appendThinking(), otherwise the header gets recreated when answer tokens start.
+    """
+    src = (REPO_ROOT / "static" / "messages.js").read_text()
+    assert "function ensureAssistantRow(force=false)" in src or 'function ensureAssistantRow(force = false)' in src, \
+        "ensureAssistantRow should manage the live assistant content segment"
+    assert "let turn=$('liveAssistantTurn');" in src, \
+        "ensureAssistantRow must bind to the existing live assistant turn wrapper"
+    assert "appendThinking();" in src, \
+        "ensureAssistantRow should create the live turn via appendThinking() when needed"
+    assert "assistantRow.className='assistant-segment';" in src or 'assistantRow.className = \'assistant-segment\';' in src, \
+        "live answer content should be appended as a segment inside the live turn wrapper"
+    assert "if(!force&&!assistantRow){" in src.replace(' ', ''), \
+        "ensureAssistantRow must still avoid creating the live answer segment when no display text exists yet"
+    assert "if(String((parsed&&parsed.displayText)||'').trim()||assistantRow) ensureAssistantRow();" in src, \
+        "token handler must only create the live answer segment once visible answer text starts"
+
+
+def test_messages_js_finalizes_thinking_card_before_tool_card(cleanup_test_sessions):
+    """R19e: later reasoning after a tool call must render in a fresh card."""
+    src = (REPO_ROOT / "static/messages.js").read_text()
+    assert "finalizeThinkingCard" in src, \
+        "tool handler must finalize the current live thinking card before appending a tool card"
+    assert "liveReasoningText='';" in src or 'liveReasoningText = "";' in src, \
+        "tool handler must reset the active reasoning segment before post-tool reasoning arrives"
+
+
+# ── R17: Stack traces must not leak to clients in 500 responses ────────────
+
+def test_500_response_has_no_trace_field():
+    """R16: HTTP 500 responses must not include a 'trace' field.
+    Leaking tracebacks exposes file paths, module names, and potentially
+    secret values from local variables.
+    """
+    # POST to /api/chat/start with missing required fields to trigger an error
+    data, status = post("/api/chat/start", {})
+    # Should be an error response (4xx or 5xx)
+    assert "trace" not in data, \
+        "Server must not leak stack traces to clients"
+
+def test_upload_error_has_no_trace_field():
+    """R16b: Upload 500 responses must not include a 'trace' field."""
+    # Send a POST to /api/upload with invalid content to trigger the error handler
+    req = urllib.request.Request(
+        BASE + "/api/upload",
+        data=b"not-multipart-data",
+        headers={"Content-Type": "text/plain", "Content-Length": "18"},
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            body = json.loads(r.read())
+            code = r.status
+    except urllib.error.HTTPError as e:
+        body = json.loads(e.read())
+        code = e.code
+    assert code >= 400, "Invalid upload should return an error status"
+    assert "trace" not in body, \
+        "Upload errors must not leak stack traces to clients"
+    assert "error" in body, "Error responses must include an 'error' key"
+
+
+# ── #248: /skills slash command ───────────────────────────────────────────────
+
+def test_skills_slash_command_defined():
+    """#248: /skills slash command must be wired up.
+
+    Pre-Task 2 (slash-command-parity batch 1) this checked for the
+    hardcoded ``name:'skills'`` entry in the COMMANDS array. The COMMANDS
+    array is now sourced from hermes-agent's ``COMMAND_REGISTRY`` at boot
+    via ``GET /api/commands``, so the literal string is gone. The handler
+    must still exist and be registered, otherwise ``/skills`` would fall
+    through to \"not yet supported\".
+    """
+    src = (REPO_ROOT / "static/commands.js").read_text()
+
+    # 1. cmdSkills function must be defined
+    assert "async function cmdSkills" in src or "function cmdSkills" in src, \
+        "cmdSkills function missing from commands.js"
+
+    # 2. HANDLERS.skills must be registered to dispatch /skills to cmdSkills
+    assert "HANDLERS.skills" in src, \
+        "HANDLERS.skills registration missing from commands.js"
+
+
+def test_reload_recovery_persists_durable_inflight_state(cleanup_test_sessions):
+    """Reload recovery must persist a durable per-session inflight snapshot.
+    Without these helpers, loadSession() references loadInflightState() but a full
+    browser reload has no saved state to hydrate, so recovery silently no-ops.
+    """
+    ui_src = (REPO_ROOT / "static/ui.js").read_text()
+    messages_src = (REPO_ROOT / "static/messages.js").read_text()
+    sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
+
+    assert "const INFLIGHT_STATE_KEY = 'hermes-webui-inflight-state'" in ui_src
+    assert "function saveInflightState(sid, state)" in ui_src
+    assert "function loadInflightState(sid, streamId)" in ui_src
+    assert "function clearInflightState(sid)" in ui_src
+    assert "saveInflightState(activeSid" in messages_src, \
+        "messages.js must persist live stream snapshots while a turn is in flight"
+    assert "clearInflightState(activeSid)" in messages_src, \
+        "messages.js must clear durable inflight snapshots when the run ends/errors/cancels"
+    assert "const stored=loadInflightState(sid, activeStreamId);" in sessions_src, \
+        "loadSession() must hydrate in-flight state from durable browser storage on reload"
--- a/tests/test_russian_locale.py
+++ b/tests/test_russian_locale.py
@@ -0,0 +1,116 @@
+from collections import Counter
+from pathlib import Path
+import re
+
+
+REPO = Path(__file__).resolve().parent.parent
+
+
+def read(path: Path) -> str:
+    return path.read_text(encoding="utf-8")
+
+
+def test_russian_locale_block_exists():
+    src = read(REPO / "static" / "i18n.js")
+    assert "\n  ru: {" in src
+    assert "_label: 'Русский'" in src
+    assert "_speech: 'ru-RU'" in src
+
+
+def extract_locale_block(src: str, locale_key: str) -> str:
+    start_match = re.search(rf"\b{re.escape(locale_key)}\s*:\s*\{{", src)
+    assert start_match, f"{locale_key} locale block not found"
+
+    start = start_match.end() - 1
+    depth = 0
+    in_single = False
+    in_double = False
+    in_backtick = False
+    escape = False
+
+    for i in range(start, len(src)):
+        ch = src[i]
+
+        if escape:
+            escape = False
+            continue
+
+        if in_single:
+            if ch == "\\":
+                escape = True
+            elif ch == "'":
+                in_single = False
+            continue
+
+        if in_double:
+            if ch == "\\":
+                escape = True
+            elif ch == '"':
+                in_double = False
+            continue
+
+        if in_backtick:
+            if ch == "\\":
+                escape = True
+            elif ch == "`":
+                in_backtick = False
+            continue
+
+        if ch == "'":
+            in_single = True
+            continue
+        if ch == '"':
+            in_double = True
+            continue
+        if ch == "`":
+            in_backtick = True
+            continue
+
+        if ch == "{":
+            depth += 1
+            continue
+        if ch == "}":
+            depth -= 1
+            if depth == 0:
+                return src[start + 1 : i]
+
+    raise AssertionError(f"{locale_key} locale block braces are not balanced")
+
+
+def test_russian_locale_includes_representative_translations():
+    src = read(REPO / "static" / "i18n.js")
+    expected = [
+        "settings_title: '\u041d\u0430\u0441\u0442\u0440\u043e\u0439\u043a\u0438'",
+        "login_title: '\u0412\u0445\u043e\u0434'",
+        "approval_heading: '\u0422\u0440\u0435\u0431\u0443\u0435\u0442\u0441\u044f \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u0438\u0435'",
+        "tab_tasks: '\u0417\u0430\u0434\u0430\u0447\u0438'",
+        "tab_profiles: '\u041f\u0440\u043e\u0444\u0438\u043b\u0438'",
+        "session_time_just_now: '\u0442\u043e\u043b\u044c\u043a\u043e \u0447\u0442\u043e'",
+        "onboarding_title: '\u0414\u043e\u0431\u0440\u043e \u043f\u043e\u0436\u0430\u043b\u043e\u0432\u0430\u0442\u044c \u0432 Hermes Web UI'",
+        "onboarding_complete: '\u041f\u0435\u0440\u0432\u0438\u0447\u043d\u0430\u044f \u043d\u0430\u0441\u0442\u0440\u043e\u0439\u043a\u0430 \u0437\u0430\u0432\u0435\u0440\u0448\u0435\u043d\u0430'",
+        "profile_default_label: '\u0028\u043f\u043e \u0443\u043c\u043e\u043b\u0447\u0430\u043d\u0438\u044e\u0029'",
+        "profile_name_placeholder: '\u041d\u0430\u0437\u0432\u0430\u043d\u0438\u0435 \u043f\u0440\u043e\u0444\u0438\u043b\u044f \u0028\u0441\u0442\u0440\u043e\u0447\u043d\u044b\u0435 \u0431\u0443\u043a\u0432\u044b, a-z, 0-9, \u0434\u0435\u0444\u0438\u0441\u044b\u0029'",
+        "profile_clone_label: '\u0421\u043a\u043e\u043f\u0438\u0440\u043e\u0432\u0430\u0442\u044c \u043a\u043e\u043d\u0444\u0438\u0433\u0443\u0440\u0430\u0446\u0438\u044e \u0438\u0437 \u0430\u043a\u0442\u0438\u0432\u043d\u043e\u0433\u043e \u043f\u0440\u043e\u0444\u0438\u043b\u044f'",
+        "profile_base_url_placeholder: '\u0411\u0430\u0437\u043e\u0432\u044b\u0439 URL \u0028\u043d\u0435\u043e\u0431\u044f\u0437\u0430\u0442\u0435\u043b\u044c\u043d\u043e, \u043d\u0430\u043f\u0440\u0438\u043c\u0435\u0440 http://localhost:11434\u0029'",
+        "profile_api_key_placeholder: 'API-\u043a\u043b\u044e\u0447 \u0028\u043d\u0435\u043e\u0431\u044f\u0437\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0029'",
+    ]
+    for entry in expected:
+        assert entry in src
+
+
+def test_russian_locale_covers_english_keys():
+    src = read(REPO / "static" / "i18n.js")
+    key_pattern = re.compile(r"^\s{4}([a-zA-Z0-9_]+):", re.MULTILINE)
+    en_keys = set(key_pattern.findall(extract_locale_block(src, "en")))
+    ru_keys = set(key_pattern.findall(extract_locale_block(src, "ru")))
+
+    missing = sorted(en_keys - ru_keys)
+    assert not missing, f"Russian locale missing keys: {missing}"
+
+
+def test_russian_locale_has_no_duplicate_keys():
+    src = read(REPO / "static" / "i18n.js")
+    key_pattern = re.compile(r"^\s{4}([a-zA-Z0-9_]+):", re.MULTILINE)
+    keys = key_pattern.findall(extract_locale_block(src, "ru"))
+    duplicates = sorted(k for k, count in Counter(keys).items() if count > 1)
+    assert not duplicates, f"Russian locale has duplicate keys: {duplicates}"
--- a/tests/test_security_redaction.py
+++ b/tests/test_security_redaction.py
@@ -0,0 +1,310 @@
+"""
+Security tests: credential redaction in API responses.
+
+Verifies that credentials (GitHub PATs, API keys, etc.) are masked in:
+  - GET /api/session  (messages and tool_calls)
+  - GET /api/memory   (MEMORY.md and USER.md content)
+  - GET /api/session/export (downloaded JSON)
+  - SSE done event    (session payload in stream)
+
+Tests run against the isolated test test_server on port 8788.
+"""
+
+import json
+import pathlib
+import sys
+import urllib.request
+import urllib.error
+import pytest
+
+sys.path.insert(0, str(pathlib.Path(__file__).parent.parent.parent))
+
+
+def _server_is_up(port: int = 8788) -> bool:
+    """Return True if the test server is accepting connections."""
+    try:
+        urllib.request.urlopen(f"http://127.0.0.1:{port}/health", timeout=2)
+        return True
+    except Exception:
+        return False
+
+
+# _needs_server: these tests require the conftest test_server fixture (port 8788).
+# The skipif is evaluated lazily via the fixture, not at collection time.
+_needs_server = pytest.mark.usefixtures("test_server")
+
+from tests._pytest_port import BASE
+
+# Sample credentials that should be masked in every API response
+_FAKE_GITHUB_PAT = "ghp_TestFakeCredential1234567890ab"
+_FAKE_SK_KEY     = "sk-TestFakeOpenAIKey1234567890abcdef"
+_FAKE_HF_TOKEN   = "hf_TestFakeHuggingFaceToken12345"
+_FAKE_AWS_KEY    = "AKIATESTFAKEKEY12345"
+
+
+# ── HTTP helpers ──────────────────────────────────────────────────────────────
+
+def _get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read())
+
+
+def _post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(
+        BASE + path, data=data,
+        headers={"Content-Type": "application/json"},
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+def _get_raw(path):
+    """Return raw bytes (used for export endpoint)."""
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return r.read()
+
+
+def _assert_no_plaintext_credentials(text: str, label: str = ""):
+    """Assert that none of the fake credential strings appear in text."""
+    for cred in (_FAKE_GITHUB_PAT, _FAKE_SK_KEY, _FAKE_HF_TOKEN, _FAKE_AWS_KEY):
+        assert cred not in text, (
+            f"{label}: credential '{cred[:12]}...' found in plaintext. "
+            "Redaction is not working."
+        )
+
+
+# ── helpers.py unit tests (import-level, no test_server needed) ───────────────────
+
+def test_redact_value_str():
+    """_redact_value masks a plaintext GitHub PAT in a string."""
+    from api.helpers import _redact_value
+    result = _redact_value(f"my token is {_FAKE_GITHUB_PAT} bye")
+    assert _FAKE_GITHUB_PAT not in result
+    assert "ghp_Te" in result  # prefix preserved
+
+
+def test_redact_value_dict():
+    """_redact_value recurses into dicts."""
+    from api.helpers import _redact_value
+    d = {"content": f"key={_FAKE_SK_KEY}", "role": "user"}
+    result = _redact_value(d)
+    assert _FAKE_SK_KEY not in result["content"]
+    assert result["role"] == "user"  # innocent values untouched
+
+
+def test_redact_value_list():
+    """_redact_value recurses into lists."""
+    from api.helpers import _redact_value
+    lst = [{"content": _FAKE_GITHUB_PAT}, {"content": "safe text"}]
+    result = _redact_value(lst)
+    assert _FAKE_GITHUB_PAT not in result[0]["content"]
+    assert result[1]["content"] == "safe text"
+
+
+def test_redact_session_data_messages():
+    """redact_session_data masks credentials in messages[]."""
+    from api.helpers import redact_session_data
+    session = {
+        "session_id": "abc123",
+        "title": f"my token {_FAKE_GITHUB_PAT}",
+        "messages": [
+            {"role": "user", "content": f"token: {_FAKE_GITHUB_PAT}"},
+            {"role": "assistant", "content": "sure"},
+        ],
+        "tool_calls": [
+            {"name": "terminal", "args": {"command": f"gh auth login --token {_FAKE_GITHUB_PAT}"},
+             "snippet": "ok"},
+        ],
+    }
+    result = redact_session_data(session)
+    dump = json.dumps(result)
+    _assert_no_plaintext_credentials(dump, "redact_session_data")
+    # Safe fields remain intact
+    assert result["session_id"] == "abc123"
+    assert result["messages"][1]["content"] == "sure"
+
+
+def test_redact_session_data_multiple_cred_types():
+    """redact_session_data handles sk-, ghp_, hf_, and AKIA keys."""
+    from api.helpers import redact_session_data
+    session = {
+        "title": "test",
+        "messages": [{"role": "user", "content": (
+            f"openai={_FAKE_SK_KEY} "
+            f"github={_FAKE_GITHUB_PAT} "
+            f"hf={_FAKE_HF_TOKEN} "
+            f"aws={_FAKE_AWS_KEY}"
+        )}],
+        "tool_calls": [],
+    }
+    result = redact_session_data(session)
+    dump = json.dumps(result)
+    _assert_no_plaintext_credentials(dump, "multi-type redaction")
+
+
+def test_redact_session_data_non_sensitive_unchanged():
+    """redact_session_data does not corrupt innocent content."""
+    from api.helpers import redact_session_data
+    session = {
+        "title": "Hello world",
+        "messages": [{"role": "user", "content": "What is 2+2?"}],
+        "tool_calls": [{"name": "terminal", "snippet": "4"}],
+    }
+    result = redact_session_data(session)
+    assert result["title"] == "Hello world"
+    assert result["messages"][0]["content"] == "What is 2+2?"
+    assert result["tool_calls"][0]["snippet"] == "4"
+
+
+# ── API-level tests (require running test server started by conftest.py) ─────
+# Run via `start.sh && pytest tests/test_security_redaction.py -v`
+
+def _create_session_with_credentials() -> str:
+    """Write a session file with credential-containing messages directly to disk.
+
+    Bypasses the server's in-memory cache so the GET endpoint is forced to read
+    from disk, exercising the redaction code path on load.
+    Uses TEST_STATE_DIR from conftest.py (the isolated test server state directory).
+    """
+    import time, uuid
+    try:
+        from conftest import TEST_STATE_DIR
+        sessions_dir = TEST_STATE_DIR / "sessions"
+    except ImportError:
+        from api.config import SESSION_DIR as sessions_dir
+    sessions_dir = pathlib.Path(sessions_dir)
+    sessions_dir.mkdir(parents=True, exist_ok=True)
+
+    # Use a unique session ID that is NOT in the server's LRU cache
+    sid = "sec_test_" + uuid.uuid4().hex[:8]
+    now = time.time()
+    session_file = sessions_dir / f"{sid}.json"
+    session_file.write_text(json.dumps({
+        "session_id": sid,
+        "title": f"session with {_FAKE_GITHUB_PAT}",
+        "workspace": "/tmp",
+        "model": "test",
+        "created_at": now,
+        "updated_at": now,
+        "pinned": False, "archived": False, "project_id": None,
+        "profile": "default", "input_tokens": 0, "output_tokens": 0,
+        "estimated_cost": None, "personality": None,
+        "messages": [
+            {"role": "user",      "content": f"my PAT is {_FAKE_GITHUB_PAT}"},
+            {"role": "assistant", "content": f"sk key is {_FAKE_SK_KEY}"},
+            {"role": "tool",      "content": "result ok", "name": "terminal"},
+        ],
+        "tool_calls": [
+            {"name": "terminal",
+             "args": {"command": f"gh auth login --token {_FAKE_GITHUB_PAT}"},
+             "snippet": "blocked"}
+        ],
+    }))
+    return sid
+
+
+def test_api_session_redacts_messages():
+    """GET /api/session route must call redact_session_data() before returning."""
+    import inspect
+    import api.routes as routes
+    src = inspect.getsource(routes.handle_get)
+    # Verify redact_session_data is applied to the session payload
+    assert "redact_session_data" in src, (
+        "api/routes.py handle_get must call redact_session_data() on /api/session response"
+    )
+
+
+def test_api_session_redacts_title():
+    """redact_session_data must redact credentials from session title field."""
+    from api.helpers import redact_session_data
+    session = {
+        "session_id": "abc123",
+        "title": f"session with {_FAKE_GITHUB_PAT}",
+        "messages": [],
+        "tool_calls": [],
+    }
+    result = redact_session_data(session)
+    assert _FAKE_GITHUB_PAT not in result["title"], (
+        f"redact_session_data must mask credentials in title field"
+    )
+    assert result["session_id"] == "abc123"  # safe fields preserved
+
+
+@_needs_server
+def test_api_sessions_list_redacts_titles(test_server):
+    """GET /api/sessions must not return session titles containing credentials."""
+    _create_session_with_credentials()
+    data = _get("/api/sessions")
+    dump = json.dumps(data)
+    _assert_no_plaintext_credentials(dump, "GET /api/sessions titles")
+
+
+def test_api_session_export_redacts():
+    """GET /api/session/export must call redact_session_data() in _handle_session_export."""
+    import inspect
+    import api.routes as routes
+    # The export handler is a separate function (_handle_session_export)
+    src = inspect.getsource(routes._handle_session_export)
+    assert "redact_session_data" in src, (
+        "_handle_session_export must call redact_session_data() before serving download"
+    )
+
+
+@_needs_server
+def test_api_memory_redacts_via_write_read(test_server):
+    """Credential written to MEMORY.md must be masked in GET /api/memory response."""
+    original = _get("/api/memory").get("memory", "")
+
+    cred_content = f"GitHub PAT: {_FAKE_GITHUB_PAT}\nNormal note: hello world"
+    data, status = _post("/api/memory/write", {"section": "memory", "content": cred_content})
+    assert status == 200, f"memory/write failed: {data}"
+
+    try:
+        read_back = _get("/api/memory")
+        dump = json.dumps(read_back)
+        _assert_no_plaintext_credentials(dump, "GET /api/memory")
+        assert "hello world" in read_back["memory"]   # non-sensitive content preserved
+    finally:
+        _post("/api/memory/write", {"section": "memory", "content": original})
+
+
+# ── startup: fix_credential_permissions ──────────────────────────────────────
+
+def test_fix_credential_permissions_corrects_loose_files(tmp_path, monkeypatch):
+    """fix_credential_permissions() tightens group/other read bits."""
+    import os
+    from api.startup import fix_credential_permissions
+
+    env_file = tmp_path / ".env"
+    env_file.write_text("SECRET=abc")
+    env_file.chmod(0o644)  # world-readable -- should be fixed
+
+    google_file = tmp_path / "google_token.json"
+    google_file.write_text("{}")
+    google_file.chmod(0o664)  # group-readable -- should be fixed
+
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    fix_credential_permissions()
+
+    import stat
+    assert stat.S_IMODE(env_file.stat().st_mode) == 0o600, ".env not fixed to 600"
+    assert stat.S_IMODE(google_file.stat().st_mode) == 0o600, "google_token.json not fixed to 600"
+
+
+def test_fix_credential_permissions_skips_correct_files(tmp_path, monkeypatch):
+    """fix_credential_permissions() does not alter already-strict files."""
+    env_file = tmp_path / ".env"
+    env_file.write_text("SECRET=abc")
+    env_file.chmod(0o600)
+
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    from api.startup import fix_credential_permissions
+    fix_credential_permissions()
+
+    import stat
+    assert stat.S_IMODE(env_file.stat().st_mode) == 0o600
--- a/tests/test_session_ops.py
+++ b/tests/test_session_ops.py
@@ -0,0 +1,251 @@
+"""End-to-end tests for /api/session/retry, /api/session/undo,
+/api/session/status, /api/session/usage.
+
+Tests run against the live test subprocess server (see tests/conftest.py).
+We seed transcripts via POST /api/session/import (ignores incoming
+session_id; returns a fresh one we register for cleanup).
+"""
+import json
+import urllib.request
+import urllib.error
+
+import pytest
+
+from tests.conftest import TEST_BASE, _post, make_session_tracked
+
+
+def _get(path):
+    """GET helper -- returns parsed JSON, or raises HTTPError on non-2xx."""
+    with urllib.request.urlopen(TEST_BASE + path, timeout=10) as r:
+        return json.loads(r.read())
+
+
+def _import_session_with_messages(cleanup_list, messages, model='openai/gpt-5.4-mini'):
+    """Create a session pre-populated with `messages` via /api/session/import.
+
+    Returns the server-assigned session_id (registered for cleanup).
+
+    api/routes.py:2588 takes {title, messages, model, workspace, tool_calls,
+    pinned} and IGNORES any incoming session_id -- always generates a fresh
+    one via Session(...). We use the server's returned id, not a self-
+    generated one.
+    """
+    body = {
+        'title': 'test',
+        'messages': messages,
+        'model': model,
+    }
+    r = _post(TEST_BASE, '/api/session/import', body)
+    assert r.get('ok') is True and 'session' in r, f"Import failed: {r}"
+    sid = r['session']['session_id']
+    cleanup_list.append(sid)
+    return sid
+
+
+# -- /api/session/retry ----------------------------------------------------
+
+def test_retry_returns_last_user_text(cleanup_test_sessions):
+    sid = _import_session_with_messages(cleanup_test_sessions, [
+        {'role': 'user', 'content': 'first user msg'},
+        {'role': 'assistant', 'content': 'first reply'},
+        {'role': 'user', 'content': 'second user msg'},
+        {'role': 'assistant', 'content': 'second reply'},
+        {'role': 'tool', 'content': 'tool output'},
+    ])
+    r = _post(TEST_BASE, '/api/session/retry', {'session_id': sid})
+    assert r.get('ok') is True, r
+    assert r.get('last_user_text') == 'second user msg'
+    assert r.get('removed_count') == 3
+
+
+def test_retry_truncates_transcript(cleanup_test_sessions):
+    sid = _import_session_with_messages(cleanup_test_sessions, [
+        {'role': 'user', 'content': 'first user msg'},
+        {'role': 'assistant', 'content': 'first reply'},
+        {'role': 'user', 'content': 'second user msg'},
+        {'role': 'assistant', 'content': 'second reply'},
+    ])
+    _post(TEST_BASE, '/api/session/retry', {'session_id': sid})
+    sess = _get(f'/api/session?session_id={sid}')['session']
+    # After retry: only the first exchange remains (2 messages).
+    assert len(sess['messages']) == 2
+    assert sess['messages'][-1]['content'] == 'first reply'
+
+
+def test_retry_no_user_returns_error(cleanup_test_sessions):
+    sid = _import_session_with_messages(cleanup_test_sessions, [
+        {'role': 'assistant', 'content': 'orphan reply'},
+    ])
+    r = _post(TEST_BASE, '/api/session/retry', {'session_id': sid})
+    assert 'error' in r
+    assert 'no previous message' in r['error'].lower()
+
+
+def test_retry_unknown_session_returns_404():
+    # _post catches HTTPError and returns the body as JSON.
+    # bad(handler, ..., 404) sends 404 + {error: "..."}.
+    r = _post(TEST_BASE, '/api/session/retry', {'session_id': 'nonexistent_zzz'})
+    assert 'error' in r
+    assert 'not found' in r['error'].lower()
+
+
+def test_retry_missing_session_id_returns_error():
+    r = _post(TEST_BASE, '/api/session/retry', {})
+    assert 'error' in r
+
+
+def test_retry_does_not_double_append(cleanup_test_sessions):
+    """After /api/session/retry, the truncated transcript must end at the
+    message BEFORE the last user message. Critical assertion: no duplicate
+    of the resent user message gets left behind in the truncated transcript.
+    """
+    sid = _import_session_with_messages(cleanup_test_sessions, [
+        {'role': 'user', 'content': 'msg A'},
+        {'role': 'assistant', 'content': 'reply A'},
+        {'role': 'user', 'content': 'msg B'},
+        {'role': 'assistant', 'content': 'reply B'},
+    ])
+    r = _post(TEST_BASE, '/api/session/retry', {'session_id': sid})
+    assert r['removed_count'] == 2  # msg B + reply B
+    sess = _get(f'/api/session?session_id={sid}')['session']
+    msgs = sess['messages']
+    # Only msg A + reply A remain. Critically: there is NO 'msg B' anywhere.
+    assert len(msgs) == 2
+    assert msgs[0]['content'] == 'msg A'
+    assert msgs[1]['content'] == 'reply A'
+
+
+def test_retry_concurrent_requests_are_safe(cleanup_test_sessions):
+    """Two concurrent /api/session/retry calls on the same session must not
+    leave the transcript in a torn or doubly-truncated state.
+
+    Pre-fix race: get_session() outside `with LOCK:` could return a stale
+    (non-cached) Session instance to one thread; both threads then mutated
+    different in-memory objects, and the second s.save() overwrote the
+    first with stale data. The fix re-binds `s = SESSIONS.get(sid, s)`
+    inside the lock so both threads converge on the canonical instance.
+    """
+    from concurrent.futures import ThreadPoolExecutor
+    sid = _import_session_with_messages(cleanup_test_sessions, [
+        {'role': 'user', 'content': 'msg A'},
+        {'role': 'assistant', 'content': 'reply A'},
+        {'role': 'user', 'content': 'msg B'},
+        {'role': 'assistant', 'content': 'reply B'},
+    ])
+
+    def _do_retry():
+        return _post(TEST_BASE, '/api/session/retry', {'session_id': sid})
+
+    with ThreadPoolExecutor(max_workers=4) as ex:
+        futures = [ex.submit(_do_retry) for _ in range(4)]
+        results = [f.result() for f in futures]
+
+    # Each call either succeeds (truncating further) or raises 'no previous
+    # message to retry' once nothing is left. After the dust settles, the
+    # transcript must be a strict prefix of the original — never have a
+    # phantom duplicate of the resent message.
+    sess = _get(f'/api/session?session_id={sid}')['session']
+    msgs = sess['messages']
+    valid_prefixes = (
+        [],
+        [{'role': 'user', 'content': 'msg A'}, {'role': 'assistant', 'content': 'reply A'}],
+        [{'role': 'user', 'content': 'msg A'}],
+    )
+    msg_pairs = [(m['role'], m.get('content', '')) for m in msgs]
+    valid_pairs = [[(m['role'], m['content']) for m in p] for p in valid_prefixes]
+    assert msg_pairs in valid_pairs, (
+        f"Concurrent retries left transcript in unexpected state: {msg_pairs}. "
+        "TOCTOU race in get_session/save likely re-introduced."
+    )
+
+
+# ── /api/session/undo ─────────────────────────────────────────────────────
+
+def test_undo_returns_removed_preview(cleanup_test_sessions):
+    sid = _import_session_with_messages(cleanup_test_sessions, [
+        {'role': 'user', 'content': 'first user msg'},
+        {'role': 'assistant', 'content': 'first reply'},
+        {'role': 'user', 'content': 'second user msg'},
+        {'role': 'assistant', 'content': 'second reply'},
+        {'role': 'tool', 'content': 'tool output'},
+    ])
+    r = _post(TEST_BASE, '/api/session/undo', {'session_id': sid})
+    assert r.get('ok') is True
+    assert r.get('removed_count') == 3
+    assert 'second user msg' in r.get('removed_preview', '')
+
+
+def test_undo_truncates_transcript(cleanup_test_sessions):
+    sid = _import_session_with_messages(cleanup_test_sessions, [
+        {'role': 'user', 'content': 'first user msg'},
+        {'role': 'assistant', 'content': 'first reply'},
+        {'role': 'user', 'content': 'second user msg'},
+        {'role': 'assistant', 'content': 'second reply'},
+    ])
+    _post(TEST_BASE, '/api/session/undo', {'session_id': sid})
+    sess = _get(f'/api/session?session_id={sid}')['session']
+    assert len(sess['messages']) == 2
+    assert sess['messages'][-1]['content'] == 'first reply'
+
+
+def test_undo_repeated_until_empty(cleanup_test_sessions):
+    sid = _import_session_with_messages(cleanup_test_sessions, [
+        {'role': 'user', 'content': 'msg A'},
+        {'role': 'assistant', 'content': 'reply A'},
+    ])
+    _post(TEST_BASE, '/api/session/undo', {'session_id': sid})
+    r = _post(TEST_BASE, '/api/session/undo', {'session_id': sid})
+    assert 'error' in r
+    assert 'nothing to undo' in r['error'].lower()
+
+
+def test_undo_unknown_session_returns_404():
+    r = _post(TEST_BASE, '/api/session/undo', {'session_id': 'nonexistent_zzz'})
+    assert 'error' in r
+    assert 'not found' in r['error'].lower()
+
+
+# ── /api/session/status ───────────────────────────────────────────────────
+
+def test_status_returns_summary(cleanup_test_sessions):
+    sid = _import_session_with_messages(cleanup_test_sessions, [
+        {'role': 'user', 'content': 'a'},
+        {'role': 'assistant', 'content': 'b'},
+        {'role': 'user', 'content': 'c'},
+    ])
+    r = _get(f'/api/session/status?session_id={sid}')
+    assert r['session_id'] == sid
+    assert r['title'] == 'test'
+    assert r['message_count'] == 3
+    assert 'model' in r
+    assert 'workspace' in r
+    assert 'created_at' in r
+    assert 'updated_at' in r
+    assert r['agent_running'] is False  # no active stream
+
+
+def test_status_unknown_returns_404():
+    try:
+        _get('/api/session/status?session_id=nonexistent_zzz')
+        pytest.fail('Expected HTTPError')
+    except urllib.error.HTTPError as e:
+        assert e.code == 404
+
+
+def test_status_missing_param():
+    try:
+        _get('/api/session/status')
+        pytest.fail('Expected HTTPError')
+    except urllib.error.HTTPError as e:
+        assert e.code == 400
+
+
+# ── /api/session/usage ────────────────────────────────────────────────────
+
+def test_usage_returns_token_counts(cleanup_test_sessions):
+    sid, _ws = make_session_tracked(cleanup_test_sessions)
+    # Usage on a new session: zero everything.
+    r = _get(f'/api/session/usage?session_id={sid}')
+    assert r['input_tokens'] == 0
+    assert r['output_tokens'] == 0
+    assert r['total_tokens'] == 0
--- a/tests/test_session_sidebar_relative_time.py
+++ b/tests/test_session_sidebar_relative_time.py
@@ -0,0 +1,155 @@
+import json
+import pathlib
+import subprocess
+import textwrap
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
+SESSIONS_JS = (REPO_ROOT / "static" / "sessions.js").read_text(encoding="utf-8")
+STYLE_CSS = (REPO_ROOT / "static" / "style.css").read_text(encoding="utf-8")
+I18N_JS = (REPO_ROOT / "static" / "i18n.js").read_text(encoding="utf-8")
+
+
+def _extract_function(source: str, name: str) -> str:
+    marker = f"function {name}"
+    start = source.index(marker)
+    brace_start = source.index("{", start)
+    depth = 0
+    for idx in range(brace_start, len(source)):
+        ch = source[idx]
+        if ch == "{":
+            depth += 1
+        elif ch == "}":
+            depth -= 1
+            if depth == 0:
+                return source[start : idx + 1]
+    raise AssertionError(f"Could not extract {name}")
+
+
+def _run_session_time_case(script_body: str) -> dict:
+    functions = "\n\n".join(
+        _extract_function(SESSIONS_JS, name)
+        for name in (
+            "_localDayOrdinal",
+            "_sessionCalendarBoundaries",
+            "_formatSessionDate",
+            "_formatRelativeSessionTime",
+            "_sessionTimeBucketLabel",
+        )
+    )
+    script = textwrap.dedent(
+        f"""
+        process.env.TZ = 'UTC';
+        const translations = {{
+          session_time_unknown: 'Unknown',
+          session_time_just_now: 'just now',
+          session_time_minutes_ago: (n) => `${{n}} minute${{n === 1 ? '' : 's'}} ago`,
+          session_time_hours_ago: (n) => `${{n}} hour${{n === 1 ? '' : 's'}} ago`,
+          session_time_days_ago: (n) => `${{n}} day${{n === 1 ? '' : 's'}} ago`,
+          session_time_last_week: 'last week',
+          session_time_bucket_today: 'Today',
+          session_time_bucket_yesterday: 'Yesterday',
+          session_time_bucket_this_week: 'This week',
+          session_time_bucket_last_week: 'Last week',
+          session_time_bucket_older: 'Older',
+        }};
+        function t(key, ...args) {{
+          const val = translations[key];
+          return typeof val === 'function' ? val(...args) : val;
+        }}
+        {functions}
+        {script_body}
+        """
+    )
+    proc = subprocess.run(["node", "-e", script], check=True, capture_output=True, text=True)
+    return json.loads(proc.stdout)
+
+
+def test_session_sidebar_js_has_dynamic_relative_time_helpers():
+    assert "function _sessionCalendarBoundaries" in SESSIONS_JS
+    assert "function _formatRelativeSessionTime" in SESSIONS_JS
+    assert "function _sessionTimeBucketLabel" in SESSIONS_JS
+    assert "session_time_bucket_last_week" in SESSIONS_JS
+    assert "session_time_bucket_this_week" in SESSIONS_JS
+    assert "session_time_bucket_older" in SESSIONS_JS
+
+
+def test_session_sidebar_renders_relative_time_and_meta_rows():
+    # session-time element was removed from sessions.js in v0.50.40 to
+    # give session titles full width — the CSS class is kept but set to display:none.
+    # session-meta / metaBits were removed when we dropped message-count, model, and
+    # source-tag badges from the sidebar (design round 2).
+    assert "orderedSessions" in SESSIONS_JS
+    assert ".session-time" in STYLE_CSS
+    assert ".session-title-row" in STYLE_CSS
+    assert ".session-item.active .session-title" in STYLE_CSS
+    assert "|| _sessionTimeBucketLabel" not in SESSIONS_JS
+    assert "const ONE_DAY=86400000;" not in SESSIONS_JS
+
+
+def test_relative_time_uses_calendar_boundaries_and_year_for_old_sessions():
+    result = _run_session_time_case(
+        """
+        const now = Date.UTC(2026, 3, 15, 1, 0, 0);
+        const mondayLate = Date.UTC(2026, 3, 13, 23, 0, 0);
+        const oldSession = Date.UTC(2024, 2, 5, 12, 0, 0);
+        process.stdout.write(JSON.stringify({
+          relative: _formatRelativeSessionTime(mondayLate, now),
+          bucket: _sessionTimeBucketLabel(mondayLate, now),
+          oldDate: _formatRelativeSessionTime(oldSession, now),
+        }));
+        """
+    )
+    assert result["relative"] == "2 days ago"
+    assert result["bucket"] == "This week"
+    assert "2024" in result["oldDate"]
+
+
+def test_relative_time_today_bucket():
+    """Session from 2 hours ago should bucket as 'Today'."""
+    result = _run_session_time_case(
+        """
+        const now = Date.UTC(2026, 3, 15, 14, 0, 0);
+        const twoHoursAgo = now - 2 * 60 * 60 * 1000;
+        process.stdout.write(JSON.stringify({
+          relative: _formatRelativeSessionTime(twoHoursAgo, now),
+          bucket: _sessionTimeBucketLabel(twoHoursAgo, now),
+        }));
+        """
+    )
+    assert result["relative"] == "2 hours ago"
+    assert result["bucket"] == "Today"
+
+
+def test_relative_time_handles_just_now_and_dst_safe_yesterday_boundary():
+    result = _run_session_time_case(
+        """
+        const now = Date.UTC(2026, 2, 9, 12, 0, 0);
+        const justNow = now - 30 * 1000;
+        const yesterday = Date.UTC(2026, 2, 8, 23, 30, 0);
+        process.stdout.write(JSON.stringify({
+          justNow: _formatRelativeSessionTime(justNow, now),
+          yesterday: _formatRelativeSessionTime(yesterday, now),
+          yesterdayBucket: _sessionTimeBucketLabel(yesterday, now),
+        }));
+        """
+    )
+    assert result["justNow"] == "just now"
+    assert result["yesterday"] == "Yesterday"
+    assert result["yesterdayBucket"] == "Yesterday"
+
+
+def test_relative_time_strings_are_localized_in_english_and_spanish_bundles():
+    for key in (
+        "session_time_unknown",
+        "session_time_just_now",
+        "session_time_minutes_ago",
+        "session_time_hours_ago",
+        "session_time_days_ago",
+        "session_time_last_week",
+        "session_time_bucket_today",
+        "session_time_bucket_yesterday",
+        "session_time_bucket_this_week",
+        "session_time_bucket_last_week",
+        "session_time_bucket_older",
+    ):
+        assert key in I18N_JS
--- a/tests/test_session_summary_redaction.py
+++ b/tests/test_session_summary_redaction.py
@@ -0,0 +1,66 @@
+import json
+import pathlib
+import sys
+import time
+import urllib.parse
+import urllib.request
+import uuid
+
+import pytest
+
+sys.path.insert(0, str(pathlib.Path(__file__).parent.parent.parent))
+
+_needs_server = pytest.mark.usefixtures("test_server")
+from tests._pytest_port import BASE
+_FULL_SECRET = "sk-" + ("B" * 24)
+
+
+def _get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read())
+
+
+def _write_session_with_secret_title():
+    from tests.conftest import TEST_STATE_DIR
+
+    sid = "sec_summary_" + uuid.uuid4().hex[:8]
+    sessions_dir = TEST_STATE_DIR / "sessions"
+    sessions_dir.mkdir(parents=True, exist_ok=True)
+    now = time.time()
+    (sessions_dir / f"{sid}.json").write_text(json.dumps({
+        "session_id": sid,
+        "title": f"session with {_FULL_SECRET}",
+        "workspace": "/tmp",
+        "model": "test",
+        "created_at": now,
+        "updated_at": now,
+        "pinned": False,
+        "archived": False,
+        "project_id": None,
+        "profile": "default",
+        "input_tokens": 0,
+        "output_tokens": 0,
+        "estimated_cost": None,
+        "personality": None,
+        "messages": [],
+        "tool_calls": [],
+    }))
+    return sid
+
+
+@_needs_server
+def test_api_sessions_search_redacts_titles(test_server):
+    sid = _write_session_with_secret_title()
+    data = _get("/api/sessions/search?q=" + urllib.parse.quote("B" * 24))
+    dump = json.dumps(data)
+    assert sid in dump
+    assert _FULL_SECRET not in dump
+
+
+@_needs_server
+def test_api_sessions_list_redacts_secret_titles(test_server):
+    sid = _write_session_with_secret_title()
+    data = _get("/api/sessions")
+    dump = json.dumps(data)
+    assert sid in dump
+    assert _FULL_SECRET not in dump
--- a/tests/test_spanish_locale.py
+++ b/tests/test_spanish_locale.py
@@ -0,0 +1,45 @@
+from pathlib import Path
+import re
+
+
+REPO = Path(__file__).resolve().parent.parent
+
+
+def read(path: Path) -> str:
+    return path.read_text(encoding="utf-8")
+
+
+def test_spanish_locale_block_exists():
+    src = read(REPO / "static" / "i18n.js")
+    assert "\n  es: {" in src
+    assert "_label: 'Español'" in src
+    assert "_speech: 'es-ES'" in src
+
+
+def test_spanish_locale_includes_representative_translations():
+    src = read(REPO / "static" / "i18n.js")
+    expected = [
+        "settings_title: 'Configuración'",
+        "login_title: 'Iniciar sesión'",
+        "approval_heading: 'Se requiere aprobación'",
+        "tab_tasks: 'Tareas'",
+        "tab_skills: 'Habilidades'",
+        "tab_memory: 'Memoria'",
+    ]
+    for entry in expected:
+        assert entry in src
+
+
+def test_spanish_locale_covers_english_keys():
+    src = read(REPO / "static" / "i18n.js")
+    en_match = re.search(r"\n  en: \{([\s\S]*?)\n  \},\n\n  es: \{", src)
+    es_match = re.search(r"\n  es: \{([\s\S]*?)\n  \},\n\n  de: \{", src)
+    assert en_match, "English locale block not found"
+    assert es_match, "Spanish locale block not found"
+
+    key_pattern = re.compile(r"^\s{4}([a-zA-Z0-9_]+):", re.MULTILINE)
+    en_keys = set(key_pattern.findall(en_match.group(1)))
+    es_keys = set(key_pattern.findall(es_match.group(1)))
+
+    missing = sorted(en_keys - es_keys)
+    assert not missing, f"Spanish locale missing keys: {missing}"
--- a/tests/test_sprint1.py
+++ b/tests/test_sprint1.py
@@ -0,0 +1,440 @@
+"""
+Sprint 1 test suite for the Hermes Web UI.
+
+Tests use the ISOLATED test server. Port is auto-derived per worktree (see conftest.py).
+Production server (port 8787) and your real conversations are never touched.
+Start the server before running:
+    <repo>/start.sh
+    # wait 2 seconds
+    pytest webui-mvp/tests/test_sprint1.py -v
+
+All tests are HTTP-level: they call real API endpoints and verify responses.
+No mocking required for session CRUD, upload parser, or approval API.
+"""
+
+import io
+import json
+import os
+import sys
+import time
+import uuid
+import urllib.request
+import urllib.parse
+import urllib.error
+import tempfile
+import pathlib
+
+# Allow importing server modules directly for unit tests
+sys.path.insert(0, str(pathlib.Path(__file__).parent.parent.parent))
+
+from tests._pytest_port import BASE
+
+
+# ──────────────────────────────────────────────
+# HTTP helpers
+# ──────────────────────────────────────────────
+
+def get(path):
+    url = BASE + path
+    with urllib.request.urlopen(url, timeout=10) as r:
+        return json.loads(r.read())
+
+
+def post(path, body=None):
+    url = BASE + path
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(url, data=data,
+          headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+def post_multipart(path, fields, files):
+    """Post a multipart/form-data request. files: {name: (filename, bytes)}"""
+    boundary = uuid.uuid4().hex.encode()
+    body = b""
+    for name, value in fields.items():
+        body += b"--" + boundary + b"\r\n"
+        body += f"Content-Disposition: form-data; name=\"{name}\"\r\n\r\n".encode()
+        body += value.encode() + b"\r\n"
+    for name, (filename, data) in files.items():
+        body += b"--" + boundary + b"\r\n"
+        body += f"Content-Disposition: form-data; name=\"{name}\"; filename=\"{filename}\"\r\n".encode()
+        body += b"Content-Type: application/octet-stream\r\n\r\n"
+        body += data + b"\r\n"
+    body += b"--" + boundary + b"--\r\n"
+    req = urllib.request.Request(BASE + path, data=body,
+          headers={"Content-Type": f"multipart/form-data; boundary={boundary.decode()}"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+def make_session_tracked(created_list, ws=None):
+    """Create a session and register it with the cleanup fixture."""
+    body = {}
+    if ws: body["workspace"] = str(ws)
+    d, _ = post("/api/session/new", body)
+    sid = d["session"]["session_id"]
+    created_list.append(sid)
+    return sid, pathlib.Path(d["session"]["workspace"])
+
+
+
+# ──────────────────────────────────────────────
+# Health check (prerequisite for all tests)
+# ──────────────────────────────────────────────
+
+def test_health():
+    """Server must be running and healthy."""
+    data = get("/health")
+    assert data["status"] == "ok", f"health not ok: {data}"
+
+
+# ──────────────────────────────────────────────
+# B11: /api/session GET footgun fix
+# ──────────────────────────────────────────────
+
+def test_session_get_no_id_returns_400():
+    """B11: GET /api/session with no session_id must return 400, not silently create."""
+    try:
+        data = get("/api/session")
+        # If we get here, the server returned 200 (old broken behavior)
+        assert False, f"Expected 400 but got 200: {data}"
+    except urllib.error.HTTPError as e:
+        assert e.code == 400, f"Expected 400, got {e.code}"
+        body = json.loads(e.read())
+        assert "error" in body
+
+
+# ──────────────────────────────────────────────
+# Session CRUD
+# ──────────────────────────────────────────────
+
+def test_session_create_and_load():
+    """Create a session, verify it appears in /api/sessions, load it."""
+    data, status = post("/api/session/new", {"model": "openai/gpt-5.4-mini"})
+    assert status == 200, f"Expected 200, got {status}: {data}"
+    assert "session" in data
+    sid = data["session"]["session_id"]
+    assert len(sid) == 12  # uuid4().hex[:12]
+
+    # Give it a title so it's visible in the session list (empty Untitled sessions are filtered)
+    post("/api/session/rename", {"session_id": sid, "title": "test-create-verify"})
+
+    # Verify it appears in /api/sessions list
+    sessions = get("/api/sessions")
+    sids = [s["session_id"] for s in sessions["sessions"]]
+    assert sid in sids, f"New session {sid} not in sessions list"
+
+    # Load it directly
+    loaded = get(f"/api/session?session_id={sid}")
+    assert loaded["session"]["session_id"] == sid
+    assert loaded["session"]["messages"] == []
+
+    # Cleanup
+    post("/api/session/delete", {"session_id": sid})
+
+
+def test_session_update():
+    """Create session, update workspace and model, verify persisted."""
+    data, _ = post("/api/session/new", {})
+    sid = data["session"]["session_id"]
+    current_ws = pathlib.Path(data["session"]["workspace"])
+    child_ws = current_ws / f"session-update-{uuid.uuid4().hex[:6]}"
+    child_ws.mkdir(parents=True, exist_ok=True)
+
+    updated, status = post("/api/session/update", {
+        "session_id": sid,
+        "workspace": str(child_ws),
+        "model": "anthropic/claude-sonnet-4.6"
+    })
+    assert status == 200
+    assert updated["session"]["model"] == "anthropic/claude-sonnet-4.6"
+
+    # Reload and verify persistence
+    reloaded = get(f"/api/session?session_id={sid}")
+    assert reloaded["session"]["model"] == "anthropic/claude-sonnet-4.6"
+
+
+def test_session_delete():
+    """Create session, delete it, verify it no longer loads."""
+    data, _ = post("/api/session/new", {})
+    sid = data["session"]["session_id"]
+
+    result, status = post("/api/session/delete", {"session_id": sid})
+    assert status == 200
+    assert result.get("ok") is True
+
+    # Trying to load it should now 404/500 (KeyError -> 500 in current handler)
+    try:
+        get(f"/api/session?session_id={sid}")
+        assert False, "Expected error loading deleted session"
+    except urllib.error.HTTPError as e:
+        assert e.code in (404, 500), f"Expected 404 or 500, got {e.code}"
+
+
+def test_session_delete_nonexistent():
+    """Deleting a nonexistent session should return ok:True (idempotent)."""
+    result, status = post("/api/session/delete", {"session_id": "doesnotexist"})
+    assert status == 200
+    assert result.get("ok") is True
+
+
+def test_sessions_list_sorted():
+    """Sessions list should be sorted most-recently-updated first."""
+    # Create two sessions with a title so they're visible (empty Untitled sessions are filtered)
+    a, _ = post("/api/session/new", {})
+    time.sleep(0.05)
+    b, _ = post("/api/session/new", {})
+    sid_a = a["session"]["session_id"]
+    sid_b = b["session"]["session_id"]
+    post("/api/session/rename", {"session_id": sid_a, "title": "test-sort-a"})
+    time.sleep(0.05)
+    post("/api/session/rename", {"session_id": sid_b, "title": "test-sort-b"})
+
+    sessions = get("/api/sessions")
+    sids = [s["session_id"] for s in sessions["sessions"]]
+
+    # b was updated more recently, should appear before a
+    assert sids.index(sid_b) < sids.index(sid_a), \
+        "Sessions not sorted by updated_at desc"
+
+    # Cleanup
+    post("/api/session/delete", {"session_id": sid_a})
+    post("/api/session/delete", {"session_id": sid_b})
+
+
+# ──────────────────────────────────────────────
+# Upload parser unit tests (pure function, no HTTP)
+# ──────────────────────────────────────────────
+
+def test_parse_multipart_text_file():
+    """parse_multipart correctly parses a text file field."""
+    sys.path.insert(0, str(pathlib.Path(__file__).parent.parent.parent))
+    # Import the function directly from the server module
+    import importlib.util
+    spec = importlib.util.spec_from_file_location(
+        "server",
+        str(pathlib.Path(__file__).parent.parent / "server.py")
+    )
+    # We only need parse_multipart; import it without running the server
+    # Parse manually by reading the source and exec only the function
+    src = pathlib.Path(__file__).parent.parent.joinpath("api/upload.py").read_text()
+    # Extract and exec parse_multipart
+    import re
+    # Find the function
+    m = re.search(r"(def parse_multipart\(.*?)(?=\ndef )", src, re.DOTALL)
+    assert m, "Could not find parse_multipart in server.py"
+    ns = {}
+    exec("import re as _re, email.parser as _ep\n" + m.group(1), ns)
+    parse_multipart = ns["parse_multipart"]
+
+    # Build a minimal multipart body
+    boundary = b"testboundary"
+    body = (
+        b"--testboundary\r\n"
+        b"Content-Disposition: form-data; name=\"session_id\"\r\n\r\n"
+        b"abc123\r\n"
+        b"--testboundary\r\n"
+        b"Content-Disposition: form-data; name=\"file\"; filename=\"hello.txt\"\r\n"
+        b"Content-Type: text/plain\r\n\r\n"
+        b"hello world\r\n"
+        b"--testboundary--\r\n"
+    )
+    fields, files = parse_multipart(
+        io.BytesIO(body),
+        "multipart/form-data; boundary=testboundary",
+        len(body)
+    )
+    assert fields.get("session_id") == "abc123", f"fields: {fields}"
+    assert "file" in files, f"files: {files}"
+    filename, content = files["file"]
+    assert filename == "hello.txt"
+    assert content == b"hello world"
+
+
+def test_parse_multipart_binary_file():
+    """parse_multipart handles binary (PNG header bytes) without corruption."""
+    src = pathlib.Path(__file__).parent.parent.joinpath("api/upload.py").read_text()
+    import re
+    m = re.search(r"(def parse_multipart\(.*?)(?=\ndef )", src, re.DOTALL)
+    ns = {}
+    exec("import re as _re, email.parser as _ep\n" + m.group(1), ns)
+    parse_multipart = ns["parse_multipart"]
+
+    # Fake PNG: first 8 bytes of PNG magic
+    png_magic = b"\x89PNG\r\n\x1a\n"
+    boundary = b"binboundary"
+    body = (
+        b"--binboundary\r\n"
+        b"Content-Disposition: form-data; name=\"session_id\"\r\n\r\n"
+        b"sess1\r\n"
+        b"--binboundary\r\n"
+        b"Content-Disposition: form-data; name=\"file\"; filename=\"test.png\"\r\n"
+        b"Content-Type: image/png\r\n\r\n" + png_magic + b"\r\n"
+        b"--binboundary--\r\n"
+    )
+    fields, files = parse_multipart(
+        io.BytesIO(body),
+        "multipart/form-data; boundary=binboundary",
+        len(body)
+    )
+    assert "file" in files
+    filename, content = files["file"]
+    assert filename == "test.png"
+    assert content == png_magic, f"Binary content corrupted: {content!r}"
+
+
+# ──────────────────────────────────────────────
+# File upload via HTTP
+# ──────────────────────────────────────────────
+
+def test_upload_text_file(cleanup_test_sessions):
+    """Upload a text file to a session workspace, verify it appears in /api/list."""
+    sid, ws = make_session_tracked(cleanup_test_sessions)
+
+    result, status = post_multipart("/api/upload", {"session_id": sid}, {
+        "file": ("test_upload.txt", b"sprint1 test content")
+    })
+    assert status == 200, f"Upload failed {status}: {result}"
+    assert "filename" in result
+    assert result["size"] == len(b"sprint1 test content")
+
+    # Verify file appears in listing
+    listing = get(f"/api/list?session_id={sid}&path=.")
+    names = [e["name"] for e in listing["entries"]]
+    assert result["filename"] in names, f"{result['filename']} not in {names}"
+    # Cleanup the uploaded file
+    post("/api/file/delete", {"session_id": sid, "path": result["filename"]})
+
+
+def test_upload_too_large(cleanup_test_sessions):
+    """Uploading a file over MAX_UPLOAD_BYTES is rejected (413 or connection closed)."""
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+
+    # 21MB > 20MB limit
+    big = b"x" * (21 * 1024 * 1024)
+    try:
+        result, status = post_multipart("/api/upload", {"session_id": sid}, {
+            "file": ("big.bin", big)
+        })
+        # If we get a response it should be 413
+        assert status == 413, f"Expected 413, got {status}: {result}"
+    except (urllib.error.URLError, ConnectionResetError, BrokenPipeError):
+        # Server closed connection after reading Content-Length > limit before body
+        # This is also valid rejection behavior
+        pass
+
+
+def test_upload_no_file_field(cleanup_test_sessions):
+    """Upload with no file field returns 400."""
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    result, status = post_multipart("/api/upload", {"session_id": sid}, {})
+    assert status == 400, f"Expected 400, got {status}: {result}"
+
+
+def test_upload_bad_session():
+    """Upload to nonexistent session returns 404."""
+    result, status = post_multipart("/api/upload", {"session_id": "nosuchsession"}, {
+        "file": ("x.txt", b"data")
+    })
+    assert status == 404, f"Expected 404, got {status}: {result}"
+
+
+# ──────────────────────────────────────────────
+# Approval API
+# ──────────────────────────────────────────────
+
+def test_approval_pending_none():
+    """GET /api/approval/pending for a session with no pending entry returns null."""
+    data = get("/api/approval/pending?session_id=no_such_session")
+    assert data["pending"] is None
+
+
+def test_approval_submit_and_respond():
+    """Inject a pending approval via server endpoint, retrieve it, respond with deny."""
+    test_sid = f"test-approval-{uuid.uuid4().hex[:6]}"
+    cmd = "rm -rf /tmp/testdir"
+    key = "recursive_delete"
+
+    # Inject into server process via test endpoint (shared module state)
+    inject = get(f"/api/approval/inject_test?session_id={urllib.parse.quote(test_sid)}&pattern_key={key}&command={urllib.parse.quote(cmd)}")
+    assert inject["ok"] is True
+
+    # Poll should now show the pending entry
+    data = get(f"/api/approval/pending?session_id={urllib.parse.quote(test_sid)}")
+    assert data["pending"] is not None, "Pending entry not visible after inject"
+    assert data["pending"]["command"] == cmd
+
+    # Respond with deny
+    result, status = post("/api/approval/respond", {
+        "session_id": test_sid,
+        "choice": "deny"
+    })
+    assert status == 200
+    assert result["ok"] is True
+    assert result["choice"] == "deny"
+
+    # Pending should be gone
+    data2 = get(f"/api/approval/pending?session_id={urllib.parse.quote(test_sid)}")
+    assert data2["pending"] is None, "Pending entry should be cleared after respond"
+
+
+def test_approval_respond_allow_session():
+    """Inject pending entry, respond with session choice, verify cleared (approved)."""
+    test_sid = f"test-approval-sess-{uuid.uuid4().hex[:6]}"
+
+    inject = get(f"/api/approval/inject_test?session_id={urllib.parse.quote(test_sid)}&pattern_key=force_kill&command=pkill+-9+someproc")
+    assert inject["ok"] is True
+
+    result, status = post("/api/approval/respond", {
+        "session_id": test_sid,
+        "choice": "session"
+    })
+    assert status == 200
+    assert result["ok"] is True
+    assert result["choice"] == "session"
+
+    # After session approval, pending should be cleared
+    data = get(f"/api/approval/pending?session_id={urllib.parse.quote(test_sid)}")
+    assert data["pending"] is None, "Pending entry should be cleared after session approval"
+
+
+# ──────────────────────────────────────────────
+# Stream status endpoint (B4/B5)
+# ──────────────────────────────────────────────
+
+def test_stream_status_unknown_id():
+    """GET /api/chat/stream/status for unknown stream_id returns active:false."""
+    data = get("/api/chat/stream/status?stream_id=doesnotexist")
+    assert data["active"] is False
+
+
+# ──────────────────────────────────────────────
+# File browser
+# ──────────────────────────────────────────────
+
+def test_list_dir(cleanup_test_sessions):
+    """List workspace directory for a session."""
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    listing = get(f"/api/list?session_id={sid}&path=.")
+    assert "entries" in listing
+    assert isinstance(listing["entries"], list)
+
+
+def test_list_dir_path_traversal(cleanup_test_sessions):
+    """Path traversal via ../.. should be blocked (500 or 400)."""
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    try:
+        listing = get(f"/api/list?session_id={sid}&path=../../etc")
+        # If server returns entries outside workspace root, that is a bug
+        # (safe_resolve should raise ValueError)
+        assert False, f"Expected error for path traversal, got: {listing}"
+    except urllib.error.HTTPError as e:
+        assert e.code in (400, 404, 500), f"Expected 400/404/500 for traversal, got {e.code}"
--- a/tests/test_sprint10.py
+++ b/tests/test_sprint10.py
@@ -0,0 +1,139 @@
+"""
+Sprint 10 Tests: server.py split, cancel endpoint, cron history, tool card polish.
+"""
+import json, pathlib, urllib.error, urllib.request, urllib.parse
+REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
+
+from tests._pytest_port import BASE
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+def get_text(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return r.read().decode(), r.status
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(BASE + path, data=data,
+                                  headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+def make_session(created_list):
+    d, _ = post("/api/session/new", {})
+    sid = d["session"]["session_id"]
+    created_list.append(sid)
+    return sid
+
+# ── server.py split: api/ modules served / importable ─────────────────────
+
+def test_health_still_works(cleanup_test_sessions):
+    data, status = get("/health")
+    assert status == 200
+    assert data["status"] == "ok"
+    assert "uptime_seconds" in data
+    assert "active_streams" in data
+
+def test_api_modules_exist(cleanup_test_sessions):
+    """All api/ module files must exist on disk."""
+    base = REPO_ROOT / "api"
+    for mod in ["__init__.py", "config.py", "helpers.py", "models.py",
+                "workspace.py", "upload.py", "streaming.py"]:
+        assert (base / mod).exists(), f"Missing api/{mod}"
+
+def test_server_py_under_750_lines(cleanup_test_sessions):
+    """server.py should be under 750 lines after the split."""
+    lines = len((REPO_ROOT / "server.py").read_text().splitlines())
+    assert lines < 750, f"server.py is {lines} lines -- split may not have landed"
+
+def test_api_config_has_cancel_flags(cleanup_test_sessions):
+    src = (REPO_ROOT / "api/config.py").read_text()
+    assert "CANCEL_FLAGS" in src
+    assert "STREAMS" in src
+
+def test_session_crud_still_works(cleanup_test_sessions):
+    """Full session lifecycle works after split."""
+    created = []
+    sid = make_session(created)
+    data, status = get(f"/api/session?session_id={urllib.parse.quote(sid)}")
+    assert status == 200
+    assert data["session"]["session_id"] == sid
+    post("/api/session/delete", {"session_id": sid})
+
+def test_static_files_still_served(cleanup_test_sessions):
+    for f in ["ui.js", "workspace.js", "sessions.js", "messages.js", "panels.js", "boot.js"]:
+        src, status = get_text(f"/static/{f}")
+        assert status == 200, f"/static/{f} returned {status}"
+        assert len(src) > 100
+
+# ── Cancel endpoint ────────────────────────────────────────────────────────
+
+def test_cancel_requires_stream_id(cleanup_test_sessions):
+    try:
+        data, status = get("/api/chat/cancel")
+        assert status == 400
+    except urllib.error.HTTPError as e:
+        assert e.code == 400
+
+def test_cancel_nonexistent_stream(cleanup_test_sessions):
+    data, status = get("/api/chat/cancel?stream_id=nonexistent_xyz")
+    assert status == 200
+    assert data["ok"] is True
+    assert data["cancelled"] is False
+
+def test_cancel_button_in_html(cleanup_test_sessions):
+    src, _ = get_text("/")
+    assert "btnCancel" in src
+    assert "cancelStream" in src
+
+def test_cancel_function_in_boot_js(cleanup_test_sessions):
+    src, _ = get_text("/static/boot.js")
+    assert "async function cancelStream(" in src
+    assert "api/chat/cancel" in src
+
+# ── Cron history ───────────────────────────────────────────────────────────
+
+def test_crons_output_limit_param(cleanup_test_sessions):
+    """Server accepts limit parameter > 1."""
+    data, status = get("/api/crons/output?job_id=nonexistent&limit=20")
+    # 404 or 200 with empty -- both acceptable for nonexistent job
+    assert status in (200, 404)
+
+def test_cron_history_button_in_panels_js(cleanup_test_sessions):
+    src, _ = get_text("/static/panels.js")
+    assert "loadCronHistory" in src
+    assert "cron_all_runs" in src  # i18n key (was hardcoded 'All runs' before i18n hardening)
+
+def test_cron_output_snippet_helper(cleanup_test_sessions):
+    src, _ = get_text("/static/panels.js")
+    assert "_cronOutputSnippet" in src
+
+# ── Tool card polish ───────────────────────────────────────────────────────
+
+def test_tool_card_running_dot_in_css(cleanup_test_sessions):
+    src, _ = get_text("/static/style.css")
+    assert "tool-card-running-dot" in src
+
+def test_tool_card_show_more_in_ui_js(cleanup_test_sessions):
+    src, _ = get_text("/static/ui.js")
+    assert "Show more" in src
+    assert "tool-card-more" in src
+
+def test_tool_card_smart_truncation_in_ui_js(cleanup_test_sessions):
+    src, _ = get_text("/static/ui.js")
+    assert "displaySnippet" in src
+    assert "lastBreak" in src
+
+def test_cancel_sse_event_handler_in_messages_js(cleanup_test_sessions):
+    src, _ = get_text("/static/messages.js")
+    assert "addEventListener('cancel'" in src
+    assert "Task cancelled" in src
+
+def test_active_stream_id_tracked(cleanup_test_sessions):
+    src, _ = get_text("/static/messages.js")
+    assert "S.activeStreamId" in src
--- a/tests/test_sprint11.py
+++ b/tests/test_sprint11.py
@@ -0,0 +1,101 @@
+"""
+Sprint 11 Tests: multi-provider model support, streaming smoothness, routes extraction.
+"""
+import json, pathlib, urllib.error, urllib.request, urllib.parse
+REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
+
+from tests._pytest_port import BASE
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(BASE + path, data=data,
+                                  headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+# ── /api/models endpoint ──────────────────────────────────────────────────
+
+def test_models_endpoint_returns_200():
+    """GET /api/models returns a valid response."""
+    d, status = get("/api/models")
+    assert status == 200
+
+def test_models_has_required_fields():
+    """Response includes groups, default_model, and active_provider."""
+    d, _ = get("/api/models")
+    assert 'groups' in d
+    assert 'default_model' in d
+    assert 'active_provider' in d
+
+def test_models_groups_structure():
+    """Each group has provider name and models list."""
+    d, _ = get("/api/models")
+    assert isinstance(d['groups'], list)
+    assert len(d['groups']) > 0
+    for group in d['groups']:
+        assert 'provider' in group
+        assert 'models' in group
+        assert isinstance(group['models'], list)
+        assert len(group['models']) > 0
+
+def test_models_model_structure():
+    """Each model has id and label."""
+    d, _ = get("/api/models")
+    for group in d['groups']:
+        for model in group['models']:
+            assert 'id' in model
+            assert 'label' in model
+            assert isinstance(model['id'], str)
+            assert isinstance(model['label'], str)
+            assert len(model['id']) > 0
+            assert len(model['label']) > 0
+
+def test_models_default_model_not_empty():
+    """When HERMES_WEBUI_DEFAULT_MODEL env var is set (as in conftest), the
+    /api/models response includes a non-empty default_model string."""
+    d, _ = get("/api/models")
+    assert isinstance(d['default_model'], str)
+    # conftest sets HERMES_WEBUI_DEFAULT_MODEL to "openai/gpt-5.4-mini", so
+    # this value should be non-empty in the test environment.
+    # When no env var is set (production with empty default), default_model
+    # can be "" — that is intentional (see PR #649).
+    assert len(d['default_model']) > 0  # only holds because conftest sets the env var
+
+def test_models_at_least_one_provider():
+    """At least one provider group should exist (fallback list at minimum)."""
+    d, _ = get("/api/models")
+    providers = [g['provider'] for g in d['groups']]
+    assert len(providers) >= 1
+
+def test_models_no_duplicate_ids():
+    """Model IDs should not be duplicated within a single group."""
+    d, _ = get("/api/models")
+    for group in d['groups']:
+        ids = [m['id'] for m in group['models']]
+        assert len(ids) == len(set(ids)), f"Duplicate model IDs in {group['provider']}: {ids}"
+
+def test_session_preserves_unlisted_model():
+    """A session with a model not in the dropdown should still load correctly."""
+    # Create a session with a custom model string
+    d, _ = post("/api/session/new", {})
+    sid = d['session']['session_id']
+    try:
+        custom_model = 'custom-provider/test-model-999'
+        post("/api/session/update", {
+            'session_id': sid,
+            'model': custom_model,
+            'workspace': d['session']['workspace']
+        })
+        # Reload and verify model persisted
+        d2, _ = get(f"/api/session?session_id={sid}")
+        assert d2['session']['model'] == custom_model
+    finally:
+        post("/api/session/delete", {'session_id': sid})
--- a/tests/test_sprint12.py
+++ b/tests/test_sprint12.py
@@ -0,0 +1,179 @@
+"""
+Sprint 12 Tests: settings panel, session pinning, session import, SSE reconnect.
+"""
+import json, pathlib, urllib.error, urllib.request, urllib.parse
+
+from tests._pytest_port import BASE
+
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(BASE + path, data=data,
+                                headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+def make_session(created_list):
+    d, _ = post("/api/session/new", {})
+    sid = d["session"]["session_id"]
+    created_list.append(sid)
+    return sid
+
+
+# ── Settings API ──────────────────────────────────────────────────────────
+
+def test_settings_get_returns_defaults():
+    """GET /api/settings returns default settings."""
+    d, status = get("/api/settings")
+    assert status == 200
+    assert 'default_model' in d
+    assert 'default_workspace' in d
+
+def test_settings_post_persists():
+    """POST /api/settings saves and returns merged settings."""
+    d, status = post("/api/settings", {"default_model": "test/model-123"})
+    assert status == 200
+    assert d['default_model'] == 'test/model-123'
+    # Verify it persisted
+    d2, _ = get("/api/settings")
+    assert d2['default_model'] == 'test/model-123'
+    # Restore
+    post("/api/settings", {"default_model": "openai/gpt-5.4-mini"})
+
+def test_settings_partial_update():
+    """POST /api/settings with partial data doesn't clobber other fields."""
+    d1, _ = get("/api/settings")
+    original_ws = d1['default_workspace']
+    post("/api/settings", {"default_model": "anthropic/claude-sonnet-4.6"})
+    d2, _ = get("/api/settings")
+    assert d2['default_model'] == 'anthropic/claude-sonnet-4.6'
+    assert d2['default_workspace'] == original_ws
+    # Restore
+    post("/api/settings", {"default_model": "openai/gpt-5.4-mini"})
+
+
+# ── Session Pinning ───────────────────────────────────────────────────────
+
+def test_pin_session():
+    """POST /api/session/pin sets pinned=true."""
+    created = []
+    try:
+        sid = make_session(created)
+        d, status = post("/api/session/pin", {"session_id": sid, "pinned": True})
+        assert status == 200
+        assert d['ok'] is True
+        assert d['session']['pinned'] is True
+    finally:
+        for sid in created:
+            post("/api/session/delete", {"session_id": sid})
+
+def test_unpin_session():
+    """POST /api/session/pin with pinned=false unpins."""
+    created = []
+    try:
+        sid = make_session(created)
+        post("/api/session/pin", {"session_id": sid, "pinned": True})
+        d, status = post("/api/session/pin", {"session_id": sid, "pinned": False})
+        assert status == 200
+        assert d['session']['pinned'] is False
+    finally:
+        for sid in created:
+            post("/api/session/delete", {"session_id": sid})
+
+def test_pinned_in_session_list():
+    """Pinned sessions include pinned field in session list."""
+    created = []
+    try:
+        sid = make_session(created)
+        # Pin it and give it a title so it shows in the list
+        post("/api/session/rename", {"session_id": sid, "title": "Pinned Test"})
+        post("/api/session/pin", {"session_id": sid, "pinned": True})
+        d, _ = get("/api/sessions")
+        match = [s for s in d['sessions'] if s['session_id'] == sid]
+        assert len(match) == 1
+        assert match[0]['pinned'] is True
+    finally:
+        for sid in created:
+            post("/api/session/delete", {"session_id": sid})
+
+def test_pinned_persists_on_reload():
+    """Pin status survives session reload from disk."""
+    created = []
+    try:
+        sid = make_session(created)
+        post("/api/session/pin", {"session_id": sid, "pinned": True})
+        d, _ = get(f"/api/session?session_id={sid}")
+        assert d['session']['pinned'] is True
+    finally:
+        for sid in created:
+            post("/api/session/delete", {"session_id": sid})
+
+
+# ── Session Import ────────────────────────────────────────────────────────
+
+def test_import_session_basic():
+    """POST /api/session/import creates a new session from JSON."""
+    payload = {
+        "title": "Imported Test",
+        "messages": [
+            {"role": "user", "content": "Hello from import"},
+            {"role": "assistant", "content": "Hi there!"},
+        ],
+        "model": "test/import-model",
+    }
+    d, status = post("/api/session/import", payload)
+    assert status == 200
+    assert d['ok'] is True
+    sid = d['session']['session_id']
+    try:
+        assert d['session']['title'] == 'Imported Test'
+        assert len(d['session']['messages']) == 2
+        # Verify it loads correctly
+        d2, _ = get(f"/api/session?session_id={sid}")
+        assert d2['session']['model'] == 'test/import-model'
+    finally:
+        post("/api/session/delete", {"session_id": sid})
+
+def test_import_requires_messages():
+    """Import fails without a messages array."""
+    d, status = post("/api/session/import", {"title": "No messages"})
+    assert status == 400
+
+def test_import_creates_new_id():
+    """Imported session gets a new session_id, not reusing any from the payload."""
+    payload = {
+        "session_id": "should_be_ignored",
+        "title": "ID Test",
+        "messages": [{"role": "user", "content": "test"}],
+    }
+    d, _ = post("/api/session/import", payload)
+    sid = d['session']['session_id']
+    try:
+        # The import should create a new ID, not use the one from the payload
+        assert sid != "should_be_ignored"
+    finally:
+        post("/api/session/delete", {"session_id": sid})
+
+def test_import_with_pinned():
+    """Imported session can be pinned."""
+    payload = {
+        "title": "Pinned Import",
+        "messages": [{"role": "user", "content": "test"}],
+        "pinned": True,
+    }
+    d, _ = post("/api/session/import", payload)
+    sid = d['session']['session_id']
+    try:
+        d2, _ = get(f"/api/session?session_id={sid}")
+        assert d2['session']['pinned'] is True
+    finally:
+        post("/api/session/delete", {"session_id": sid})
--- a/tests/test_sprint13.py
+++ b/tests/test_sprint13.py
@@ -0,0 +1,122 @@
+"""
+Sprint 13 Tests: cron recent endpoint, session duplicate, background alerts.
+"""
+import json, pathlib, urllib.error, urllib.request
+
+from tests._pytest_port import BASE
+
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(BASE + path, data=data,
+                                headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+def make_session(created_list):
+    d, _ = post("/api/session/new", {})
+    sid = d["session"]["session_id"]
+    created_list.append(sid)
+    return sid, d["session"]
+
+
+# ── Cron recent endpoint ──────────────────────────────────────────────────
+
+def test_crons_recent_returns_200():
+    """GET /api/crons/recent returns completions list."""
+    d, status = get("/api/crons/recent?since=0")
+    assert status == 200
+    assert 'completions' in d
+    assert isinstance(d['completions'], list)
+    assert 'since' in d
+
+def test_crons_recent_with_future_since():
+    """Completions list is empty when since is in the future."""
+    import time
+    d, _ = get(f"/api/crons/recent?since={time.time() + 99999}")
+    assert d['completions'] == []
+
+def test_crons_recent_default_since():
+    """Default since=0 returns all completions."""
+    d, status = get("/api/crons/recent")
+    assert status == 200
+    assert 'completions' in d
+
+
+# ── Session duplicate ─────────────────────────────────────────────────────
+
+def test_duplicate_session():
+    """Duplicating a session creates a new one with same workspace/model."""
+    created = []
+    try:
+        sid, sess = make_session(created)
+        # Set a specific model on the session
+        post("/api/session/update", {
+            "session_id": sid, "model": "test/dup-model",
+            "workspace": sess["workspace"]
+        })
+        # Duplicate: create new session with same workspace/model
+        d2, status = post("/api/session/new", {
+            "workspace": sess["workspace"], "model": "test/dup-model"
+        })
+        assert status == 200
+        new_sid = d2["session"]["session_id"]
+        created.append(new_sid)
+        assert new_sid != sid
+        assert d2["session"]["model"] == "test/dup-model"
+        assert d2["session"]["workspace"] == sess["workspace"]
+    finally:
+        for s in created:
+            post("/api/session/delete", {"session_id": s})
+
+
+# ── Session pinned field preserved across operations ──────────────────────
+
+def test_pinned_survives_update():
+    """Pinned status survives session update."""
+    created = []
+    try:
+        sid, sess = make_session(created)
+        post("/api/session/pin", {"session_id": sid, "pinned": True})
+        # Update workspace/model
+        post("/api/session/update", {
+            "session_id": sid, "model": "test/other",
+            "workspace": sess["workspace"]
+        })
+        d, _ = get(f"/api/session?session_id={sid}")
+        assert d["session"]["pinned"] is True
+    finally:
+        for s in created:
+            post("/api/session/delete", {"session_id": s})
+
+
+# ── Workspace symlink validation ──────────────────────────────────────────
+
+def test_workspace_add_rejects_nonexistent():
+    """Adding a non-existent path returns 400."""
+    d, status = post("/api/workspaces/add", {"path": "/nonexistent/path/12345"})
+    assert status == 400
+
+def test_workspace_add_accepts_real_dir():
+    """Adding a real directory under the trusted workspace root succeeds."""
+    d, _ = post("/api/session/new", {})
+    root = pathlib.Path(d["session"]["workspace"])
+    tmp = root / "trusted-add-test"
+    tmp.mkdir(parents=True, exist_ok=True)
+    try:
+        d, status = post("/api/workspaces/add", {"path": str(tmp), "name": "test-ws"})
+        assert status == 200
+        assert d["ok"] is True
+    finally:
+        post("/api/workspaces/remove", {"path": str(tmp)})
+        import shutil
+        shutil.rmtree(tmp, ignore_errors=True)
--- a/tests/test_sprint14.py
+++ b/tests/test_sprint14.py
@@ -0,0 +1,153 @@
+"""
+Sprint 14 Tests: file rename, folder create, session archive, session tags, mermaid, timestamps.
+"""
+import json, os, pathlib, shutil, tempfile, urllib.error, urllib.request
+
+from tests._pytest_port import BASE
+
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(BASE + path, data=data,
+                                headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+def make_session(created_list):
+    d, _ = post("/api/session/new", {})
+    sid = d["session"]["session_id"]
+    created_list.append(sid)
+    return sid, d["session"]
+
+
+# ── File rename ───────────────────────────────────────────────────────────
+
+def test_file_rename():
+    """Renaming a file changes its name on disk."""
+    created = []
+    try:
+        sid, sess = make_session(created)
+        # Create a file first
+        post("/api/file/create", {"session_id": sid, "path": "rename_test.txt", "content": "hello"})
+        d, status = post("/api/file/rename", {
+            "session_id": sid, "path": "rename_test.txt", "new_name": "renamed.txt"
+        })
+        assert status == 200
+        assert d["ok"] is True
+        assert "renamed.txt" in d["new_path"]
+    finally:
+        for s in created:
+            post("/api/session/delete", {"session_id": s})
+
+
+def test_file_rename_rejects_path_traversal():
+    """Rename rejects names with path separators."""
+    created = []
+    try:
+        sid, sess = make_session(created)
+        post("/api/file/create", {"session_id": sid, "path": "safe.txt", "content": ""})
+        d, status = post("/api/file/rename", {
+            "session_id": sid, "path": "safe.txt", "new_name": "../evil.txt"
+        })
+        assert status == 400
+    finally:
+        for s in created:
+            post("/api/session/delete", {"session_id": s})
+
+
+def test_file_rename_rejects_existing():
+    """Rename fails if target name already exists."""
+    created = []
+    try:
+        sid, sess = make_session(created)
+        post("/api/file/create", {"session_id": sid, "path": "a.txt", "content": "a"})
+        post("/api/file/create", {"session_id": sid, "path": "b.txt", "content": "b"})
+        d, status = post("/api/file/rename", {
+            "session_id": sid, "path": "a.txt", "new_name": "b.txt"
+        })
+        assert status == 400
+    finally:
+        for s in created:
+            post("/api/session/delete", {"session_id": s})
+
+
+# ── Folder create ─────────────────────────────────────────────────────────
+
+def test_create_dir():
+    """Creating a folder succeeds."""
+    created = []
+    try:
+        sid, sess = make_session(created)
+        d, status = post("/api/file/create-dir", {
+            "session_id": sid, "path": "test_folder"
+        })
+        assert status == 200
+        assert d["ok"] is True
+    finally:
+        for s in created:
+            post("/api/session/delete", {"session_id": s})
+
+
+def test_create_dir_rejects_existing():
+    """Creating a folder that already exists fails."""
+    created = []
+    try:
+        sid, sess = make_session(created)
+        post("/api/file/create-dir", {"session_id": sid, "path": "dup_folder"})
+        d, status = post("/api/file/create-dir", {"session_id": sid, "path": "dup_folder"})
+        assert status == 400
+    finally:
+        for s in created:
+            post("/api/session/delete", {"session_id": s})
+
+
+# ── Session archive ───────────────────────────────────────────────────────
+
+def test_archive_session():
+    """Archiving a session sets archived=true."""
+    created = []
+    try:
+        sid, _ = make_session(created)
+        d, status = post("/api/session/archive", {"session_id": sid, "archived": True})
+        assert status == 200
+        assert d["session"]["archived"] is True
+    finally:
+        for s in created:
+            post("/api/session/delete", {"session_id": s})
+
+
+def test_unarchive_session():
+    """Unarchiving a session sets archived=false."""
+    created = []
+    try:
+        sid, _ = make_session(created)
+        post("/api/session/archive", {"session_id": sid, "archived": True})
+        d, status = post("/api/session/archive", {"session_id": sid, "archived": False})
+        assert status == 200
+        assert d["session"]["archived"] is False
+    finally:
+        for s in created:
+            post("/api/session/delete", {"session_id": s})
+
+
+def test_archived_in_compact():
+    """Archived field appears in session list."""
+    created = []
+    try:
+        sid, _ = make_session(created)
+        post("/api/session/rename", {"session_id": sid, "title": "Archive Test"})
+        post("/api/session/archive", {"session_id": sid, "archived": True})
+        d, _ = get(f"/api/session?session_id={sid}")
+        assert d["session"]["archived"] is True
+    finally:
+        for s in created:
+            post("/api/session/delete", {"session_id": s})
--- a/tests/test_sprint15.py
+++ b/tests/test_sprint15.py
@@ -0,0 +1,234 @@
+"""
+Sprint 15 Tests: session projects (CRUD, move, backward compat).
+"""
+import json, urllib.error, urllib.request
+
+from tests._pytest_port import BASE
+
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(BASE + path, data=data,
+                                headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+def make_session(created_list):
+    d, _ = post("/api/session/new", {})
+    sid = d["session"]["session_id"]
+    created_list.append(sid)
+    return sid, d["session"]
+
+
+def make_project(created_list, name="Test Project", color=None):
+    body = {"name": name}
+    if color:
+        body["color"] = color
+    d, status = post("/api/projects/create", body)
+    assert status == 200
+    pid = d["project"]["project_id"]
+    created_list.append(pid)
+    return pid, d["project"]
+
+
+def cleanup_projects(project_ids):
+    for pid in project_ids:
+        try:
+            post("/api/projects/delete", {"project_id": pid})
+        except Exception:
+            pass
+
+
+# ── Project CRUD ─────────────────────────────────────────────────────────
+
+def test_create_project():
+    """Creating a project returns a valid project dict."""
+    pids = []
+    try:
+        pid, proj = make_project(pids, "My Project", "#7cb9ff")
+        assert pid and len(pid) == 12
+        assert proj["name"] == "My Project"
+        assert proj["color"] == "#7cb9ff"
+        assert "created_at" in proj
+    finally:
+        cleanup_projects(pids)
+
+
+def test_list_projects_empty():
+    """Listing projects when none exist returns empty list."""
+    d, status = get("/api/projects")
+    assert status == 200
+    assert isinstance(d["projects"], list)
+
+
+def test_list_projects():
+    """Listing projects returns created projects."""
+    pids = []
+    try:
+        make_project(pids, "Alpha")
+        make_project(pids, "Beta")
+        d, status = get("/api/projects")
+        assert status == 200
+        names = [p["name"] for p in d["projects"]]
+        assert "Alpha" in names
+        assert "Beta" in names
+    finally:
+        cleanup_projects(pids)
+
+
+def test_rename_project():
+    """Renaming a project updates its name."""
+    pids = []
+    try:
+        pid, _ = make_project(pids, "Old Name")
+        d, status = post("/api/projects/rename", {"project_id": pid, "name": "New Name"})
+        assert status == 200
+        assert d["project"]["name"] == "New Name"
+        # Verify via list
+        dl, _ = get("/api/projects")
+        names = [p["name"] for p in dl["projects"]]
+        assert "New Name" in names
+        assert "Old Name" not in names
+    finally:
+        cleanup_projects(pids)
+
+
+def test_delete_project():
+    """Deleting a project removes it from the list."""
+    pids = []
+    try:
+        pid, _ = make_project(pids, "Doomed")
+        d, status = post("/api/projects/delete", {"project_id": pid})
+        assert status == 200
+        assert d["ok"] is True
+        dl, _ = get("/api/projects")
+        assert all(p["project_id"] != pid for p in dl["projects"])
+        pids.clear()  # already deleted
+    finally:
+        cleanup_projects(pids)
+
+
+def test_delete_project_unassigns_sessions():
+    """Deleting a project unassigns all sessions that belonged to it."""
+    pids = []
+    sids = []
+    try:
+        pid, _ = make_project(pids, "Temp Project")
+        sid, _ = make_session(sids)
+        # Assign session to project
+        post("/api/session/move", {"session_id": sid, "project_id": pid})
+        # Verify assigned
+        sd, _ = get(f"/api/session?session_id={sid}")
+        assert sd["session"].get("project_id") == pid
+        # Delete project
+        post("/api/projects/delete", {"project_id": pid})
+        pids.clear()
+        # Verify session is unassigned
+        sd2, _ = get(f"/api/session?session_id={sid}")
+        assert sd2["session"].get("project_id") is None
+    finally:
+        cleanup_projects(pids)
+        for s in sids:
+            post("/api/session/delete", {"session_id": s})
+
+
+def test_create_project_requires_name():
+    """Creating a project without a name returns 400."""
+    d, status = post("/api/projects/create", {})
+    assert status == 400
+
+
+def test_delete_nonexistent_project():
+    """Deleting a project that doesn't exist returns 404."""
+    d, status = post("/api/projects/delete", {"project_id": "nonexistent99"})
+    assert status == 404
+
+
+# ── Session move ─────────────────────────────────────────────────────────
+
+def test_session_move_to_project():
+    """Moving a session to a project sets its project_id."""
+    pids = []
+    sids = []
+    try:
+        pid, _ = make_project(pids, "Work")
+        sid, _ = make_session(sids)
+        d, status = post("/api/session/move", {"session_id": sid, "project_id": pid})
+        assert status == 200
+        assert d["session"]["project_id"] == pid
+    finally:
+        cleanup_projects(pids)
+        for s in sids:
+            post("/api/session/delete", {"session_id": s})
+
+
+def test_session_move_to_unassigned():
+    """Moving a session to null project unassigns it."""
+    pids = []
+    sids = []
+    try:
+        pid, _ = make_project(pids, "Temp")
+        sid, _ = make_session(sids)
+        # Assign then unassign
+        post("/api/session/move", {"session_id": sid, "project_id": pid})
+        d, status = post("/api/session/move", {"session_id": sid, "project_id": None})
+        assert status == 200
+        assert d["session"]["project_id"] is None
+    finally:
+        cleanup_projects(pids)
+        for s in sids:
+            post("/api/session/delete", {"session_id": s})
+
+
+def test_session_project_in_list():
+    """Session list includes project_id for assigned sessions."""
+    pids = []
+    sids = []
+    try:
+        pid, _ = make_project(pids, "Listed")
+        sid, _ = make_session(sids)
+        # Give it a title so it shows in list (non-empty Untitled sessions are hidden)
+        post("/api/session/rename", {"session_id": sid, "title": "Project Test Session"})
+        post("/api/session/move", {"session_id": sid, "project_id": pid})
+        dl, _ = get("/api/sessions")
+        match = [s for s in dl["sessions"] if s["session_id"] == sid]
+        assert len(match) == 1
+        assert match[0]["project_id"] == pid
+    finally:
+        cleanup_projects(pids)
+        for s in sids:
+            post("/api/session/delete", {"session_id": s})
+
+
+# ── Backward compat ──────────────────────────────────────────────────────
+
+def test_compact_includes_project_id():
+    """New session compact dict includes project_id as null."""
+    sids = []
+    try:
+        sid, sess = make_session(sids)
+        # Give it a title so it appears in the list
+        post("/api/session/rename", {"session_id": sid, "title": "Compat Test"})
+        dl, _ = get("/api/sessions")
+        match = [s for s in dl["sessions"] if s["session_id"] == sid]
+        assert len(match) == 1
+        assert "project_id" in match[0]
+        assert match[0]["project_id"] is None
+    finally:
+        for s in sids:
+            post("/api/session/delete", {"session_id": s})
+
+
+def test_session_move_requires_session_id():
+    """Moving without session_id returns 400."""
+    d, status = post("/api/session/move", {"project_id": "abc"})
+    assert status == 400
--- a/tests/test_sprint16.py
+++ b/tests/test_sprint16.py
@@ -0,0 +1,721 @@
+"""
+Sprint 16 Tests: safe HTML rendering in renderMd(), active session styling,
+session sidebar polish (SVG icons, dropdown actions).
+"""
+import html as _html
+import pathlib
+import re
+import urllib.request
+
+from tests._pytest_port import BASE
+REPO_ROOT = pathlib.Path(__file__).parent.parent
+
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+def get_text(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return r.read().decode("utf-8"), r.status
+
+
+def esc(s):
+    """Mirror of esc() in ui.js — HTML-escapes a string."""
+    return _html.escape(str(s), quote=True)
+
+
+SAFE_TAGS = re.compile(
+    r"^<\/?(strong|em|code|pre|h[1-6]|ul|ol|li|table|thead|tbody|tr|th|td"
+    r"|hr|blockquote|p|br|a|div)([\s>]|$)",
+    re.I,
+)
+SAFE_INLINE = re.compile(r"^<\/?(strong|em|code|a)([\s>]|$)", re.I)
+
+
+def inline_md(t):
+    """Mirror of inlineMd() in ui.js — for use inside list items / blockquotes."""
+    t = re.sub(r"\*\*\*(.+?)\*\*\*", lambda m: "<strong><em>" + esc(m.group(1)) + "</em></strong>", t)
+    t = re.sub(r"\*\*(.+?)\*\*",     lambda m: "<strong>" + esc(m.group(1)) + "</strong>", t)
+    t = re.sub(r"\*([^*\n]+)\*",     lambda m: "<em>" + esc(m.group(1)) + "</em>", t)
+    t = re.sub(r"`([^`\n]+)`",        lambda m: "<code>" + esc(m.group(1)) + "</code>", t)
+    t = re.sub(
+        r"\[([^\]]+)\]\((https?://[^\)]+)\)",
+        lambda m: f'<a href="{esc(m.group(2))}" target="_blank" rel="noopener">{esc(m.group(1))}</a>',
+        t,
+    )
+    t = re.sub(r"</?[a-zA-Z][^>]*>", lambda m: m.group() if SAFE_INLINE.match(m.group()) else esc(m.group()), t)
+    return t
+
+
+def render_md(raw):
+    """
+    Python mirror of renderMd() in static/ui.js.
+    Kept in sync with the JS implementation so tests catch regressions
+    if the JS logic drifts from the documented behaviour.
+    """
+    s = raw or ""
+
+    # Pre-pass: stash code blocks/spans, convert safe HTML → markdown equivalents
+    fence_stash = []
+
+    def stash(m):
+        fence_stash.append(m.group())
+        return "\x00F" + str(len(fence_stash) - 1) + "\x00"
+
+    s = re.sub(r"(```[\s\S]*?```|`[^`\n]+`)", stash, s)
+    s = re.sub(r"<strong>([\s\S]*?)</strong>", lambda m: "**" + m.group(1) + "**", s, flags=re.I)
+    s = re.sub(r"<b>([\s\S]*?)</b>",           lambda m: "**" + m.group(1) + "**", s, flags=re.I)
+    s = re.sub(r"<em>([\s\S]*?)</em>",          lambda m: "*"  + m.group(1) + "*",  s, flags=re.I)
+    s = re.sub(r"<i>([\s\S]*?)</i>",            lambda m: "*"  + m.group(1) + "*",  s, flags=re.I)
+    s = re.sub(r"<code>([^<]*?)</code>",         lambda m: "`"  + m.group(1) + "`",  s, flags=re.I)
+    s = re.sub(r"<br\s*/?>", "\n", s, flags=re.I)
+    s = re.sub(r"\x00F(\d+)\x00", lambda m: fence_stash[int(m.group(1))], s)
+
+    # Fenced code blocks
+    def fenced(m):
+        lang, code = m.group(1), m.group(2).rstrip("\n")
+        h = f'<div class="pre-header">{esc(lang)}</div>' if lang else ""
+        return h + "<pre><code>" + esc(code) + "</code></pre>"
+    s = re.sub(r"```([\w+-]*)\n?([\s\S]*?)```", fenced, s)
+    s = re.sub(r"`([^`\n]+)`", lambda m: "<code>" + esc(m.group(1)) + "</code>", s)
+
+    # Inline formatting (top-level, outside list items)
+    s = re.sub(r"\*\*\*(.+?)\*\*\*", lambda m: "<strong><em>" + esc(m.group(1)) + "</em></strong>", s)
+    s = re.sub(r"\*\*(.+?)\*\*",     lambda m: "<strong>" + esc(m.group(1)) + "</strong>", s)
+    s = re.sub(r"\*([^*\n]+)\*",     lambda m: "<em>" + esc(m.group(1)) + "</em>", s)
+
+    # Block elements using inlineMd for their content
+    s = re.sub(r"^### (.+)$", lambda m: "<h3>" + inline_md(m.group(1)) + "</h3>", s, flags=re.M)
+    s = re.sub(r"^## (.+)$",  lambda m: "<h2>" + inline_md(m.group(1)) + "</h2>", s, flags=re.M)
+    s = re.sub(r"^# (.+)$",   lambda m: "<h1>" + inline_md(m.group(1)) + "</h1>", s, flags=re.M)
+    s = re.sub(r"^---+$", "<hr>", s, flags=re.M)
+    s = re.sub(r"^> (.+)$", lambda m: "<blockquote>" + inline_md(m.group(1)) + "</blockquote>", s, flags=re.M)
+
+    def handle_ul(block):
+        lines = block.strip().split("\n")
+        out = "<ul>"
+        for l in lines:
+            indent = bool(re.match(r"^ {2,}", l))
+            text = re.sub(r"^ {0,4}[-*+] ", "", l)
+            style = ' style="margin-left:16px"' if indent else ""
+            out += f"<li{style}>{inline_md(text)}</li>"
+        return out + "</ul>"
+
+    s = re.sub(r"((?:^(?:  )?[-*+] .+\n?)+)", lambda m: handle_ul(m.group()), s, flags=re.M)
+
+    def handle_ol(block):
+        lines = block.strip().split("\n")
+        out = "<ol>"
+        for l in lines:
+            text = re.sub(r"^ {0,4}\d+\. ", "", l)
+            out += f"<li>{inline_md(text)}</li>"
+        return out + "</ol>"
+
+    s = re.sub(r"((?:^(?:  )?\d+\. .+\n?)+)", lambda m: handle_ol(m.group()), s, flags=re.M)
+
+    # Safety net: escape unknown tags in remaining text
+    s = re.sub(r"</?[a-zA-Z][^>]*>", lambda m: m.group() if SAFE_TAGS.match(m.group()) else esc(m.group()), s)
+
+    # Paragraph wrap
+    parts = s.split("\n\n")
+    def wrap(p):
+        p = p.strip()
+        if not p: return ""
+        if re.match(r"^<(h[1-6]|ul|ol|pre|hr|blockquote)", p): return p
+        return "<p>" + p.replace("\n", "<br>") + "</p>"
+    s = "\n".join(wrap(p) for p in parts)
+    return s
+
+
+# ── Static analysis: verify key structures exist in ui.js ────────────────────
+
+def test_render_md_pre_pass_converts_strong(cleanup_test_sessions):
+    """ui.js renderMd() must have pre-pass that converts <strong> to **."""
+    src = REPO_ROOT / "static" / "ui.js"
+    code = src.read_text()
+    assert "<strong>" in code and "**" in code, "pre-pass for <strong> not found"
+    # Verify the specific conversion pattern
+    assert re.search(r"<strong>.*?\*\*", code, re.S), \
+        "renderMd pre-pass should convert <strong>...</strong> to **...**"
+
+
+def test_render_md_has_safety_net(cleanup_test_sessions):
+    """ui.js must have a safety-net that escapes unknown HTML tags after the pipeline."""
+    src = REPO_ROOT / "static" / "ui.js"
+    code = src.read_text()
+    assert "SAFE_TAGS" in code, "SAFE_TAGS allowlist regex not found in ui.js"
+    assert "esc(tag)" in code, "safety-net esc(tag) call not found in ui.js"
+
+
+def test_render_md_stashes_code_blocks(cleanup_test_sessions):
+    """ui.js pre-pass must stash code blocks before replacing safe HTML tags."""
+    src = REPO_ROOT / "static" / "ui.js"
+    code = src.read_text()
+    assert "fence_stash" in code, "fence_stash not found in renderMd pre-pass"
+
+
+def test_render_md_handles_br_tag(cleanup_test_sessions):
+    """ui.js must convert <br> to newline in pre-pass."""
+    src = REPO_ROOT / "static" / "ui.js"
+    code = src.read_text()
+    assert re.search(r"<br\\s\*", code) or "<br" in code, "<br> handling not found"
+
+
+def test_render_md_no_placeholder_remnants(cleanup_test_sessions):
+    """Old Unicode placeholder approach (\\uE001-\\uE005) must be gone."""
+    src = REPO_ROOT / "static" / "ui.js"
+    code = src.read_text()
+    for old_ph in ["\\uE001", "\\uE002", "\\uE003", "\\uE004", "\\uE005"]:
+        assert old_ph not in code, \
+            f"Old placeholder {old_ph} still present — broken implementation not cleaned up"
+
+
+def test_render_md_safe_tag_allowlist_complete(cleanup_test_sessions):
+    """SAFE_TAGS allowlist must include all tags the pipeline emits."""
+    src = REPO_ROOT / "static" / "ui.js"
+    code = src.read_text()
+    required = ["strong", "em", "code", "pre", "ul", "ol", "li",
+                "table", "blockquote", "hr", "br", "a", "div"]
+    safe_tags_match = re.search(r"SAFE_TAGS\s*=\s*/(.+?)/i", code)
+    assert safe_tags_match, "SAFE_TAGS regex not found"
+    pattern = safe_tags_match.group(1)
+    for tag in required:
+        assert tag in pattern, f"Tag '{tag}' missing from SAFE_TAGS allowlist"
+
+
+# ── Behavioural: renderMd logic via Python mirror ─────────────────────────────
+
+def test_render_md_markdown_bold(cleanup_test_sessions):
+    """**word** markdown renders as <strong>word</strong>."""
+    out = render_md("Hello **world**")
+    assert "<strong>world</strong>" in out
+
+
+def test_render_md_html_strong_passthrough(cleanup_test_sessions):
+    """<strong>word</strong> in AI output renders as bold."""
+    out = render_md("Hello <strong>world</strong>")
+    assert "<strong>world</strong>" in out
+
+
+def test_render_md_html_b_tag(cleanup_test_sessions):
+    """<b>word</b> renders as <strong>word</strong>."""
+    out = render_md("Hello <b>world</b>")
+    assert "<strong>world</strong>" in out
+
+
+def test_render_md_html_em_passthrough(cleanup_test_sessions):
+    """<em>word</em> renders as italic."""
+    out = render_md("Hello <em>world</em>")
+    assert "<em>world</em>" in out
+
+
+def test_render_md_html_i_tag(cleanup_test_sessions):
+    """<i>word</i> renders as <em>word</em>."""
+    out = render_md("Hello <i>word</i>")
+    assert "<em>word</em>" in out
+
+
+def test_render_md_html_code_passthrough(cleanup_test_sessions):
+    """<code>text</code> renders as inline code."""
+    out = render_md("use <code>print()</code>")
+    assert "<code>print()</code>" in out
+
+
+def test_render_md_html_br_becomes_newline(cleanup_test_sessions):
+    """<br> in AI output becomes a newline (rendered as <br> inside <p> later)."""
+    out = render_md("line one<br>line two")
+    assert "line one\nline two" in out or "line one<br>line two" in out
+
+
+def test_render_md_mixed_markdown_and_html(cleanup_test_sessions):
+    """Markdown and HTML formatting can coexist in the same response."""
+    out = render_md("**markdown** and <strong>html</strong>")
+    assert "<strong>markdown</strong>" in out
+    assert "<strong>html</strong>" in out
+
+
+def test_render_md_html_strong_in_list_item(cleanup_test_sessions):
+    """THE SCREENSHOT BUG: <strong> tags inside list items must render as bold,
+    not as escaped literal text like &lt;strong&gt;."""
+    out = render_md(
+        "- <strong>All items</strong> get `border-radius: 0 8px 8px 0`\n"
+        "- <strong>Active item</strong> uses <code>#e8a030</code>\n"
+        "- <strong>Project items</strong> show their color\n"
+        "- <strong>Regular items</strong> stay muted"
+    )
+    assert "&lt;strong&gt;" not in out, \
+        "Escaped <strong> literal found in list output — bold not rendering"
+    assert "<strong>All items</strong>" in out
+    assert "<strong>Active item</strong>" in out
+    assert "<code>border-radius: 0 8px 8px 0</code>" in out
+    assert "<code>#e8a030</code>" in out
+
+
+def test_render_md_exact_screenshot_content(cleanup_test_sessions):
+    """Exact text from the ui-changes-unrendered-html-tags.png screenshot.
+    This is the canonical regression test for the inlineMd fix.
+    All four bullet points must render <strong> and <code> as HTML, not literal text."""
+    out = render_md(
+        "- <strong>All items</strong> now have <code>border-radius: 0 8px 8px 0</code>"
+        " \u2014 straight left edge everywhere, rounded on the right\n"
+        "- <strong>Active item</strong> is now gold/amber (<code>#e8a030</code>)"
+        " \u2014 same warm gold used in the logo \u2014 instead of blue,"
+        " so it stands out distinctly from everything else\n"
+        "- <strong>Project items</strong> still show their project color on the left"
+        " border, but only when they're not the active item (active always wins with gold)\n"
+        "- <strong>Regular items</strong> (no project) still have no left border color"
+    )
+    # None of the safe tags should appear as literal escaped text
+    assert "&lt;strong&gt;" not in out, \
+        "Literal &lt;strong&gt; found — <strong> is not rendering as bold"
+    assert "&lt;/strong&gt;" not in out, \
+        "Literal &lt;/strong&gt; found — closing tag is not rendering"
+    assert "&lt;code&gt;" not in out, \
+        "Literal &lt;code&gt; found — <code> is not rendering as inline code"
+    # Each item's bold label must render correctly
+    assert "<strong>All items</strong>" in out
+    assert "<strong>Active item</strong>" in out
+    assert "<strong>Project items</strong>" in out
+    assert "<strong>Regular items</strong>" in out
+    # The code spans in items 1 and 2 must render correctly
+    assert "<code>border-radius: 0 8px 8px 0</code>" in out
+    assert "<code>#e8a030</code>" in out
+    # The surrounding prose text must be preserved
+    assert "straight left edge everywhere" in out
+    assert "same warm gold used in the logo" in out
+    assert "active always wins with gold" in out
+
+
+def test_render_md_markdown_bold_in_list_item(cleanup_test_sessions):
+    """**bold** markdown inside list items must render as <strong>."""
+    out = render_md("- **First** item\n- **Second** item with `code`")
+    assert "<strong>First</strong>" in out
+    assert "<strong>Second</strong>" in out
+    assert "<code>code</code>" in out
+
+
+def test_render_md_html_strong_in_blockquote(cleanup_test_sessions):
+    """<strong> inside blockquote must render as bold."""
+    out = render_md("> <strong>Note:</strong> pay attention")
+    assert "&lt;strong&gt;" not in out
+    assert "<strong>Note:</strong>" in out
+
+
+def test_render_md_html_strong_in_heading(cleanup_test_sessions):
+    """<strong> inside a heading must render as bold."""
+    out = render_md("## <strong>Important</strong> Section")
+    assert "&lt;strong&gt;" not in out
+    assert "<strong>Important</strong>" in out
+
+
+def test_render_md_xss_in_list_still_blocked(cleanup_test_sessions):
+    """XSS attempts in list items must still be escaped."""
+    out = render_md("- <img src=x onerror=alert(1)> bad")
+    assert "<img" not in out
+    assert "&lt;img" in out
+
+
+def test_render_md_xss_in_blockquote_still_blocked(cleanup_test_sessions):
+    """XSS in blockquote must still be escaped."""
+    out = render_md("> <script>alert(1)</script>")
+    assert "<script>" not in out
+    assert "&lt;script" in out
+
+
+def test_render_md_code_span_in_list_protected(cleanup_test_sessions):
+    """Backtick code span in list item must escape its content."""
+    out = render_md("- Use `<br>` for breaks")
+    assert "<code>&lt;br&gt;</code>" in out
+
+
+def test_render_md_code_block_protects_html(cleanup_test_sessions):
+    """HTML inside a backtick code span must NOT be converted — shown as literal."""
+    out = render_md("keep `<strong>literal</strong>` safe")
+    assert "&lt;strong&gt;" in out, "HTML inside code span should be escaped"
+    assert "<strong>literal</strong>" not in out, "HTML inside code span should NOT render as bold"
+
+
+def test_render_md_fenced_code_protects_html(cleanup_test_sessions):
+    """HTML inside a fenced code block must not be converted by the pre-pass.
+    The fenced block is stashed before tag replacement runs, so the raw HTML
+    is preserved intact for the pipeline's esc() to escape when rendering
+    the <pre><code> block. We verify the stash/restore mechanism works by
+    checking the content is unchanged after the pre-pass (i.e. still contains
+    the original tag text, not converted to **not bold**)."""
+    src = "```\n<strong>not bold</strong>\n```"
+    out = render_md(src)
+    # Pre-pass stash preserves the raw content -- it should NOT have been
+    # converted to **not bold** (which would render as bold outside the fence)
+    assert "**not bold**" not in out, \
+        "Fenced code content was incorrectly converted to markdown by the pre-pass"
+    # The raw content should still be present (stash/restore worked)
+    assert "<strong>not bold</strong>" in out or "&lt;strong&gt;" in out, \
+        "Fenced code content was lost after stash/restore"
+
+
+# ── Security: XSS must be blocked ─────────────────────────────────────────────
+
+def test_render_md_xss_img_tag_escaped(cleanup_test_sessions):
+    """<img src=x onerror=alert(1)> must be HTML-escaped, not rendered."""
+    out = render_md("<img src=x onerror=alert(1)>")
+    assert "<img" not in out, "Raw <img> tag must not appear in output"
+    assert "&lt;img" in out, "<img> must be HTML-escaped"
+
+
+def test_render_md_xss_script_tag_escaped(cleanup_test_sessions):
+    """<script>alert(1)</script> must be HTML-escaped."""
+    out = render_md("<script>alert(1)</script>")
+    assert "<script>" not in out, "Raw <script> tag must not appear in output"
+    assert "&lt;script" in out, "<script> must be HTML-escaped"
+
+
+def test_render_md_xss_iframe_escaped(cleanup_test_sessions):
+    """<iframe> must be HTML-escaped."""
+    out = render_md("<iframe src='evil.com'></iframe>")
+    assert "<iframe" not in out
+    assert "&lt;iframe" in out
+
+
+def test_render_md_xss_svg_onerror_escaped(cleanup_test_sessions):
+    """<svg onload=...> must be HTML-escaped."""
+    out = render_md("<svg onload=alert(1)>")
+    assert "<svg" not in out
+    assert "&lt;svg" in out
+
+
+def test_render_md_xss_in_bold_text_escaped(cleanup_test_sessions):
+    """**<img onerror=...>** — XSS inside markdown bold must be escaped."""
+    out = render_md("**<img src=x onerror=alert(1)>**")
+    assert "<img" not in out, "XSS inside **bold** must be escaped"
+    assert "&lt;img" in out
+
+
+def test_render_md_xss_in_html_strong_escaped(cleanup_test_sessions):
+    """<strong><img ...></strong> — nested XSS inside HTML strong must be escaped."""
+    out = render_md("<strong><img src=x onerror=alert(1)></strong>")
+    # <strong> converts to ** which then escapes the inner content via esc()
+    assert "<img" not in out, "XSS nested inside <strong> must be escaped"
+
+
+def test_render_md_xss_object_tag_escaped(cleanup_test_sessions):
+    """<object data=...> must be HTML-escaped."""
+    out = render_md("<object data='evil.swf'></object>")
+    assert "<object" not in out
+    assert "&lt;object" in out
+
+
+# ── Sprint 16 sidebar: static structure checks ───────────────────────────────
+
+# ── Exhaustive inlineMd / renderMd edge-case tests ───────────────────────────
+
+# --- Unordered list variants ---
+
+def test_list_bold_only(cleanup_test_sessions):
+    """Single bold word in list item."""
+    out = render_md("- **bold**")
+    assert "<strong>bold</strong>" in out
+    assert "&lt;" not in out
+
+def test_list_italic_only(cleanup_test_sessions):
+    """Single italic word in list item."""
+    out = render_md("- *italic*")
+    assert "<em>italic</em>" in out
+
+def test_list_code_only(cleanup_test_sessions):
+    """Single code span in list item."""
+    out = render_md("- `code`")
+    assert "<code>code</code>" in out
+
+def test_list_bold_and_code_mixed(cleanup_test_sessions):
+    """Bold and code together in one list item."""
+    out = render_md("- **run** `pip install foo`")
+    assert "<strong>run</strong>" in out
+    assert "<code>pip install foo</code>" in out
+
+def test_list_html_strong_and_code_mixed(cleanup_test_sessions):
+    """HTML <strong> and <code> together — the exact screenshot scenario."""
+    out = render_md("- <strong>Key</strong>: use <code>value</code>")
+    assert "<strong>Key</strong>" in out
+    assert "<code>value</code>" in out
+    assert "&lt;strong&gt;" not in out
+    assert "&lt;code&gt;" not in out
+
+def test_list_html_em(cleanup_test_sessions):
+    """HTML <em> in list item renders as italic."""
+    out = render_md("- <em>emphasized</em> text")
+    assert "<em>emphasized</em>" in out
+    assert "&lt;em&gt;" not in out
+
+def test_list_html_b_tag(cleanup_test_sessions):
+    """HTML <b> in list item renders as bold."""
+    out = render_md("- <b>bold via b tag</b>")
+    assert "<strong>bold via b tag</strong>" in out
+    assert "&lt;b&gt;" not in out
+
+def test_list_html_i_tag(cleanup_test_sessions):
+    """HTML <i> in list item renders as italic."""
+    out = render_md("- <i>italic via i tag</i>")
+    assert "<em>italic via i tag</em>" in out
+    assert "&lt;i&gt;" not in out
+
+def test_list_multiple_items_each_formatted(cleanup_test_sessions):
+    """Multiple list items each with different formatting."""
+    out = render_md(
+        "- **bold item**\n"
+        "- *italic item*\n"
+        "- `code item`\n"
+        "- plain item"
+    )
+    assert "<strong>bold item</strong>" in out
+    assert "<em>italic item</em>" in out
+    assert "<code>code item</code>" in out
+    assert "<li>plain item</li>" in out
+
+def test_list_item_bold_mid_sentence(cleanup_test_sessions):
+    """Bold in middle of a list item sentence."""
+    out = render_md("- Set the **timeout** to 30 seconds")
+    assert "<strong>timeout</strong>" in out
+    assert "Set the" in out
+    assert "to 30 seconds" in out
+
+def test_list_item_multiple_bold_spans(cleanup_test_sessions):
+    """Multiple bold spans in one list item."""
+    out = render_md("- **A** and **B** are both important")
+    assert "<strong>A</strong>" in out
+    assert "<strong>B</strong>" in out
+
+def test_ordered_list_bold(cleanup_test_sessions):
+    """Bold text inside ordered list items."""
+    out = render_md("1. **First** step\n2. **Second** step\n3. Plain step")
+    assert "<ol>" in out
+    assert "<strong>First</strong>" in out
+    assert "<strong>Second</strong>" in out
+    assert "<li>Plain step</li>" in out
+
+def test_ordered_list_html_strong(cleanup_test_sessions):
+    """HTML <strong> inside ordered list items renders correctly."""
+    out = render_md("1. <strong>Install</strong> the package\n2. <strong>Configure</strong> the settings")
+    assert "<ol>" in out
+    assert "<strong>Install</strong>" in out
+    assert "<strong>Configure</strong>" in out
+    assert "&lt;strong&gt;" not in out
+
+def test_ordered_list_code_spans(cleanup_test_sessions):
+    """Code spans inside ordered list items."""
+    out = render_md("1. Run `npm install`\n2. Run `npm start`")
+    assert "<code>npm install</code>" in out
+    assert "<code>npm start</code>" in out
+
+def test_indented_list_item_bold(cleanup_test_sessions):
+    """Bold inside indented (nested) list item."""
+    out = render_md("- top level\n  - **nested bold**")
+    assert "<strong>nested bold</strong>" in out
+    assert "margin-left:16px" in out
+
+# --- Blockquote variants ---
+
+def test_blockquote_plain(cleanup_test_sessions):
+    """Plain blockquote wraps in <blockquote>."""
+    out = render_md("> simple quote")
+    assert "<blockquote>simple quote</blockquote>" in out
+
+def test_blockquote_bold(cleanup_test_sessions):
+    """**bold** inside blockquote renders correctly."""
+    out = render_md("> **important** note")
+    assert "<strong>important</strong>" in out
+
+def test_blockquote_html_strong(cleanup_test_sessions):
+    """<strong> inside blockquote renders as bold."""
+    out = render_md("> <strong>Warning:</strong> read this")
+    assert "<strong>Warning:</strong>" in out
+    assert "&lt;strong&gt;" not in out
+
+def test_blockquote_code_span(cleanup_test_sessions):
+    """Code span inside blockquote renders correctly."""
+    out = render_md("> Use `git commit` to save")
+    assert "<code>git commit</code>" in out
+
+def test_blockquote_mixed_formatting(cleanup_test_sessions):
+    """Mixed bold and code in blockquote."""
+    out = render_md("> **Note:** run `pip install foo` first")
+    assert "<strong>Note:</strong>" in out
+    assert "<code>pip install foo</code>" in out
+
+def test_blockquote_xss_blocked(cleanup_test_sessions):
+    """XSS in blockquote content must be escaped."""
+    out = render_md("> <img src=x onerror=alert(1)>")
+    assert "&lt;img" in out
+    assert "<img" not in out
+
+# --- Heading variants ---
+
+def test_heading_h1_bold(cleanup_test_sessions):
+    """Bold inside h1 renders correctly."""
+    out = render_md("# **Main** Title")
+    assert "<h1><strong>Main</strong> Title</h1>" in out
+
+def test_heading_h2_html_strong(cleanup_test_sessions):
+    """HTML <strong> inside h2 renders correctly."""
+    out = render_md("## <strong>Section</strong> Name")
+    assert "<h2><strong>Section</strong> Name</h2>" in out
+    assert "&lt;strong&gt;" not in out
+
+def test_heading_h3_code(cleanup_test_sessions):
+    """Code span inside h3 renders correctly."""
+    out = render_md("### The `renderMd` function")
+    assert "<h3>The <code>renderMd</code> function</h3>" in out
+
+def test_heading_xss_blocked(cleanup_test_sessions):
+    """XSS attempt in heading must be escaped."""
+    out = render_md("## <script>alert(1)</script>")
+    assert "<script>" not in out
+    assert "&lt;script" in out
+
+# --- Paragraph / top-level formatting ---
+
+def test_paragraph_bold_renders(cleanup_test_sessions):
+    """Bold in a plain paragraph renders correctly."""
+    out = render_md("The **quick brown fox** jumps.")
+    assert "<strong>quick brown fox</strong>" in out
+
+def test_paragraph_html_strong_renders(cleanup_test_sessions):
+    """HTML <strong> in a plain paragraph renders correctly."""
+    out = render_md("The <strong>quick brown fox</strong> jumps.")
+    assert "<strong>quick brown fox</strong>" in out
+    assert "&lt;strong&gt;" not in out
+
+def test_paragraph_html_code_renders(cleanup_test_sessions):
+    """HTML <code> in a plain paragraph renders correctly."""
+    out = render_md("Call <code>foo()</code> to start.")
+    assert "<code>foo()</code>" in out
+    assert "&lt;code&gt;" not in out
+
+def test_paragraph_br_creates_line_break(cleanup_test_sessions):
+    """<br> in paragraph becomes a line break inside <p>."""
+    out = render_md("Line one<br>Line two")
+    # br converts to \n which inside <p> becomes <br>
+    assert "Line one" in out and "Line two" in out
+
+def test_multiple_paragraphs_separated(cleanup_test_sessions):
+    """Double newline creates separate <p> elements."""
+    out = render_md("First paragraph.\n\nSecond paragraph.")
+    assert out.count("<p>") == 2
+
+# --- Table variants ---
+
+def test_table_structure_in_ui_js(cleanup_test_sessions):
+    """ui.js must contain table rendering logic with thead/tbody structure."""
+    src = (REPO_ROOT / "static" / "ui.js").read_text()
+    assert "<table>" in src or "table>" in src, "table rendering not found in ui.js"
+    assert "thead" in src, "thead not found in table renderer"
+    assert "tbody" in src, "tbody not found in table renderer"
+    assert "parseRow" in src, "parseRow helper not found in table renderer"
+
+# --- br tag specifically ---
+
+def test_br_in_list_item(cleanup_test_sessions):
+    """<br> inside a list item becomes a newline."""
+    out = render_md("- Line one<br>Line two")
+    assert "Line one" in out
+    assert "Line two" in out
+
+def test_br_self_closing_in_paragraph(cleanup_test_sessions):
+    """<br/> self-closing form is also handled."""
+    out = render_md("Before<br/>After")
+    assert "Before" in out and "After" in out
+
+# --- No double-escaping ---
+
+def test_no_double_escaping_ampersand(cleanup_test_sessions):
+    """A literal & in text must become &amp; exactly once, not &amp;amp;."""
+    out = render_md("foo & bar")
+    assert "&amp;amp;" not in out
+    assert "&amp;" in out or "foo & bar" in out  # either fine (paragraph wrap may not escape)
+
+def test_no_double_escaping_lt_in_code(cleanup_test_sessions):
+    """< inside a code span must become &lt; exactly once."""
+    out = render_md("`a < b`")
+    assert "&lt;lt;" not in out
+    assert "&lt;" in out
+
+def test_strong_text_not_double_escaped(cleanup_test_sessions):
+    """Content of <strong> must not be double-escaped."""
+    out = render_md("<strong>hello & world</strong>")
+    # The & inside strong content should be escaped once
+    assert "&amp;amp;" not in out
+    assert "<strong>" in out
+
+# --- inlineMd helper present in source ---
+
+def test_inline_md_helper_in_ui_js(cleanup_test_sessions):
+    """ui.js must define inlineMd() helper function."""
+    src = (REPO_ROOT / "static" / "ui.js").read_text()
+    assert "function inlineMd(" in src, "inlineMd() helper not found in ui.js"
+
+def test_inline_md_used_in_list_handler(cleanup_test_sessions):
+    """List handler in ui.js must call inlineMd() not esc() for item text."""
+    src = (REPO_ROOT / "static" / "ui.js").read_text()
+    # Find the list block handler
+    ul_idx = src.find("html+='<ul>'") or src.find('html+=`<ul>`') or src.find("let html='<ul>'")
+    assert ul_idx >= 0 or "inlineMd(text)" in src, "inlineMd not called in list handler"
+    # Verify inlineMd is called, not bare esc
+    assert "inlineMd(text)" in src, "inlineMd(text) call not found — list items may not render formatting"
+
+def test_inline_md_used_in_blockquote_handler(cleanup_test_sessions):
+    """Blockquote handler in ui.js must call inlineMd() not esc() for content."""
+    src = (REPO_ROOT / "static" / "ui.js").read_text()
+    assert "inlineMd(t)" in src, "inlineMd not called in blockquote/heading handler"
+
+
+def test_sessions_js_has_svg_icons(cleanup_test_sessions):
+    """sessions.js must define ICONS object with SVG strings for sidebar buttons."""
+    src = REPO_ROOT / "static" / "sessions.js"
+    code = src.read_text()
+    assert "const ICONS=" in code or "const ICONS =" in code, "ICONS constant not found"
+    for icon in ["pin", "folder", "archive", "trash", "dup"]:
+        assert icon + ":" in code or f"'{icon}'" in code, f"ICONS.{icon} not found"
+    assert "<svg" in code, "SVG content not found in ICONS"
+
+
+def test_sessions_js_has_dropdown_actions(cleanup_test_sessions):
+    """sessions.js must use a single trigger button and dropdown for session actions."""
+    src = REPO_ROOT / "static" / "sessions.js"
+    code = src.read_text()
+    assert "session-actions-trigger" in code, "session action trigger button not found in sessions.js"
+    assert "session-action-menu" in code, "session action dropdown menu not found in sessions.js"
+
+
+def test_style_css_has_session_actions_dropdown(cleanup_test_sessions):
+    """style.css must define trigger and dropdown styles for session actions."""
+    src = REPO_ROOT / "static" / "style.css"
+    code = src.read_text()
+    assert ".session-actions" in code, ".session-actions not found in style.css"
+    assert ".session-action-menu" in code, ".session-action-menu not found in style.css"
+    assert "position:fixed" in code or "position: fixed" in code, \
+        ".session-action-menu must use position:fixed to avoid sidebar clipping"
+
+
+def test_style_css_active_session_uses_accent(cleanup_test_sessions):
+    """Active session style should use accent color variable, not hardcoded hex."""
+    src = REPO_ROOT / "static" / "style.css"
+    code = src.read_text()
+    assert "var(--accent" in code and ".session-item.active" in code, \
+        "Active session must use var(--accent) variables in style.css"
+
+
+def test_sessions_js_uses_action_menu_not_per_row_buttons(cleanup_test_sessions):
+    """sessions.js must use the single ⋯ action menu instead of per-row buttons.
+
+    The per-row button overlay was replaced with a single ⋯ trigger that opens a
+    positioned dropdown (session-action-menu). This removes the borderLeftColor
+    project colour override that the old code applied, which was the original
+    concern this test guarded. The new design uses a dot indicator for project
+    membership instead.
+    """
+    src = REPO_ROOT / "static" / "sessions.js"
+    code = src.read_text()
+    assert "session-actions-trigger" in code, "session-actions-trigger not found in sessions.js"
+    assert "_openSessionActionMenu" in code, "_openSessionActionMenu not found in sessions.js"
+    assert "closeSessionActionMenu" in code, "closeSessionActionMenu not found in sessions.js"
+    # The old per-row buttons must not be present (they were replaced by the menu)
+    assert "act-pin" not in code, "old act-pin per-row button still in sessions.js"
+    assert "act-archive" not in code, "old act-archive per-row button still in sessions.js"
--- a/tests/test_sprint17.py
+++ b/tests/test_sprint17.py
@@ -0,0 +1,96 @@
+"""
+Sprint 17 Tests: send_key setting, commands.js static file, workspace subdir listing.
+"""
+import json, urllib.error, urllib.request
+
+from tests._pytest_port import BASE
+
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(BASE + path, data=data,
+                                headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+def make_session(created_list):
+    d, _ = post("/api/session/new", {})
+    sid = d["session"]["session_id"]
+    created_list.append(sid)
+    return sid, d["session"]
+
+
+# ── Settings: send_key ──────────────────────────────────────────────────────
+
+def test_settings_send_key_default():
+    """GET /api/settings returns send_key with default value 'enter'."""
+    data, status = get("/api/settings")
+    assert status == 200
+    assert data.get("send_key") == "enter"
+
+
+def test_settings_save_send_key():
+    """POST /api/settings with send_key persists and round-trips."""
+    try:
+        # Save ctrl+enter
+        _, status = post("/api/settings", {"send_key": "ctrl+enter"})
+        assert status == 200
+        # Verify it persisted
+        data, _ = get("/api/settings")
+        assert data["send_key"] == "ctrl+enter"
+    finally:
+        # Always restore default
+        post("/api/settings", {"send_key": "enter"})
+    data, _ = get("/api/settings")
+    assert data["send_key"] == "enter"
+
+
+def test_settings_invalid_send_key_rejected():
+    """POST /api/settings with invalid send_key value is silently ignored."""
+    # Set a known good value first
+    post("/api/settings", {"send_key": "enter"})
+    # Try to set an invalid value
+    data, status = post("/api/settings", {"send_key": "invalid_value"})
+    assert status == 200
+    # Should still be 'enter' (invalid value ignored)
+    assert data["send_key"] == "enter"
+
+
+def test_settings_unknown_key_ignored():
+    """POST /api/settings ignores unknown keys."""
+    data, status = post("/api/settings", {"unknown_key": "value", "send_key": "enter"})
+    assert status == 200
+    assert "unknown_key" not in data
+
+
+# ── Static file: commands.js ────────────────────────────────────────────────
+
+def test_static_commands_js_served():
+    """GET /static/commands.js returns 200 and contains COMMANDS registry."""
+    req = urllib.request.Request(BASE + "/static/commands.js")
+    with urllib.request.urlopen(req, timeout=10) as r:
+        body = r.read().decode()
+        assert r.status == 200
+        assert "COMMANDS" in body
+        assert "executeCommand" in body
+
+
+# ── Workspace: subdir listing ───────────────────────────────────────────────
+
+def test_list_workspace_root():
+    """GET /api/list with path=. returns entries for workspace root."""
+    created = []
+    sid, _ = make_session(created)
+    data, status = get(f"/api/list?session_id={sid}&path=.")
+    assert status == 200
+    assert "entries" in data
+    assert isinstance(data["entries"], list)
--- a/tests/test_sprint19.py
+++ b/tests/test_sprint19.py
@@ -0,0 +1,128 @@
+"""
+Sprint 19 Tests: auth/login, security headers, request size limit.
+"""
+import json, urllib.error, urllib.request
+
+from tests._pytest_port import BASE
+
+
+def get(path, headers=None):
+    req = urllib.request.Request(BASE + path)
+    if headers:
+        for k, v in headers.items():
+            req.add_header(k, v)
+    with urllib.request.urlopen(req, timeout=10) as r:
+        return json.loads(r.read()), r.status, dict(r.headers)
+
+
+def post(path, body=None, headers=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(BASE + path, data=data,
+                                headers={"Content-Type": "application/json"})
+    if headers:
+        for k, v in headers.items():
+            req.add_header(k, v)
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status, dict(r.headers)
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code, dict(e.headers)
+
+
+# ── Auth status (no password configured in test env) ──────────────────────
+
+def test_auth_status_disabled():
+    """Auth should be disabled by default (no password set)."""
+    d, status, _ = get("/api/auth/status")
+    assert status == 200
+    assert d["auth_enabled"] is False
+
+
+def test_login_when_auth_disabled():
+    """Login should succeed trivially when auth is not enabled."""
+    d, status, _ = post("/api/auth/login", {"password": "anything"})
+    assert status == 200
+    assert d["ok"] is True
+
+
+def test_all_routes_accessible_without_auth():
+    """When auth is disabled, all routes should work without cookies."""
+    d, status, _ = get("/api/sessions")
+    assert status == 200
+    assert "sessions" in d
+
+
+def test_login_page_served():
+    """GET /login should return the login page HTML."""
+    req = urllib.request.Request(BASE + "/login")
+    with urllib.request.urlopen(req, timeout=10) as r:
+        html = r.read().decode()
+        assert r.status == 200
+        assert "Sign in" in html
+        assert "Hermes" in html
+
+
+# ── Security headers ─────────────────────────────────────────────────────
+
+def test_security_headers_on_json():
+    """JSON responses should include security headers."""
+    d, status, headers = get("/api/auth/status")
+    assert status == 200
+    assert headers.get("X-Content-Type-Options") == "nosniff"
+    assert headers.get("X-Frame-Options") == "DENY"
+    assert headers.get("Referrer-Policy") == "same-origin"
+
+
+def test_security_headers_on_health():
+    """Health endpoint should include security headers."""
+    d, status, headers = get("/health")
+    assert status == 200
+    assert headers.get("X-Content-Type-Options") == "nosniff"
+
+
+def test_permissions_policy_does_not_disable_microphone():
+    """Permissions-Policy must not hard-disable microphone access for same-origin voice input."""
+    _, status, headers = get("/health")
+    assert status == 200
+    policy = headers.get("Permissions-Policy", "")
+    assert policy, "Permissions-Policy header missing"
+    assert "microphone=()" not in policy, \
+        "Permissions-Policy must not block microphone access or desktop/mobile voice input cannot work"
+
+
+def test_cache_control_no_store():
+    """API responses should have Cache-Control: no-store."""
+    d, status, headers = get("/api/sessions")
+    assert headers.get("Cache-Control") == "no-store"
+
+
+# ── Settings password field ──────────────────────────────────────────────
+
+def test_settings_password_hash_not_exposed():
+    """GET /api/settings must never expose the stored password hash."""
+    d, status, _ = get("/api/settings")
+    assert status == 200
+    assert "password_hash" not in d  # security: never send hash to client
+
+
+def test_settings_save_preserves_other_fields():
+    """Saving settings should not break existing fields."""
+    # Get current settings
+    current, _, _ = get("/api/settings")
+    # Save with just send_key
+    d, status, _ = post("/api/settings", {"send_key": "enter"})
+    assert status == 200
+    # Verify other fields still present
+    updated, _, _ = get("/api/settings")
+    assert "default_model" in updated
+    assert "default_workspace" in updated
+
+
+def test_settings_password_hash_not_directly_settable():
+    """POST /api/settings with password_hash must not overwrite the stored hash."""
+    # Attempt to set a raw hash directly (attack vector)
+    post("/api/settings", {"password_hash": "deadbeef" * 8})
+    # Settings response must not expose it regardless
+    updated, status, _ = get("/api/settings")
+    assert status == 200
+    assert "password_hash" not in updated
--- a/tests/test_sprint2.py
+++ b/tests/test_sprint2.py
@@ -0,0 +1,106 @@
+"""Sprint 2 tests: image preview, file types, markdown. Uses cleanup_test_sessions fixture."""
+import io, json, uuid, urllib.request, urllib.error, pathlib
+
+from tests._pytest_port import BASE
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+def get_raw(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return r.read(), r.headers.get('Content-Type', ''), r.status
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(BASE + path, data=data, headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+def make_session_tracked(created_list, ws=None):
+    """Create a session and register it with the cleanup fixture."""
+    import pathlib as _pathlib
+    body = {}
+    if ws: body["workspace"] = str(ws)
+    d, _ = post("/api/session/new", body)
+    sid = d["session"]["session_id"]
+    created_list.append(sid)
+    return sid, _pathlib.Path(d["session"]["workspace"])
+
+
+
+def test_raw_endpoint_serves_png(cleanup_test_sessions):
+    sid, ws = make_session_tracked(cleanup_test_sessions)
+    png = (b"\x89PNG\r\n\x1a\n" b"\x00\x00\x00\rIHDR\x00\x00\x00\x01"
+           b"\x00\x00\x00\x01\x08\x02\x00\x00\x00"
+           b"\x90wS\xde\x00\x00\x00\x0cIDATx\x9cc"
+           b"\xf8\x0f\x00\x00\x01\x01\x00\x05\x18"
+           b"\xd8N\x00\x00\x00\x00IEND\xaeB`\x82")
+    (ws / "test.png").write_bytes(png)
+    raw, ct, status = get_raw(f"/api/file/raw?session_id={sid}&path=test.png")
+    assert status == 200
+    assert "image/png" in ct
+    assert raw == png
+
+def test_raw_endpoint_serves_jpeg(cleanup_test_sessions):
+    sid, ws = make_session_tracked(cleanup_test_sessions)
+    jpeg = b"\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xd9"
+    (ws / "photo.jpg").write_bytes(jpeg)
+    raw, ct, status = get_raw(f"/api/file/raw?session_id={sid}&path=photo.jpg")
+    assert status == 200
+    assert "image/jpeg" in ct
+
+def test_raw_endpoint_serves_svg(cleanup_test_sessions):
+    sid, ws = make_session_tracked(cleanup_test_sessions)
+    svg = b"<svg xmlns=\"http://www.w3.org/2000/svg\" width=\"100\" height=\"100\"><circle/></svg>"
+    (ws / "icon.svg").write_bytes(svg)
+    raw, ct, status = get_raw(f"/api/file/raw?session_id={sid}&path=icon.svg")
+    assert status == 200
+    assert "image/svg" in ct
+
+def test_raw_endpoint_path_traversal_blocked(cleanup_test_sessions):
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    try:
+        get_raw(f"/api/file/raw?session_id={sid}&path=../../etc/passwd")
+        assert False
+    except urllib.error.HTTPError as e:
+        assert e.code in (400, 500)
+
+def test_raw_endpoint_missing_file_returns_404(cleanup_test_sessions):
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    try:
+        get_raw(f"/api/file/raw?session_id={sid}&path=no_such_file.png")
+        assert False
+    except urllib.error.HTTPError as e:
+        assert e.code in (404, 500)
+
+def test_md_file_returns_text_via_api_file(cleanup_test_sessions):
+    sid, ws = make_session_tracked(cleanup_test_sessions)
+    md = "# Hello\n\nThis is **bold**.\n"
+    (ws / "README.md").write_text(md)
+    data, status = get(f"/api/file?session_id={sid}&path=README.md")
+    assert status == 200
+    assert data["content"] == md
+
+def test_md_file_with_table(cleanup_test_sessions):
+    sid, ws = make_session_tracked(cleanup_test_sessions)
+    md = "| Name | Value |\n|------|-------|\n| foo  | bar   |\n"
+    (ws / "table.md").write_text(md)
+    data, status = get(f"/api/file?session_id={sid}&path=table.md")
+    assert status == 200
+    assert "| Name | Value |" in data["content"]
+
+def test_file_listing_includes_images(cleanup_test_sessions):
+    sid, ws = make_session_tracked(cleanup_test_sessions)
+    (ws / "photo.png").write_bytes(b"fake png")
+    (ws / "notes.md").write_text("# Notes")
+    (ws / "script.py").write_text("print('hello')")
+    data, status = get(f"/api/list?session_id={sid}&path=.")
+    assert status == 200
+    names = {e["name"]: e for e in data["entries"]}
+    assert "photo.png" in names
+    assert "notes.md" in names
+    assert "script.py" in names
--- a/tests/test_sprint20.py
+++ b/tests/test_sprint20.py
@@ -0,0 +1,444 @@
+"""
+Sprint 20 Tests: Voice input (mic button) via Web Speech API.
+
+These tests verify the static assets contain the correct HTML structure,
+CSS rules, and JS logic for the mic feature — all of which runs purely in
+the browser with no server-side component.
+"""
+import re
+import urllib.request
+import json
+import pathlib
+
+from tests._pytest_port import BASE
+
+
+def get_text(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return r.read().decode(), r.status
+
+
+# ── index.html ────────────────────────────────────────────────────────────
+
+
+def test_mic_button_present_in_html():
+    """index.html must contain the mic button with id='btnMic'."""
+    html, status = get_text("/")
+    assert status == 200
+    assert 'id="btnMic"' in html
+
+
+def test_mic_button_has_mic_btn_class():
+    """btnMic must carry the mic-btn CSS class for styling hooks."""
+    html, _ = get_text("/")
+    assert 'class="icon-btn mic-btn"' in html
+
+
+def test_mic_button_hidden_by_default():
+    """btnMic starts hidden (display:none) — JS shows it only if supported."""
+    html, _ = get_text("/")
+    # The button element should have display:none in its style attribute
+    assert 'id="btnMic"' in html
+    btn_match = re.search(r'id="btnMic"[^>]*>', html)
+    assert btn_match, "btnMic element not found"
+    assert 'display:none' in btn_match.group(0)
+
+
+def test_mic_button_has_title():
+    """btnMic should have a descriptive title for accessibility."""
+    html, _ = get_text("/")
+    btn_match = re.search(r'id="btnMic"[^>]*>', html)
+    assert btn_match
+    assert 'title=' in btn_match.group(0)
+
+
+def test_mic_status_div_present():
+    """index.html must contain the #micStatus listening indicator."""
+    html, _ = get_text("/")
+    assert 'id="micStatus"' in html
+
+
+def test_mic_status_hidden_by_default():
+    """#micStatus starts hidden — only shown during active recording."""
+    html, _ = get_text("/")
+    status_match = re.search(r'id="micStatus"[^>]*>', html)
+    assert status_match, "#micStatus element not found"
+    assert 'display:none' in status_match.group(0)
+
+
+def test_mic_status_has_mic_dot():
+    """#micStatus must contain a .mic-dot element for the pulse animation."""
+    html, _ = get_text("/")
+    # mic-dot should appear after micStatus
+    idx_status = html.find('id="micStatus"')
+    idx_dot = html.find('mic-dot', idx_status)
+    assert idx_status != -1 and idx_dot != -1
+    assert idx_dot > idx_status
+
+
+def test_mic_status_has_listening_text():
+    """#micStatus should display a 'Listening' label."""
+    html, _ = get_text("/")
+    assert 'Listening' in html
+
+
+def test_mic_button_svg_microphone_shape():
+    """btnMic SVG must include the rect (mic body) and path (mic arc)."""
+    html, _ = get_text("/")
+    # Find mic button section
+    btn_start = html.find('id="btnMic"')
+    btn_end = html.find('</button>', btn_start) + len('</button>')
+    btn_html = html[btn_start:btn_end]
+    assert '<rect' in btn_html, "mic SVG missing rect (mic body)"
+    assert '<path' in btn_html, "mic SVG missing path (arc)"
+    assert '<line' in btn_html, "mic SVG missing line (stand)"
+
+
+def test_mic_button_inside_composer_left():
+    """btnMic must be inside .composer-left, next to the attach button."""
+    html, _ = get_text("/")
+    composer_left_start = html.find('class="composer-left"')
+    composer_left_end = html.find('</div>', composer_left_start)
+    section = html[composer_left_start:composer_left_end]
+    assert 'btnAttach' in section
+    assert 'btnMic' in section
+
+
+# ── style.css ────────────────────────────────────────────────────────────
+
+
+def test_mic_btn_css_rule_exists():
+    """style.css must define .mic-btn rule."""
+    css, status = get_text("/static/style.css")
+    assert status == 200
+    assert '.mic-btn' in css
+
+
+def test_mic_btn_recording_state_css():
+    """.mic-btn.recording must be defined for active recording visual state."""
+    css, _ = get_text("/static/style.css")
+    assert '.mic-btn.recording' in css
+
+
+def test_mic_recording_color_error():
+    """.mic-btn.recording must use the error color variable or red."""
+    css, _ = get_text("/static/style.css")
+    recording_idx = css.find('.mic-btn.recording')
+    # Find the rule block after the selector
+    brace_open = css.find('{', recording_idx)
+    brace_close = css.find('}', brace_open)
+    rule = css[brace_open:brace_close]
+    assert 'var(--error)' in rule or '#e94560' in rule
+
+
+def test_mic_recording_has_animation():
+    """.mic-btn.recording must use an animation for the pulse effect."""
+    css, _ = get_text("/static/style.css")
+    recording_idx = css.find('.mic-btn.recording')
+    brace_open = css.find('{', recording_idx)
+    brace_close = css.find('}', brace_open)
+    rule = css[brace_open:brace_close]
+    assert 'animation' in rule
+
+
+def test_mic_pulse_keyframes_defined():
+    """@keyframes mic-pulse must be defined for the pulsing animation."""
+    css, _ = get_text("/static/style.css")
+    assert 'mic-pulse' in css
+    assert '@keyframes' in css
+
+
+def test_mic_status_css_rule_exists():
+    """style.css must define .mic-status rule."""
+    css, _ = get_text("/static/style.css")
+    assert '.mic-status' in css
+
+
+def test_mic_dot_css_rule_exists():
+    """style.css must define .mic-dot rule with animation."""
+    css, _ = get_text("/static/style.css")
+    assert '.mic-dot' in css
+    dot_idx = css.find('.mic-dot')
+    brace_open = css.find('{', dot_idx)
+    brace_close = css.find('}', brace_open)
+    rule = css[brace_open:brace_close]
+    assert 'animation' in rule
+
+
+def test_mic_btn_has_transition():
+    """.mic-btn must define a transition for smooth state changes."""
+    css, _ = get_text("/static/style.css")
+    mic_btn_idx = css.find('.mic-btn{')
+    if mic_btn_idx == -1:
+        mic_btn_idx = css.find('.mic-btn ')
+    brace_open = css.find('{', mic_btn_idx)
+    brace_close = css.find('}', brace_open)
+    rule = css[brace_open:brace_close]
+    assert 'transition' in rule
+
+
+# ── boot.js ──────────────────────────────────────────────────────────────
+
+
+def test_boot_js_serves_ok():
+    """boot.js must be served successfully."""
+    _, status = get_text("/static/boot.js")
+    assert status == 200
+
+
+def test_boot_js_speech_recognition_check():
+    """boot.js must check for SpeechRecognition (with webkit fallback)."""
+    js, _ = get_text("/static/boot.js")
+    assert 'SpeechRecognition' in js
+    assert 'webkitSpeechRecognition' in js
+
+
+def test_boot_js_recognition_config():
+    """boot.js must configure recognition.continuous, interimResults, and lang."""
+    js, _ = get_text("/static/boot.js")
+    assert 'recognition.continuous' in js
+    assert 'recognition.interimResults' in js
+    assert 'recognition.lang' in js
+
+
+def test_boot_js_recognition_not_continuous():
+    """recognition.continuous must be false (auto-stop after silence)."""
+    js, _ = get_text("/static/boot.js")
+    assert 'recognition.continuous=false' in js or 'recognition.continuous = false' in js
+
+
+def test_boot_js_recognition_interim_results():
+    """recognition.interimResults must be true (live transcription preview)."""
+    js, _ = get_text("/static/boot.js")
+    assert 'recognition.interimResults=true' in js or 'recognition.interimResults = true' in js
+
+
+def test_boot_js_recognition_lang_en():
+    """recognition.lang must be set (static en-US or dynamic via _locale._speech)."""
+    js, _ = get_text("/static/boot.js")
+    # Accept either the old static value or the new locale-driven assignment
+    assert (
+        "recognition.lang='en-US'" in js
+        or 'recognition.lang = "en-US"' in js
+        or "recognition.lang=" in js  # dynamic: recognition.lang=(_locale._speech)||'en-US'
+    )
+
+
+def test_boot_js_onresult_handler():
+    """boot.js must define recognition.onresult to handle transcription."""
+    js, _ = get_text("/static/boot.js")
+    assert 'recognition.onresult' in js
+
+
+def test_boot_js_onend_handler():
+    """boot.js must define recognition.onend to reset state when recording stops."""
+    js, _ = get_text("/static/boot.js")
+    assert 'recognition.onend' in js
+
+
+def test_boot_js_onerror_handler():
+    """boot.js must define recognition.onerror for graceful error handling."""
+    js, _ = get_text("/static/boot.js")
+    assert 'recognition.onerror' in js
+
+
+def test_boot_js_not_allowed_error_message():
+    """onerror must handle 'not-allowed' with a user-friendly message."""
+    js, _ = get_text("/static/boot.js")
+    assert 'not-allowed' in js
+    assert 'permission' in js.lower() or 'denied' in js.lower() or 'access' in js.lower()
+
+
+def test_boot_js_no_speech_error_message():
+    """onerror must handle 'no-speech' with a user-friendly message."""
+    js, _ = get_text("/static/boot.js")
+    assert 'no-speech' in js
+
+
+def test_boot_js_network_error_message():
+    """onerror must handle 'network' error."""
+    js, _ = get_text("/static/boot.js")
+    assert "'network'" in js or '"network"' in js
+
+
+def test_boot_js_mic_active_flag():
+    """boot.js must track recording state via _micActive flag."""
+    js, _ = get_text("/static/boot.js")
+    assert '_micActive' in js
+
+
+def test_boot_js_mic_recording_class_toggle():
+    """boot.js must toggle 'recording' CSS class on the mic button."""
+    js, _ = get_text("/static/boot.js")
+    assert "'recording'" in js or '"recording"' in js
+
+
+def test_boot_js_mic_status_toggle():
+    """boot.js must show/hide #micStatus during recording."""
+    js, _ = get_text("/static/boot.js")
+    assert 'micStatus' in js
+
+
+def test_boot_js_send_stops_mic():
+    """btnSend onclick must stop mic before sending (send guard)."""
+    js, _ = get_text("/static/boot.js")
+    # The send button onclick should check _micActive and stop recording
+    send_onclick_idx = js.find("$('btnSend').onclick")
+    assert send_onclick_idx != -1
+    # Find the handler code — check that _micActive check appears near send assignment
+    handler_end = js.find(';', send_onclick_idx)
+    handler = js[send_onclick_idx:handler_end + 1]
+    assert '_micActive' in handler or 'stopMic' in handler.lower()
+
+
+def test_boot_js_btn_mic_onclick():
+    """boot.js must attach an onclick handler to btnMic."""
+    js, _ = get_text("/static/boot.js")
+    assert 'btn.onclick' in js or "btnMic.onclick" in js or "$('btnMic').onclick" in js
+
+
+def test_boot_js_recognition_start():
+    """boot.js must call recognition.start() to begin recording."""
+    js, _ = get_text("/static/boot.js")
+    assert 'recognition.start()' in js
+
+
+def test_boot_js_recognition_stop():
+    """boot.js must call recognition.stop() to end recording."""
+    js, _ = get_text("/static/boot.js")
+    assert 'recognition.stop()' in js
+
+
+def test_boot_js_iife_guard():
+    """Mic logic must be wrapped in an IIFE so it doesn't pollute global scope."""
+    js, _ = get_text("/static/boot.js")
+    # IIFE pattern: (function(){...})() or (() => {...})()
+    assert '(function(){' in js or '(function () {' in js
+
+
+def test_boot_js_browser_unsupported_guard_uses_fallback_capabilities():
+    """boot.js must keep the mic available when either speech recognition OR recorder capture exists."""
+    js, _ = get_text("/static/boot.js")
+    assert 'navigator.mediaDevices' in js
+    assert 'getUserMedia' in js
+    assert 'MediaRecorder' in js
+    assert '_canRecordAudio' in js or 'canRecordAudio' in js, \
+        "boot.js should compute a recorder fallback instead of bailing only on SpeechRecognition"
+
+
+def test_boot_js_media_recorder_fallback_posts_to_transcribe_api():
+    """Desktop fallback must send recorded audio to /api/transcribe for transcription."""
+    js, _ = get_text("/static/boot.js")
+    assert 'api/transcribe' in js
+    assert 'fetch(' in js
+
+
+def test_routes_define_transcribe_endpoint():
+    """Server routes must expose /api/transcribe for MediaRecorder fallback uploads."""
+    routes = pathlib.Path(__file__).parent.parent.joinpath("api/routes.py").read_text(encoding="utf-8")
+    assert '"/api/transcribe"' in routes
+
+
+def test_boot_js_shows_mic_button_when_any_voice_path_is_supported():
+    """boot.js must reveal btnMic when speech recognition or recorder fallback is available."""
+    js, _ = get_text("/static/boot.js")
+    assert "btn.style.display=''" in js or 'btn.style.display = ""' in js
+
+
+def test_boot_js_show_toast_on_error():
+    """boot.js must call showToast() for mic errors."""
+    js, _ = get_text("/static/boot.js")
+    assert 'showToast' in js
+
+
+def test_boot_js_autoresize_called():
+    """boot.js must call autoResize() after updating textarea from transcript."""
+    js, _ = get_text("/static/boot.js")
+    assert 'autoResize()' in js
+
+
+# ── Append behaviour (fix: mic appends to existing text, not replace) ────
+
+
+def test_boot_js_prefix_variable_declared():
+    """boot.js must declare _prefix variable to snapshot pre-existing textarea content."""
+    js, _ = get_text("/static/boot.js")
+    assert "_prefix" in js
+
+
+def test_boot_js_prefix_captured_on_start():
+    """_prefix must be set from ta.value when the user starts recording."""
+    js, _ = get_text("/static/boot.js")
+    # _prefix assignment must happen in the btn.onclick else branch (before recognition.start)
+    btn_onclick_idx = js.find("btn.onclick")
+    btn_onclick_end = js.find("};", btn_onclick_idx)
+    onclick_body = js[btn_onclick_idx:btn_onclick_end]
+    assert "_prefix=ta.value" in onclick_body or "_prefix = ta.value" in onclick_body
+
+
+def test_boot_js_onresult_prepends_prefix():
+    """onresult must include _prefix when writing to textarea (append, not replace)."""
+    js, _ = get_text("/static/boot.js")
+    onresult_idx = js.find("recognition.onresult")
+    onresult_end = js.find("};", onresult_idx)
+    onresult_body = js[onresult_idx:onresult_end]
+    # ta.value must be set to _prefix + something, not just the transcript alone
+    assert "_prefix" in onresult_body
+
+
+def test_boot_js_onend_commits_with_prefix():
+    """onend must commit _prefix + _finalText so appended text survives after recognition ends."""
+    js, _ = get_text("/static/boot.js")
+    onend_idx = js.find("recognition.onend")
+    onend_end = js.find("};", onend_idx)
+    onend_body = js[onend_idx:onend_end]
+    assert "_prefix" in onend_body
+
+
+def test_boot_js_prefix_reset_on_stop():
+    """_prefix must be reset when recording stops so next session starts clean."""
+    js, _ = get_text("/static/boot.js")
+    # _setRecording(false) clears both _finalText and _prefix
+    set_rec_idx = js.find("function _setRecording")
+    set_rec_end = js.find("}", set_rec_idx) + 1
+    fn_body = js[set_rec_idx:set_rec_end]
+    assert "_prefix" in fn_body
+
+
+def test_boot_js_auto_space_between_prefix_and_transcript():
+    """onend must insert a space between existing text and new transcript when needed."""
+    js, _ = get_text("/static/boot.js")
+    onend_idx = js.find("recognition.onend")
+    onend_end = js.find("};", onend_idx)
+    onend_body = js[onend_idx:onend_end]
+    # Should handle spacing — look for trimStart or endsWith(' ') check
+    has_spacing = ("trimStart" in onend_body or "endsWith(' ')" in onend_body
+                   or "endsWith(\" \")" in onend_body or "endsWith('\\n')" in onend_body)
+    assert has_spacing, "onend should handle spacing between prefix and new transcript"
+
+
+# ── Regression: existing behaviour unchanged ──────────────────────────────
+
+
+def test_attach_button_still_wired():
+    """btnAttach onclick must still be wired up (no regression)."""
+    js, _ = get_text("/static/boot.js")
+    assert "$('btnAttach').onclick" in js
+
+
+def test_file_input_onchange_still_wired():
+    """fileInput onchange must still be wired up (no regression)."""
+    js, _ = get_text("/static/boot.js")
+    assert "$('fileInput').onchange" in js
+
+
+def test_index_html_still_has_send_button():
+    """btnSend must still be present in index.html (no regression)."""
+    html, _ = get_text("/")
+    assert 'id="btnSend"' in html
+
+
+def test_index_html_still_has_attach_button():
+    """btnAttach must still be present in index.html (no regression)."""
+    html, _ = get_text("/")
+    assert 'id="btnAttach"' in html
--- a/tests/test_sprint20b.py
+++ b/tests/test_sprint20b.py
@@ -0,0 +1,341 @@
+"""
+Sprint 21 Tests: Send button polish — hidden until content, pop-in animation,
+icon-only circle design.
+"""
+import re
+import urllib.request
+
+from tests._pytest_port import BASE
+
+
+def get_text(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return r.read().decode(), r.status
+
+
+# ── index.html ────────────────────────────────────────────────────────────
+
+
+def test_send_button_present():
+    """btnSend must still exist in the DOM."""
+    html, status = get_text("/")
+    assert status == 200
+    assert 'id="btnSend"' in html
+
+
+def test_send_button_disabled_by_default():
+    """btnSend must start disabled — enabled only when there is content."""
+    html, _ = get_text("/")
+    btn_match = re.search(r'id="btnSend"[^>]*>', html)
+    assert btn_match, "btnSend element not found"
+    assert 'disabled' in btn_match.group(0)
+
+
+def test_send_button_no_text_label():
+    """Send button must be icon-only — no visible 'Send' text label."""
+    html, _ = get_text("/")
+    # Find the full button element (from opening tag to closing tag)
+    btn_open_end = html.find('>', html.find('id="btnSend"')) + 1
+    btn_end = html.find('</button>', btn_open_end) + len('</button>')
+    btn_inner = html[btn_open_end:btn_end]
+    # Strip SVG content and any remaining tags; check visible text
+    no_svg = re.sub(r'<svg[^>]*>.*?</svg>', '', btn_inner, flags=re.DOTALL)
+    visible_text = re.sub(r'<[^>]+>', '', no_svg).strip()
+    assert visible_text == '', f"Send button has visible text: {visible_text!r}"
+
+
+def test_send_button_has_svg_icon():
+    """Send button must have an SVG icon."""
+    html, _ = get_text("/")
+    btn_start = html.find('id="btnSend"')
+    btn_end = html.find('</button>', btn_start) + len('</button>')
+    btn_html = html[btn_start:btn_end]
+    assert '<svg' in btn_html
+
+
+def test_send_button_has_title_attribute():
+    """btnSend must have a title attribute for accessibility (replaces text label)."""
+    html, _ = get_text("/")
+    btn_match = re.search(r'id="btnSend"[^>]*>', html)
+    assert btn_match
+    assert 'title=' in btn_match.group(0)
+
+
+def test_send_button_svg_arrow_up():
+    """Send button SVG should use an upward arrow (line + polyline or path)."""
+    html, _ = get_text("/")
+    btn_start = html.find('id="btnSend"')
+    btn_end = html.find('</button>', btn_start) + len('</button>')
+    btn_html = html[btn_start:btn_end]
+    # Must have some directional shape element
+    has_shape = ('<line' in btn_html or '<polyline' in btn_html or
+                 '<polygon' in btn_html or '<path' in btn_html)
+    assert has_shape, "Send button SVG missing directional shape"
+
+
+# ── style.css ────────────────────────────────────────────────────────────
+
+
+def test_send_btn_is_circle():
+    """send-btn must use border-radius:50% for the circle shape."""
+    css, status = get_text("/static/style.css")
+    assert status == 200
+    send_idx = css.find('.send-btn{')
+    brace_open = css.find('{', send_idx)
+    brace_close = css.find('}', brace_open)
+    rule = css[brace_open:brace_close]
+    assert 'border-radius:50%' in rule or 'border-radius: 50%' in rule
+
+
+def test_send_btn_fixed_dimensions():
+    """send-btn must have explicit width and height (icon-circle, not text-padded)."""
+    css, _ = get_text("/static/style.css")
+    send_idx = css.find('.send-btn{')
+    brace_open = css.find('{', send_idx)
+    brace_close = css.find('}', brace_open)
+    rule = css[brace_open:brace_close]
+    assert 'width:' in rule or 'width :' in rule
+    assert 'height:' in rule or 'height :' in rule
+
+
+def test_send_btn_no_old_padding():
+    """send-btn must not use text padding layout (old pill style removed)."""
+    css, _ = get_text("/static/style.css")
+    send_idx = css.find('.send-btn{')
+    brace_open = css.find('{', send_idx)
+    brace_close = css.find('}', brace_open)
+    rule = css[brace_open:brace_close]
+    # Old style used padding:7px 18px — should be gone
+    assert 'padding:7px' not in rule and 'padding: 7px' not in rule
+
+
+def test_send_btn_accent_background():
+    """send-btn background must use the accent color variable."""
+    css, _ = get_text("/static/style.css")
+    send_idx = css.find('.send-btn{')
+    brace_open = css.find('{', send_idx)
+    brace_close = css.find('}', brace_open)
+    rule = css[brace_open:brace_close]
+    assert 'var(--accent)' in rule or 'var(--blue)' in rule or '7cb9ff' in rule
+
+
+def test_send_btn_has_transition():
+    """send-btn must have transition for smooth hover/active states."""
+    css, _ = get_text("/static/style.css")
+    send_idx = css.find('.send-btn{')
+    brace_open = css.find('{', send_idx)
+    brace_close = css.find('}', brace_open)
+    rule = css[brace_open:brace_close]
+    assert 'transition' in rule
+
+
+def test_send_btn_has_box_shadow():
+    """send-btn must have a box-shadow glow effect."""
+    css, _ = get_text("/static/style.css")
+    send_idx = css.find('.send-btn{')
+    brace_open = css.find('{', send_idx)
+    brace_close = css.find('}', brace_open)
+    rule = css[brace_open:brace_close]
+    assert 'box-shadow' in rule
+
+
+def test_send_btn_hover_has_scale():
+    """send-btn:hover must use transform:scale for a satisfying hover effect."""
+    css, _ = get_text("/static/style.css")
+    hover_idx = css.find('.send-btn:hover{')
+    brace_open = css.find('{', hover_idx)
+    brace_close = css.find('}', brace_open)
+    rule = css[brace_open:brace_close]
+    assert 'scale' in rule
+
+
+def test_send_btn_active_shrinks():
+    """send-btn:active must scale down slightly for tactile press feedback."""
+    css, _ = get_text("/static/style.css")
+    active_idx = css.find('.send-btn:active{')
+    brace_open = css.find('{', active_idx)
+    brace_close = css.find('}', brace_open)
+    rule = css[brace_open:brace_close]
+    assert 'scale' in rule
+
+
+def test_send_btn_disabled_rule_exists():
+    """send-btn:disabled must still be styled."""
+    css, _ = get_text("/static/style.css")
+    assert '.send-btn:disabled' in css
+
+
+def test_send_btn_visible_class_defined():
+    """.send-btn.visible class must be defined for the pop-in animation."""
+    css, _ = get_text("/static/style.css")
+    assert '.send-btn.visible' in css
+
+
+def test_send_pop_in_keyframes_defined():
+    """@keyframes send-pop-in must be defined."""
+    css, _ = get_text("/static/style.css")
+    assert 'send-pop-in' in css
+    assert '@keyframes' in css
+
+
+def _extract_keyframe(css, name):
+    """Extract the full @keyframes block for the given animation name."""
+    # Find '@keyframes <name>' directly (forward search) to avoid hitting
+    # an earlier keyframe when multiple are defined on the same line.
+    kf_start = css.find('@keyframes ' + name)
+    assert kf_start != -1, f"@keyframes {name} not found in CSS"
+    depth = 0
+    kf_end = kf_start
+    for i, ch in enumerate(css[kf_start:], kf_start):
+        if ch == '{':
+            depth += 1
+        elif ch == '}':
+            depth -= 1
+            if depth == 0:
+                kf_end = i
+                break
+    return css[kf_start:kf_end]
+
+
+def test_send_pop_in_uses_scale():
+    """send-pop-in keyframe must animate from a scaled-down state."""
+    css, _ = get_text("/static/style.css")
+    kf_rule = _extract_keyframe(css, 'send-pop-in')
+    assert 'scale' in kf_rule
+
+
+def test_send_pop_in_uses_opacity():
+    """send-pop-in keyframe must fade in (opacity transition)."""
+    css, _ = get_text("/static/style.css")
+    kf_rule = _extract_keyframe(css, 'send-pop-in')
+    assert 'opacity' in kf_rule
+
+
+def test_send_btn_mobile_override_no_padding():
+    """Mobile override for send-btn must not add text padding (keeps circle shape)."""
+    css, _ = get_text("/static/style.css")
+    # Find the @media block
+    media_idx = css.find('@media')
+    send_mobile_idx = css.find('.send-btn', media_idx)
+    if send_mobile_idx == -1:
+        return  # No mobile override, fine
+    brace_open = css.find('{', send_mobile_idx)
+    brace_close = css.find('}', brace_open)
+    rule = css[brace_open:brace_close]
+    assert 'padding:' not in rule and 'font-size' not in rule
+
+
+# ── ui.js ─────────────────────────────────────────────────────────────────
+
+
+def test_ui_js_update_send_btn_function():
+    """ui.js must define updateSendBtn() function."""
+    js, status = get_text("/static/ui.js")
+    assert status == 200
+    assert 'function updateSendBtn' in js
+
+
+def test_update_send_btn_checks_content():
+    """updateSendBtn must check textarea value length."""
+    js, _ = get_text("/static/ui.js")
+    fn_idx = js.find('function updateSendBtn')
+    fn_end = js.find('\n}', fn_idx) + 2
+    fn_body = js[fn_idx:fn_end]
+    assert 'msg' in fn_body
+    assert '.value' in fn_body
+    assert '.length' in fn_body or '.trim()' in fn_body
+
+
+def test_update_send_btn_checks_pending_files():
+    """updateSendBtn must also show send button when files are attached."""
+    js, _ = get_text("/static/ui.js")
+    fn_idx = js.find('function updateSendBtn')
+    fn_end = js.find('\n}', fn_idx) + 2
+    fn_body = js[fn_idx:fn_end]
+    assert 'pendingFiles' in fn_body
+
+
+def test_update_send_btn_uses_visible_class():
+    """updateSendBtn must add .visible class to trigger the pop-in animation."""
+    js, _ = get_text("/static/ui.js")
+    fn_idx = js.find('function updateSendBtn')
+    fn_end = js.find('\n}', fn_idx) + 2
+    fn_body = js[fn_idx:fn_end]
+    assert 'visible' in fn_body
+
+
+def test_update_send_btn_uses_disabled():
+    """updateSendBtn must disable the button when no content or busy."""
+    js, _ = get_text("/static/ui.js")
+    fn_idx = js.find('function updateSendBtn')
+    fn_end = js.find('\n}', fn_idx) + 2
+    fn_body = js[fn_idx:fn_end]
+    assert 'disabled' in fn_body
+
+
+def test_set_busy_calls_update_send_btn():
+    """setBusy must call updateSendBtn() so button hides while agent is responding."""
+    js, _ = get_text("/static/ui.js")
+    busy_idx = js.find('function setBusy')
+    busy_end = js.find('\n}', busy_idx) + 2
+    busy_body = js[busy_idx:busy_end]
+    assert 'updateSendBtn' in busy_body
+
+
+def test_render_tray_calls_update_send_btn():
+    """renderTray must call updateSendBtn() so button appears when files are attached."""
+    js, _ = get_text("/static/ui.js")
+    tray_idx = js.find('function renderTray')
+    tray_end = js.find('\n}', tray_idx) + 2
+    tray_body = js[tray_idx:tray_end]
+    assert 'updateSendBtn' in tray_body
+
+
+# ── boot.js ──────────────────────────────────────────────────────────────
+
+
+def test_boot_js_input_calls_update_send_btn():
+    """boot.js input event listener must call updateSendBtn()."""
+    js, status = get_text("/static/boot.js")
+    assert status == 200
+    assert 'updateSendBtn' in js
+
+
+# ── messages.js ───────────────────────────────────────────────────────────
+
+
+def test_auto_resize_calls_update_send_btn():
+    """autoResize() must call updateSendBtn() so button hides after send clears textarea."""
+    js, status = get_text("/static/messages.js")
+    assert status == 200
+    assert 'updateSendBtn' in js
+
+
+# ── Regression: existing behaviour unchanged ──────────────────────────────
+
+
+def test_send_button_still_has_send_btn_class():
+    """btnSend must still carry class='send-btn' for CSS targeting."""
+    html, _ = get_text("/")
+    assert 'class="send-btn"' in html
+
+
+def test_ui_js_set_busy_calls_update_send_btn():
+    """setBusy must call updateSendBtn to manage button disabled state."""
+    js, _ = get_text("/static/ui.js")
+    busy_idx = js.find('function setBusy')
+    busy_end = js.find('\n}', busy_idx) + 2
+    busy_body = js[busy_idx:busy_end]
+    assert 'updateSendBtn' in busy_body
+
+
+def test_index_html_attach_button_unchanged():
+    """btnAttach must still be present (no regression)."""
+    html, _ = get_text("/")
+    assert 'id="btnAttach"' in html
+
+
+def test_send_function_still_exists():
+    """send() function must still be defined in messages.js."""
+    js, _ = get_text("/static/messages.js")
+    assert 'async function send()' in js
--- a/tests/test_sprint23.py
+++ b/tests/test_sprint23.py
@@ -0,0 +1,196 @@
+"""
+Sprint 23 Tests: agentic transparency — token/cost display, session usage fields,
+subagent card names, skill picker in cron, skill linked files.
+"""
+import json, urllib.error, urllib.request
+
+from tests._pytest_port import BASE
+
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(BASE + path, data=data,
+                                headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+def make_session(created_list):
+    d, _ = post("/api/session/new", {})
+    sid = d["session"]["session_id"]
+    created_list.append(sid)
+    return sid, d["session"]
+
+
+# ── Session usage fields ─────────────────────────────────────────────────
+
+def test_new_session_has_usage_fields():
+    """New session should include input_tokens, output_tokens, estimated_cost."""
+    created = []
+    try:
+        sid, sess = make_session(created)
+        post("/api/session/rename", {"session_id": sid, "title": "Usage Test"})
+        d, status = get(f"/api/session?session_id={sid}")
+        assert status == 200
+        sess = d["session"]
+        assert "input_tokens" in sess, "input_tokens field missing from session"
+        assert "output_tokens" in sess, "output_tokens field missing from session"
+        assert "estimated_cost" in sess, "estimated_cost field missing from session"
+        assert sess["input_tokens"] == 0
+        assert sess["output_tokens"] == 0
+    finally:
+        for s in created:
+            post("/api/session/delete", {"session_id": s})
+
+
+def test_session_compact_has_usage_fields():
+    """Session list should include usage fields in compact form."""
+    created = []
+    try:
+        sid, _ = make_session(created)
+        post("/api/session/rename", {"session_id": sid, "title": "Compact Usage"})
+        d, status = get("/api/sessions")
+        assert status == 200
+        match = [s for s in d["sessions"] if s["session_id"] == sid]
+        assert len(match) == 1
+        assert "input_tokens" in match[0], "input_tokens missing from session list"
+        assert "output_tokens" in match[0], "output_tokens missing from session list"
+        assert match[0]["input_tokens"] == 0
+        assert match[0]["output_tokens"] == 0
+    finally:
+        for s in created:
+            post("/api/session/delete", {"session_id": s})
+
+
+def test_session_usage_defaults_zero():
+    """New session usage fields should default to 0/None in creation response."""
+    created = []
+    try:
+        sid, sess = make_session(created)
+        assert "input_tokens" in sess, "input_tokens missing from new session response"
+        assert "output_tokens" in sess, "output_tokens missing from new session response"
+        assert sess["input_tokens"] == 0
+        assert sess["output_tokens"] == 0
+    finally:
+        for s in created:
+            post("/api/session/delete", {"session_id": s})
+
+
+# ── Skills content linked_files ──────────────────────────────────────────
+
+def test_skills_content_requires_name():
+    """GET /api/skills/content without name should return 400 (or 500 if skills module unavailable)."""
+    try:
+        d, status = get("/api/skills/content")
+        assert status in (400, 500), f"Expected 400/500 for missing name, got {status}"
+    except urllib.error.HTTPError as e:
+        assert e.code in (400, 500), f"Expected 400/500 for missing name, got {e.code}"
+
+
+def test_skills_content_has_linked_files_key():
+    """GET /api/skills/content should always return a linked_files key."""
+    try:
+        d, status = get("/api/skills")
+        if not d.get("skills"):
+            return  # no skills in test env, skip
+        name = d["skills"][0]["name"]
+        d2, status2 = get(f"/api/skills/content?name={name}")
+        assert status2 == 200
+        assert "linked_files" in d2, "linked_files key missing from skills/content response"
+        # linked_files must be a dict (possibly empty), not None
+        assert isinstance(d2["linked_files"], dict), "linked_files must be a dict"
+    except urllib.error.HTTPError:
+        pass  # skills module unavailable in this env
+
+
+def test_skills_content_file_path_traversal_rejected():
+    """GET /api/skills/content with traversal path should be rejected."""
+    from urllib.parse import quote as _quote
+    try:
+        d, status = get("/api/skills")
+        if not d.get("skills"):
+            return  # no skills in test env, skip
+        name = d["skills"][0]["name"]
+        traversal = _quote("../../etc/passwd", safe="")
+        try:
+            d2, status2 = get(f"/api/skills/content?name={name}&file={traversal}")
+            assert status2 in (400, 404, 500), f"Path traversal should be rejected, got {status2}"
+        except urllib.error.HTTPError as e:
+            assert e.code in (400, 404, 500), f"Path traversal should be rejected, got {e.code}"
+    except urllib.error.HTTPError:
+        pass  # skills module unavailable in test env
+
+
+def test_skills_content_wildcard_name_rejected():
+    """GET /api/skills/content with glob wildcard in name should be rejected when file param present."""
+    try:
+        try:
+            d2, status2 = get("/api/skills/content?name=*&file=SKILL.md")
+            assert status2 == 400, f"Wildcard name should return 400, got {status2}"
+        except urllib.error.HTTPError as e:
+            assert e.code in (400, 404), f"Wildcard name should be rejected, got {e.code}"
+    except Exception:
+        pass
+
+
+# ── Cron create with skills ───────────────────────────────────────────────
+
+def test_cron_create_accepts_skills():
+    """POST /api/crons/create should accept and store a skills array (or 500 if cron module unavailable)."""
+    created_jobs = []
+    try:
+        body = {
+            "name": "test-sprint23-skills",
+            "schedule": "0 9 * * *",
+            "prompt": "test prompt",
+            "deliver": "local",
+            "skills": ["some-skill"]
+        }
+        d, status = post("/api/crons/create", body)
+        if status in (400, 500) and ('module' in str(d.get('error','')) or 'cron' in str(d.get('error',''))):
+            return  # cron module not available in test env
+        assert status == 200, f"Expected 200 from cron create, got {status}: {d}"
+        assert d.get("ok"), f"Cron create did not return ok: {d}"
+        job_id = d.get("job", {}).get("id") or d.get("id")
+        if job_id:
+            created_jobs.append(job_id)
+        # Verify job appears in list
+        jobs_d, _ = get("/api/crons")
+        job = next((j for j in jobs_d.get("jobs", []) if j.get("name") == "test-sprint23-skills"), None)
+        assert job is not None, "Created cron job not found in job list"
+        assert job.get("skills") == ["some-skill"] or job.get("skill") == "some-skill", \
+            f"skills not stored on job: {job}"
+    finally:
+        try:
+            for jid in created_jobs:
+                post("/api/crons/delete", {"id": jid})
+            jobs_d, _ = get("/api/crons")
+            for j in jobs_d.get("jobs", []):
+                if j.get("name") == "test-sprint23-skills":
+                    post("/api/crons/delete", {"id": j["id"]})
+        except Exception:
+            pass  # cron module may not be available
+
+
+# ── Tool call integrity ──────────────────────────────────────────────────
+
+def test_tool_calls_have_real_names():
+    """Tool calls in session JSON should not have unresolved 'tool' name."""
+    created = []
+    try:
+        sid, _ = make_session(created)
+        d, status = get(f"/api/session?session_id={sid}")
+        assert status == 200
+        for tc in d["session"].get("tool_calls", []):
+            assert tc.get("name") not in ("tool", "", None), f"Unresolved tool name: {tc}"
+    finally:
+        for s in created:
+            post("/api/session/delete", {"session_id": s})
--- a/tests/test_sprint26.py
+++ b/tests/test_sprint26.py
@@ -0,0 +1,175 @@
+"""
+Sprint 26 Tests: canonical appearance settings persist and legacy theme names
+map onto the new theme + skin system.
+"""
+import json, urllib.error, urllib.request
+import pathlib
+import sys
+
+from tests._pytest_port import BASE
+
+REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent
+if str(REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(REPO_ROOT))
+
+from api import config
+
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(BASE + path, data=data,
+                                headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+# ── Theme settings ───────────────────────────────────────────────────────
+
+def test_settings_default_theme():
+    """Default theme should be 'dark'."""
+    d, status = get("/api/settings")
+    assert status == 200
+    assert d.get("theme") == "dark"
+
+
+def test_settings_set_theme_light_persists():
+    """Setting theme to 'light' should persist and round-trip."""
+    try:
+        d, status = post("/api/settings", {"theme": "light"})
+        assert status == 200
+        d2, _ = get("/api/settings")
+        assert d2.get("theme") == "light"
+    finally:
+        # Reset to dark
+        post("/api/settings", {"theme": "dark"})
+
+
+def test_settings_set_theme_light():
+    """Setting theme to 'light' should persist."""
+    try:
+        post("/api/settings", {"theme": "light"})
+        d, _ = get("/api/settings")
+        assert d.get("theme") == "light"
+    finally:
+        post("/api/settings", {"theme": "dark"})
+
+
+def test_settings_set_theme_system():
+    """Setting theme to 'system' should persist."""
+    try:
+        post("/api/settings", {"theme": "system"})
+        d, _ = get("/api/settings")
+        assert d.get("theme") == "system"
+    finally:
+        post("/api/settings", {"theme": "dark"})
+
+
+def test_settings_set_skin():
+    """Setting skin should persist."""
+    try:
+        post("/api/settings", {"skin": "ares"})
+        d, _ = get("/api/settings")
+        assert d.get("skin") == "ares"
+    finally:
+        post("/api/settings", {"skin": "default"})
+
+
+def test_settings_set_skin_poseidon():
+    """Setting skin to 'poseidon' should persist."""
+    try:
+        post("/api/settings", {"skin": "poseidon"})
+        d, _ = get("/api/settings")
+        assert d.get("skin") == "poseidon"
+    finally:
+        post("/api/settings", {"skin": "default"})
+
+
+def test_settings_legacy_theme_maps_to_dark_skin_pair():
+    """Legacy theme names should map to the closest supported theme + skin."""
+    try:
+        d, status = post("/api/settings", {"theme": "slate"})
+        assert status == 200
+        d2, _ = get("/api/settings")
+        assert d2.get("theme") == "dark"
+        assert d2.get("skin") == "slate"
+    finally:
+        post("/api/settings", {"theme": "dark", "skin": "default"})
+
+
+def test_settings_legacy_monokai_maps_to_sisyphus_skin():
+    """Monokai should migrate onto the closest supported accent skin."""
+    try:
+        d, status = post("/api/settings", {"theme": "monokai"})
+        assert status == 200
+        d2, _ = get("/api/settings")
+        assert d2.get("theme") == "dark"
+        assert d2.get("skin") == "sisyphus"
+    finally:
+        post("/api/settings", {"theme": "dark", "skin": "default"})
+
+
+def test_settings_unknown_theme_falls_back_to_dark_default():
+    """Unknown themes should normalize to a safe canonical appearance."""
+    try:
+        d, status = post("/api/settings", {"theme": "my-custom-theme"})
+        assert status == 200
+        d2, _ = get("/api/settings")
+        assert d2.get("theme") == "dark"
+        assert d2.get("skin") == "default"
+    finally:
+        post("/api/settings", {"theme": "dark", "skin": "default"})
+
+
+def test_settings_invalid_skin_falls_back_to_default():
+    """Unknown skin names should normalize back to the default accent."""
+    try:
+        d, status = post("/api/settings", {"skin": "not-a-skin"})
+        assert status == 200
+        d2, _ = get("/api/settings")
+        assert d2.get("skin") == "default"
+    finally:
+        post("/api/settings", {"skin": "default"})
+
+
+def test_load_settings_normalizes_legacy_theme_from_file(monkeypatch, tmp_path):
+    """Existing settings.json files with legacy theme names should normalize on load."""
+    settings_path = tmp_path / "settings.json"
+    settings_path.write_text(json.dumps({"theme": "solarized"}), encoding="utf-8")
+    monkeypatch.setattr(config, "SETTINGS_FILE", settings_path)
+
+    loaded = config.load_settings()
+
+    assert loaded["theme"] == "dark"
+    assert loaded["skin"] == "poseidon"
+
+
+def test_theme_does_not_break_other_settings():
+    """Setting theme should not disturb other settings."""
+    d_before, _ = get("/api/settings")
+    send_key_before = d_before.get("send_key")
+    try:
+        post("/api/settings", {"theme": "light"})
+        d_after, _ = get("/api/settings")
+        assert d_after.get("send_key") == send_key_before
+        assert d_after.get("theme") == "light"
+    finally:
+        post("/api/settings", {"theme": "dark"})
+
+
+def test_theme_survives_round_trip():
+    """Theme set via POST should appear in subsequent GET."""
+    try:
+        post("/api/settings", {"theme": "light"})
+        d, status = get("/api/settings")
+        assert status == 200
+        assert d["theme"] == "light"
+    finally:
+        post("/api/settings", {"theme": "dark"})
--- a/tests/test_sprint27.py
+++ b/tests/test_sprint27.py
@@ -0,0 +1,136 @@
+"""
+Sprint 27 Tests: configurable assistant display name (bot_name).
+Tests cover settings API round-trip, empty/missing input defaults,
+login page rendering, and server-side sanitization.
+"""
+import json
+import urllib.error
+import urllib.request
+
+from tests._pytest_port import BASE
+
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+
+def get_raw(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return r.read().decode(), r.status
+
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(BASE + path, data=data,
+                                 headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+# ── Default value ─────────────────────────────────────────────────────────
+
+def test_settings_default_bot_name():
+    """GET /api/settings should return bot_name defaulting to 'Hermes'."""
+    d, status = get("/api/settings")
+    assert status == 200
+    assert "bot_name" in d
+    assert d["bot_name"] == "Hermes"
+
+
+# ── Round-trip ────────────────────────────────────────────────────────────
+
+def test_settings_set_bot_name():
+    """POST /api/settings with bot_name should persist and round-trip."""
+    try:
+        d, status = post("/api/settings", {"bot_name": "TestBot"})
+        assert status == 200
+        assert d.get("bot_name") == "TestBot"
+        d2, _ = get("/api/settings")
+        assert d2.get("bot_name") == "TestBot"
+    finally:
+        post("/api/settings", {"bot_name": "Hermes"})
+
+
+def test_settings_bot_name_special_chars():
+    """bot_name with safe special characters should persist correctly."""
+    try:
+        d, status = post("/api/settings", {"bot_name": "My Assistant 2.0"})
+        assert status == 200
+        d2, _ = get("/api/settings")
+        assert d2.get("bot_name") == "My Assistant 2.0"
+    finally:
+        post("/api/settings", {"bot_name": "Hermes"})
+
+
+# ── Server-side sanitization ──────────────────────────────────────────────
+
+def test_settings_empty_bot_name_defaults_to_hermes():
+    """Posting an empty bot_name should default to 'Hermes' server-side."""
+    try:
+        d, status = post("/api/settings", {"bot_name": ""})
+        assert status == 200
+        assert d.get("bot_name") == "Hermes"
+        d2, _ = get("/api/settings")
+        assert d2.get("bot_name") == "Hermes"
+    finally:
+        post("/api/settings", {"bot_name": "Hermes"})
+
+
+def test_settings_whitespace_bot_name_defaults_to_hermes():
+    """Posting a whitespace-only bot_name should default to 'Hermes'."""
+    try:
+        d, status = post("/api/settings", {"bot_name": "   "})
+        assert status == 200
+        assert d.get("bot_name") == "Hermes"
+    finally:
+        post("/api/settings", {"bot_name": "Hermes"})
+
+
+# ── Login page rendering ──────────────────────────────────────────────────
+
+def test_login_page_shows_default_bot_name():
+    """GET /login should contain 'Hermes' in title and h1 when default."""
+    html, status = get_raw("/login")
+    assert status == 200
+    assert "<title>Hermes" in html
+    assert "<h1>Hermes</h1>" in html
+
+
+def test_login_page_shows_custom_bot_name():
+    """GET /login should reflect the configured bot_name."""
+    try:
+        post("/api/settings", {"bot_name": "Aria"})
+        html, status = get_raw("/login")
+        assert status == 200
+        assert "<title>Aria" in html
+        assert "<h1>Aria</h1>" in html
+    finally:
+        post("/api/settings", {"bot_name": "Hermes"})
+
+
+def test_login_page_empty_name_does_not_crash():
+    """Login page must not 500 even if somehow bot_name is empty in settings."""
+    # Force an empty value by patching settings file directly — skipped here
+    # because the server-side guard in POST /api/settings prevents storing empty.
+    # Instead, verify that /login returns 200 reliably.
+    html, status = get_raw("/login")
+    assert status == 200
+    assert "Sign in" in html
+
+
+def test_login_page_xss_escaped():
+    """bot_name with HTML special chars should be escaped in the login page."""
+    try:
+        post("/api/settings", {"bot_name": "<script>alert(1)</script>"})
+        html, status = get_raw("/login")
+        assert status == 200
+        # Raw tag must not appear unescaped
+        assert "<script>alert(1)</script>" not in html
+        # Escaped form should appear
+        assert "&lt;script&gt;" in html
+    finally:
+        post("/api/settings", {"bot_name": "Hermes"})
--- a/tests/test_sprint28.py
+++ b/tests/test_sprint28.py
@@ -0,0 +1,224 @@
+"""
+Sprint 28 Tests: /personality slash command — backend API coverage.
+Tests: GET /api/personalities, POST /api/personality/set, Session.compact(),
+path traversal defence, size cap, clear personality.
+"""
+import json
+import pathlib
+import shutil
+import sys
+import urllib.error
+import urllib.request
+
+# Import test constants from conftest (same process — these are module-level values)
+sys.path.insert(0, str(pathlib.Path(__file__).parent))
+from conftest import TEST_STATE_DIR
+
+from tests._pytest_port import BASE
+
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(BASE + path, data=data,
+                                 headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+def _personalities_dir():
+    """Return the personalities directory the test server will look in.
+
+    conftest sets HERMES_HOME=TEST_STATE_DIR in the server's environment.
+    The server's api/profiles._DEFAULT_HERMES_HOME resolves to TEST_STATE_DIR,
+    so get_active_hermes_home() returns TEST_STATE_DIR, and personalities
+    live at TEST_STATE_DIR/personalities.
+    """
+    p = TEST_STATE_DIR / 'personalities'
+    p.mkdir(parents=True, exist_ok=True)
+    return p
+
+
+def _make_personality(name, content="# Test Bot\nA test personality."):
+    """Create a personality directory with a SOUL.md."""
+    d = _personalities_dir() / name
+    d.mkdir(parents=True, exist_ok=True)
+    (d / "SOUL.md").write_text(content)
+    return d
+
+
+def _make_session():
+    """Create a new session and return its session_id."""
+    d, status = post("/api/session/new", {})
+    assert status == 200, f"Failed to create session: {d}"
+    return d["session"]["session_id"]
+
+
+def _cleanup_session(sid):
+    try:
+        post("/api/session/delete", {"session_id": sid})
+    except Exception:
+        pass
+
+
+# ── GET /api/personalities ────────────────────────────────────────────────────
+
+def test_personalities_empty_when_none_exist():
+    """GET /api/personalities returns empty list when no personalities exist."""
+    p_dir = _personalities_dir()
+    for child in list(p_dir.iterdir()):
+        if child.is_dir() and not child.is_symlink():
+            shutil.rmtree(child)
+    d, status = get("/api/personalities")
+    assert status == 200
+    assert d.get("personalities") == []
+
+
+def test_personalities_lists_from_config():
+    """GET /api/personalities returns personalities from config.yaml agent.personalities.
+    Skipped if no personalities configured in test environment.
+    """
+    d, status = get("/api/personalities")
+    assert status == 200
+    assert isinstance(d.get("personalities"), list)
+    # If personalities are configured, verify structure
+    for p in d.get("personalities", []):
+        assert "name" in p
+        assert "description" in p
+
+
+def test_personalities_returns_empty_when_none_configured():
+    """GET /api/personalities returns empty list when no personalities in config."""
+    # The test server starts with a clean state dir (no config.yaml),
+    # so agent.personalities is empty by default
+    d, status = get("/api/personalities")
+    assert status == 200
+    # May or may not have personalities depending on the real ~/.hermes/config.yaml
+    # being loaded. Just verify the structure is correct.
+    assert isinstance(d.get("personalities"), list)
+
+
+def test_personalities_skips_non_dict_config():
+    """GET /api/personalities handles non-dict agent config gracefully."""
+    d, status = get("/api/personalities")
+    assert status == 200
+    assert isinstance(d.get("personalities"), list)
+
+
+# ── POST /api/personality/set ─────────────────────────────────────────────────
+
+_test_personalities = {}
+
+def _inject_personality(name, value):
+    """Write a personality into the test config.yaml so the server picks it up."""
+    _test_personalities[name] = value
+    _write_test_config()
+
+def _remove_personality(name):
+    """Remove a personality from the test config.yaml."""
+    _test_personalities.pop(name, None)
+    _write_test_config()
+
+def _write_test_config():
+    """Write config.yaml with test personalities using simple YAML format."""
+    TEST_STATE_DIR.mkdir(parents=True, exist_ok=True)
+    config_path = TEST_STATE_DIR / 'config.yaml'
+    lines = ['agent:', '  personalities:']
+    for pname, pval in _test_personalities.items():
+        if isinstance(pval, dict):
+            lines.append(f'    {pname}:')
+            for k, v in pval.items():
+                lines.append(f'      {k}: "{v}"')
+        else:
+            lines.append(f'    {pname}: "{pval}"')
+    config_path.write_text('\n'.join(lines) + '\n')
+
+
+def test_set_personality_valid():
+    """Setting a personality that exists in config stores name and returns prompt.
+    Skipped if config.yaml has no personalities (common in test environments).
+    """
+    # First check if any personalities are configured
+    d, status = get("/api/personalities")
+    if not d.get("personalities"):
+        return  # skip — no personalities in test server config
+    name = d["personalities"][0]["name"]
+    sid = _make_session()
+    try:
+        d2, status2 = post("/api/personality/set", {"session_id": sid, "name": name})
+        assert status2 == 200
+        assert d2.get("ok") is True
+        assert d2.get("personality") == name
+    finally:
+        _cleanup_session(sid)
+
+
+def test_set_personality_persists_in_compact():
+    """After setting personality, GET /api/session returns personality in compact.
+    Skipped if config.yaml has no personalities.
+    """
+    d, status = get("/api/personalities")
+    if not d.get("personalities"):
+        return  # skip
+    name = d["personalities"][0]["name"]
+    sid = _make_session()
+    try:
+        post("/api/personality/set", {"session_id": sid, "name": name})
+        d2, status2 = get(f"/api/session?session_id={sid}")
+        assert status2 == 200
+        session = d2.get("session", {})
+        assert session.get("personality") == name
+    finally:
+        _cleanup_session(sid)
+
+
+def test_clear_personality_sets_null():
+    """Clearing personality with name='' sets it to None (null in JSON)."""
+    sid = _make_session()
+    try:
+        # Set a personality name directly on the session (no config validation needed for clear)
+        d, status = post("/api/personality/set", {"session_id": sid, "name": ""})
+        assert status == 200
+        assert d.get("personality") is None
+        # Verify persisted
+        d2, s2 = get(f"/api/session?session_id={sid}")
+        assert s2 == 200
+        assert d2.get("session", {}).get("personality") is None
+    finally:
+        _cleanup_session(sid)
+
+
+def test_set_personality_not_found_returns_404():
+    """Setting a non-existent personality returns 404."""
+    sid = _make_session()
+    try:
+        d, status = post("/api/personality/set",
+                         {"session_id": sid, "name": "doesnotexist"})
+        assert status == 404
+    finally:
+        _cleanup_session(sid)
+
+
+def test_set_personality_nonexistent_returns_404():
+    """Names not in config.yaml agent.personalities return 404."""
+    sid = _make_session()
+    try:
+        d, status = post("/api/personality/set",
+                         {"session_id": sid, "name": "doesnotexist"})
+        assert status == 404, f"Expected 404, got {status}: {d}"
+    finally:
+        _cleanup_session(sid)
+
+
+def test_set_personality_missing_session_returns_404():
+    """Setting personality on non-existent session returns 404."""
+    d, status = post("/api/personality/set",
+                     {"session_id": "nonexistent000", "name": "x"})
+    assert status == 404
--- a/tests/test_sprint29.py
+++ b/tests/test_sprint29.py
@@ -0,0 +1,731 @@
+"""
+Sprint 29 Tests: Security hardening — 12 fixes from PR #171.
+
+Covers:
+  1. CSRF protection — cross-origin POST rejected, same-origin allowed
+  2. Login rate limiting — 5th attempt 429, 6th rejected, still works after burst
+  3. Session ID validation — non-hex chars rejected in Session.load()
+  4. Error path sanitization — _sanitize_error() strips filesystem paths
+  5. Secure cookie detection — getattr used safely on plain socket
+  6. HMAC signature length — 32-char hex (128-bit), not 16
+  7. Skills path traversal — path outside SKILLS_DIR rejected
+  8. Content-Disposition for dangerous MIME types — HTML/SVG force download
+  9. PBKDF2 password hashing — save_settings uses auth._hash_password
+  10. Non-loopback startup warning (manual / integration test)
+  11. SSRF DNS check logic (unit test on helper function)
+  12. ENV_LOCK export — _ENV_LOCK importable from streaming module
+"""
+import importlib
+import json
+import pathlib
+import sys
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+
+sys.path.insert(0, str(pathlib.Path(__file__).parent))
+from conftest import TEST_STATE_DIR
+
+from tests._pytest_port import BASE
+
+
+def get(path, headers=None):
+    req = urllib.request.Request(BASE + path, headers=headers or {})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+def post(path, body=None, headers=None):
+    data = json.dumps(body or {}).encode()
+    h = {"Content-Type": "application/json"}
+    if headers:
+        h.update(headers)
+    req = urllib.request.Request(BASE + path, data=data, headers=h)
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+def get_raw_with_headers(path):
+    req = urllib.request.Request(BASE + path)
+    with urllib.request.urlopen(req, timeout=10) as r:
+        return r.read(), dict(r.headers.items()), r.status
+
+
+# ── 1. CSRF Protection ─────────────────────────────────────────────────────
+
+
+class TestCSRF:
+    @staticmethod
+    def _csrf_allowed(headers):
+        from types import SimpleNamespace
+        from api.routes import _check_csrf
+
+        return _check_csrf(SimpleNamespace(headers=headers))
+
+    def test_no_origin_no_referer_allowed(self):
+        """Curl-style request with no Origin/Referer must pass CSRF check."""
+        body, status = post("/api/sessions/new", {})
+        # Should succeed (200 or 404) but NOT 403
+        assert status != 403, f"Expected non-403 for no-origin request, got {status}"
+
+    def test_cross_origin_post_rejected(self):
+        """Cross-origin POST (Origin != Host) must be rejected with 403."""
+        body, status = post(
+            "/api/sessions/new",
+            {},
+            headers={"Origin": "http://evil.com", "Host": "127.0.0.1:8788"},
+        )
+        assert status == 403, f"Expected 403 for cross-origin request, got {status}: {body}"
+        assert "cross-origin" in body.get("error", "").lower() or "rejected" in body.get("error", "").lower()
+
+    def test_same_origin_post_allowed(self):
+        """Same-origin POST (Origin matches Host) must be allowed."""
+        body, status = post(
+            "/api/sessions/new",
+            {},
+            headers={"Origin": "http://127.0.0.1:8788", "Host": "127.0.0.1:8788"},
+        )
+        assert status != 403, f"Expected non-403 for same-origin request, got {status}: {body}"
+
+    def test_same_origin_referer_allowed(self):
+        """Same-origin Referer (matching Host) must be allowed."""
+        body, status = post(
+            "/api/sessions/new",
+            {},
+            headers={"Referer": "http://127.0.0.1:8788/", "Host": "127.0.0.1:8788"},
+        )
+        assert status != 403, f"Expected non-403 for same-referer request, got {status}"
+
+    def test_proxy_host_default_https_port_matches_http_origin(self):
+        """http:// origin without port must NOT match X-Forwarded-Host with :443.
+
+        After the scheme-aware _ports_match fix: http:// absent port = :80,
+        which is not equal to :443. These are different protocols/ports and
+        should be rejected. In real reverse proxy scenarios where the external
+        URL is HTTPS, the browser sends Origin: https://... not http://...
+        See test_proxy_host_default_https_port_matches_https_origin for the
+        real-world proxy case that should pass.
+        """
+        assert not self._csrf_allowed({
+            "Origin": "http://example.com",
+            "X-Forwarded-Host": "example.com:443",
+        }), 'http origin (port :80) must not match https host (:443)'
+
+    def test_proxy_host_default_https_port_matches_https_origin(self):
+        """HTTPS Origin without port should match X-Forwarded-Host with explicit :443."""
+        assert self._csrf_allowed({
+            "Origin": "https://example.com",
+            "X-Forwarded-Host": "example.com:443",
+        })
+
+    def test_proxy_host_port_normalization_still_rejects_other_host(self):
+        """Port normalization must not allow different hosts through."""
+        assert not self._csrf_allowed({
+            "Origin": "https://evil.com",
+            "X-Forwarded-Host": "example.com:443",
+        })
+
+    def test_allowed_public_origin_bypasses_missing_proxy_port(self, monkeypatch):
+        """Explicitly configured public origins should pass even if proxy strips :port from Host."""
+        monkeypatch.setenv('HERMES_WEBUI_ALLOWED_ORIGINS', 'https://myapp.example.com:8000')
+        assert self._csrf_allowed({
+            'Origin': 'https://myapp.example.com:8000',
+            'Host': 'myapp.example.com',
+            'X-Forwarded-Proto': 'https',
+        })
+
+    def test_other_origin_not_allowed_by_public_origin_allowlist(self, monkeypatch):
+        """Allowlist must stay exact; unrelated origins must still be rejected."""
+        monkeypatch.setenv('HERMES_WEBUI_ALLOWED_ORIGINS', 'https://myapp.example.com:8000')
+        assert not self._csrf_allowed({
+            'Origin': 'https://evil.com:8000',
+            'Host': 'myapp.example.com',
+            'X-Forwarded-Proto': 'https',
+        })
+
+    # ── Port normalization: scheme-aware (M-1 fix) ────────────────────────────
+
+    def test_cross_protocol_port_not_confused_http_origin_https_host(self):
+        """http:// origin must NOT match a host with :443 (HTTPS default).
+
+        Before M-1 fix, _ports_match treated both 80 and 443 as equivalent to
+        absent port, allowing http://host to match https://host:443 servers.
+        """
+        assert not self._csrf_allowed({
+            'Origin': 'http://example.com',     # http, no port = :80
+            'X-Forwarded-Host': 'example.com:443',  # HTTPS port
+        }), 'http origin should NOT match host advertising port 443'
+
+    def test_cross_protocol_port_not_confused_https_origin_http_host(self):
+        """https:// origin must NOT match a host with :80 (HTTP default)."""
+        assert not self._csrf_allowed({
+            'Origin': 'https://example.com',    # https, no port = :443
+            'X-Forwarded-Host': 'example.com:80',   # HTTP port
+        }), 'https origin should NOT match host advertising port 80'
+
+    def test_http_explicit_port_80_matches_host_without_port(self):
+        """http://example.com:80 is the same origin as http://example.com."""
+        assert self._csrf_allowed({
+            'Origin': 'http://example.com:80',
+            'Host': 'example.com',
+        })
+
+    def test_https_explicit_port_443_matches_host_without_port(self):
+        """https://example.com:443 is the same origin as https://example.com."""
+        assert self._csrf_allowed({
+            'Origin': 'https://example.com:443',
+            'Host': 'example.com',
+        })
+
+    def test_non_default_port_not_waived(self):
+        """Non-default ports (e.g. :8000) must not be treated as equivalent to absent."""
+        assert not self._csrf_allowed({
+            'Origin': 'https://example.com:8000',
+            'Host': 'example.com',
+        })
+
+    # ── Bug scenario: proxy strips non-standard port ──────────────────────────
+
+    def test_bug_origin_8000_host_without_port_rejected_without_allowlist(self, monkeypatch):
+        """Without the allowlist, origin with :8000 must be rejected when proxy strips port.
+
+        This documents the original bug: Origin: https://app.com:8000 with
+        Host: app.com (proxy stripped the port). Before this PR that returned 403.
+        The fix (HERMES_WEBUI_ALLOWED_ORIGINS) handles it; without the env var
+        the request is still rejected, which is the safe default.
+        """
+        monkeypatch.delenv('HERMES_WEBUI_ALLOWED_ORIGINS', raising=False)
+        assert not self._csrf_allowed({
+            'Origin': 'https://myapp.example.com:8000',
+            'Host': 'myapp.example.com',
+        }), 'without allowlist, port mismatch must be rejected (safe default)'
+
+    def test_allowed_origins_comma_separated(self, monkeypatch):
+        """HERMES_WEBUI_ALLOWED_ORIGINS accepts multiple comma-separated origins."""
+        monkeypatch.setenv(
+            'HERMES_WEBUI_ALLOWED_ORIGINS',
+            'https://app1.example.com:8000, https://app2.example.com:9000',
+        )
+        assert self._csrf_allowed({'Origin': 'https://app1.example.com:8000', 'Host': 'proxy.internal'})
+        assert self._csrf_allowed({'Origin': 'https://app2.example.com:9000', 'Host': 'proxy.internal'})
+        assert not self._csrf_allowed({'Origin': 'https://evil.com:8000', 'Host': 'proxy.internal'})
+
+    def test_allowed_origins_without_scheme_ignored(self, monkeypatch, capsys):
+        """Allowlist entries missing the scheme are skipped and a warning is printed."""
+        monkeypatch.setenv('HERMES_WEBUI_ALLOWED_ORIGINS', 'myapp.example.com:8000')
+        from api.routes import _allowed_public_origins
+        result = _allowed_public_origins()
+        assert len(result) == 0, 'entry without scheme must be ignored'
+        captured = capsys.readouterr()
+        assert 'WARNING' in captured.err and 'scheme' in captured.err.lower()
+
+    def test_allowed_origins_trailing_slash_normalized(self, monkeypatch):
+        """Trailing slash in allowlist entry is stripped before comparison."""
+        monkeypatch.setenv('HERMES_WEBUI_ALLOWED_ORIGINS', 'https://myapp.example.com:8000/')
+        assert self._csrf_allowed({
+            'Origin': 'https://myapp.example.com:8000',
+            'Host': 'proxy.internal',
+        })
+
+
+# ── CSRF helpers: unit tests ─────────────────────────────────────────────────
+
+
+class TestCSRFHelpers:
+    """Direct unit tests for _normalize_host_port and _ports_match."""
+    def test_normalize_host_only(self):
+        from api.routes import _normalize_host_port
+        assert _normalize_host_port('example.com') == ('example.com', None)
+
+    def test_normalize_host_with_port(self):
+        from api.routes import _normalize_host_port
+        assert _normalize_host_port('example.com:8000') == ('example.com', '8000')
+
+    def test_normalize_ipv6_no_port(self):
+        from api.routes import _normalize_host_port
+        assert _normalize_host_port('[::1]') == ('::1', None)
+
+    def test_normalize_ipv6_with_port(self):
+        from api.routes import _normalize_host_port
+        assert _normalize_host_port('[::1]:8080') == ('::1', '8080')
+
+    def test_normalize_empty(self):
+        from api.routes import _normalize_host_port
+        assert _normalize_host_port('') == ('', None)
+
+    def test_normalize_whitespace_stripped(self):
+        from api.routes import _normalize_host_port
+        assert _normalize_host_port('  example.com  ') == ('example.com', None)
+
+    def test_normalize_lowercases(self):
+        from api.routes import _normalize_host_port
+        assert _normalize_host_port('EXAMPLE.COM:80') == ('example.com', '80')
+
+    def test_ports_match_identical(self):
+        from api.routes import _ports_match
+        assert _ports_match('https', '8000', '8000') is True
+
+    def test_ports_match_both_absent(self):
+        from api.routes import _ports_match
+        assert _ports_match('https', None, None) is True
+
+    def test_ports_match_https_absent_vs_443(self):
+        from api.routes import _ports_match
+        assert _ports_match('https', None, '443') is True
+        assert _ports_match('https', '443', None) is True
+
+    def test_ports_match_http_absent_vs_80(self):
+        from api.routes import _ports_match
+        assert _ports_match('http', None, '80') is True
+        assert _ports_match('http', '80', None) is True
+
+    def test_ports_match_http_absent_vs_443_rejected(self):
+        """http:// scheme: absent port is :80, not :443."""
+        from api.routes import _ports_match
+        assert _ports_match('http', None, '443') is False
+        assert _ports_match('http', '443', None) is False
+
+    def test_ports_match_https_absent_vs_80_rejected(self):
+        """https:// scheme: absent port is :443, not :80."""
+        from api.routes import _ports_match
+        assert _ports_match('https', None, '80') is False
+        assert _ports_match('https', '80', None) is False
+
+    def test_ports_match_non_default_never_waived(self):
+        from api.routes import _ports_match
+        assert _ports_match('https', None, '8000') is False
+        assert _ports_match('https', '8000', None) is False
+        assert _ports_match('http', None, '8080') is False
+
+    def test_ports_match_different_non_default(self):
+        from api.routes import _ports_match
+        assert _ports_match('https', '8000', '9000') is False
+
+
+# ── 2. Login Rate Limiting ─────────────────────────────────────────────────
+
+
+class TestLoginRateLimit:
+    def test_rate_limit_triggers_429(self):
+        """More than 5 failed login attempts from same IP must yield 429."""
+        from api.auth import _login_attempts, _LOGIN_WINDOW
+
+        # Force the rate limiter state: inject 5 stale-now timestamps so next call is fresh
+        # Actually easier: just hit the endpoint 6 times with wrong password
+        # But we can't set a password in a test without config file.
+        # Instead test the helper directly.
+        import time
+        from api import auth as _auth
+
+        # Reset state for a fake IP
+        fake_ip = "10.255.254.253"
+        _auth._login_attempts[fake_ip] = []
+
+        # Record 5 attempts — should still be allowed
+        for _ in range(5):
+            _auth._record_login_attempt(fake_ip)
+        assert not _auth._check_login_rate(fake_ip), \
+            "After 5 attempts, _check_login_rate should return False (blocked)"
+
+    def test_rate_limit_resets_after_window(self):
+        """After window expires, rate limit resets."""
+        import time
+        from api import auth as _auth
+
+        fake_ip = "10.255.254.252"
+        # Inject 5 old timestamps (outside window)
+        old_ts = time.time() - 70  # 70s ago, outside 60s window
+        _auth._login_attempts[fake_ip] = [old_ts] * 5
+        assert _auth._check_login_rate(fake_ip), \
+            "After window expires, IP should be allowed again"
+
+    def test_rate_limit_endpoint_returns_429(self, webui_server):
+        """Live endpoint: 6th bad attempt returns 429 (auth enabled required)."""
+        # This test only runs meaningfully when auth is enabled.
+        # We can still verify the helper returns 429 from the unit test above.
+        # If auth not enabled, endpoint returns 200 OK with 'Auth not enabled'.
+        from api import auth as _auth
+
+        fake_ip = "10.255.254.251"
+        # Fill the bucket
+        _auth._login_attempts[fake_ip] = [time.time()] * 5
+        assert not _auth._check_login_rate(fake_ip)
+
+
+# ── 3. Session ID Validation ───────────────────────────────────────────────
+
+
+class TestSessionIDValidation:
+    def test_hex_session_id_loads(self, tmp_path):
+        """A valid hex session ID gets past the validation check."""
+        import sys
+        sys.path.insert(0, str(pathlib.Path(__file__).parent.parent))
+        from api.models import Session, SESSION_DIR
+        valid_hex = "deadbeef" * 8  # 64 hex chars
+        # Should not raise — returns None only if file doesn't exist (it won't)
+        result = Session.load(valid_hex)
+        assert result is None  # No file, but no error
+
+    def test_new_format_session_id_passes_validation(self):
+        """New hermes-agent session IDs (YYYYMMDD_HHMMSS_xxxxxx) must pass validation."""
+        from api.models import Session
+        # Should pass the validator (returns None only because the file doesn't exist)
+        result = Session.load("20260406_164014_74b2d1")
+        assert result is None  # file doesn't exist, but validator passed
+
+    def test_non_hex_session_id_rejected(self):
+        """A session ID with dangerous chars must be rejected."""
+        from api.models import Session
+        evil_ids = [
+            "../../../etc/passwd",
+            "../../../../root/.ssh/id_rsa",
+            "session; rm -rf /",
+            "hello world",
+            "ZZZZZZZZZZZZZZZZ",
+            "session\x00evil",
+            "..\\..\\windows\\system32",
+            "session/../../etc/passwd",
+            "valid_looking.json",
+        ]
+        for sid in evil_ids:
+            result = Session.load(sid)
+            assert result is None, \
+                f"Session.load should reject dangerous ID '{sid}', got {result}"
+
+    def test_empty_session_id_rejected(self):
+        """An empty session ID must be rejected."""
+        from api.models import Session
+        assert Session.load("") is None
+        assert Session.load(None) is None
+
+
+# ── 4. Error Path Sanitization ────────────────────────────────────────────
+
+
+class TestSanitizeError:
+    def test_unix_path_stripped(self):
+        from api.helpers import _sanitize_error
+        e = FileNotFoundError("/home/hermes/.hermes/sessions/abc123.json")
+        result = _sanitize_error(e)
+        assert "/home/hermes" not in result
+        assert "<path>" in result
+
+    def test_nested_unix_path_stripped(self):
+        from api.helpers import _sanitize_error
+        e = ValueError("cannot read /var/lib/hermes/data.db: permission denied")
+        result = _sanitize_error(e)
+        assert "/var/lib/hermes" not in result
+        assert "<path>" in result
+
+    def test_no_path_unchanged(self):
+        from api.helpers import _sanitize_error
+        e = ValueError("session not found")
+        result = _sanitize_error(e)
+        assert result == "session not found"
+
+    def test_windows_path_stripped(self):
+        from api.helpers import _sanitize_error
+        e = FileNotFoundError("C:\\Users\\hermes\\AppData\\sessions\\x.json not found")
+        result = _sanitize_error(e)
+        assert "C:\\Users\\hermes" not in result
+
+    def test_live_404_does_not_leak_path(self, webui_server):
+        """Live server: file-not-found errors must not expose filesystem paths."""
+        body, status = post("/api/file/read", {"path": "../../etc/passwd"})
+        err = body.get("error", "")
+        assert "/home" not in err and "/var" not in err and "/etc" not in err, \
+            f"Error message leaks filesystem path: {err}"
+
+
+# ── 5. Secure Cookie Flag ─────────────────────────────────────────────────
+
+
+class TestSecureCookieFlag:
+    def test_getattr_safe_on_plain_socket(self):
+        """getattr(handler.request, 'getpeercert', None) must not raise on plain socket."""
+        import socket
+        # Plain socket has no getpeercert attribute
+        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        try:
+            result = getattr(s, 'getpeercert', None)
+            assert result is None, \
+                f"Expected None on plain socket, got {result}"
+        finally:
+            s.close()
+
+    def test_secure_flag_not_set_for_plain_http(self, webui_server):
+        """Login endpoint over plain HTTP must NOT set Secure cookie flag."""
+        # Auth is disabled in tests, so this just checks no crash
+        body, status = post("/api/auth/login", {"password": "test"})
+        # Either 200 (auth not enabled) or 401 (auth enabled, wrong pw)
+        assert status in (200, 401, 429), f"Unexpected status {status}"
+
+
+# ── 6. HMAC Signature Length ──────────────────────────────────────────────
+
+
+class TestHMACLength:
+    def test_session_token_sig_is_32_chars(self):
+        """Session cookie signature must be 32 hex chars (128-bit), not 16."""
+        from api.auth import create_session
+        cookie = create_session()
+        token, sig = cookie.rsplit('.', 1)
+        assert len(sig) == 32, \
+            f"Expected 32-char signature (128-bit), got {len(sig)}: {sig}"
+
+    def test_verify_session_rejects_old_16char_sig(self):
+        """A cookie with a 16-char sig must fail verification."""
+        import hmac as _hmac
+        import hashlib
+        from api.auth import _signing_key, verify_session, _sessions
+        import time
+        import secrets
+
+        token = secrets.token_hex(32)
+        _sessions[token] = time.time() + 3600  # valid session
+        old_sig = _hmac.new(_signing_key(), token.encode(), hashlib.sha256).hexdigest()[:16]
+        old_cookie = f"{token}.{old_sig}"
+        # Should fail: sig length wrong
+        assert not verify_session(old_cookie), \
+            "Old 16-char sig cookie must not verify (sig mismatch)"
+
+
+# ── 7. Skills Path Traversal ──────────────────────────────────────────────
+
+
+class TestSkillsPathTraversal:
+    def test_traversal_rejected(self, webui_server):
+        """Saving a skill with a traversal path must return 400."""
+        body, status = post("/api/skills/save", {
+            "name": "../../evil",
+            "content": "# evil",
+        })
+        assert status in (400, 403), \
+            f"Expected 400/403 for traversal skill path, got {status}: {body}"
+
+    def test_valid_skill_accepted(self, webui_server):
+        """Saving a skill with a valid name must succeed."""
+        body, status = post("/api/skills/save", {
+            "name": "test-security-skill",
+            "content": "---\nname: test-security-skill\ndescription: test\n---\n# test",
+        })
+        # 500 = skills module not available (hermes-agent not installed) — skip
+        if status == 500:
+            import pytest; pytest.skip("skills module requires hermes-agent")
+        # Should succeed (200) or need auth (401/403) — not path error (400)
+        assert status in (200, 401, 403, 404), \
+            f"Valid skill save got unexpected status {status}: {body}"
+
+
+# ── 8. Content-Disposition for Dangerous MIME Types ───────────────────────
+
+
+class TestContentDisposition:
+    def test_html_file_forced_download(self, webui_server, tmp_path):
+        """HTML files served via /api/file/raw must have Content-Disposition: attachment."""
+        import urllib.request
+        import urllib.error
+
+        # Use a session to create an HTML file in the workspace
+        sessions_body, _ = post("/api/sessions/new", {})
+        sid = sessions_body.get("session_id") or sessions_body.get("id")
+        if not sid:
+            return  # Skip if sessions API shape is unexpected
+
+        # Can't easily create a file via the test server without a workspace,
+        # so test the logic directly instead.
+        from api.routes import _handle_file_raw
+        dangerous_types = {'text/html', 'application/xhtml+xml', 'image/svg+xml'}
+        for mime in dangerous_types:
+            assert mime in dangerous_types, f"{mime} should be in dangerous_types set"
+
+    def test_dangerous_mime_types_set_complete(self):
+        """The set of dangerous MIME types must include html, xhtml, and svg."""
+        import ast
+        import pathlib
+        routes_src = pathlib.Path(__file__).parent.parent / "api" / "routes.py"
+        src = routes_src.read_text()
+        assert "text/html" in src
+        assert "application/xhtml+xml" in src
+        assert "image/svg+xml" in src
+        assert "dangerous_types" in src
+
+    def test_unicode_filename_download_header_is_latin1_safe(self, cleanup_test_sessions):
+        """Unicode filenames must not crash download responses."""
+        body, status = post("/api/session/new", {})
+        assert status == 200, body
+        sid = body["session"]["session_id"]
+        cleanup_test_sessions.append(sid)
+        ws = pathlib.Path(body["session"]["workspace"])
+        filename = "中文对照表.pdf"
+        pdf_bytes = b"%PDF-1.3\n1 0 obj\n<<>>\nendobj\ntrailer\n<<>>\n%%EOF\n"
+        (ws / filename).write_bytes(pdf_bytes)
+
+        encoded = urllib.parse.quote(filename)
+        raw, headers, raw_status = get_raw_with_headers(
+            f"/api/file/raw?session_id={sid}&path={encoded}&download=1"
+        )
+
+        assert raw_status == 200
+        assert raw == pdf_bytes
+        disp = headers["Content-Disposition"]
+        assert disp.startswith("attachment; ")
+        assert "filename*=UTF-8''" in disp
+        disp.encode("latin-1")
+
+    def test_unicode_filename_inline_header_is_latin1_safe(self, cleanup_test_sessions):
+        """Inline responses must also work for unicode filenames."""
+        body, status = post("/api/session/new", {})
+        assert status == 200, body
+        sid = body["session"]["session_id"]
+        cleanup_test_sessions.append(sid)
+        ws = pathlib.Path(body["session"]["workspace"])
+        filename = "预览.pdf"
+        pdf_bytes = b"%PDF-1.3\n1 0 obj\n<<>>\nendobj\ntrailer\n<<>>\n%%EOF\n"
+        (ws / filename).write_bytes(pdf_bytes)
+
+        encoded = urllib.parse.quote(filename)
+        raw, headers, raw_status = get_raw_with_headers(
+            f"/api/file/raw?session_id={sid}&path={encoded}"
+        )
+
+        assert raw_status == 200
+        assert raw == pdf_bytes
+        disp = headers["Content-Disposition"]
+        assert disp.startswith("inline; ")
+        assert "filename*=UTF-8''" in disp
+        disp.encode("latin-1")
+
+
+# ── 9. PBKDF2 Password Hashing ───────────────────────────────────────────
+
+
+class TestPasswordHashing:
+    def test_hash_password_is_hex(self):
+        """_hash_password must produce a non-empty hex string (PBKDF2-SHA256)."""
+        from api.auth import _hash_password
+        result = _hash_password("mysecretpassword")
+        assert isinstance(result, str) and len(result) == 64, \
+            f"Expected 64-char hex hash (SHA-256 output), got len={len(result)}: {result}"
+        # Hex-only chars
+        assert all(c in "0123456789abcdef" for c in result), \
+            f"Hash must be hex string, got: {result}"
+
+    def test_hash_password_is_deterministic_with_same_salt(self):
+        """_hash_password must return the same hash for same input (signing key is stable)."""
+        from api.auth import _hash_password
+        h1 = _hash_password("consistent_password")
+        h2 = _hash_password("consistent_password")
+        assert h1 == h2, "Same password must produce same hash (stable signing key)"
+
+    def test_hash_password_different_inputs_differ(self):
+        """Different passwords must produce different hashes."""
+        from api.auth import _hash_password
+        assert _hash_password("password_a") != _hash_password("password_b"), \
+            "Different passwords must produce different hashes"
+
+    def test_hash_password_longer_than_sha256(self):
+        """PBKDF2 with 600k iterations is much stronger than single SHA-256.
+        We verify indirectly: the code must call pbkdf2_hmac, not sha256 directly."""
+        import inspect
+        from api import auth as _auth
+        src = inspect.getsource(_auth._hash_password)
+        assert "pbkdf2_hmac" in src, \
+            "_hash_password must use pbkdf2_hmac, not raw sha256"
+        assert "600_000" in src or "600000" in src, \
+            "_hash_password must use 600,000 iterations"
+
+    def test_save_settings_stores_64char_hex_hash(self):
+        """save_settings with _set_password must store a 64-char hex hash (PBKDF2)."""
+        from api.config import save_settings, load_settings, SETTINGS_FILE
+        import json
+
+        # Remember original content so we can restore it
+        original = None
+        if SETTINGS_FILE.exists():
+            original = SETTINGS_FILE.read_text()
+
+        try:
+            save_settings({"_set_password": "test_pbkdf2_pw"})
+            settings = load_settings()
+            ph = settings.get("password_hash", "")
+            assert len(ph) == 64 and all(c in "0123456789abcdef" for c in ph), \
+                f"save_settings must store 64-char hex PBKDF2 hash, got: {ph!r}"
+        finally:
+            # Restore original settings
+            if original is not None:
+                SETTINGS_FILE.write_text(original)
+            else:
+                save_settings({"_clear_password": True})
+
+
+# ── 10. Non-loopback Startup Warning ─────────────────────────────────────
+
+
+class TestStartupWarning:
+    def test_warning_code_present_in_server(self):
+        """server.py must contain non-loopback warning code."""
+        src = pathlib.Path(__file__).parent.parent / "server.py"
+        text = src.read_text()
+        assert "0.0.0.0" in text or "non-loopback" in text.lower() or "WARNING" in text, \
+            "server.py must contain non-loopback warning logic"
+        assert "is_auth_enabled" in text, \
+            "server.py must check is_auth_enabled() before warning"
+
+
+# ── 11. SSRF DNS Check ─────────────────────────────────────────────────────
+
+
+class TestSSRFCheck:
+    def test_ssrf_guard_code_present_in_config(self):
+        """config.py must contain SSRF DNS resolution guard."""
+        src = pathlib.Path(__file__).parent.parent / "api" / "config.py"
+        text = src.read_text()
+        assert "getaddrinfo" in text, "SSRF guard must resolve DNS with getaddrinfo"
+        assert "is_private" in text, "SSRF guard must check is_private IP"
+        assert "is_loopback" in text, "SSRF guard must check is_loopback IP"
+
+    def test_known_local_providers_whitelisted(self):
+        """Ollama and localhost endpoints should NOT be blocked by SSRF guard."""
+        src = pathlib.Path(__file__).parent.parent / "api" / "config.py"
+        text = src.read_text()
+        assert "ollama" in text.lower()
+        assert "localhost" in text.lower()
+        assert "lmstudio" in text.lower() or "lm-studio" in text.lower()
+
+
+# ── 12. ENV_LOCK Export ────────────────────────────────────────────────────
+
+
+class TestENVLock:
+    def test_env_lock_importable_from_streaming(self):
+        """_ENV_LOCK must be importable from api.streaming."""
+        from api.streaming import _ENV_LOCK
+        import threading
+        assert isinstance(_ENV_LOCK, type(threading.Lock())), \
+            "_ENV_LOCK must be a threading.Lock"
+
+    def test_env_lock_importable_in_routes(self):
+        """api.routes must be able to import _ENV_LOCK from api.streaming."""
+        # If routes.py fails to import, this will raise ImportError
+        import importlib
+        import api.routes  # noqa: F401 -- just checking import works
+        # No error means the circular import is OK
+
+
+# ── Fixture ────────────────────────────────────────────────────────────────
+
+import pytest
+
+
+@pytest.fixture(scope="module")
+def webui_server():
+    """Reuse the module-scoped server started by conftest.py."""
+    return BASE
--- a/tests/test_sprint3.py
+++ b/tests/test_sprint3.py
@@ -0,0 +1,199 @@
+"""Sprint 3 tests: cron API, skills API, memory API, input validation."""
+import json, uuid, urllib.request, urllib.error
+
+from tests._pytest_port import BASE
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(BASE + path, data=data, headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+def make_session_tracked(created_list, ws=None):
+    """Create a session and register it with the cleanup fixture."""
+    import pathlib as _pathlib
+    body = {}
+    if ws: body["workspace"] = str(ws)
+    d, _ = post("/api/session/new", body)
+    sid = d["session"]["session_id"]
+    created_list.append(sid)
+    return sid, _pathlib.Path(d["session"]["workspace"])
+
+
+def test_crons_list():
+    data, status = get("/api/crons")
+    assert status == 200
+    assert "jobs" in data
+
+def test_crons_list_has_required_fields():
+    data, _ = get("/api/crons")
+    if not data["jobs"]: return
+    job = data["jobs"][0]
+    for field in ("id", "name", "prompt", "enabled", "schedule_display"):
+        assert field in job
+
+def test_crons_output_requires_job_id():
+    try:
+        get("/api/crons/output")
+        assert False
+    except urllib.error.HTTPError as e:
+        assert e.code == 400
+
+def test_crons_output_real_job():
+    data, _ = get("/api/crons")
+    if not data["jobs"]: return
+    job_id = data["jobs"][0]["id"]
+    out, status = get(f"/api/crons/output?job_id={job_id}&limit=3")
+    assert status == 200
+    assert "outputs" in out
+
+def test_crons_pause_requires_job_id():
+    result, status = post("/api/crons/pause", {})
+    assert status in (400, 404)
+
+def test_crons_resume_requires_job_id():
+    result, status = post("/api/crons/resume", {})
+    assert status in (400, 404)
+
+def test_crons_run_nonexistent():
+    result, status = post("/api/crons/run", {"job_id": "doesnotexist999"})
+    assert status == 404
+
+def test_skills_list():
+    data, status = get("/api/skills")
+    assert status == 200
+    assert len(data["skills"]) > 0
+
+def test_skills_list_has_required_fields():
+    data, _ = get("/api/skills")
+    skill = data["skills"][0]
+    assert "name" in skill and "description" in skill
+
+def test_skills_content_known():
+    data, status = get("/api/skills/content?name=dogfood")
+    assert status == 200
+    assert len(data["content"]) > 0
+
+def test_skills_content_requires_name():
+    try:
+        get("/api/skills/content")
+        assert False
+    except urllib.error.HTTPError as e:
+        assert e.code == 400
+
+def test_skills_search_returns_subset():
+    data, _ = get("/api/skills")
+    assert len(data["skills"]) > 5
+
+def test_memory_returns_both_files():
+    data, status = get("/api/memory")
+    assert status == 200
+    assert "memory" in data and "user" in data
+
+def test_memory_content_is_string():
+    data, _ = get("/api/memory")
+    assert isinstance(data["memory"], str)
+    assert isinstance(data["user"], str)
+
+def test_memory_has_mtime():
+    data, _ = get("/api/memory")
+    assert "memory_mtime" in data and "user_mtime" in data
+
+def test_session_update_requires_session_id():
+    result, status = post("/api/session/update", {"model": "openai/gpt-5.4-mini"})
+    assert status == 400
+
+def test_session_delete_requires_session_id():
+    result, status = post("/api/session/delete", {})
+    assert status == 400
+
+
+def test_session_delete_rejects_absolute_path_payload(tmp_path):
+    victim = tmp_path / "victim.json"
+    victim.write_text("TOPSECRET", encoding="utf-8")
+    result, status = post("/api/session/delete", {"session_id": str(victim.with_suffix(""))})
+    assert status == 400
+    assert victim.exists(), "absolute-path payload must not delete arbitrary files"
+
+
+def test_session_delete_rejects_traversal_payload(tmp_path):
+    victim = tmp_path / "outside.json"
+    victim.write_text("TOPSECRET", encoding="utf-8")
+    traversal = f"../../../../{victim.with_suffix('').as_posix().lstrip('/')}"
+    result, status = post("/api/session/delete", {"session_id": traversal})
+    assert status == 400
+    assert victim.exists(), "traversal payload must not delete arbitrary files"
+
+
+def test_chat_start_requires_session_id():
+    result, status = post("/api/chat/start", {"message": "hello"})
+    assert status == 400
+
+def test_chat_start_requires_message(cleanup_test_sessions):
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    result, status = post("/api/chat/start", {"session_id": sid, "message": ""})
+    assert status == 400
+
+def test_session_update_unknown_id_returns_404():
+    result, status = post("/api/session/update", {"session_id": "nosuchsession", "model": "openai/gpt-5.4-mini"})
+    assert status == 404
+
+
+def test_session_update_rejects_workspace_outside_trusted_root(tmp_path):
+    d, _ = post("/api/session/new", {})
+    sid = d["session"]["session_id"]
+    outside = tmp_path / "outside"
+    outside.mkdir(parents=True, exist_ok=True)
+    result, status = post("/api/session/update", {"session_id": sid, "workspace": str(outside)})
+    assert status == 400
+    assert "outside" in result.get("error", "").lower()
+
+
+def test_chat_start_rejects_workspace_outside_trusted_root(tmp_path):
+    d, _ = post("/api/session/new", {})
+    sid = d["session"]["session_id"]
+    outside = tmp_path / "outside-chat"
+    outside.mkdir(parents=True, exist_ok=True)
+    result, status = post("/api/chat/start", {"session_id": sid, "message": "hello", "workspace": str(outside)})
+    assert status == 400
+    assert "outside" in result.get("error", "").lower()
+
+
+def test_workspace_add_rejects_path_outside_trusted_root(tmp_path):
+    outside = tmp_path / "outside-add"
+    outside.mkdir(parents=True, exist_ok=True)
+    result, status = post("/api/workspaces/add", {"path": str(outside), "name": "Outside"})
+    assert status == 400
+    assert "outside" in result.get("error", "").lower()
+
+
+def test_session_new_rejects_workspace_outside_trusted_root(tmp_path):
+    outside = tmp_path / "outside-new"
+    outside.mkdir(parents=True, exist_ok=True)
+    result, status = post("/api/session/new", {"workspace": str(outside)})
+    assert status == 400
+    assert "outside" in result.get("error", "").lower()
+
+
+def test_session_search_returns_matches(cleanup_test_sessions):
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    post("/api/session/rename", {"session_id": sid, "title": f"unique-s3-{sid}"})
+    data, status = get(f"/api/sessions/search?q=unique-s3-{sid}")
+    assert status == 200
+    sids = [s["session_id"] for s in data["sessions"]]
+    assert sid in sids
+
+def test_session_search_empty_query_returns_all():
+    data, status = get("/api/sessions/search?q=")
+    assert status == 200 and "sessions" in data
+
+def test_session_search_no_results():
+    data, status = get("/api/sessions/search?q=zzznomatchzzz9999")
+    assert status == 200 and data["sessions"] == []
--- a/tests/test_sprint30.py
+++ b/tests/test_sprint30.py
@@ -0,0 +1,576 @@
+"""
+Sprint 30: Approval card UI, i18n coverage, and approval flow polish.
+
+Tests for:
+- Approval card HTML structure (all 4 buttons, IDs, data-i18n attrs)
+- Keyboard shortcut handler presence in boot.js
+- i18n keys for approval card in both locales
+- CSS for approval-btn states (loading, disabled, kbd badge)
+- respondApproval loading/disable pattern in messages.js
+- streaming.py scoping fix (_unreg_notify=None initialisation)
+- Approval respond HTTP endpoint (existing + new behaviour)
+"""
+
+import json
+import re
+import urllib.request
+import urllib.error
+import urllib.parse
+
+import pytest
+
+from tests._pytest_port import BASE
+
+
+def get(path):
+    url = BASE + path
+    with urllib.request.urlopen(url, timeout=10) as r:
+        return json.loads(r.read())
+
+
+def post(path, body=None):
+    url = BASE + path
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(url, data=data,
+          headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+
+def read(path):
+    with open(path, encoding="utf-8") as f:
+        return f.read()
+
+
+import pathlib
+REPO = pathlib.Path(__file__).parent.parent
+
+
+# ── HTML structure ───────────────────────────────────────────────────────────
+
+class TestApprovalCardHTML:
+
+    def test_approval_card_has_four_buttons(self):
+        html = read(REPO / "static/index.html")
+        for choice in ("once", "session", "always", "deny"):
+            assert f"respondApproval('{choice}')" in html, \
+                f"approval button for '{choice}' missing from index.html"
+
+    def test_approval_buttons_have_ids(self):
+        html = read(REPO / "static/index.html")
+        for btn_id in ("approvalBtnOnce", "approvalBtnSession",
+                       "approvalBtnAlways", "approvalBtnDeny"):
+            assert f'id="{btn_id}"' in html, \
+                f"button id '{btn_id}' missing from approval card"
+
+    def test_approval_heading_has_data_i18n(self):
+        html = read(REPO / "static/index.html")
+        assert 'data-i18n="approval_heading"' in html, \
+            "approval heading missing data-i18n attribute"
+
+    def test_approval_buttons_have_data_i18n_labels(self):
+        html = read(REPO / "static/index.html")
+        for key in ("approval_btn_once", "approval_btn_session",
+                    "approval_btn_always", "approval_btn_deny"):
+            assert f'data-i18n="{key}"' in html, \
+                f"button label data-i18n='{key}' missing"
+
+    def test_approval_once_button_has_kbd_badge(self):
+        html = read(REPO / "static/index.html")
+        assert '<kbd class="approval-kbd">' in html, \
+            "kbd badge missing from Allow once button"
+
+    def test_approval_card_has_aria_roles(self):
+        html = read(REPO / "static/index.html")
+        assert 'role="alertdialog"' in html, \
+            "approval card missing role=alertdialog for accessibility"
+        assert 'aria-labelledby="approvalHeading"' in html, \
+            "approval card missing aria-labelledby"
+
+
+class TestClarifyCardHTML:
+
+    def test_clarify_card_markup_present(self):
+        html = read(REPO / "static/index.html")
+        assert 'id="clarifyCard"' in html, "clarify card missing from index.html"
+        assert 'id="clarifyHeading"' in html, "clarify heading missing"
+        assert 'id="clarifyQuestion"' in html, "clarify question text missing"
+        assert 'id="clarifyChoices"' in html, "clarify choices container missing"
+        assert 'id="clarifyInput"' in html, "clarify input missing"
+        assert 'id="clarifySubmit"' in html, "clarify submit button missing"
+
+    def test_clarify_card_has_data_i18n(self):
+        html = read(REPO / "static/index.html")
+        assert 'data-i18n="clarify_heading"' in html
+        assert 'data-i18n="clarify_send"' in html
+        assert 'data-i18n-placeholder="clarify_input_placeholder"' in html
+
+    def test_clarify_card_has_aria_roles(self):
+        html = read(REPO / "static/index.html")
+        assert 'role="dialog"' in html, \
+            "clarify card missing role=dialog for accessibility"
+        assert 'aria-labelledby="clarifyHeading"' in html, \
+            "clarify card missing aria-labelledby"
+
+
+# ── CSS ──────────────────────────────────────────────────────────────────────
+
+class TestApprovalCardCSS:
+
+    def test_btn_disabled_style_present(self):
+        css = read(REPO / "static/style.css")
+        assert ".approval-btn:disabled" in css, \
+            "disabled state style missing for approval buttons"
+
+    def test_btn_loading_class_present(self):
+        css = read(REPO / "static/style.css")
+        assert ".approval-btn.loading" in css, \
+            "loading class style missing for approval buttons"
+
+    def test_approval_kbd_style_present(self):
+        css = read(REPO / "static/style.css")
+        assert ".approval-kbd" in css, \
+            ".approval-kbd style missing from style.css"
+
+    def test_approval_kbd_hidden_on_mobile(self):
+        css = read(REPO / "static/style.css")
+        # Should be display:none inside the mobile media query
+        assert ".approval-kbd{display:none;}" in css or \
+               ".approval-kbd { display: none; }" in css or \
+               re.search(r'\.approval-kbd\s*\{[^}]*display\s*:\s*none', css), \
+            ".approval-kbd should be hidden on mobile"
+
+    def test_btn_transform_on_hover(self):
+        css = read(REPO / "static/style.css")
+        assert "translateY(-1px)" in css, \
+            "hover lift effect missing from approval buttons"
+
+    def test_four_choice_styles_present(self):
+        css = read(REPO / "static/style.css")
+        for cls in (".approval-btn.once", ".approval-btn.session",
+                    ".approval-btn.always", ".approval-btn.deny"):
+            assert cls in css, f"CSS class '{cls}' missing"
+
+
+class TestClarifyCardCSS:
+
+    def test_clarify_styles_present(self):
+        css = read(REPO / "static/style.css")
+        for cls in (
+            ".clarify-card",
+            ".clarify-card.visible",
+            ".clarify-inner",
+            ".clarify-header",
+            ".clarify-question",
+            ".clarify-choices",
+            ".clarify-choice",
+            ".clarify-response",
+            ".clarify-input",
+            ".clarify-submit",
+            ".clarify-hint",
+        ):
+            assert cls in css, f"CSS class '{cls}' missing"
+
+    def test_clarify_mobile_styles_present(self):
+        css = read(REPO / "static/style.css")
+        assert ".clarify-card{padding:0 10px 8px;}" in css or \
+               ".clarify-card { padding:0 10px 8px; }" in css or \
+               "clarify-card" in css, "clarify mobile styles missing"
+
+    def test_clarify_focus_styles_present(self):
+        css = read(REPO / "static/style.css")
+        assert ".clarify-choice:focus" in css and ".clarify-submit:focus" in css, \
+            "clarify focus styles missing"
+
+
+# ── i18n keys ────────────────────────────────────────────────────────────────
+
+class TestApprovalI18nKeys:
+
+    REQUIRED_KEYS = [
+        "approval_heading",
+        "approval_btn_once",
+        "approval_btn_session",
+        "approval_btn_always",
+        "approval_btn_deny",
+        "approval_responding",
+    ]
+
+    def test_english_locale_has_all_approval_keys(self):
+        src = read(REPO / "static/i18n.js")
+        # Find en locale block (before the first closing };)
+        en_block_end = src.find("\n};")
+        en_block = src[:en_block_end]
+        for key in self.REQUIRED_KEYS:
+            assert f"{key}:" in en_block, \
+                f"English locale missing i18n key: {key}"
+
+    def test_chinese_locale_has_all_approval_keys(self):
+        src = read(REPO / "static/i18n.js")
+        # Find zh locale block (from `  zh: {` to the closing `  },` before `};`)
+        zh_start = src.find("\n  zh: {")
+        assert zh_start != -1, "zh locale block not found in i18n.js"
+        zh_block = src[zh_start:]
+        for key in self.REQUIRED_KEYS:
+            assert f"{key}:" in zh_block, \
+                f"Chinese locale missing i18n key: {key}"
+
+    def test_approval_heading_english_value(self):
+        src = read(REPO / "static/i18n.js")
+        assert "approval_heading: 'Approval required'" in src, \
+            "English approval_heading value incorrect"
+
+    def test_approval_btn_once_english_value(self):
+        src = read(REPO / "static/i18n.js")
+        assert "approval_btn_once: 'Allow once'" in src, \
+            "English approval_btn_once value incorrect"
+
+    def test_approval_btn_deny_english_value(self):
+        src = read(REPO / "static/i18n.js")
+        assert "approval_btn_deny: 'Deny'" in src, \
+            "English approval_btn_deny value incorrect"
+
+
+class TestClarifyI18nKeys:
+
+    REQUIRED_KEYS = [
+        "clarify_heading",
+        "clarify_hint",
+        "clarify_other",
+        "clarify_send",
+        "clarify_input_placeholder",
+        "clarify_responding",
+    ]
+
+    def test_english_locale_has_all_clarify_keys(self):
+        src = read(REPO / "static/i18n.js")
+        en_block_end = src.find("\n};")
+        en_block = src[:en_block_end]
+        for key in self.REQUIRED_KEYS:
+            assert f"{key}:" in en_block, f"English locale missing i18n key: {key}"
+
+    def test_chinese_locale_has_all_clarify_keys(self):
+        src = read(REPO / "static/i18n.js")
+        zh_start = src.find("\n  zh: {")
+        assert zh_start != -1, "zh locale block not found in i18n.js"
+        zh_block = src[zh_start:]
+        for key in self.REQUIRED_KEYS:
+            assert f"{key}:" in zh_block, f"Chinese locale missing i18n key: {key}"
+
+    def test_clarify_heading_english_value(self):
+        src = read(REPO / "static/i18n.js")
+        assert "clarify_heading: 'Clarification needed'" in src, \
+            "English clarify_heading value incorrect"
+
+
+# ── messages.js behaviour ────────────────────────────────────────────────────
+
+class TestApprovalMessagesJS:
+
+    def test_show_approval_card_re_enables_buttons(self):
+        src = read(REPO / "static/messages.js")
+        assert "b.disabled = false" in src and "loading" in src, \
+            "showApprovalCard should re-enable buttons on each show"
+
+    def test_respond_disables_buttons_immediately(self):
+        src = read(REPO / "static/messages.js")
+        assert "b.disabled = true" in src, \
+            "respondApproval should disable buttons immediately to prevent double-submit"
+
+    def test_respond_uses_i18n_for_error(self):
+        src = read(REPO / "static/messages.js")
+        # Should use t('approval_responding') not a hardcoded string
+        assert "t(\"approval_responding\")" in src or "t('approval_responding')" in src, \
+            "respondApproval error message should use t('approval_responding')"
+
+    def test_show_card_applies_locale_to_dom(self):
+        src = read(REPO / "static/messages.js")
+        assert "applyLocaleToDOM" in src, \
+            "showApprovalCard should call applyLocaleToDOM to translate data-i18n labels"
+
+    def test_show_card_focuses_once_button(self):
+        src = read(REPO / "static/messages.js")
+        assert "approvalBtnOnce" in src and "focus()" in src, \
+            "showApprovalCard should focus the Allow once button"
+
+
+class TestClarifyMessagesJS:
+
+    def test_clarify_event_listener_present(self):
+        src = read(REPO / "static/messages.js")
+        assert "addEventListener('clarify'" in src, \
+            "clarify SSE listener missing from messages.js"
+
+    def test_show_clarify_card_present(self):
+        src = read(REPO / "static/messages.js")
+        assert "function showClarifyCard" in src, "showClarifyCard missing"
+        assert "clarifyChoices" in src and "clarifyInput" in src, \
+            "showClarifyCard should manage clarify DOM elements"
+
+    def test_respond_clarify_uses_api_endpoint(self):
+        src = read(REPO / "static/messages.js")
+        assert '/api/clarify/respond' in src, \
+            "respondClarify should POST to /api/clarify/respond"
+
+    def test_clarify_polling_helpers_present(self):
+        src = read(REPO / "static/messages.js")
+        for token in ("startClarifyPolling", "stopClarifyPolling", "hideClarifyCard", "_clarifySessionId"):
+            assert token in src, f"{token} missing from messages.js"
+
+
+# ── boot.js keyboard shortcut ────────────────────────────────────────────────
+
+class TestApprovalKeyboardShortcut:
+
+    def test_enter_shortcut_present_in_boot_js(self):
+        src = read(REPO / "static/boot.js")
+        assert "respondApproval('once')" in src or 'respondApproval("once")' in src, \
+            "Enter shortcut calling respondApproval('once') missing from boot.js"
+
+    def test_enter_shortcut_checks_card_visible(self):
+        src = read(REPO / "static/boot.js")
+        assert "approvalCard" in src and "visible" in src, \
+            "Enter shortcut should check if approval card is visible"
+
+    def test_enter_shortcut_guards_input_elements(self):
+        src = read(REPO / "static/boot.js")
+        assert "TEXTAREA" in src and "INPUT" in src, \
+            "Enter shortcut should not fire when focus is on TEXTAREA or INPUT"
+
+
+# ── streaming.py scoping fix ─────────────────────────────────────────────────
+
+class TestStreamingApprovalScoping:
+
+    def test_unreg_notify_initialised_to_none(self):
+        src = read(REPO / "api/streaming.py")
+        assert "_unreg_notify = None" in src, \
+            "_unreg_notify must be initialised to None before the try block"
+
+    def test_finally_checks_unreg_notify_not_none(self):
+        src = read(REPO / "api/streaming.py")
+        assert "_unreg_notify is not None" in src, \
+            "finally block must check '_unreg_notify is not None' before calling it"
+
+    def test_approval_registered_flag_present(self):
+        src = read(REPO / "api/streaming.py")
+        assert "_approval_registered = False" in src, \
+            "_approval_registered flag must be initialised to False"
+
+    def test_clarify_registered_flag_present(self):
+        src = read(REPO / "api/streaming.py")
+        assert "_clarify_registered = False" in src, \
+            "_clarify_registered flag must be initialised to False"
+
+    def test_clarify_unreg_notify_initialised_to_none(self):
+        src = read(REPO / "api/streaming.py")
+        assert "_unreg_clarify_notify = None" in src, \
+            "_unreg_clarify_notify must be initialised to None before the try block"
+
+    def test_finally_checks_clarify_unreg_notify_not_none(self):
+        src = read(REPO / "api/streaming.py")
+        assert "_unreg_clarify_notify is not None" in src, \
+            "finally block must check '_unreg_clarify_notify is not None' before calling it"
+
+
+# ── HTTP regression: approval respond ────────────────────────────────────────
+
+class TestApprovalRespondHTTP:
+
+    def test_respond_ok_with_all_choices(self):
+        for choice in ("once", "session", "always", "deny"):
+            import uuid
+            sid = f"sprint30-{uuid.uuid4().hex[:8]}"
+            result, status = post("/api/approval/respond",
+                                  {"session_id": sid, "choice": choice})
+            assert status == 200, f"choice={choice} should return 200"
+            assert result["ok"] is True
+            assert result["choice"] == choice
+
+    def test_respond_rejects_bad_choice(self):
+        result, status = post("/api/approval/respond",
+                              {"session_id": "x", "choice": "HACKED"})
+        assert status == 400
+
+    def test_respond_requires_session_id(self):
+        result, status = post("/api/approval/respond", {"choice": "deny"})
+        assert status == 400
+
+    def test_respond_returns_choice_field(self):
+        import uuid
+        sid = f"sprint30-choice-{uuid.uuid4().hex[:8]}"
+        result, status = post("/api/approval/respond",
+                              {"session_id": sid, "choice": "always"})
+        assert status == 200
+        assert "choice" in result
+        assert result["choice"] == "always"
+
+
+class TestApprovalCardTimerLogic:
+    """Tests for the 30s minimum visibility guard introduced in PR #225."""
+
+    def _get_js(self):
+        return pathlib.Path(__file__).parent.parent / 'static' / 'messages.js'
+
+    def test_approval_min_visible_ms_constant_present(self):
+        """APPROVAL_MIN_VISIBLE_MS constant exists and is 30000."""
+        src = self._get_js().read_text()
+        assert 'APPROVAL_MIN_VISIBLE_MS' in src
+        import re
+        m = re.search(r'APPROVAL_MIN_VISIBLE_MS\s*=\s*(\d+)', src)
+        assert m is not None, 'APPROVAL_MIN_VISIBLE_MS not assigned'
+        assert int(m.group(1)) == 30000, f'Expected 30000, got {m.group(1)}'
+
+    def test_hide_approval_card_has_force_parameter(self):
+        """hideApprovalCard() accepts a force parameter."""
+        src = self._get_js().read_text()
+        assert 'hideApprovalCard(force=false)' in src or \
+               'hideApprovalCard(force = false)' in src, \
+            'hideApprovalCard must have force=false default parameter'
+
+    def test_hide_approval_card_checks_force_flag(self):
+        """hideApprovalCard body has a conditional on force."""
+        src = self._get_js().read_text()
+        # The guard: if (!force && _approvalVisibleSince)
+        assert '!force' in src, 'hideApprovalCard must check !force before deferred hide'
+
+    def test_approval_hide_timer_variable_present(self):
+        """Module-level _approvalHideTimer variable is declared."""
+        src = self._get_js().read_text()
+        assert '_approvalHideTimer' in src
+
+    def test_approval_visible_since_variable_present(self):
+        """Module-level _approvalVisibleSince variable is declared."""
+        src = self._get_js().read_text()
+        assert '_approvalVisibleSince' in src
+
+    def test_approval_signature_variable_present(self):
+        """Module-level _approvalSignature variable is declared."""
+        src = self._get_js().read_text()
+        assert '_approvalSignature' in src
+
+    def test_respond_approval_calls_hide_with_force(self):
+        """respondApproval must call hideApprovalCard(true) — not no-arg."""
+        src = self._get_js().read_text()
+        # Extract respondApproval function body
+        import re
+        m = re.search(r'async function respondApproval.*?(?=\nasync function|\nfunction |\Z)',
+                      src, re.DOTALL)
+        assert m, 'respondApproval function not found'
+        body = m.group(0)
+        # Must call hideApprovalCard(true), not the bare hideApprovalCard()
+        assert 'hideApprovalCard(true)' in body, \
+            'respondApproval must call hideApprovalCard(true) so card hides immediately after user clicks'
+        # Must NOT have bare hideApprovalCard() without force
+        bare_calls = re.findall(r'hideApprovalCard\((?!true)', body)
+        assert not bare_calls, \
+            f'respondApproval has bare hideApprovalCard() calls (no force=true): {bare_calls}'
+
+    def test_stream_done_calls_hide_with_force(self):
+        """Done SSE event handler must call hideApprovalCard(true)."""
+        src = self._get_js().read_text()
+        # Find the done event handler section (stopApprovalPolling followed by hideApprovalCard)
+        import re
+        # Look for pattern: stopApprovalPolling();\n + hideApprovalCard
+        matches = re.findall(
+            r'stopApprovalPolling\(\);\s*\n\s*if\(!_approvalSessionId[^)]*\)\s*hideApprovalCard\((\w*)\)',
+            src
+        )
+        # All stopApprovalPolling paths that call hideApprovalCard should use force=true
+        for match in matches:
+            assert match == 'true', \
+                f'After stopApprovalPolling(), hideApprovalCard called without force=true (got: {match!r})'
+
+    def test_poll_loop_still_uses_no_force(self):
+        """Poll loop hideApprovalCard() (when pending gone) keeps no-force — correct behavior."""
+        src = self._get_js().read_text()
+        # Line 446: else { hideApprovalCard(); } — this is the poll-loop path
+        # The 30s guard should protect this call (don't force from poll ticks)
+        assert 'else { hideApprovalCard(); }' in src or \
+               'else {hideApprovalCard();}' in src or \
+               'else { hideApprovalCard() }' in src, \
+            'Poll loop should still call hideApprovalCard() without force=true'
+
+    def test_show_approval_card_signature_dedup(self):
+        """showApprovalCard uses a signature to avoid resetting timer on repeat polls."""
+        src = self._get_js().read_text()
+        # The sig computation must use JSON.stringify on card content
+        import re
+        m = re.search(r'function showApprovalCard.*?(?=\nfunction |\nasync function |\Z)',
+                      src, re.DOTALL)
+        assert m, 'showApprovalCard function not found'
+        body = m.group(0)
+        assert 'JSON.stringify' in body, 'showApprovalCard must compute a signature via JSON.stringify'
+        assert '_approvalSignature' in body, 'showApprovalCard must check/set _approvalSignature'
+
+    def test_clear_approval_hide_timer_helper_present(self):
+        """_clearApprovalHideTimer helper exists to cancel deferred hides."""
+        src = self._get_js().read_text()
+        assert '_clearApprovalHideTimer' in src, \
+            '_clearApprovalHideTimer helper must exist to cancel deferred setTimeout'
+
+
+class TestClarifyCardTimerLogic:
+
+    def _get_js(self):
+        return pathlib.Path(__file__).parent.parent / 'static' / 'messages.js'
+
+    def test_clarify_min_visible_ms_constant_present(self):
+        src = self._get_js().read_text()
+        assert 'CLARIFY_MIN_VISIBLE_MS' in src
+        import re
+        m = re.search(r'CLARIFY_MIN_VISIBLE_MS\s*=\s*(\d+)', src)
+        assert m is not None, 'CLARIFY_MIN_VISIBLE_MS not assigned'
+        assert int(m.group(1)) == 30000, f'Expected 30000, got {m.group(1)}'
+
+    def test_hide_clarify_card_has_force_parameter(self):
+        src = self._get_js().read_text()
+        assert 'hideClarifyCard(force=false)' in src or \
+               'hideClarifyCard(force = false)' in src, \
+            'hideClarifyCard must have force=false default parameter'
+
+    def test_hide_clarify_card_checks_force_flag(self):
+        src = self._get_js().read_text()
+        assert '!force' in src, 'hideClarifyCard must check !force before deferred hide'
+
+    def test_clarify_hide_timer_variable_present(self):
+        src = self._get_js().read_text()
+        assert '_clarifyHideTimer' in src
+
+    def test_clarify_visible_since_variable_present(self):
+        src = self._get_js().read_text()
+        assert '_clarifyVisibleSince' in src
+
+    def test_clarify_signature_variable_present(self):
+        src = self._get_js().read_text()
+        assert '_clarifySignature' in src
+
+    def test_respond_clarify_calls_hide_with_force(self):
+        src = self._get_js().read_text()
+        import re
+        m = re.search(r'async function respondClarify.*?(?=\nasync function|\nfunction |\Z)',
+                      src, re.DOTALL)
+        assert m, 'respondClarify function not found'
+        body = m.group(0)
+        assert 'hideClarifyCard(true)' in body, \
+            'respondClarify must call hideClarifyCard(true) so card hides immediately after user clicks'
+
+    def test_clarify_poll_loop_uses_no_force(self):
+        src = self._get_js().read_text()
+        assert 'else { hideClarifyCard(); }' in src or \
+               'else {hideClarifyCard();}' in src or \
+               'else { hideClarifyCard() }' in src, \
+            'Clarify poll loop should hide without force=true'
+
+    def test_show_clarify_card_signature_dedup(self):
+        src = self._get_js().read_text()
+        import re
+        m = re.search(r'function showClarifyCard.*?(?=\nfunction |\nasync function |\Z)',
+                      src, re.DOTALL)
+        assert m, 'showClarifyCard function not found'
+        body = m.group(0)
+        assert 'JSON.stringify' in body, 'showClarifyCard must compute a signature via JSON.stringify'
+        assert '_clarifySignature' in body, 'showClarifyCard must check/set _clarifySignature'
--- a/tests/test_sprint31.py
+++ b/tests/test_sprint31.py
@@ -0,0 +1,143 @@
+"""
+Tests for issue #170: new profile form with optional custom endpoint fields.
+
+Tests cover:
+  1. _write_endpoint_to_config writes base_url into config.yaml
+  2. _write_endpoint_to_config writes api_key into config.yaml
+  3. _write_endpoint_to_config writes both together
+  4. _write_endpoint_to_config merges with existing config (does not clobber)
+  5. _write_endpoint_to_config is a no-op when both args are None/empty
+  6. API route accepts base_url and api_key in POST body
+  7. Profile created via API has base_url in config.yaml
+"""
+import json
+import pathlib
+import shutil
+import os
+import pytest
+
+yaml = pytest.importorskip("yaml", reason="PyYAML required for config write tests")
+
+
+# ── 1-5: _write_endpoint_to_config unit tests ─────────────────────────────────
+
+class TestWriteEndpointToConfig:
+    def test_writes_base_url(self, tmp_path):
+        from api.profiles import _write_endpoint_to_config
+        _write_endpoint_to_config(tmp_path, base_url="http://localhost:11434")
+        cfg = yaml.safe_load((tmp_path / "config.yaml").read_text())
+        assert cfg["model"]["base_url"] == "http://localhost:11434"
+
+    def test_writes_api_key(self, tmp_path):
+        from api.profiles import _write_endpoint_to_config
+        _write_endpoint_to_config(tmp_path, api_key="sk-local-test")
+        cfg = yaml.safe_load((tmp_path / "config.yaml").read_text())
+        assert cfg["model"]["api_key"] == "sk-local-test"
+
+    def test_writes_both(self, tmp_path):
+        from api.profiles import _write_endpoint_to_config
+        _write_endpoint_to_config(tmp_path, base_url="http://localhost:8080", api_key="mykey")
+        cfg = yaml.safe_load((tmp_path / "config.yaml").read_text())
+        assert cfg["model"]["base_url"] == "http://localhost:8080"
+        assert cfg["model"]["api_key"] == "mykey"
+
+    def test_merges_with_existing_config(self, tmp_path):
+        """Does not clobber other top-level config keys."""
+        existing = {"model": {"default": "gpt-4o", "provider": "openai"}, "agent": {"max_turns": 90}}
+        (tmp_path / "config.yaml").write_text(yaml.dump(existing))
+        from api.profiles import _write_endpoint_to_config
+        _write_endpoint_to_config(tmp_path, base_url="http://localhost:1234")
+        cfg = yaml.safe_load((tmp_path / "config.yaml").read_text())
+        # Existing keys preserved
+        assert cfg["model"]["default"] == "gpt-4o"
+        assert cfg["model"]["provider"] == "openai"
+        assert cfg["agent"]["max_turns"] == 90
+        # New key added
+        assert cfg["model"]["base_url"] == "http://localhost:1234"
+
+    def test_noop_when_both_none(self, tmp_path):
+        from api.profiles import _write_endpoint_to_config
+        _write_endpoint_to_config(tmp_path, base_url=None, api_key=None)
+        assert not (tmp_path / "config.yaml").exists()
+
+    def test_noop_when_both_empty_strings(self, tmp_path):
+        from api.profiles import _write_endpoint_to_config
+        _write_endpoint_to_config(tmp_path, base_url="", api_key="")
+        assert not (tmp_path / "config.yaml").exists()
+
+
+# ── 6-7: API integration tests ────────────────────────────────────────────────
+
+from tests._pytest_port import BASE as _TEST_BASE
+
+
+def _post(path, body=None):
+    import urllib.request
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(
+        _TEST_BASE + path, data=data, headers={"Content-Type": "application/json"}
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), None
+    except urllib.error.HTTPError as e:
+        try:
+            return json.loads(e.read()), e.code
+        except Exception:
+            return {}, e.code
+
+
+class TestProfileCreateAPIWithEndpoint:
+    _PROFILE_NAME = "test-ep-sprint31"
+
+    def _cleanup(self):
+        """Remove the test profile from wherever hermes_cli placed it."""
+        home_hermes = pathlib.Path.home() / ".hermes"
+        # Walk all profile roots: real ~/.hermes, and any subdirs that might be HERMES_HOME
+        roots_to_check = set()
+        roots_to_check.add(home_hermes)
+        for root, dirs, _ in os.walk(str(home_hermes)):
+            if "profiles" in dirs:
+                roots_to_check.add(pathlib.Path(root))
+            if root.count(os.sep) - str(home_hermes).count(os.sep) > 4:
+                break  # don't recurse too deep
+        for search_root in roots_to_check:
+            candidate = search_root / "profiles" / self._PROFILE_NAME
+            if candidate.exists():
+                shutil.rmtree(candidate)
+
+    def setup_method(self, _):
+        self._cleanup()
+
+    def teardown_method(self, _):
+        self._cleanup()
+
+    def test_api_route_accepts_base_url(self, test_server):
+        """POST /api/profile/create with base_url returns ok:True."""
+        data, err = _post("/api/profile/create", {
+            "name": self._PROFILE_NAME,
+            "base_url": "http://localhost:11434",
+        })
+        assert err is None, f"Expected 200, got {err}: {data}"
+        assert data.get("ok") is True
+
+    def test_api_route_writes_base_url_to_config(self, test_server):
+        """Route accepts base_url and returns profile metadata.
+
+        The actual config.yaml write is covered by the unit tests above.
+        """
+        data, err = _post("/api/profile/create", {
+            "name": self._PROFILE_NAME,
+            "base_url": "http://localhost:9999",
+        })
+        assert err is None, f"Expected 200, got {err}: {data}"
+        assert data.get("ok") is True
+        assert data.get("profile", {}).get("path"), f"API response missing profile.path: {data}"
+
+    def test_api_route_rejects_invalid_base_url(self, test_server):
+        """POST /api/profile/create with a non-http base_url returns 400."""
+        data, err = _post("/api/profile/create", {
+            "name": self._PROFILE_NAME,
+            "base_url": "ftp://localhost:11434",
+        })
+        assert err == 400, f"Expected 400, got {err}: {data}"
--- a/tests/test_sprint32.py
+++ b/tests/test_sprint32.py
@@ -0,0 +1,72 @@
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+import subprocess
+import os
+from api.startup import auto_install_agent_deps
+
+class TestAutoInstallAgentDeps:
+    def test_installs_from_requirements_txt(self, tmp_path):
+        agent_dir = tmp_path / 'hermes-agent'
+        agent_dir.mkdir()
+        req = agent_dir / 'requirements.txt'
+        req.write_text('pyyaml\n')
+        with patch.dict('os.environ', {'HERMES_WEBUI_AGENT_DIR': str(agent_dir)}, clear=False):
+            with patch('subprocess.run') as mock_run:
+                mock_run.return_value = MagicMock(returncode=0, stderr='')
+                assert auto_install_agent_deps() is True
+                args = mock_run.call_args[0][0]
+                assert '-r' in args and str(req) in args
+
+    def test_falls_back_to_pyproject(self, tmp_path):
+        agent_dir = tmp_path / 'hermes-agent'
+        agent_dir.mkdir()
+        (agent_dir / 'pyproject.toml').write_text('[project]\nname="hermes-agent"\n')
+        with patch.dict('os.environ', {'HERMES_WEBUI_AGENT_DIR': str(agent_dir)}, clear=False):
+            with patch('subprocess.run') as mock_run:
+                mock_run.return_value = MagicMock(returncode=0, stderr='')
+                assert auto_install_agent_deps() is True
+                args = mock_run.call_args[0][0]
+                assert str(agent_dir) in args and '-r' not in args
+
+    def test_skips_when_agent_dir_missing(self, tmp_path, capsys):
+        missing = tmp_path / 'nonexistent-agent'
+        # Patch both HERMES_WEBUI_AGENT_DIR and HERMES_HOME so the fallback
+        # path (HERMES_HOME/hermes-agent) also resolves to a nonexistent dir,
+        # preventing the real agent dir from being found in the test environment.
+        env_overrides = {
+            'HERMES_WEBUI_AGENT_DIR': str(missing),
+            'HERMES_HOME': str(tmp_path / 'no-hermes-home'),
+        }
+        with patch.dict('os.environ', env_overrides, clear=False):
+            with patch('subprocess.run') as mock_run:
+                assert auto_install_agent_deps() is False
+                assert not mock_run.called
+        assert 'skipped' in capsys.readouterr().out.lower()
+
+    def test_skips_when_no_install_file(self, tmp_path, capsys):
+        agent_dir = tmp_path / 'hermes-agent'
+        agent_dir.mkdir()
+        with patch.dict('os.environ', {'HERMES_WEBUI_AGENT_DIR': str(agent_dir)}, clear=False):
+            with patch('subprocess.run') as mock_run:
+                assert auto_install_agent_deps() is False
+                assert not mock_run.called
+        assert 'skipped' in capsys.readouterr().out.lower()
+
+    def test_tolerates_pip_failure(self, tmp_path, capsys):
+        agent_dir = tmp_path / 'hermes-agent'
+        agent_dir.mkdir()
+        (agent_dir / 'requirements.txt').write_text('somepkg\n')
+        with patch.dict('os.environ', {'HERMES_WEBUI_AGENT_DIR': str(agent_dir)}, clear=False):
+            with patch('subprocess.run') as mock_run:
+                mock_run.return_value = MagicMock(returncode=1, stderr='ERROR: could not find package')
+                assert auto_install_agent_deps() is False
+        assert 'failed' in capsys.readouterr().out.lower() or 'pip' in capsys.readouterr().out.lower()
+
+    def test_tolerates_timeout(self, tmp_path, capsys):
+        agent_dir = tmp_path / 'hermes-agent'
+        agent_dir.mkdir()
+        (agent_dir / 'requirements.txt').write_text('somepkg\n')
+        with patch.dict('os.environ', {'HERMES_WEBUI_AGENT_DIR': str(agent_dir)}, clear=False):
+            with patch('subprocess.run', side_effect=subprocess.TimeoutExpired('pip', 120)):
+                assert auto_install_agent_deps() is False
+        assert 'timed out' in capsys.readouterr().out.lower()
--- a/tests/test_sprint33.py
+++ b/tests/test_sprint33.py
@@ -0,0 +1,59 @@
+"""
+Sprint 33 Tests: Shared app dialogs replace native confirm/prompt usage.
+
+These tests verify the static assets expose the reusable confirm/input modal
+and that browser-native confirm/prompt calls are no longer used in the Web UI.
+"""
+
+import pathlib
+import re
+
+
+REPO = pathlib.Path(__file__).parent.parent
+
+
+def read(path):
+    return (REPO / path).read_text(encoding="utf-8")
+
+
+def test_index_has_shared_app_dialog_markup():
+    html = read("static/index.html")
+    assert 'id="appDialogOverlay"' in html
+    assert 'id="appDialog"' in html
+    assert 'id="appDialogTitle"' in html
+    assert 'id="appDialogDesc"' in html
+    assert 'id="appDialogInput"' in html
+    assert 'id="appDialogCancel"' in html
+    assert 'id="appDialogConfirm"' in html
+
+
+def test_app_dialog_css_rules_exist():
+    css = read("static/style.css")
+    for selector in (
+        ".app-dialog-overlay",
+        ".app-dialog",
+        ".app-dialog-input",
+        ".app-dialog-actions",
+        ".app-dialog-btn.confirm",
+        ".app-dialog-btn.confirm.danger",
+    ):
+        assert selector in css, f"missing CSS selector: {selector}"
+
+
+def test_ui_js_exposes_shared_dialog_helpers():
+    src = read("static/ui.js")
+    assert "function showConfirmDialog(opts={})" in src
+    assert "function showPromptDialog(opts={})" in src
+    assert "document.addEventListener('keydown'" in src
+
+
+def test_no_native_confirm_calls_remain_in_static_js():
+    for path in (REPO / "static").glob("*.js"):
+        src = path.read_text(encoding="utf-8")
+        assert not re.search(r"\bconfirm\s*\(", src), f"native confirm() remains in {path.name}"
+
+
+def test_no_native_prompt_calls_remain_in_static_js():
+    for path in (REPO / "static").glob("*.js"):
+        src = path.read_text(encoding="utf-8")
+        assert not re.search(r"\bprompt\s*\(", src), f"native prompt() remains in {path.name}"
--- a/tests/test_sprint34.py
+++ b/tests/test_sprint34.py
@@ -0,0 +1,300 @@
+"""
+Sprint 34 Tests: OAuth provider support in onboarding (issues #303, #304).
+
+Covers:
+  1. _provider_oauth_authenticated() returns True for known OAuth providers
+     with valid tokens in auth.json
+  2. _provider_oauth_authenticated() returns False when auth.json is absent,
+     empty, or has no token data
+  3. _provider_oauth_authenticated() returns False for unknown/API-key providers
+  4. _status_from_runtime() marks copilot/openai-codex as provider_ready when
+     credentials exist
+  5. _status_from_runtime() gives a helpful "hermes auth" note (not "API key")
+     for OAuth providers that have no credentials yet
+  6. API route /api/onboarding/status reflects OAuth-ready state
+"""
+
+import json
+import pathlib
+import tempfile
+import unittest.mock
+
+import pytest
+
+REPO = pathlib.Path(__file__).parent.parent
+from tests._pytest_port import BASE
+
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+def _make_auth_json(provider_id: str, tokens: dict, tmp_dir: pathlib.Path) -> pathlib.Path:
+    """Write an auth.json with the given tokens for provider_id into tmp_dir."""
+    store = {"providers": {provider_id: tokens}}
+    auth_path = tmp_dir / "auth.json"
+    auth_path.write_text(json.dumps(store), encoding="utf-8")
+    return auth_path
+
+
+# ── 1–3. _provider_oauth_authenticated unit tests ────────────────────────────
+
+class TestProviderOAuthAuthenticated:
+    """Unit tests for the new _provider_oauth_authenticated() helper."""
+
+    def _call(self, provider: str, hermes_home: pathlib.Path) -> bool:
+        # Import fresh so we don't get a stale module reference
+        from api.onboarding import _provider_oauth_authenticated
+        return _provider_oauth_authenticated(provider, hermes_home)
+
+    def test_returns_false_when_auth_json_absent(self, tmp_path):
+        """No auth.json -> not authenticated."""
+        assert self._call("openai-codex", tmp_path) is False
+
+    def test_openai_codex_with_access_token(self, tmp_path):
+        """openai-codex with a valid access_token -> authenticated."""
+        _make_auth_json(
+            "openai-codex",
+            {"access_token": "ey.test.token", "refresh_token": "ref123"},
+            tmp_path,
+        )
+        assert self._call("openai-codex", tmp_path) is True
+
+    def test_openai_codex_with_refresh_token_only(self, tmp_path):
+        """openai-codex with only a refresh_token -> still authenticated."""
+        _make_auth_json(
+            "openai-codex",
+            {"access_token": "", "refresh_token": "ref_only_token"},
+            tmp_path,
+        )
+        assert self._call("openai-codex", tmp_path) is True
+
+    def test_copilot_with_api_key(self, tmp_path):
+        """copilot with an api_key (GitHub token) -> authenticated."""
+        _make_auth_json("copilot", {"api_key": "ghu_test_token_123"}, tmp_path)
+        assert self._call("copilot", tmp_path) is True
+
+    def test_empty_tokens_returns_false(self, tmp_path):
+        """All token fields empty -> not authenticated."""
+        _make_auth_json(
+            "openai-codex",
+            {"access_token": "", "refresh_token": "", "api_key": ""},
+            tmp_path,
+        )
+        assert self._call("openai-codex", tmp_path) is False
+
+    def test_missing_provider_key_in_auth_json(self, tmp_path):
+        """auth.json present but provider key absent -> not authenticated."""
+        store = {"providers": {"some-other-provider": {"access_token": "tok"}}}
+        (tmp_path / "auth.json").write_text(json.dumps(store), encoding="utf-8")
+        assert self._call("openai-codex", tmp_path) is False
+
+    def test_unknown_provider_not_in_oauth_list(self, tmp_path):
+        """A provider that is not a known OAuth provider -> always False."""
+        _make_auth_json("some-random-provider", {"access_token": "tok"}, tmp_path)
+        assert self._call("some-random-provider", tmp_path) is False
+
+    def test_nous_provider_recognized(self, tmp_path):
+        """nous is in the known OAuth set."""
+        _make_auth_json("nous", {"access_token": "nous_tok"}, tmp_path)
+        assert self._call("nous", tmp_path) is True
+
+    def test_qwen_oauth_provider_recognized(self, tmp_path):
+        """qwen-oauth is in the known OAuth set."""
+        _make_auth_json("qwen-oauth", {"access_token": "qwen_tok"}, tmp_path)
+        assert self._call("qwen-oauth", tmp_path) is True
+
+    def test_empty_provider_string_returns_false(self, tmp_path):
+        """Empty provider string -> False, no crash."""
+        assert self._call("", tmp_path) is False
+        assert self._call("  ", tmp_path) is False
+
+
+# ── 4–5. _status_from_runtime integration ────────────────────────────────────
+
+class TestStatusFromRuntimeOAuth:
+    """_status_from_runtime should treat OAuth providers with tokens as ready."""
+
+    def _call(self, provider: str, model: str, hermes_home: pathlib.Path) -> dict:
+        from api.onboarding import _status_from_runtime
+        import api.onboarding as _ob
+        orig_home = _ob._get_active_hermes_home
+        orig_found = _ob._HERMES_FOUND
+        _ob._get_active_hermes_home = lambda: hermes_home
+        # Simulate hermes-agent being available so we reach the provider logic
+        # (without this, _status_from_runtime short-circuits to agent_unavailable)
+        _ob._HERMES_FOUND = True
+        try:
+            cfg = {"model": {"provider": provider, "default": model}}
+            return _status_from_runtime(cfg, True)
+        finally:
+            _ob._get_active_hermes_home = orig_home
+            _ob._HERMES_FOUND = orig_found
+
+    def test_copilot_ready_when_api_key_in_auth_json(self, tmp_path):
+        """copilot configured + api_key in auth.json -> provider_ready True."""
+        _make_auth_json("copilot", {"api_key": "ghu_abc123"}, tmp_path)
+        result = self._call("copilot", "gpt-5.4", tmp_path)
+        assert result["provider_configured"] is True
+        assert result["provider_ready"] is True
+        assert result["setup_state"] == "ready"
+
+    def test_openai_codex_ready_when_token_in_auth_json(self, tmp_path):
+        """openai-codex configured + access_token -> provider_ready True."""
+        _make_auth_json(
+            "openai-codex",
+            {"access_token": "ey.test", "refresh_token": "ref"},
+            tmp_path,
+        )
+        result = self._call("openai-codex", "codex-mini-latest", tmp_path)
+        assert result["provider_configured"] is True
+        assert result["provider_ready"] is True
+        assert result["setup_state"] == "ready"
+
+    def test_copilot_not_ready_without_credentials(self, tmp_path):
+        """copilot configured but no credentials -> provider_ready False.
+
+        We mock hermes_cli.auth to be unavailable so the function falls through
+        to the auth.json path.  With no auth.json the result must be False.
+        """
+        import unittest.mock
+
+        # Prevent the hermes_cli fast path from finding real credentials
+        with unittest.mock.patch(
+            "api.onboarding._provider_oauth_authenticated",
+            return_value=False,
+        ):
+            result = self._call("copilot", "gpt-5.4", tmp_path)
+
+        assert result["provider_configured"] is True
+        assert result["provider_ready"] is False
+        assert result["setup_state"] == "provider_incomplete"
+
+    def test_oauth_incomplete_note_mentions_hermes_auth(self, tmp_path):
+        """When OAuth provider is incomplete, note should mention hermes auth/model."""
+        result = self._call("openai-codex", "codex-mini-latest", tmp_path)
+        note = result["provider_note"]
+        assert "hermes auth" in note or "hermes model" in note, (
+            f"Expected 'hermes auth' or 'hermes model' in note, got: {note!r}"
+        )
+
+    def test_oauth_incomplete_note_does_not_say_api_key(self, tmp_path):
+        """OAuth provider incomplete note must not say 'API key' — that's misleading."""
+        result = self._call("copilot", "gpt-5.4", tmp_path)
+        note = result["provider_note"]
+        assert "API key" not in note, (
+            f"Note misleadingly mentions 'API key' for OAuth provider: {note!r}"
+        )
+
+    def test_standard_provider_incomplete_note_still_says_api_key(self, tmp_path):
+        """For a standard API-key provider (openrouter), note should still say API key."""
+        # openrouter with no .env
+        result = self._call("openrouter", "anthropic/claude-sonnet-4.6", tmp_path)
+        assert result["provider_ready"] is False
+        note = result["provider_note"]
+        assert "API key" in note, (
+            f"Expected 'API key' in note for openrouter, got: {note!r}"
+        )
+
+
+# ── 6. API endpoint reflects OAuth-ready state ───────────────────────────────
+
+class TestOnboardingStatusApiOAuth:
+    """
+    The /api/onboarding/status endpoint should report provider_ready=True
+    when an OAuth provider is configured and has valid credentials.
+    """
+
+    def test_status_endpoint_returns_200(self):
+        import urllib.request
+        with urllib.request.urlopen(BASE + "/api/onboarding/status", timeout=10) as r:
+            assert r.status == 200
+            data = json.loads(r.read())
+        assert "system" in data
+        assert "provider_ready" in data["system"]
+
+    def test_onboarding_status_has_chat_ready_field(self):
+        import urllib.request
+        with urllib.request.urlopen(BASE + "/api/onboarding/status", timeout=10) as r:
+            data = json.loads(r.read())
+        assert "chat_ready" in data["system"]
+
+    def test_status_setup_state_valid_values(self):
+        """setup_state must be one of the known string values."""
+        import urllib.request
+        with urllib.request.urlopen(BASE + "/api/onboarding/status", timeout=10) as r:
+            data = json.loads(r.read())
+        valid = {"ready", "provider_incomplete", "needs_provider", "agent_unavailable"}
+        assert data["system"]["setup_state"] in valid, (
+            f"Unexpected setup_state: {data['system']['setup_state']!r}"
+        )
+
+
+# ── Control Center: section reset on close ─────────────────────────────────
+
+def test_control_center_resets_active_section_on_close():
+    """Closing the control center must reset _settingsSection to 'conversation'."""
+    src = open(pathlib.Path(__file__).parent.parent / 'static' / 'panels.js').read()
+    assert '_settingsSection' in src, '_settingsSection state variable missing from panels.js'
+    assert "_settingsSection = 'conversation'" in src or "_settingsSection='conversation'" in src, \
+        'Control center does not reset section to conversation on close'
+
+
+def test_control_center_tab_highlight_on_open():
+    """Opening the control center must use settings-tabs for section navigation."""
+    css = open(pathlib.Path(__file__).parent.parent / 'static' / 'style.css').read()
+    assert 'settings-tabs' in css, 'settings-tabs CSS class for control center tabs missing from style.css'
+
+
+# ── apply_onboarding_setup: unsupported/OAuth providers complete gracefully ──
+
+class TestApplyOnboardingSetupUnsupportedProvider:
+    """PR #323 / Issue #322: apply_onboarding_setup must not raise ValueError for
+    providers already configured via CLI (openai-codex, copilot, nous, etc.).
+    Instead it marks onboarding complete and returns current status.
+    """
+
+    def _call(self, provider: str) -> dict:
+        import sys, pathlib, unittest.mock, tempfile, os
+        repo = pathlib.Path(__file__).parent.parent
+        if str(repo) not in sys.path:
+            sys.path.insert(0, str(repo))
+
+        from api.onboarding import apply_onboarding_setup
+
+        with tempfile.TemporaryDirectory() as tmp:
+            with unittest.mock.patch("api.onboarding._get_active_hermes_home",
+                                     return_value=pathlib.Path(tmp)), \
+                 unittest.mock.patch("api.onboarding._get_config_path",
+                                     return_value=pathlib.Path(tmp) / "config.yaml"), \
+                 unittest.mock.patch("api.onboarding.save_settings") as mock_save, \
+                 unittest.mock.patch("api.onboarding.get_onboarding_status",
+                                     return_value={"completed": True, "system": {}}):
+                result = apply_onboarding_setup({"provider": provider, "model": "", "api_key": ""})
+                return result, mock_save
+
+    def test_openai_codex_does_not_raise(self):
+        """apply_onboarding_setup with openai-codex must not raise ValueError."""
+        result, _ = self._call("openai-codex")
+        assert result is not None
+
+    def test_copilot_does_not_raise(self):
+        """apply_onboarding_setup with copilot must not raise ValueError."""
+        result, _ = self._call("copilot")
+        assert result is not None
+
+    def test_nous_does_not_raise(self):
+        """apply_onboarding_setup with nous must not raise ValueError."""
+        result, _ = self._call("nous")
+        assert result is not None
+
+    def test_unsupported_provider_marks_onboarding_complete(self):
+        """apply_onboarding_setup with an unsupported provider must save onboarding_completed=True."""
+        _, mock_save = self._call("openai-codex")
+        calls = [str(c) for c in mock_save.call_args_list]
+        assert any("onboarding_completed" in c for c in calls), \
+            "save_settings must be called with onboarding_completed=True for unsupported providers"
+
+    def test_unsupported_provider_returns_status_dict(self):
+        """apply_onboarding_setup with an unsupported provider must return a status dict (not raise)."""
+        result, _ = self._call("openai-codex")
+        assert isinstance(result, dict), \
+            "apply_onboarding_setup must return a dict for unsupported providers, not raise"
--- a/tests/test_sprint35.py
+++ b/tests/test_sprint35.py
@@ -0,0 +1,146 @@
+"""
+Sprint 35 Tests: Breadcrumb nav + wider panel + responsive message width (PR #302).
+
+Covers:
+  1. PANEL_MAX raised from 500 to 1200 in boot.js
+  2. Responsive .messages-inner breakpoints in style.css (no hardcoded 800px)
+  3. renderFileBreadcrumb() function exists in workspace.js
+  4. renderFileBreadcrumb() is called from openFile()
+  5. clearPreview() calls renderBreadcrumb() to restore dir breadcrumb
+  6. Breadcrumb segments use correct CSS classes
+  7. breadcrumbBar element exists in index.html
+  8. Breadcrumb CSS rules exist in style.css
+"""
+
+import pathlib
+import re
+
+REPO = pathlib.Path(__file__).parent.parent
+
+
+def read(path):
+    return (REPO / path).read_text(encoding="utf-8")
+
+
+# ── 1. PANEL_MAX raised ──────────────────────────────────────────────────────
+
+def test_panel_max_raised_to_1200():
+    """PANEL_MAX must be 1200 (raised from 500) for wider right panel."""
+    src = read("static/boot.js")
+    assert "PANEL_MAX=1200" in src or "PANEL_MAX = 1200" in src, (
+        "PANEL_MAX was not raised to 1200 — right panel cannot be widened on ultrawide screens"
+    )
+
+
+def test_panel_max_is_not_500():
+    """Old PANEL_MAX=500 must no longer be present."""
+    src = read("static/boot.js")
+    assert "PANEL_MAX=500" not in src and "PANEL_MAX = 500" not in src, (
+        "Old PANEL_MAX=500 still present — right panel width not updated"
+    )
+
+
+# ── 2. Responsive messages-inner ─────────────────────────────────────────────
+
+def test_messages_inner_has_responsive_breakpoints():
+    """style.css must have @media breakpoints for .messages-inner."""
+    css = read("static/style.css")
+    assert "min-width:1400px" in css or "min-width: 1400px" in css, (
+        "Missing @media(min-width:1400px) breakpoint for .messages-inner"
+    )
+    assert "min-width:1800px" in css or "min-width: 1800px" in css, (
+        "Missing @media(min-width:1800px) breakpoint for .messages-inner"
+    )
+
+
+def test_messages_inner_no_hardcoded_800px():
+    """The base .messages-inner rule must not hardcode max-width:800px."""
+    css = read("static/style.css")
+    # Find the .messages-inner base rule (not inside a @media block)
+    # It should not have max-width:800px on the same line
+    for line in css.splitlines():
+        if ".messages-inner{" in line and "max-width:800px" in line:
+            raise AssertionError(
+                "Base .messages-inner still has hardcoded max-width:800px — "
+                "responsive breakpoints not applied"
+            )
+
+
+def test_messages_inner_breakpoint_values():
+    """The breakpoints should expand max-width at 1400px and 1800px."""
+    css = read("static/style.css")
+    assert "max-width:1100px" in css or "max-width: 1100px" in css, (
+        "Expected max-width:1100px at 1400px breakpoint"
+    )
+    assert "max-width:1200px" in css or "max-width: 1200px" in css, (
+        "Expected max-width:1200px at 1800px breakpoint"
+    )
+
+
+# ── 3–6. Breadcrumb navigation ───────────────────────────────────────────────
+
+def test_render_file_breadcrumb_function_exists():
+    """workspace.js must expose renderFileBreadcrumb()."""
+    src = read("static/workspace.js")
+    assert "function renderFileBreadcrumb" in src, (
+        "renderFileBreadcrumb() not defined in workspace.js"
+    )
+
+
+def test_render_file_breadcrumb_called_from_open_file():
+    """openFile() must call renderFileBreadcrumb(path) to show path segments."""
+    src = read("static/workspace.js")
+    assert "renderFileBreadcrumb(path)" in src, (
+        "openFile() does not call renderFileBreadcrumb(path)"
+    )
+
+
+def test_breadcrumb_has_root_segment():
+    """renderFileBreadcrumb must add a root '~' segment."""
+    src = read("static/workspace.js")
+    idx = src.find("function renderFileBreadcrumb")
+    block = src[idx:idx + 800]
+    assert "'~'" in block or '"~"' in block, (
+        "renderFileBreadcrumb missing root '~' segment"
+    )
+
+
+def test_breadcrumb_segments_use_correct_classes():
+    """Breadcrumb segments must use breadcrumb-seg breadcrumb-link/current classes."""
+    src = read("static/workspace.js")
+    assert "breadcrumb-seg" in src, "breadcrumb-seg class not used"
+    assert "breadcrumb-link" in src, "breadcrumb-link class not used"
+    assert "breadcrumb-current" in src, "breadcrumb-current class not used"
+
+
+def test_clear_preview_calls_render_breadcrumb():
+    """clearPreview() in boot.js must call renderBreadcrumb() to restore dir view."""
+    src = read("static/boot.js")
+    # Find clearPreview and check renderBreadcrumb is called nearby
+    idx = src.find("function clearPreview")
+    assert idx != -1, "clearPreview not found in boot.js"
+    block = src[idx:idx + 600]
+    assert "renderBreadcrumb" in block, (
+        "clearPreview() does not call renderBreadcrumb() — "
+        "directory breadcrumb won't restore after closing file preview"
+    )
+
+
+# ── 7. HTML markup ───────────────────────────────────────────────────────────
+
+def test_breadcrumb_bar_in_index_html():
+    """index.html must have the breadcrumbBar element."""
+    html = read("static/index.html")
+    assert 'id="breadcrumbBar"' in html, (
+        "breadcrumbBar element missing from index.html — "
+        "renderFileBreadcrumb() has nowhere to render"
+    )
+
+
+# ── 8. Breadcrumb CSS ────────────────────────────────────────────────────────
+
+def test_breadcrumb_css_rules_exist():
+    """style.css must have breadcrumb CSS rules."""
+    css = read("static/style.css")
+    for selector in (".breadcrumb-seg", ".breadcrumb-link", ".breadcrumb-current"):
+        assert selector in css, f"Missing CSS rule: {selector}"
--- a/tests/test_sprint36.py
+++ b/tests/test_sprint36.py
@@ -0,0 +1,182 @@
+"""
+Sprint 36 Tests: cancelStream cleanup no longer depends on SSE event (PR #309 / issue #299).
+
+The old cancelStream() set "Cancelling..." status and then relied on the SSE cancel
+event to clear it. If the SSE connection was already closed, the event never arrived
+and "Cancelling..." lingered indefinitely.
+
+The fix: cancelStream() now clears status, busy state, activeStreamId, and the cancel
+button directly after the cancel API request completes — regardless of whether the SSE
+cancel event fires. The SSE handler still runs if it arrives (all operations idempotent).
+
+Covers:
+  1. cancelStream() clears activeStreamId unconditionally after the fetch
+  2. cancelStream() calls setBusy(false) unconditionally
+  3. cancelStream() calls setStatus('') unconditionally
+  4. cancelStream() hides the cancel button unconditionally
+  5. The catch block no longer calls setStatus(cancel_failed) — cleanup runs even on error
+  6. The SSE cancel handler is still present (idempotent path)
+  7. cancel_failed i18n key is still defined in all locales (key exists, just not used in
+     the catch-path anymore — kept for potential future use)
+"""
+
+import pathlib
+import re
+
+REPO = pathlib.Path(__file__).parent.parent
+
+
+def read(path):
+    return (REPO / path).read_text(encoding="utf-8")
+
+
+def _locale_count(src: str) -> int:
+    pattern = re.compile(
+        r"^\s{2}(?:'(?P<quoted>[A-Za-z0-9-]+)'|(?P<plain>[A-Za-z0-9-]+))\s*:\s*\{",
+        re.MULTILINE,
+    )
+    return sum(1 for _ in pattern.finditer(src))
+
+
+# ── 1–4. cancelStream() cleanup is unconditional ─────────────────────────────
+
+class TestCancelStreamCleanup:
+    """cancelStream() must clear all busy state regardless of SSE connection state."""
+
+    def _get_cancel_block(self):
+        """Extract the cancelStream function body from boot.js."""
+        src = read("static/boot.js")
+        idx = src.find("async function cancelStream()")
+        assert idx != -1, "cancelStream not found in boot.js"
+        # Find the closing brace — scan for the matching }
+        depth = 0
+        end = idx
+        for i, ch in enumerate(src[idx:]):
+            if ch == '{':
+                depth += 1
+            elif ch == '}':
+                depth -= 1
+                if depth == 0:
+                    end = idx + i + 1
+                    break
+        return src[idx:end]
+
+    def test_clears_active_stream_id(self):
+        """cancelStream() must null out S.activeStreamId after the request."""
+        block = self._get_cancel_block()
+        assert "S.activeStreamId=null" in block or "S.activeStreamId = null" in block, (
+            "cancelStream() does not clear S.activeStreamId — "
+            "subsequent calls could re-cancel an already-finished stream"
+        )
+
+    def test_calls_set_busy_false(self):
+        """cancelStream() must call setBusy(false) directly."""
+        block = self._get_cancel_block()
+        assert "setBusy(false)" in block, (
+            "cancelStream() does not call setBusy(false) — "
+            "spinner may linger if SSE connection is already closed"
+        )
+
+    def test_calls_set_status_empty(self):
+        """cancelStream() must call setStatus('') to clear 'Cancelling...' text."""
+        block = self._get_cancel_block()
+        assert "setStatus('')" in block or 'setStatus("")' in block, (
+            "cancelStream() does not clear status text — "
+            "'Cancelling...' can linger if SSE cancel event never arrives"
+        )
+
+    def test_hides_cancel_button(self):
+        """cancelStream() must hide the cancel button unconditionally."""
+        block = self._get_cancel_block()
+        assert "btnCancel" in block, (
+            "cancelStream() does not reference btnCancel — cancel button may stay visible"
+        )
+
+    def test_cleanup_not_inside_try_block(self):
+        """Cleanup must happen outside the try block so it runs even if fetch fails."""
+        block = self._get_cancel_block()
+        # The S.activeStreamId=null and setBusy(false) must appear after the try/catch
+        # Verify they are NOT only inside the try block by checking position relative to catch
+        try_idx = block.find("try{")
+        catch_idx = block.find("}catch(")
+        cleanup_idx = block.find("S.activeStreamId=null")
+        if cleanup_idx == -1:
+            cleanup_idx = block.find("S.activeStreamId = null")
+        assert cleanup_idx > catch_idx, (
+            "S.activeStreamId cleanup appears to be inside the try block — "
+            "it won't run if the fetch throws"
+        )
+
+
+# ── 5. Error path behavior ────────────────────────────────────────────────────
+
+class TestCancelStreamErrorPath:
+    """The catch block should not prevent cleanup from running."""
+
+    def test_catch_block_does_not_call_set_status_cancel_failed(self):
+        """The catch block must not call setStatus(cancel_failed) on its own.
+
+        Previously: catch(e){setStatus(t('cancel_failed')+e.message)}
+        After fix: catch swallows the error; cleanup runs in the outer scope.
+        The status is cleared by setStatus('') unconditionally.
+        """
+        src = read("static/boot.js")
+        idx = src.find("async function cancelStream()")
+        block = src[idx:idx + 400]
+        # The old pattern was setStatus inside catch; new pattern has it outside
+        # Look for the catch block specifically
+        catch_idx = block.find("}catch(")
+        if catch_idx == -1:
+            catch_idx = block.find("} catch (")
+        assert catch_idx != -1, "No catch block found in cancelStream"
+        # Get just the catch body
+        brace_open = block.find("{", catch_idx)
+        brace_close = block.find("}", brace_open)
+        catch_body = block[brace_open:brace_close + 1]
+        assert "cancel_failed" not in catch_body, (
+            "catch block still calls setStatus(cancel_failed) — "
+            "this means a failed cancel shows an error instead of cleaning up silently"
+        )
+
+
+# ── 6. SSE cancel handler still present ──────────────────────────────────────
+
+def test_sse_cancel_handler_still_present():
+    """The SSE 'cancel' event handler must still exist in messages.js.
+
+    The new cancelStream() cleanup is not a replacement — the SSE handler
+    provides additional cleanup (removes 'Task cancelled.' message, clears
+    tool cards, etc.) when the connection is still alive.
+    """
+    src = read("static/messages.js")
+    assert "addEventListener('cancel'" in src or 'addEventListener("cancel"' in src, (
+        "SSE cancel event handler missing from messages.js — "
+        "live cancellation cleanup path is broken"
+    )
+
+
+def test_sse_cancel_handler_calls_set_busy():
+    """The SSE cancel handler must still call setBusy(false)."""
+    src = read("static/messages.js")
+    idx = src.find("addEventListener('cancel'") 
+    if idx == -1:
+        idx = src.find('addEventListener("cancel"')
+    assert idx != -1
+    block = src[idx:idx + 1000]
+    assert "setBusy(false)" in block, (
+        "SSE cancel handler no longer calls setBusy(false)"
+    )
+
+
+# ── 7. i18n key preserved ─────────────────────────────────────────────────────
+
+def test_cancel_failed_i18n_key_exists_in_all_locales():
+    """cancel_failed key must still exist in i18n.js for all locales."""
+    src = read("static/i18n.js")
+    # Should appear once per locale (en, es, de, ru, zh, zh-Hant)
+    locale_count = _locale_count(src)
+    count = src.count("cancel_failed:")
+    assert count >= locale_count, (
+        f"cancel_failed key only found {count} times in i18n.js — "
+        f"expected at least {locale_count} (one per locale)"
+    )
--- a/tests/test_sprint37.py
+++ b/tests/test_sprint37.py
@@ -0,0 +1,103 @@
+"""
+Sprint 37 Tests: Workspace panel open/closed state persists across refreshes via localStorage.
+"""
+import pathlib
+import re
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent
+BOOT_JS   = (REPO_ROOT / "static" / "boot.js").read_text()
+HTML      = (REPO_ROOT / "static" / "index.html").read_text()
+
+
+# ── Persistence: save on change ───────────────────────────────────────────────
+
+def test_workspace_panel_saves_to_localstorage():
+    """_setWorkspacePanelMode must call localStorage.setItem with hermes-webui-workspace-panel."""
+    assert "hermes-webui-workspace-panel" in BOOT_JS, \
+        "boot.js must use localStorage key 'hermes-webui-workspace-panel' to persist panel state"
+
+
+def test_workspace_panel_save_inside_set_mode():
+    """localStorage.setItem for panel state must live inside _setWorkspacePanelMode."""
+    fn_idx = BOOT_JS.find("function _setWorkspacePanelMode(")
+    fn_end = BOOT_JS.find("\n}", fn_idx) + 2
+    fn_body = BOOT_JS[fn_idx:fn_end]
+    assert "hermes-webui-workspace-panel" in fn_body, \
+        "localStorage save must be inside _setWorkspacePanelMode so every state change is captured"
+
+
+def test_workspace_panel_saves_open_value():
+    """When the panel is open, localStorage must be set to 'open'."""
+    fn_idx = BOOT_JS.find("function _setWorkspacePanelMode(")
+    fn_end = BOOT_JS.find("\n}", fn_idx) + 2
+    fn_body = BOOT_JS[fn_idx:fn_end]
+    assert "'open'" in fn_body or '"open"' in fn_body, \
+        "_setWorkspacePanelMode must store 'open' for an open panel state"
+
+
+def test_workspace_panel_saves_closed_value():
+    """When the panel is closed, localStorage must be set to 'closed'."""
+    fn_idx = BOOT_JS.find("function _setWorkspacePanelMode(")
+    fn_end = BOOT_JS.find("\n}", fn_idx) + 2
+    fn_body = BOOT_JS[fn_idx:fn_end]
+    assert "'closed'" in fn_body or '"closed"' in fn_body, \
+        "_setWorkspacePanelMode must store 'closed' for a closed panel state"
+
+
+# ── Persistence: restore on boot ─────────────────────────────────────────────
+
+def test_workspace_panel_restored_on_boot():
+    """Boot IIFE must read hermes-webui-workspace-panel from localStorage and restore the mode."""
+    # Find the boot IIFE (the async IIFE at the bottom of boot.js)
+    iife_idx = BOOT_JS.rfind("(async function")
+    if iife_idx < 0:
+        iife_idx = BOOT_JS.rfind("(async()=>{")
+    iife_body = BOOT_JS[iife_idx:]
+    assert "hermes-webui-workspace-panel" in iife_body, \
+        "Boot IIFE must read 'hermes-webui-workspace-panel' from localStorage to restore panel state on load"
+
+
+def test_workspace_panel_restore_sets_browse_mode():
+    """When localStorage says 'open', boot must set _workspacePanelMode to 'browse' before syncing."""
+    iife_idx = BOOT_JS.rfind("(async function")
+    if iife_idx < 0:
+        iife_idx = BOOT_JS.rfind("(async()=>{")
+    iife_body = BOOT_JS[iife_idx:]
+    # The restore block must assign _workspacePanelMode = 'browse'
+    assert "_workspacePanelMode='browse'" in iife_body or "_workspacePanelMode = 'browse'" in iife_body, \
+        "Boot must set _workspacePanelMode='browse' when restoring an open panel"
+
+
+def test_workspace_panel_restore_before_sync():
+    """Restore must happen before syncWorkspacePanelState() so the state drives the initial render."""
+    iife_idx = BOOT_JS.rfind("(async function")
+    if iife_idx < 0:
+        iife_idx = BOOT_JS.rfind("(async()=>{")
+    iife_body = BOOT_JS[iife_idx:]
+    restore_pos = iife_body.find("hermes-webui-workspace-panel")
+    sync_pos    = iife_body.find("syncWorkspacePanelState()")
+    assert restore_pos >= 0, "restore read must be present in boot IIFE"
+    assert sync_pos >= 0,    "syncWorkspacePanelState call must be present in boot IIFE"
+    assert restore_pos < sync_pos, \
+        "Workspace panel restore must happen BEFORE syncWorkspacePanelState() so the correct mode is applied"
+
+
+def test_workspace_panel_preload_marker_restored_in_head():
+    """index.html must preload the workspace panel state before the main stylesheet paints."""
+    marker = "document.documentElement.dataset.workspacePanel"
+    css_link = '<link rel="stylesheet" href="static/style.css">'
+    marker_pos = HTML.find(marker)
+    css_pos = HTML.find(css_link)
+    assert marker_pos >= 0, "index.html must preload documentElement.dataset.workspacePanel from localStorage"
+    assert css_pos >= 0, "main stylesheet link missing from index.html"
+    assert marker_pos < css_pos, \
+        "workspace panel preload marker must be set before style.css loads to avoid first-paint flash"
+
+
+def test_workspace_panel_mode_syncs_document_dataset():
+    """_setWorkspacePanelMode must update documentElement.dataset.workspacePanel for runtime parity."""
+    fn_idx = BOOT_JS.find("function _setWorkspacePanelMode(")
+    fn_end = BOOT_JS.find("\n}", fn_idx) + 2
+    fn_body = BOOT_JS[fn_idx:fn_end]
+    assert "document.documentElement.dataset.workspacePanel" in fn_body, \
+        "_setWorkspacePanelMode must keep documentElement.dataset.workspacePanel in sync with the panel state"
--- a/tests/test_sprint38.py
+++ b/tests/test_sprint38.py
@@ -0,0 +1,140 @@
+"""
+Sprint 38 Tests: Think-tag stripping with leading whitespace (PR #327).
+
+Covers the static render path (ui.js regex logic, verified against the JS source)
+and the streaming render path (messages.js _streamDisplay logic).
+"""
+import pathlib
+import re
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent
+UI_JS     = (REPO_ROOT / "static" / "ui.js").read_text()
+MSG_JS    = (REPO_ROOT / "static" / "messages.js").read_text()
+
+
+# ── ui.js: static render path ────────────────────────────────────────────────
+
+def test_think_regex_has_no_anchor():
+    """The <think> regex in ui.js must not use a ^ anchor so leading whitespace is allowed."""
+    # Find the thinkMatch line by locating the .match( call on that line
+    idx = UI_JS.find("const thinkMatch=content.match(")
+    assert idx >= 0, "thinkMatch line not found in ui.js"
+    line = UI_JS[idx:idx+100]
+    # The regex must NOT start with ^ right after the opening /
+    assert "/^<think>" not in line and "(/^" not in line, \
+        f"thinkMatch regex must not use ^ anchor — found: {line.strip()}"
+
+
+def test_gemma_regex_has_no_anchor():
+    """The Gemma channel-token regex in ui.js must not use a ^ anchor."""
+    match = re.search(r'const gemmaMatch=content\.match\((/[^/]+/)\)', UI_JS)
+    assert match, "gemmaMatch line not found in ui.js"
+    pattern = match.group(1)
+    assert not pattern.startswith('/^'), \
+        f"gemmaMatch regex must not use ^ anchor — got {pattern}"
+
+
+def test_think_content_removal_uses_replace_not_slice():
+    """After extracting thinkingText, content must use .replace() not .slice() to remove the tag."""
+    # Find the block that handles thinkMatch
+    idx = UI_JS.find("if(thinkMatch){")
+    assert idx >= 0, "thinkMatch handler block not found"
+    block = UI_JS[idx:idx+200]
+    assert "content.replace(" in block, \
+        "ui.js must use content.replace() to remove <think> block (not .slice())"
+    assert ".trimStart()" in block, \
+        "ui.js must call .trimStart() on content after removing the <think> block"
+
+
+def test_gemma_content_removal_uses_replace_not_slice():
+    """Gemma channel token removal must also use .replace() not .slice()."""
+    idx = UI_JS.find("if(gemmaMatch){")
+    assert idx >= 0, "gemmaMatch handler block not found"
+    block = UI_JS[idx:idx+200]
+    assert "content.replace(" in block, \
+        "ui.js must use content.replace() to remove Gemma channel block (not .slice())"
+    assert ".trimStart()" in block, \
+        "ui.js must call .trimStart() on content after removing the Gemma channel block"
+
+
+def test_gemma_turn_regex_in_ui_js():
+    """The Gemma 4 <|turn|>thinking\\n...<turn|> pattern must be extracted from persisted content."""
+    # Detection in _messageHasReasoningPayload (correct double-pipe format)
+    assert "<\\|turn\\|>thinking" in UI_JS, (
+        "ui.js _messageHasReasoningPayload must detect Gemma 4 <|turn|>thinking\\n...<turn|> pattern"
+        " (note: double-pipe: <|turn|> not <|turn>)"
+    )
+    # Extraction block
+    match = re.search(r'const gemmaTurnMatch=content\.match\((/[^/]+/)\)', UI_JS)
+    assert match, "gemmaTurnMatch line not found in ui.js"
+    pattern = match.group(1)
+    assert not pattern.startswith('/^'), (
+        f"gemmaTurnMatch regex must not use ^ anchor — got {pattern}"
+    )
+
+
+def test_gemma_turn_content_removal_uses_replace_not_slice():
+    """Gemma 4 turn token removal must use .replace() not .slice()."""
+    idx = UI_JS.find("if(gemmaTurnMatch){")
+    assert idx >= 0, "gemmaTurnMatch handler block not found in ui.js"
+    block = UI_JS[idx:idx+240]
+    assert "content.replace(" in block, (
+        "ui.js must use content.replace() to remove Gemma 4 turn block (not .slice())"
+    )
+    assert ".trimStart()" in block, (
+        "ui.js must call .trimStart() on content after removing the Gemma 4 turn block"
+    )
+
+
+# ── messages.js: streaming render path ───────────────────────────────────────
+
+def test_stream_display_trims_before_startswith():
+    """_streamDisplay in messages.js must call .trimStart() before .startsWith() check."""
+    fn_idx = MSG_JS.find("function _streamDisplay()")
+    assert fn_idx >= 0, "_streamDisplay function not found in messages.js"
+    fn_end = MSG_JS.find("\n  }", fn_idx) + 4
+    fn_body = MSG_JS[fn_idx:fn_end]
+    assert "trimStart()" in fn_body, \
+        "_streamDisplay must call trimStart() to handle models that emit leading whitespace before <think>"
+
+
+def test_stream_display_uses_trimmed_for_startswith():
+    """_streamDisplay must check trimmed.startsWith(open), not raw.startsWith(open)."""
+    fn_idx = MSG_JS.find("function _streamDisplay()")
+    fn_end = MSG_JS.find("\n  }", fn_idx) + 4
+    fn_body = MSG_JS[fn_idx:fn_end]
+    assert "trimmed.startsWith(open)" in fn_body, \
+        "_streamDisplay must use trimmed.startsWith(open) not raw.startsWith(open)"
+
+
+def test_stream_display_partial_tag_uses_trimmed():
+    """The partial-tag guard in _streamDisplay must also use trimmed, not raw."""
+    fn_idx = MSG_JS.find("function _streamDisplay()")
+    fn_end = MSG_JS.find("\n  }", fn_idx) + 4
+    fn_body = MSG_JS[fn_idx:fn_end]
+    assert "open.startsWith(trimmed)" in fn_body, \
+        "Partial-tag guard must use open.startsWith(trimmed) not open.startsWith(raw)"
+
+
+def test_stream_display_trims_return_after_close():
+    """After stripping a completed think block, _streamDisplay must trim leading whitespace from the result."""
+    fn_idx = MSG_JS.find("function _streamDisplay()")
+    fn_end = MSG_JS.find("\n  }", fn_idx) + 4
+    fn_body = MSG_JS[fn_idx:fn_end]
+    # The return after finding close must strip whitespace from the result
+    assert ".replace(/^" in fn_body and "s+/,'')" in fn_body, \
+        "_streamDisplay must strip leading whitespace from content after the closing think tag"
+
+
+# ── Regression: existing anchored patterns must be gone ──────────────────────
+
+def test_no_anchored_think_regex_in_ui_js():
+    """The old anchored regex /^<think>/ must not exist in ui.js."""
+    assert "/^<think>" not in UI_JS, \
+        "Old anchored /^<think>/ regex still present in ui.js — fix not applied"
+
+
+def test_no_anchored_gemma_regex_in_ui_js():
+    """The old anchored Gemma regex must not exist in ui.js."""
+    assert "/^<|channel>" not in UI_JS, \
+        "Old anchored /^<|channel>/ regex still present in ui.js — fix not applied"
--- a/tests/test_sprint39.py
+++ b/tests/test_sprint39.py
@@ -0,0 +1,235 @@
+"""
+Sprint 39 Tests: Skip-onboarding env var + onboarding key reload fix (PR A of issue #329).
+
+Covers:
+- HERMES_WEBUI_SKIP_ONBOARDING=1 bypasses the wizard unconditionally (chat_ready not required)
+- HERMES_WEBUI_SKIP_ONBOARDING unset leaves default behaviour unchanged
+- apply_onboarding_setup sets os.environ synchronously when an API key is saved
+- apply_onboarding_setup refuses to write config/env files when SKIP_ONBOARDING is set
+"""
+import os
+import unittest
+import unittest.mock
+from unittest.mock import patch
+
+import api.onboarding as mod
+
+
+_READY_RUNTIME = {
+    "chat_ready": True,
+    "provider_configured": True,
+    "provider_ready": True,
+    "setup_state": "ready",
+    "provider_note": "Ready",
+    "current_provider": "openai",
+    "current_model": "gpt-4o",
+    "current_base_url": None,
+    "env_path": "/tmp/test.env",
+}
+
+_NOT_READY_RUNTIME = {
+    "chat_ready": False,
+    "provider_configured": False,
+    "provider_ready": False,
+    "setup_state": "needs_provider",
+    "provider_note": "Needs setup",
+    "current_provider": None,
+    "current_model": None,
+    "current_base_url": None,
+    "env_path": "/tmp/test.env",
+}
+
+_COMMON_PATCHES = [
+    ("api.onboarding.load_settings",        lambda: {}),
+    ("api.onboarding.get_config",           lambda: {}),
+    ("api.onboarding.verify_hermes_imports",lambda: (True, [], [])),
+    ("api.onboarding.load_workspaces",      lambda: []),
+    ("api.onboarding.get_last_workspace",   lambda: "/tmp"),
+    ("api.onboarding.get_available_models", lambda: []),
+    ("api.onboarding.is_auth_enabled",      lambda: False),
+    ("api.onboarding._build_setup_catalog", lambda cfg: {}),
+    ("api.onboarding._get_config_path",     lambda: __import__("pathlib").Path("/tmp/fake.yaml")),
+]
+
+
+def _apply_patches(extra_patches=()):
+    patches = []
+    for target, side_effect in _COMMON_PATCHES:
+        p = patch(target, side_effect=side_effect)
+        patches.append(p)
+    for target, side_effect in extra_patches:
+        p = patch(target, side_effect=side_effect)
+        patches.append(p)
+    return patches
+
+
+class TestSkipOnboardingEnvVar(unittest.TestCase):
+
+    def _run_status(self, runtime, env_override):
+        runtime_patches = [("api.onboarding._status_from_runtime", lambda cfg, ok: runtime)]
+        all_patches = _apply_patches(runtime_patches)
+        with patch.dict(os.environ, env_override, clear=False):
+            for p in all_patches:
+                p.start()
+            try:
+                return mod.get_onboarding_status()
+            finally:
+                for p in all_patches:
+                    p.stop()
+
+    def test_skip_env_1_and_chat_ready_marks_completed(self):
+        """HERMES_WEBUI_SKIP_ONBOARDING=1 + chat_ready=True → completed=True."""
+        status = self._run_status(_READY_RUNTIME, {"HERMES_WEBUI_SKIP_ONBOARDING": "1"})
+        self.assertTrue(status["completed"],
+                        "completed must be True when skip env var is 1 and chat_ready")
+
+    def test_skip_env_true_and_chat_ready_marks_completed(self):
+        """HERMES_WEBUI_SKIP_ONBOARDING=true also accepted."""
+        status = self._run_status(_READY_RUNTIME, {"HERMES_WEBUI_SKIP_ONBOARDING": "true"})
+        self.assertTrue(status["completed"])
+
+    def test_skip_env_yes_and_chat_ready_marks_completed(self):
+        """HERMES_WEBUI_SKIP_ONBOARDING=yes also accepted."""
+        status = self._run_status(_READY_RUNTIME, {"HERMES_WEBUI_SKIP_ONBOARDING": "yes"})
+        self.assertTrue(status["completed"])
+
+    def test_skip_env_1_works_even_when_not_chat_ready(self):
+        """HERMES_WEBUI_SKIP_ONBOARDING=1 skips unconditionally — chat_ready is NOT required."""
+        status = self._run_status(_NOT_READY_RUNTIME, {"HERMES_WEBUI_SKIP_ONBOARDING": "1"})
+        self.assertTrue(status["completed"],
+                        "completed must be True when skip env var is set, regardless of chat_ready")
+
+    def test_skip_env_unset_leaves_default_false(self):
+        """Without the env var, completed is False when settings are empty."""
+        env = {k: v for k, v in os.environ.items() if k != "HERMES_WEBUI_SKIP_ONBOARDING"}
+        with patch.dict(os.environ, env, clear=True):
+            status = self._run_status(_READY_RUNTIME, {})
+        self.assertFalse(status["completed"],
+                         "completed must be False when env var absent and settings empty")
+
+    def test_settings_completed_still_works_without_env_var(self):
+        """onboarding_completed in settings → completed=True regardless of env var."""
+        runtime_patches = [("api.onboarding._status_from_runtime", lambda cfg, ok: _READY_RUNTIME)]
+        settings_patch = [("api.onboarding.load_settings", lambda: {"onboarding_completed": True})]
+        all_patches = _apply_patches(runtime_patches + settings_patch)
+        env = {k: v for k, v in os.environ.items() if k != "HERMES_WEBUI_SKIP_ONBOARDING"}
+        with patch.dict(os.environ, env, clear=True):
+            for p in all_patches:
+                p.start()
+            try:
+                status = mod.get_onboarding_status()
+            finally:
+                for p in all_patches:
+                    p.stop()
+        self.assertTrue(status["completed"])
+
+
+class TestApplyOnboardingKeySync(unittest.TestCase):
+    """Verify that apply_onboarding_setup sets os.environ synchronously."""
+
+    def test_api_key_set_in_os_environ_after_apply(self):
+        """After apply_onboarding_setup with a key, os.environ must have the key."""
+        import pathlib
+
+        os.environ.pop("OPENAI_API_KEY", None)
+
+        mock_cfg = {"model": {"provider": "openai", "default": "gpt-4o"}}
+
+        with patch("api.onboarding._load_yaml_config", return_value=mock_cfg), \
+             patch("api.onboarding._save_yaml_config"), \
+             patch("api.onboarding._write_env_file"), \
+             patch("api.onboarding.reload_config"), \
+             patch("api.onboarding.get_onboarding_status", return_value={"completed": True}), \
+             patch("api.onboarding._get_config_path", return_value=pathlib.Path("/tmp/fake.yaml")), \
+             patch("api.onboarding._load_env_file", return_value={}), \
+             patch("api.onboarding._provider_api_key_present", return_value=False), \
+             patch("api.onboarding._get_active_hermes_home", return_value=pathlib.Path("/tmp")):
+
+            mod.apply_onboarding_setup({
+                "provider": "openai",
+                "model": "gpt-4o",
+                "api_key": "sk-test-key-123",
+            })
+
+        self.assertEqual(os.environ.get("OPENAI_API_KEY"), "sk-test-key-123",
+                         "OPENAI_API_KEY must be set directly on os.environ after apply")
+        os.environ.pop("OPENAI_API_KEY", None)
+
+    def test_no_key_provided_does_not_set_environ(self):
+        """If no api_key is given (key already present), os.environ is not clobbered."""
+        import pathlib
+
+        os.environ["OPENAI_API_KEY"] = "sk-existing-key"
+
+        mock_cfg = {"model": {"provider": "openai", "default": "gpt-4o"}}
+
+        with patch("api.onboarding._load_yaml_config", return_value=mock_cfg), \
+             patch("api.onboarding._save_yaml_config"), \
+             patch("api.onboarding._write_env_file"), \
+             patch("api.onboarding.reload_config"), \
+             patch("api.onboarding.get_onboarding_status", return_value={"completed": True}), \
+             patch("api.onboarding._get_config_path", return_value=pathlib.Path("/tmp/fake.yaml")), \
+             patch("api.onboarding._load_env_file", return_value={"OPENAI_API_KEY": "sk-existing-key"}), \
+             patch("api.onboarding._provider_api_key_present", return_value=True), \
+             patch("api.onboarding._get_active_hermes_home", return_value=pathlib.Path("/tmp")):
+
+            mod.apply_onboarding_setup({
+                "provider": "openai",
+                "model": "gpt-4o",
+            })
+
+        # Key must be unchanged
+        self.assertEqual(os.environ.get("OPENAI_API_KEY"), "sk-existing-key")
+        os.environ.pop("OPENAI_API_KEY", None)
+
+
+class TestApplyOnboardingSkipGuard(unittest.TestCase):
+    """apply_onboarding_setup must not write config/env when SKIP_ONBOARDING is set."""
+
+    def test_apply_setup_blocked_when_skip_env_set(self):
+        """SKIP_ONBOARDING=1 → apply_onboarding_setup never touches disk."""
+        save_yaml_mock = unittest.mock.MagicMock()
+        write_env_mock = unittest.mock.MagicMock()
+
+        with patch.dict(os.environ, {"HERMES_WEBUI_SKIP_ONBOARDING": "1"}, clear=False), \
+             patch("api.onboarding._save_yaml_config", save_yaml_mock), \
+             patch("api.onboarding._write_env_file", write_env_mock), \
+             patch("api.onboarding.save_settings"), \
+             patch("api.onboarding.get_onboarding_status", return_value={"completed": True}):
+            mod.apply_onboarding_setup({
+                "provider": "openai",
+                "model": "gpt-4o",
+                "api_key": "should-not-be-saved",
+            })
+
+        save_yaml_mock.assert_not_called()
+        write_env_mock.assert_not_called()
+
+    def test_apply_setup_proceeds_normally_without_skip_env(self):
+        """Without SKIP_ONBOARDING, apply_onboarding_setup writes config as usual."""
+        import pathlib
+        save_yaml_mock = unittest.mock.MagicMock()
+
+        mock_cfg = {"model": {"provider": "openai", "default": "gpt-4o"}}
+        env = {k: v for k, v in os.environ.items() if k != "HERMES_WEBUI_SKIP_ONBOARDING"}
+
+        with patch.dict(os.environ, env, clear=True), \
+             patch("api.onboarding._load_yaml_config", return_value=mock_cfg), \
+             patch("api.onboarding._save_yaml_config", save_yaml_mock), \
+             patch("api.onboarding._write_env_file"), \
+             patch("api.onboarding.reload_config"), \
+             patch("api.onboarding.get_onboarding_status", return_value={"completed": True}), \
+             patch("api.onboarding._get_config_path", return_value=pathlib.Path("/tmp/fake.yaml")), \
+             patch("api.onboarding._load_env_file", return_value={"OPENAI_API_KEY": "existing"}), \
+             patch("api.onboarding._provider_api_key_present", return_value=True), \
+             patch("api.onboarding._get_active_hermes_home", return_value=pathlib.Path("/tmp")):
+            mod.apply_onboarding_setup({
+                "provider": "openai",
+                "model": "gpt-4o",
+            })
+
+        save_yaml_mock.assert_called_once()
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_sprint4.py
+++ b/tests/test_sprint4.py
@@ -0,0 +1,158 @@
+"""Sprint 4 tests: relocation, session rename, search, file ops, validation."""
+import json, pathlib, uuid, urllib.request, urllib.error
+
+from tests._pytest_port import BASE
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+def get_raw(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return r.read(), r.headers.get("Content-Type",""), r.status
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(BASE + path, data=data, headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+def make_session_tracked(created_list, ws=None):
+    """Create a session and register it with the cleanup fixture."""
+    import pathlib as _pathlib
+    body = {}
+    if ws: body["workspace"] = str(ws)
+    d, _ = post("/api/session/new", body)
+    sid = d["session"]["session_id"]
+    created_list.append(sid)
+    return sid, _pathlib.Path(d["session"]["workspace"])
+
+
+def test_server_running_from_new_location():
+    data, status = get("/health")
+    assert status == 200 and data["status"] == "ok"
+
+def test_static_css_served():
+    raw, ct, status = get_raw("/static/style.css")
+    assert status == 200 and "text/css" in ct and b"--bg" in raw
+
+def test_static_unknown_file_404():
+    try:
+        get_raw("/static/doesnotexist.xyz")
+        assert False
+    except urllib.error.HTTPError as e:
+        assert e.code == 404
+
+def test_session_rename(cleanup_test_sessions):
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    result, status = post("/api/session/rename", {"session_id": sid, "title": "Renamed Session"})
+    assert status == 200 and result["session"]["title"] == "Renamed Session"
+
+def test_session_rename_persists(cleanup_test_sessions):
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    post("/api/session/rename", {"session_id": sid, "title": "Persisted"})
+    loaded, _ = get(f"/api/session?session_id={sid}")
+    assert loaded["session"]["title"] == "Persisted"
+
+def test_session_rename_truncates(cleanup_test_sessions):
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    result, status = post("/api/session/rename", {"session_id": sid, "title": "A" * 200})
+    assert status == 200 and len(result["session"]["title"]) <= 80
+
+def test_session_rename_requires_fields():
+    result, status = post("/api/session/rename", {"session_id": "x"})
+    assert status == 400
+    result2, status2 = post("/api/session/rename", {"title": "hi"})
+    assert status2 == 400
+
+def test_session_rename_unknown_id():
+    result, status = post("/api/session/rename", {"session_id": "nosuchid", "title": "hi"})
+    assert status == 404
+
+def test_session_search_returns_matches(cleanup_test_sessions):
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    uid = uuid.uuid4().hex[:8]
+    post("/api/session/rename", {"session_id": sid, "title": f"s4-search-{uid}"})
+    data, status = get(f"/api/sessions/search?q=s4-search-{uid}")
+    assert status == 200
+    sids = [s["session_id"] for s in data["sessions"]]
+    assert sid in sids
+
+def test_session_search_empty_query_returns_all():
+    data, status = get("/api/sessions/search?q=")
+    assert status == 200 and "sessions" in data
+
+def test_session_search_no_results():
+    data, status = get("/api/sessions/search?q=zzznomatchzzz9999")
+    assert status == 200 and data["sessions"] == []
+
+def test_file_create(cleanup_test_sessions):
+    sid, ws = make_session_tracked(cleanup_test_sessions)
+    fname = f"test_{uuid.uuid4().hex[:6]}.txt"
+    result, status = post("/api/file/create", {"session_id": sid, "path": fname, "content": "hello sprint4"})
+    assert status == 200 and result["ok"] is True
+    assert (ws / fname).read_text() == "hello sprint4"
+
+def test_file_create_requires_fields(cleanup_test_sessions):
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    result, status = post("/api/file/create", {"session_id": sid})
+    assert status == 400
+    result2, status2 = post("/api/file/create", {"path": "x.txt"})
+    assert status2 == 400
+
+def test_file_create_duplicate_rejected(cleanup_test_sessions):
+    sid, ws = make_session_tracked(cleanup_test_sessions)
+    fname = f"dup_{uuid.uuid4().hex[:6]}.txt"
+    post("/api/file/create", {"session_id": sid, "path": fname, "content": ""})
+    result, status = post("/api/file/create", {"session_id": sid, "path": fname, "content": ""})
+    assert status == 400
+
+def test_file_delete(cleanup_test_sessions):
+    sid, ws = make_session_tracked(cleanup_test_sessions)
+    (ws / "to_delete.txt").write_text("bye")
+    result, status = post("/api/file/delete", {"session_id": sid, "path": "to_delete.txt"})
+    assert status == 200 and not (ws / "to_delete.txt").exists()
+
+def test_file_delete_missing_returns_404(cleanup_test_sessions):
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    result, status = post("/api/file/delete", {"session_id": sid, "path": "nosuchfile.txt"})
+    assert status == 404
+
+def test_file_delete_path_traversal_blocked(cleanup_test_sessions):
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    result, status = post("/api/file/delete", {"session_id": sid, "path": "../../etc/passwd"})
+    assert status in (400, 500)
+
+def test_list_requires_session_id():
+    try:
+        get("/api/list?path=.")
+        assert False
+    except urllib.error.HTTPError as e:
+        assert e.code == 400
+
+def test_file_requires_session_id():
+    try:
+        get("/api/file?path=readme.txt")
+        assert False
+    except urllib.error.HTTPError as e:
+        assert e.code == 400
+
+def test_file_requires_path(cleanup_test_sessions):
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    try:
+        get(f"/api/file?session_id={sid}")
+        assert False
+    except urllib.error.HTTPError as e:
+        assert e.code == 400
+
+def test_new_session_inherits_workspace(cleanup_test_sessions):
+    sid, ws = make_session_tracked(cleanup_test_sessions)
+    child = ws / f"workspace-inherit-{uuid.uuid4().hex[:6]}"
+    child.mkdir(parents=True, exist_ok=True)
+    post("/api/session/update", {"session_id": sid, "workspace": str(child), "model": "openai/gpt-5.4-mini"})
+    sid2, _ = make_session_tracked(cleanup_test_sessions)
+    data, _ = get(f"/api/session?session_id={sid2}")
+    assert data["session"]["workspace"] == str(child)
--- a/tests/test_sprint40.py
+++ b/tests/test_sprint40.py
@@ -0,0 +1,162 @@
+"""
+Sprint 40 Tests: OAuth provider onboarding path (PR B of issue #329).
+
+Covers:
+- _build_setup_catalog sets current_is_oauth=True for OAuth providers
+- _build_setup_catalog sets current_is_oauth=False for API-key providers
+- _build_setup_catalog sets current_is_oauth=False when no provider configured
+- apply_onboarding_setup with unsupported provider marks onboarding complete directly
+- i18n.js contains all required OAuth onboarding keys in both English and Spanish
+"""
+import pathlib
+import re
+import unittest
+from unittest.mock import patch
+
+import api.onboarding as mod
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent
+I18N_JS = (REPO_ROOT / "static" / "i18n.js").read_text()
+ONBOARDING_JS = (REPO_ROOT / "static" / "onboarding.js").read_text()
+
+
+# ── Backend: _build_setup_catalog ──────────────────────────────────────────
+
+
+class TestBuildSetupCatalog(unittest.TestCase):
+
+    def _catalog(self, provider, model="gpt-4o", base_url=""):
+        cfg = {}
+        if provider:
+            cfg = {"model": {"provider": provider, "default": model, "base_url": base_url}}
+        with patch.object(mod, "get_config", return_value=cfg):
+            return mod._build_setup_catalog(cfg)
+
+    def test_oauth_provider_sets_current_is_oauth_true(self):
+        """openai-codex is not in _SUPPORTED_PROVIDER_SETUPS → current_is_oauth=True."""
+        catalog = self._catalog("openai-codex", "gpt-5.4")
+        self.assertTrue(catalog["current_is_oauth"],
+                        "current_is_oauth must be True for openai-codex")
+
+    def test_copilot_provider_sets_current_is_oauth_true(self):
+        """copilot is also OAuth."""
+        catalog = self._catalog("copilot")
+        self.assertTrue(catalog["current_is_oauth"])
+
+    def test_openai_provider_sets_current_is_oauth_false(self):
+        """openai is in _SUPPORTED_PROVIDER_SETUPS → current_is_oauth=False."""
+        catalog = self._catalog("openai", "gpt-4o")
+        self.assertFalse(catalog["current_is_oauth"],
+                         "current_is_oauth must be False for API-key provider openai")
+
+    def test_anthropic_provider_sets_current_is_oauth_false(self):
+        catalog = self._catalog("anthropic", "claude-sonnet-4.6")
+        self.assertFalse(catalog["current_is_oauth"])
+
+    def test_no_provider_sets_current_is_oauth_false(self):
+        """Empty config → current_is_oauth=False."""
+        catalog = self._catalog("")
+        self.assertFalse(catalog["current_is_oauth"])
+
+    def test_catalog_includes_current_is_oauth_key(self):
+        """current_is_oauth must always be present in the catalog dict."""
+        catalog = self._catalog("openrouter")
+        self.assertIn("current_is_oauth", catalog)
+
+
+# ── Backend: apply_onboarding_setup for OAuth providers ────────────────────
+
+
+class TestApplyOnboardingOAuthPath(unittest.TestCase):
+
+    def test_unsupported_provider_skips_to_complete(self):
+        """apply_onboarding_setup with an OAuth provider just marks onboarding done."""
+        saved = {}
+
+        def _save(d):
+            saved.update(d)
+
+        mock_status = {"completed": True, "system": {"chat_ready": True}}
+
+        with patch.object(mod, "save_settings", side_effect=_save), \
+             patch.object(mod, "get_onboarding_status", return_value=mock_status):
+            result = mod.apply_onboarding_setup({"provider": "openai-codex", "model": "gpt-5.4"})
+
+        self.assertTrue(saved.get("onboarding_completed"),
+                        "save_settings must set onboarding_completed=True for OAuth provider")
+        self.assertEqual(result, mock_status)
+
+    def test_unsupported_provider_does_not_write_config_yaml(self):
+        """OAuth path must not call _save_yaml_config — no config mutation."""
+        with patch.object(mod, "save_settings"), \
+             patch.object(mod, "get_onboarding_status", return_value={}), \
+             patch.object(mod, "_save_yaml_config") as mock_save_yaml:
+            mod.apply_onboarding_setup({"provider": "copilot", "model": "gpt-4o"})
+
+        mock_save_yaml.assert_not_called()
+
+
+# ── Frontend: i18n keys ────────────────────────────────────────────────────
+
+
+_REQUIRED_OAUTH_KEYS = [
+    "onboarding_oauth_provider_ready_title",
+    "onboarding_oauth_provider_ready_body",
+    "onboarding_oauth_provider_not_ready_title",
+    "onboarding_oauth_provider_not_ready_body",
+    "onboarding_oauth_switch_hint",
+]
+
+
+class TestOAuthI18nKeys(unittest.TestCase):
+
+    def test_english_locale_has_all_oauth_keys(self):
+        """All OAuth onboarding i18n keys must be present in the English locale."""
+        missing = [k for k in _REQUIRED_OAUTH_KEYS if k not in I18N_JS]
+        self.assertFalse(missing,
+                         f"English locale missing OAuth keys: {missing}")
+
+    def test_spanish_locale_has_all_oauth_keys(self):
+        """All OAuth onboarding i18n keys must be present in the Spanish locale."""
+        # Spanish locale is the second occurrence of each key
+        counts = {k: I18N_JS.count(k) for k in _REQUIRED_OAUTH_KEYS}
+        under = [k for k, c in counts.items() if c < 2]
+        self.assertFalse(under,
+                         f"Spanish locale missing OAuth keys (need 2 occurrences each): {under}")
+
+    def test_oauth_body_strings_contain_provider_placeholder(self):
+        """Body strings must contain {provider} so JS can substitute the provider name."""
+        for key in ["onboarding_oauth_provider_ready_body",
+                    "onboarding_oauth_provider_not_ready_body"]:
+            self.assertIn("{provider}", I18N_JS,
+                          f"{key} must contain {{provider}} placeholder")
+
+
+# ── Frontend: onboarding.js uses current_is_oauth ─────────────────────────
+
+
+class TestOAuthOnboardingJs(unittest.TestCase):
+
+    def test_onboarding_js_reads_current_is_oauth(self):
+        """onboarding.js must check current_is_oauth from the status payload."""
+        self.assertIn("current_is_oauth", ONBOARDING_JS,
+                      "onboarding.js must read current_is_oauth from ONBOARDING.status.setup")
+
+    def test_onboarding_js_renders_oauth_ready_card(self):
+        """onboarding.js must render the oauth-ready card class."""
+        self.assertIn("onboarding-oauth-ready", ONBOARDING_JS)
+
+    def test_onboarding_js_renders_oauth_pending_card(self):
+        """onboarding.js must render the oauth-pending card class."""
+        self.assertIn("onboarding-oauth-pending", ONBOARDING_JS)
+
+    def test_style_css_has_oauth_card_rules(self):
+        """style.css must contain the .onboarding-oauth-card rules."""
+        css = (REPO_ROOT / "static" / "style.css").read_text()
+        self.assertIn("onboarding-oauth-card", css)
+        self.assertIn("onboarding-oauth-ready", css)
+        self.assertIn("onboarding-oauth-pending", css)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_sprint40_ui_polish.py
+++ b/tests/test_sprint40_ui_polish.py
@@ -0,0 +1,267 @@
+"""
+Sprint 40 UI Polish Tests: Active session title uses CSS theme variable (issue #440).
+
+Covers:
+- .session-item.active .session-title uses var(--gold) instead of hardcoded #e8a030
+- The hardcoded amber color #e8a030 is NOT present in the active session title rule
+"""
+import os
+import pathlib
+import re
+import sys
+import unittest
+from unittest import mock
+
+# Ensure repo is on sys.path so api.config can be imported
+_REPO_ROOT = pathlib.Path(__file__).parent.parent
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+REPO_ROOT  = _REPO_ROOT
+STYLE_CSS  = (REPO_ROOT / "static" / "style.css").read_text()
+SESSIONS_JS = (REPO_ROOT / "static" / "sessions.js").read_text()
+PANELS_JS   = (REPO_ROOT / "static" / "panels.js").read_text()
+
+try:
+    from api import config as _api_config
+    _config_available = True
+except Exception:
+    _api_config = None
+    _config_available = False
+
+# Combined tests for Sprint 40 — Session + UI Polish
+# Covers: active title color, unknown model, Telegram badge,
+#         custom endpoint model routing, workspace chip
+
+
+# ── #451 active title ─────────────────────────────────────────────
+class TestActiveSessionTitleThemeColor(unittest.TestCase):
+
+    def test_active_session_title_uses_theme_variable(self):
+        """
+        .session-item.active .session-title must use var(--gold) not a hardcoded hex.
+        The light-mode override line (:not(.dark)) is allowed to keep its own
+        hardcoded color; we only check the base/dark rule.
+        """
+        # Find all lines that match the active session title selector
+        lines = STYLE_CSS.splitlines()
+        base_rule_lines = [
+            line for line in lines
+            if ".session-item.active .session-title" in line
+            and ':not(.dark)' not in line
+        ]
+
+        self.assertTrue(
+            len(base_rule_lines) >= 1,
+            "Could not find .session-item.active .session-title base rule in style.css"
+        )
+
+        for line in base_rule_lines:
+            self.assertTrue(
+                "var(--gold)" in line or "var(--accent-text)" in line,
+                f"Expected var(--gold) or var(--accent-text) in active session title rule, got: {line.strip()}"
+            )
+            self.assertNotIn(
+                "#e8a030",
+                line,
+                f"Hardcoded #e8a030 must be removed from active session title rule: {line.strip()}"
+            )
+
+
+class TestDarkTopbarSelector(unittest.TestCase):
+
+    def test_topbar_dark_border_uses_root_dark_selector(self):
+        self.assertIn(
+            ":root.dark .topbar{border-bottom:1px solid rgba(255,255,255,.07);}",
+            STYLE_CSS,
+            "Topbar dark border override must target :root.dark after the theme-class migration",
+        )
+        self.assertNotIn(
+            '[data-theme="dark"] .topbar',
+            STYLE_CSS,
+            "Topbar dark border override must not keep the removed data-theme selector",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
+
+# ── #452 unknown model ─────────────────────────────────────────────
+class TestGatewaySessionNullModel(unittest.TestCase):
+    """Verify that api/models.py and api/gateway_watcher.py do not
+    fall back to the string 'unknown' for missing model values."""
+
+    def test_gateway_session_null_model_returns_none_not_unknown(self):
+        """api/models.py must not use `or 'unknown'` for the model field
+        so that a NULL model in state.db is returned as None (falsy) to
+        the frontend rather than the truthy string 'unknown'."""
+        models_src = (REPO_ROOT / "api" / "models.py").read_text()
+        # Ensure the old fallback pattern is gone
+        self.assertNotIn(
+            "'model': row['model'] or 'unknown'",
+            models_src,
+            "api/models.py must not use `or 'unknown'` for the model field "
+            "(fixes #443: gateway sessions showed 'telegram · unknown')",
+        )
+
+    def test_gateway_watcher_null_model_returns_none_not_unknown(self):
+        """api/gateway_watcher.py must not use `or 'unknown'` for the model
+        field so that a NULL model in state.db is returned as None (falsy)."""
+        gw_src = (REPO_ROOT / "api" / "gateway_watcher.py").read_text()
+        self.assertNotIn(
+            "'model': row['model'] or 'unknown'",
+            gw_src,
+            "api/gateway_watcher.py must not use `or 'unknown'` for the model "
+            "field (fixes #443: gateway sessions showed 'telegram · unknown')",
+        )
+
+    def test_gateway_session_model_uses_none_fallback(self):
+        """Both source files must use `row['model'] or None` (explicit None
+        fallback) for the model field assignment."""
+        models_src = (REPO_ROOT / "api" / "models.py").read_text()
+        gw_src = (REPO_ROOT / "api" / "gateway_watcher.py").read_text()
+        self.assertIn(
+            "'model': row['model'] or None,",
+            models_src,
+            "api/models.py should assign `row['model'] or None` for the model field",
+        )
+        self.assertIn(
+            "'model': row['model'] or None,",
+            gw_src,
+            "api/gateway_watcher.py should assign `row['model'] or None` for the model field",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
+
+# ── #454 model routing ─────────────────────────────────────────────
+@unittest.skipUnless(_config_available, "api.config not importable")
+class TestCustomEndpointModelStripping:
+    """Tests for fix #433: strip provider prefix when custom base_url is set."""
+
+    def _resolve(self, model_id, provider=None, base_url=None):
+        """Helper: set cfg directly (same pattern as test_model_resolver.py)."""
+        old_cfg = dict(_api_config.cfg)
+        model_cfg = {}
+        if provider:
+            model_cfg['provider'] = provider
+        if base_url:
+            model_cfg['base_url'] = base_url
+        _api_config.cfg['model'] = model_cfg
+        try:
+            return _api_config.resolve_model_provider(model_id)
+        finally:
+            _api_config.cfg.clear()
+            _api_config.cfg.update(old_cfg)
+
+    def test_prefixed_model_stripped_for_custom_endpoint(self):
+        """Issue #433: 'openai/gpt-5.4' with custom base_url returns bare 'gpt-5.4'."""
+        model, provider, base_url = self._resolve(
+            'openai/gpt-5.4',
+            provider='custom',
+            base_url='http://my-proxy.local:8080/v1',
+        )
+        assert model == 'gpt-5.4', (
+            "Expected bare 'gpt-5.4' for custom endpoint, got '{}'."
+            " Stale provider-prefix must be stripped.".format(model)
+        )
+        assert base_url == 'http://my-proxy.local:8080/v1'
+        assert provider == 'custom'
+
+    def test_bare_model_unchanged_for_custom_endpoint(self):
+        """Bare model ID (no slash) must pass through untouched with custom base_url."""
+        model, provider, base_url = self._resolve(
+            'gpt-4o',
+            provider='custom',
+            base_url='http://my-proxy.local:8080/v1',
+        )
+        assert model == 'gpt-4o', (
+            "Bare model 'gpt-4o' should not be modified, got '{}'.".format(model)
+        )
+        assert base_url == 'http://my-proxy.local:8080/v1'
+        assert provider == 'custom'
+
+    def test_prefixed_model_kept_for_openrouter(self):
+        """When NO custom base_url (openrouter route), prefixed model must stay prefixed."""
+        model, provider, base_url = self._resolve(
+            'openai/gpt-5.4',
+            provider='anthropic',  # cross-provider pick triggers openrouter routing
+        )
+        # Cross-provider model with openrouter routing must keep full provider/model path
+        assert 'openai/gpt-5.4' in model or provider == 'openrouter', (
+            "Expected prefixed model or openrouter routing for non-custom endpoint, "
+            "got model='{}', provider='{}'.".format(model, provider)
+        )
+        assert base_url is None, (
+            "OpenRouter routing must not set a base_url, got '{}'.".format(base_url)
+        )
+
+# ── #455 workspace chip ─────────────────────────────────────────────
+class TestWorkspaceChipAfterProfileSwitch(unittest.TestCase):
+    """Verify that switchToProfile() applies the profile default workspace
+    to the new session when a conversation is in progress (fixes #424)."""
+
+    def test_workspace_chip_updated_after_profile_switch(self):
+        """After await newSession(false) in the sessionInProgress branch,
+        the code must call updateWorkspaceChip() so the chip reflects the
+        new profile's default workspace instead of showing 'No active workspace'."""
+        # Find the sessionInProgress block
+        idx = PANELS_JS.find('if (sessionInProgress)')
+        self.assertGreater(idx, -1, "sessionInProgress branch must exist in panels.js")
+
+        # Slice from that point to cover the relevant block
+        block = PANELS_JS[idx:idx + 1000]
+
+        # newSession(false) must be called first
+        self.assertIn('await newSession(false)', block,
+                      "sessionInProgress branch must call await newSession(false)")
+
+        # The fix: updateWorkspaceChip() must be called after newSession(false)
+        pos_new_session = block.find('await newSession(false)')
+        pos_update_chip = block.find('updateWorkspaceChip()')
+        self.assertGreater(pos_update_chip, -1,
+                           "updateWorkspaceChip() must be called in the sessionInProgress branch")
+        self.assertGreater(pos_update_chip, pos_new_session,
+                           "updateWorkspaceChip() must be called AFTER newSession(false)")
+
+    def test_profile_default_workspace_applied_to_new_session(self):
+        """After newSession(false) the code must assign S._profileDefaultWorkspace
+        to S.session.workspace so the session is correctly tagged."""
+        idx = PANELS_JS.find('if (sessionInProgress)')
+        self.assertGreater(idx, -1)
+        block = PANELS_JS[idx:idx + 1000]
+
+        # The fix block must set S.session.workspace from S._profileDefaultWorkspace
+        self.assertIn('S.session.workspace = S._profileDefaultWorkspace', block,
+                      "S.session.workspace must be set from S._profileDefaultWorkspace "
+                      "in the sessionInProgress branch after newSession(false)")
+
+    def test_api_session_update_called_for_new_session_workspace(self):
+        """The fix must call /api/session/update to persist the workspace on the server."""
+        idx = PANELS_JS.find('if (sessionInProgress)')
+        self.assertGreater(idx, -1)
+        block = PANELS_JS[idx:idx + 1000]
+
+        # Must patch the session on the backend too
+        self.assertIn('/api/session/update', block,
+                      "The sessionInProgress branch must call /api/session/update "
+                      "to persist the new workspace after newSession(false)")
+
+    def test_update_workspace_chip_before_render_session_list(self):
+        """updateWorkspaceChip() should be called before renderSessionList()
+        so the chip is correct when the UI re-renders."""
+        idx = PANELS_JS.find('if (sessionInProgress)')
+        self.assertGreater(idx, -1)
+        block = PANELS_JS[idx:idx + 1000]
+
+        pos_chip = block.find('updateWorkspaceChip()')
+        pos_render = block.find('await renderSessionList()')
+        self.assertGreater(pos_chip, -1, "updateWorkspaceChip() must exist in block")
+        self.assertGreater(pos_render, -1, "renderSessionList() must exist in block")
+        self.assertLess(pos_chip, pos_render,
+                        "updateWorkspaceChip() must be called before renderSessionList()")
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/tests/test_sprint41.py
+++ b/tests/test_sprint41.py
@@ -0,0 +1,381 @@
+"""
+Sprint 41 Tests: Title auto-generation fix + mobile close button CSS (PR #333).
+
+Covers:
+- streaming.py: sessions titled 'New Chat' trigger auto-title generation
+- streaming.py: sessions with empty/falsy title trigger auto-title generation
+- streaming.py: sessions titled 'Untitled' (original guard) still trigger
+- streaming.py: sessions with a user-set title do NOT trigger auto-title
+- style.css: .mobile-close-btn is hidden by default (desktop rule present)
+- style.css: .mobile-close-btn shown in <=900px media query
+- style.css: #btnCollapseWorkspacePanel hidden in <=900px media query
+- index.html: both .mobile-close-btn and #btnCollapseWorkspacePanel buttons exist
+"""
+import pathlib
+import re
+import unittest
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent
+CSS = (REPO_ROOT / "static" / "style.css").read_text()
+HTML = (REPO_ROOT / "static" / "index.html").read_text()
+MESSAGES_JS = (REPO_ROOT / "static" / "messages.js").read_text()
+STREAMING_PY = (REPO_ROOT / "api" / "streaming.py").read_text()
+
+
+# ── streaming.py: title auto-generation condition ─────────────────────────
+
+class TestTitleAutoGenerationCondition(unittest.TestCase):
+    """Verify the guarded condition in streaming.py covers all default title cases."""
+
+    def _titles_that_trigger(self):
+        """Extract the condition from the source so tests stay in sync with code."""
+        # Find the if-condition that calls title_from
+        m = re.search(
+            r'if\s+(s\.title\s*==.*?):\s*\n\s*s\.title\s*=\s*title_from',
+            STREAMING_PY,
+            re.DOTALL,
+        )
+        self.assertIsNotNone(m, "Could not find title auto-generation condition in streaming.py")
+        return m.group(1)
+
+    def test_untitled_in_condition(self):
+        cond = self._titles_that_trigger()
+        self.assertIn("'Untitled'", cond, "Original 'Untitled' guard must be present")
+
+    def test_new_chat_in_condition(self):
+        cond = self._titles_that_trigger()
+        self.assertIn("'New Chat'", cond, "'New Chat' guard must be present (PR #333)")
+
+    def test_empty_title_guard_in_condition(self):
+        cond = self._titles_that_trigger()
+        self.assertIn("not s.title", cond, "Empty/falsy title guard must be present (PR #333)")
+
+    def test_condition_logic_covers_all_defaults(self):
+        """The condition uses OR so any one default title triggers generation."""
+        cond = self._titles_that_trigger()
+        # All three guards must be joined by 'or'
+        parts = re.split(r'\bor\b', cond)
+        self.assertGreaterEqual(len(parts), 3,
+            "Expected at least 3 OR-joined sub-conditions (Untitled, New Chat, not s.title)")
+
+
+# ── style.css: mobile close button visibility ─────────────────────────────
+
+class TestMobileCloseButtonCSS(unittest.TestCase):
+    """Verify CSS rules that control the duplicate close button on mobile."""
+
+    def test_mobile_close_btn_hidden_by_default(self):
+        """Desktop default: .mobile-close-btn must be display:none outside any media query."""
+        # Find the rule before the first @media block that contains mobile-close-btn
+        # We look for the pattern in the desktop (non-media-query) section
+        self.assertIn(
+            ".mobile-close-btn{display:none;}",
+            CSS.replace(" ", ""),
+            ".mobile-close-btn should be hidden by default (desktop) — rule missing or wrong"
+        )
+
+    def test_mobile_close_btn_shown_in_900px_query(self):
+        """Inside max-width:900px media query, .mobile-close-btn must be display:flex."""
+        # Extract the 900px media block
+        m = re.search(r'@media\s*\(max-width\s*:\s*900px\)\s*\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}',
+                      CSS)
+        self.assertIsNotNone(m, "@media(max-width:900px) block not found in style.css")
+        block = m.group(1).replace(" ", "")
+        self.assertIn(".mobile-close-btn{display:flex;}",
+                      block,
+                      ".mobile-close-btn must be display:flex inside the 900px media query")
+
+    def test_desktop_collapse_btn_hidden_in_900px_query(self):
+        """Inside max-width:900px media query, #btnCollapseWorkspacePanel must be display:none."""
+        m = re.search(r'@media\s*\(max-width\s*:\s*900px\)\s*\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}',
+                      CSS)
+        self.assertIsNotNone(m, "@media(max-width:900px) block not found in style.css")
+        block = m.group(1).replace(" ", "")
+        self.assertIn("#btnCollapseWorkspacePanel{display:none;}",
+                      block,
+                      "#btnCollapseWorkspacePanel must be display:none in 900px media query")
+
+    def test_900px_query_retains_existing_rules(self):
+        """Ensure the PR didn't accidentally drop existing rules from the 900px block."""
+        m = re.search(r'@media\s*\(max-width\s*:\s*900px\)\s*\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}',
+                      CSS)
+        self.assertIsNotNone(m)
+        block = m.group(1)
+        self.assertIn("rightpanel", block, ".rightpanel rule missing from 900px block")
+        self.assertIn("mobile-files-btn", block, ".mobile-files-btn rule missing from 900px block")
+
+
+# ── index.html: button presence ───────────────────────────────────────────
+
+class TestWorkspacePanelButtons(unittest.TestCase):
+    """Verify both panel buttons are present in the HTML so CSS rules have targets."""
+
+    def test_desktop_collapse_button_exists(self):
+        self.assertIn("btnCollapseWorkspacePanel", HTML,
+                      "#btnCollapseWorkspacePanel button must exist in index.html")
+
+    def test_mobile_close_button_exists(self):
+        self.assertIn("mobile-close-btn", HTML,
+                      ".mobile-close-btn button must exist in index.html")
+
+    def test_mobile_close_button_has_aria_label(self):
+        """Accessibility: mobile close button must have an aria-label."""
+        m = re.search(r'class="[^"]*mobile-close-btn[^"]*"[^>]*>', HTML)
+        self.assertIsNotNone(m, "Could not find mobile-close-btn element")
+        self.assertIn("aria-label", m.group(0),
+                      "mobile-close-btn must have aria-label for accessibility")
+
+
+class TestIssue495TitleStreaming(unittest.TestCase):
+    """Regression checks for issue #495 title SSE behavior."""
+
+    def test_streaming_has_llm_title_helper(self):
+        self.assertIn(
+            "def _generate_llm_session_title_for_agent(",
+            STREAMING_PY,
+            "streaming.py should define an agent-backed LLM title helper for session titles",
+        )
+
+    def test_streaming_rejects_generic_completion_titles(self):
+        self.assertIn(
+            "测试完成",
+            STREAMING_PY,
+            "streaming.py should reject generic completion phrases as session titles",
+        )
+        self.assertIn(
+            "all set",
+            STREAMING_PY,
+            "streaming.py should reject generic English completion phrases as session titles",
+        )
+
+    def test_streaming_uses_reasoning_split_for_minimax_titles(self):
+        self.assertIn(
+            "reasoning_split",
+            STREAMING_PY,
+            "streaming.py should request MiniMax title calls with reasoning_split so final text is separated from thinking",
+        )
+
+    def test_streaming_emits_title_sse_event(self):
+        self.assertIn(
+            "put_event('title', {'session_id': s.session_id, 'title': s.title})",
+            STREAMING_PY,
+            "streaming.py should emit a title SSE event when title is updated",
+        )
+
+    def test_streaming_emits_title_status_sse_event(self):
+        self.assertIn(
+            "put_event('title_status', payload)",
+            STREAMING_PY,
+            "streaming.py should emit a title_status SSE event for title generation diagnostics",
+        )
+
+    def test_streaming_emits_stream_end_event(self):
+        self.assertIn(
+            "put_event('stream_end', {'session_id': session_id})",
+            STREAMING_PY,
+            "background title path should end the SSE stream with stream_end",
+        )
+
+    def test_frontend_listens_for_title_event(self):
+        self.assertIn(
+            "addEventListener('title'",
+            MESSAGES_JS,
+            "messages.js should listen for title SSE events",
+        )
+
+    def test_frontend_listens_for_title_status_event(self):
+        self.assertIn(
+            "addEventListener('title_status'",
+            MESSAGES_JS,
+            "messages.js should listen for title_status SSE events",
+        )
+        self.assertIn(
+            "console.info('[title]'",
+            MESSAGES_JS,
+            "messages.js should log title generation diagnostics to the browser console",
+        )
+
+    def test_frontend_refreshes_title_ui_after_title_event(self):
+        self.assertIn(
+            "syncTopbar()",
+            MESSAGES_JS,
+            "messages.js title listener should sync top bar title",
+        )
+        self.assertTrue(
+            ("renderSessionListFromCache()" in MESSAGES_JS) or ("renderSessionList()" in MESSAGES_JS),
+            "messages.js title listener should refresh session list UI",
+        )
+
+    def test_frontend_waits_for_stream_end_before_closing(self):
+        self.assertIn(
+            "addEventListener('stream_end'",
+            MESSAGES_JS,
+            "messages.js should close SSE connection on stream_end (not immediately on done)",
+        )
+
+    def test_title_snippet_uses_visible_assistant_reply_after_tools(self):
+        """Tool-heavy opening turns should use the final visible assistant reply."""
+        from api.streaming import _first_exchange_snippets
+
+        user_msg = {
+            "role": "user",
+            "content": "Please look up the earlier context and then summarize it.",
+        }
+        preamble_asst = {
+            "role": "assistant",
+            "content": "Let me check my memory first.",
+            "tool_calls": [
+                {
+                    "id": "call-1",
+                    "function": {
+                        "name": "memory",
+                        "arguments": '{"action":"search"}',
+                    },
+                }
+            ],
+        }
+        tool_result = {
+            "role": "tool",
+            "tool_call_id": "call-1",
+            "content": '{"result":"background info"}',
+        }
+        final_asst = {
+            "role": "assistant",
+            "content": "Here is the substantive answer after the tool work.",
+        }
+
+        user_text, assistant_text = _first_exchange_snippets(
+            [user_msg, preamble_asst, tool_result, final_asst]
+        )
+
+        self.assertEqual(user_text, user_msg["content"][:500])
+        self.assertEqual(assistant_text, final_asst["content"][:500])
+
+    def test_title_snippet_keeps_short_substantive_assistant_reply(self):
+        """Short but real assistant answers should still be eligible for titles."""
+        from api.streaming import _first_exchange_snippets
+
+        messages = [
+            {"role": "user", "content": "Can you help me rename this session?"},
+            {"role": "assistant", "content": "Sure."},
+        ]
+
+        user_text, assistant_text = _first_exchange_snippets(messages)
+
+        self.assertEqual(user_text, "Can you help me rename this session?")
+        self.assertEqual(assistant_text, "Sure.")
+
+    def test_provisional_title_detection_ignores_whitespace_noise(self):
+        """Temporary first-message titles should still match with whitespace normalization."""
+        from api.streaming import _is_provisional_title, title_from
+
+        messages = [
+            {
+                "role": "user",
+                "content": "过去两个礼拜发生了一些事情。最重要的一点就是我加入了一个 Hermes Web UI 的项目。\n\n因为我开始使用 Hermes 这个 agent 以后，就逐渐不再使用 OpenClaw了。",
+            },
+            {"role": "assistant", "content": "Sure, let me help."},
+        ]
+
+        derived = title_from(messages, "")
+        current = derived[:63]  # Simulate the provisional title the UI writes immediately.
+
+        self.assertNotEqual(current, derived[:64])
+        self.assertTrue(
+            _is_provisional_title(current, messages),
+            "Whitespace-normalized provisional titles should still be recognized",
+        )
+
+    def test_title_snippet_keeps_tool_call_with_substantive_text(self):
+        """An assistant row with tool_calls AND a substantive answer text
+        must still be used as the first-exchange snippet — it's not a
+        preamble, it's an agentic first-turn plan."""
+        from api.streaming import _first_exchange_snippets
+
+        user_msg = {
+            "role": "user",
+            "content": "Can you schedule a reminder for the Q3 kickoff meeting?",
+        }
+        # Assistant row with both a real answer AND a tool_call
+        agentic_asst = {
+            "role": "assistant",
+            "content": "I'll schedule the Q3 kickoff reminder for next Monday at 9am.",
+            "tool_calls": [
+                {
+                    "id": "call-1",
+                    "function": {
+                        "name": "cronjob",
+                        "arguments": '{"action":"create","when":"mon 9am"}',
+                    },
+                }
+            ],
+        }
+
+        user_text, assistant_text = _first_exchange_snippets([user_msg, agentic_asst])
+
+        self.assertEqual(user_text, user_msg["content"][:500])
+        self.assertEqual(
+            assistant_text,
+            agentic_asst["content"][:500],
+            "Substantive answer text on a tool_call row must be preserved",
+        )
+
+    def test_title_snippet_skips_tool_call_preamble_only_rows(self):
+        """Tool-call rows whose content is empty or meta-reasoning preamble
+        ('Let me check my memory first.') must still be skipped — those are
+        orchestration scaffolding, not title material."""
+        from api.streaming import _first_exchange_snippets
+
+        user_msg = {
+            "role": "user",
+            "content": "Summarize my notes from last week.",
+        }
+        empty_preamble = {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "call-1",
+                    "function": {
+                        "name": "memory",
+                        "arguments": '{"action":"search"}',
+                    },
+                }
+            ],
+        }
+        meta_preamble = {
+            "role": "assistant",
+            "content": "Let me check my memory first.",
+            "tool_calls": [
+                {
+                    "id": "call-2",
+                    "function": {
+                        "name": "memory",
+                        "arguments": '{"action":"search","q":"last week"}',
+                    },
+                }
+            ],
+        }
+        tool_result = {
+            "role": "tool",
+            "tool_call_id": "call-2",
+            "content": '{"result":"background info"}',
+        }
+        final_asst = {
+            "role": "assistant",
+            "content": "Here's a summary of your notes from last week.",
+        }
+
+        _, assistant_text = _first_exchange_snippets(
+            [user_msg, empty_preamble, meta_preamble, tool_result, final_asst]
+        )
+
+        self.assertEqual(
+            assistant_text,
+            final_asst["content"][:500],
+            "Empty and meta-reasoning preamble rows must be skipped",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_sprint42.py
+++ b/tests/test_sprint42.py
@@ -0,0 +1,456 @@
+"""
+Sprint 42 Tests: SessionDB injection into AIAgent for WebUI sessions (PR #356).
+
+Covers:
+- streaming.py: SessionDB is initialized inside _run_agent_streaming (import present)
+- streaming.py: try/except guards SessionDB init so failures are non-fatal
+- streaming.py: session_db= kwarg is passed to AIAgent constructor
+- streaming.py: SessionDB init failure prints a WARNING (not silently swallowed)
+- streaming.py: SessionDB init is placed before AIAgent construction
+"""
+import ast
+import pathlib
+import re
+import queue
+import sys
+import types
+import unittest
+from unittest import mock
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent
+STREAMING_PY = (REPO_ROOT / "api" / "streaming.py").read_text()
+
+
+# ── Shared helpers for sprint-42 additional tests ────────────────────────────
+
+REPO = REPO_ROOT  # alias used by #427 tests
+_SESSIONS_JS = REPO_ROOT / 'static' / 'sessions.js'
+_STREAMING_PY = REPO_ROOT / 'api' / 'streaming.py'
+_MESSAGES_JS = REPO_ROOT / 'static' / 'messages.js'
+_UI_JS = REPO_ROOT / 'static' / 'ui.js'
+
+def _read_sessions_js():
+    return _SESSIONS_JS.read_text(encoding='utf-8')
+
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestSessionDBInjection(unittest.TestCase):
+    """Verify SessionDB is initialized and passed to AIAgent in streaming.py."""
+
+    def test_hermes_state_import_present(self):
+        """SessionDB must be imported from hermes_state inside _run_agent_streaming."""
+        self.assertIn(
+            "from hermes_state import SessionDB",
+            STREAMING_PY,
+            "SessionDB import missing from streaming.py (PR #356)",
+        )
+
+    def test_session_db_kwarg_passed_to_agent(self):
+        """session_db= must be passed to the AIAgent constructor call."""
+        self.assertIn(
+            "session_db=_session_db",
+            STREAMING_PY,
+            "session_db kwarg not passed to AIAgent (PR #356)",
+        )
+
+    def test_sessiondb_init_in_try_except(self):
+        """SessionDB() init must be wrapped in try/except for non-fatal failure handling."""
+        # Check that the try/except pattern surrounding SessionDB() is present
+        pattern = r"try:\s*\n\s*from hermes_state import SessionDB\s*\n\s*_session_db\s*=\s*SessionDB\(\)"
+        self.assertRegex(
+            STREAMING_PY,
+            pattern,
+            "SessionDB() init must be inside a try block for non-fatal error handling (PR #356)",
+        )
+
+    def test_sessiondb_failure_logs_warning(self):
+        """A failure initializing SessionDB must print a WARNING (not silently drop the error)."""
+        self.assertIn(
+            "WARNING: SessionDB init failed",
+            STREAMING_PY,
+            "SessionDB init failure must log a WARNING message (PR #356)",
+        )
+
+    def test_session_db_initialized_before_agent_construction(self):
+        """SessionDB initialization must appear before the AIAgent(...) constructor call."""
+        db_pos = STREAMING_PY.find("from hermes_state import SessionDB")
+        agent_pos = STREAMING_PY.find("session_db=_session_db")
+        self.assertGreater(
+            agent_pos,
+            db_pos,
+            "SessionDB init must appear before AIAgent construction (PR #356)",
+        )
+
+    def test_session_db_default_is_none(self):
+        """_session_db must be initialized to None before the try block (safe default)."""
+        # Pattern: _session_db = None followed (eventually) by the try/SessionDB block
+        pattern = r"_session_db\s*=\s*None\s*\n\s*try:"
+        self.assertRegex(
+            STREAMING_PY,
+            pattern,
+            "_session_db must default to None before try/except block (PR #356)",
+        )
+
+
+class TestRuntimeRouteInjection(unittest.TestCase):
+    """Verify WebUI forwards the resolved runtime route into AIAgent."""
+
+    def test_runtime_provider_keys_are_forwarded_to_agent(self):
+        """WebUI must pass the runtime route fields that CLI already uses."""
+        for snippet in (
+            "api_mode=_rt.get('api_mode')",
+            "acp_command=_rt.get('command')",
+            "acp_args=_rt.get('args')",
+            "credential_pool=_rt.get('credential_pool')",
+        ):
+            self.assertIn(
+                snippet,
+                STREAMING_PY,
+                f"Missing runtime route forwarding in AIAgent constructor: {snippet}",
+            )
+
+    def test_runtime_route_is_forwarded_from_resolver_into_agent_init(self):
+        """The resolved ACP route should be passed through to AIAgent kwargs."""
+        import api.streaming as streaming
+
+        captured = {}
+        fake_session_db = object()
+        resolve_runtime_provider = mock.Mock(
+            return_value={
+                "provider": "openai-codex",
+                "base_url": "https://api.openai.com/v1",
+                "api_key": "rt-key",
+                "api_mode": "codex_responses",
+                "command": "codex",
+                "args": ["exec", "--json"],
+                "credential_pool": "openai-codex",
+            }
+        )
+
+        class FakeSession:
+            def __init__(self):
+                self.session_id = "sess-runtime-route"
+                self.title = "Existing title"
+                self.workspace = "/tmp"
+                self.model = "gpt-5.4"
+                self.messages = []
+                self.personality = None
+                self.input_tokens = 0
+                self.output_tokens = 0
+                self.estimated_cost = None
+                self.tool_calls = []
+                self.active_stream_id = None
+                self.pending_user_message = None
+                self.pending_attachments = []
+                self.pending_started_at = None
+
+            def save(self, touch_updated_at=True):
+                self._saved = True
+
+            def compact(self):
+                return {
+                    "session_id": self.session_id,
+                    "title": self.title,
+                    "workspace": self.workspace,
+                    "model": self.model,
+                    "created_at": 0,
+                    "updated_at": 0,
+                    "pinned": False,
+                    "archived": False,
+                    "project_id": None,
+                    "profile": None,
+                    "input_tokens": self.input_tokens,
+                    "output_tokens": self.output_tokens,
+                    "estimated_cost": self.estimated_cost,
+                    "personality": self.personality,
+                }
+
+        class CapturingAgent:
+            def __init__(self, **kwargs):
+                captured["init_kwargs"] = kwargs
+                self.session_id = kwargs["session_id"]
+                self.context_compressor = None
+                self.session_prompt_tokens = 0
+                self.session_completion_tokens = 0
+                self.session_estimated_cost_usd = None
+                self.reasoning_config = None
+                self.ephemeral_system_prompt = None
+                self._last_error = None
+
+            def run_conversation(self, **kwargs):
+                captured["run_kwargs"] = kwargs
+                return {
+                    "messages": [
+                        {"role": "user", "content": kwargs["persist_user_message"]},
+                        {"role": "assistant", "content": "ok"},
+                    ]
+                }
+
+            def interrupt(self, _message):
+                captured["interrupted"] = True
+
+        fake_session = FakeSession()
+        fake_stream_id = "stream-runtime-route"
+        fake_queue = queue.Queue()
+        fake_runtime_module = types.ModuleType("hermes_cli.runtime_provider")
+        fake_runtime_module.resolve_runtime_provider = resolve_runtime_provider
+        fake_hermes_cli = types.ModuleType("hermes_cli")
+        fake_hermes_cli.runtime_provider = fake_runtime_module
+        fake_hermes_state = types.ModuleType("hermes_state")
+        fake_hermes_state.SessionDB = mock.Mock(return_value=fake_session_db)
+
+        with mock.patch.object(streaming, "get_session", return_value=fake_session), \
+             mock.patch.object(streaming, "_get_ai_agent", return_value=CapturingAgent), \
+             mock.patch.object(streaming, "resolve_model_provider", return_value=("gpt-5.4", "openai-codex", None)), \
+             mock.patch("api.config.get_config", return_value={}), \
+             mock.patch("api.config._resolve_cli_toolsets", return_value=[]), \
+             mock.patch.dict(
+                 sys.modules,
+                 {
+                     "hermes_cli": fake_hermes_cli,
+                     "hermes_cli.runtime_provider": fake_runtime_module,
+                     "hermes_state": fake_hermes_state,
+                 },
+             ):
+            streaming.STREAMS[fake_stream_id] = fake_queue
+            streaming._run_agent_streaming(
+                session_id=fake_session.session_id,
+                msg_text="hello from webui",
+                model="gpt-5.4",
+                workspace="/tmp",
+                stream_id=fake_stream_id,
+            )
+
+        resolve_runtime_provider.assert_called_once_with(requested="openai-codex")
+        init_kwargs = captured["init_kwargs"]
+        self.assertEqual(init_kwargs["api_mode"], "codex_responses")
+        self.assertEqual(init_kwargs["acp_command"], "codex")
+        self.assertEqual(init_kwargs["acp_args"], ["exec", "--json"])
+        self.assertEqual(init_kwargs["credential_pool"], "openai-codex")
+        self.assertEqual(init_kwargs["api_key"], "rt-key")
+        self.assertIs(init_kwargs["session_db"], fake_session_db)
+
+
+class TestSessionDBAST(unittest.TestCase):
+    """AST-level checks: verify the try/except is not inside _ENV_LOCK (deadlock guard)."""
+
+    def setUp(self):
+        self.tree = ast.parse(STREAMING_PY)
+
+    def test_sessiondb_try_not_inside_env_lock(self):
+        """The try block that wraps SessionDB init must NOT be inside a 'with _ENV_LOCK:' block.
+
+        Putting a try/except inside _ENV_LOCK is the deadlock pattern caught by test_sprint34.
+        The SessionDB try/except is outside the lock scope, which is correct.
+        """
+        # Find all 'with _ENV_LOCK:' nodes; check none of their bodies contain
+        # a Try node that also contains 'from hermes_state import SessionDB'
+        for node in ast.walk(self.tree):
+            if not isinstance(node, ast.With):
+                continue
+            names = [getattr(item.context_expr, "id", "") for item in node.items]
+            if "_ENV_LOCK" not in names:
+                continue
+            # Walk the with-body for Try nodes
+            for stmt in node.body:
+                if isinstance(stmt, ast.Try):
+                    # Check if this try imports hermes_state
+                    src = ast.unparse(stmt)
+                    self.assertNotIn(
+                        "hermes_state",
+                        src,
+                        "SessionDB try/except must NOT be inside _ENV_LOCK body (deadlock risk)",
+                    )
+
+
+class TestModelCustomInput(unittest.TestCase):
+    """Tests for issue #444 — custom model ID input in model dropdown."""
+
+    STATIC = pathlib.Path(__file__).parent.parent / 'static'
+
+    def _read(self, filename):
+        path = self.STATIC / filename
+        with open(path, 'r', encoding='utf-8') as f:
+            return f.read()
+
+    def _renderModelDropdown_body(self):
+        src = self._read('ui.js')
+        start = src.find('function renderModelDropdown()')
+        end = src.find('\nasync function selectModelFromDropdown', start)
+        return src[start:end]
+
+    def test_model_custom_input_in_dropdown(self):
+        body = self._renderModelDropdown_body()
+        self.assertIn('model-custom-input', body,
+                      'model-custom-input class must be in renderModelDropdown')
+
+    def test_model_custom_enter_handler(self):
+        body = self._renderModelDropdown_body()
+        self.assertIn('_applyCustom', body,
+                      '_applyCustom function must be defined in renderModelDropdown')
+
+    def test_model_custom_css_defined(self):
+        css = self._read('style.css')
+        self.assertIn('.model-custom-row', css,
+                      '.model-custom-row must be defined in style.css')
+        self.assertIn('.model-custom-input', css,
+                      '.model-custom-input must be defined in style.css')
+
+    def test_model_custom_i18n_keys(self):
+        i18n = self._read('i18n.js')
+        # Find en locale block (appears first before es)
+        en_block_start = i18n.find("'en'")
+        es_block_start = i18n.find("'es'")
+        en_block = i18n[en_block_start:es_block_start]
+        self.assertIn('model_custom_label', en_block,
+                      'model_custom_label must be in en locale')
+        self.assertIn('model_custom_placeholder', en_block,
+                      'model_custom_placeholder must be in en locale')
+
+
+# ── Sprint 42 additional tests: context indicator (#437) ─────────────────
+def test_context_indicator_uses_pick_helper():
+    """The _pick helper must be present in sessions.js to prefer latest over stale values."""
+    content = _read_sessions_js()
+    assert '_pick' in content, "_pick helper not found in static/sessions.js"
+
+
+def test_context_indicator_old_pattern_removed():
+    """The old || pattern that preferred stale session data must be gone."""
+    content = _read_sessions_js()
+    assert '_s.input_tokens||u.input_tokens' not in content, \
+        "Old stale-data-first pattern '_s.input_tokens||u.input_tokens' still present in static/sessions.js"
+
+
+def test_context_indicator_all_six_fields():
+    """All six token/cost fields must appear in the _syncCtxIndicator call."""
+    content = _read_sessions_js()
+    fields = [
+        'input_tokens',
+        'output_tokens',
+        'estimated_cost',
+        'context_length',
+        'last_prompt_tokens',
+        'threshold_tokens',
+    ]
+    for field in fields:
+        assert field in content, \
+            f"Field '{field}' not found in static/sessions.js _syncCtxIndicator call"
+
+
+# ── Sprint 42 additional tests: system prompt title (#441) ──────────────
+def test_system_prompt_title_guard_exists():
+    """The guard that detects [SYSTEM: prefixes must be present in sessions.js."""
+    content = _read_sessions_js()
+    assert '[SYSTEM:' in content, \
+        "sessions.js must contain the [SYSTEM: guard to intercept system-prompt titles"
+    # Make sure it appears in an if-condition context, not just a comment
+    assert "cleanTitle.startsWith('[SYSTEM:')" in content, \
+        "sessions.js must have: cleanTitle.startsWith('[SYSTEM:') guard expression"
+
+
+def test_cleanTitle_is_let_not_const():
+    """cleanTitle must be declared with let (not const) to allow reassignment in the guard."""
+    content = _read_sessions_js()
+    assert 'let cleanTitle' in content, \
+        "cleanTitle must be declared with 'let' (not 'const') to allow reassignment"
+    # Make sure the old const form is gone in this context
+    # (check the specific assignment line pattern)
+    assert "const cleanTitle=tags.length" not in content, \
+        "Old 'const cleanTitle=tags.length...' must be replaced by 'let cleanTitle=...'"
+
+
+# ── Sprint 42 additional tests: thinking panel persistence (#427) ────────
+def test_streaming_persists_reasoning_in_session():
+    """streaming.py must accumulate reasoning_text and patch last assistant message."""
+    src = (REPO / 'api' / 'streaming.py').read_text()
+
+    # _reasoning_text must be initialised
+    assert "_reasoning_text = ''" in src, \
+        "_reasoning_text variable not initialised in streaming.py"
+
+    # on_reasoning must accumulate into _reasoning_text
+    assert '_reasoning_text += str(text)' in src, \
+        "on_reasoning callback does not accumulate into _reasoning_text"
+
+    # Persistence block must exist before raw_session is built
+    assert "Persist reasoning trace in the session so it survives reload" in src, \
+        "Reasoning persistence comment not found in streaming.py"
+
+    assert "_rm['reasoning'] = _reasoning_text" in src, \
+        "Code to set _rm['reasoning'] not found in streaming.py"
+
+    # Persistence block must come BEFORE raw_session assignment
+    persist_idx = src.index("Persist reasoning trace in the session")
+    raw_session_idx = src.index("raw_session = s.compact()")
+    assert persist_idx < raw_session_idx, \
+        "Reasoning persistence block must appear before raw_session assignment"
+
+
+def test_done_handler_patches_reasoning_field():
+    """messages.js done SSE handler must patch reasoningText onto the last assistant message."""
+    src = (REPO / 'static' / 'messages.js').read_text()
+
+    # The persistence comment must be present inside the done handler
+    assert "Persist reasoning trace so thinking card survives page reload" in src, \
+        "Reasoning persistence comment not found in messages.js done handler"
+
+    # The guard and assignment must be present
+    assert "if(reasoningText){" in src, \
+        "reasoningText guard not found in messages.js"
+
+    assert "lastAsst.reasoning=reasoningText" in src, \
+        "lastAsst.reasoning assignment not found in messages.js"
+
+    # Verify the patch is inside the done handler (after 'source.addEventListener' for done)
+    done_handler_idx = src.index("source.addEventListener('done'")
+    persist_idx = src.index("Persist reasoning trace so thinking card survives page reload")
+    assert done_handler_idx < persist_idx, \
+        "Reasoning persistence patch must be inside the done SSE handler"
+
+    # The guard must also check !lastAsst.reasoning to avoid overwriting server value
+    assert "!lastAsst.reasoning" in src, \
+        "Guard '!lastAsst.reasoning' missing — would overwrite server-persisted reasoning"
+
+
+def test_rendermessages_reads_reasoning_from_messages():
+    """ui.js renderMessages must read m.reasoning to display the thinking card."""
+    src = (REPO / 'static' / 'ui.js').read_text()
+
+    # m.reasoning must be read in the render path
+    assert 'm.reasoning' in src, \
+        "m.reasoning not referenced in ui.js — thinking card won't render on reload"
+
+    # The thinking card rendering block must also be present
+    assert 'thinking-card' in src, \
+        "thinking-card CSS class not found in ui.js"
+
+    # Specifically, the fallback that reads from top-level m.reasoning field
+    assert 'thinkingText=m.reasoning' in src.replace(' ', ''), \
+        "thinkingText=m.reasoning assignment not found in ui.js renderMessages"
+
+
+def test_streaming_restores_prior_reasoning_metadata_after_followup():
+    """Previous-turn thinking must survive later turns.
+
+    The provider-facing history strips WebUI-only `reasoning` fields, so the
+    streaming path must merge that metadata back onto the returned message
+    history before saving the session, including reinserting dropped
+    reasoning-only assistant segments.
+    """
+    src = (REPO / 'api' / 'streaming.py').read_text()
+    assert "def _restore_reasoning_metadata(" in src, \
+        "streaming.py must define a helper to restore prior reasoning metadata"
+    assert "s.messages = _restore_reasoning_metadata(" in src, \
+        "streaming.py must merge prior reasoning metadata back after run_conversation()"
+    assert "updated_messages.insert(safe_pos, copy.deepcopy(prev_msg))" in src, \
+        "streaming.py must reinsert dropped reasoning-only assistant messages"
+
+
+def test_routes_restores_prior_reasoning_metadata_after_followup():
+    """The non-streaming route path must preserve prior reasoning metadata too."""
+    src = (REPO / 'api' / 'routes.py').read_text()
+    assert "_restore_reasoning_metadata" in src, \
+        "routes.py must import reasoning metadata restoration helper"
+    assert 's.messages = _restore_reasoning_metadata(' in src, \
+        "routes.py must merge prior reasoning metadata back after run_conversation()"
--- a/tests/test_sprint43.py
+++ b/tests/test_sprint43.py
@@ -0,0 +1,253 @@
+"""
+Sprint 43 Tests: Bandit security fixes — B310, B324, B110 + QuietHTTPServer (PR #354).
+
+Covers:
+- gateway_watcher.py: MD5 uses usedforsecurity=False (B324)
+- config.py: URL scheme validation before urlopen (B310)
+- bootstrap.py: URL scheme validation in wait_for_health (B310)
+- server.py: QuietHTTPServer class exists and extends ThreadingHTTPServer
+- server.py: QuietHTTPServer.handle_error suppresses client disconnect errors
+- server.py: QuietHTTPServer uses sys.exc_info() not traceback.sys.exc_info()
+- Logging: at least 5 modules add a module-level logger (B110 remediation)
+- routes.py: session titles redacted in /api/sessions list response
+"""
+import ast
+import pathlib
+import re
+import sys
+import unittest
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent
+GATEWAY_WATCHER_PY = (REPO_ROOT / "api" / "gateway_watcher.py").read_text()
+CONFIG_PY = (REPO_ROOT / "api" / "config.py").read_text()
+BOOTSTRAP_PY = (REPO_ROOT / "bootstrap.py").read_text()
+SERVER_PY = (REPO_ROOT / "server.py").read_text()
+ROUTES_PY = (REPO_ROOT / "api" / "routes.py").read_text()
+AUTH_PY = (REPO_ROOT / "api" / "auth.py").read_text()
+PROFILES_PY = (REPO_ROOT / "api" / "profiles.py").read_text()
+STREAMING_PY = (REPO_ROOT / "api" / "streaming.py").read_text()
+WORKSPACE_PY = (REPO_ROOT / "api" / "workspace.py").read_text()
+STATE_SYNC_PY = (REPO_ROOT / "api" / "state_sync.py").read_text()
+
+
+# ── B324: MD5 usedforsecurity=False ─────────────────────────────────────────
+
+class TestMD5SecurityFix(unittest.TestCase):
+    """B324: hashlib.md5 must use usedforsecurity=False for non-crypto hashes."""
+
+    def test_gateway_watcher_md5_usedforsecurity_false(self):
+        """_snapshot_hash must pass usedforsecurity=False to hashlib.md5 (PR #354)."""
+        self.assertIn(
+            "usedforsecurity=False",
+            GATEWAY_WATCHER_PY,
+            "gateway_watcher.py: MD5 must use usedforsecurity=False (B324)",
+        )
+
+    def test_gateway_watcher_md5_pattern(self):
+        """Exact pattern: hashlib.md5(..., usedforsecurity=False)."""
+        # Use re.search with DOTALL since the arg may span parens internally
+        import re
+        self.assertIsNotNone(
+            re.search(r"hashlib\.md5\(.*?usedforsecurity=False\)", GATEWAY_WATCHER_PY, re.DOTALL),
+            "MD5 call must include usedforsecurity=False kwarg",
+        )
+
+
+# ── B310: URL scheme validation ──────────────────────────────────────────────
+
+class TestUrlSchemeValidation(unittest.TestCase):
+    """B310: urllib.request.urlopen must not be called with arbitrary schemes."""
+
+    def test_config_scheme_validation_present(self):
+        """config.py must validate URL scheme before urlopen (B310 fix)."""
+        self.assertIn(
+            "parsed_url.scheme",
+            CONFIG_PY,
+            "config.py: URL scheme validation missing (B310)",
+        )
+        # Must check against allowed schemes
+        self.assertRegex(
+            CONFIG_PY,
+            r'parsed_url\.scheme\s+not\s+in\s+\(',
+            "config.py: scheme check must use 'not in (...)' pattern",
+        )
+
+    def test_config_urlopen_has_nosec(self):
+        """The urlopen call in config.py must have a # nosec B310 comment."""
+        self.assertIn(
+            "nosec B310",
+            CONFIG_PY,
+            "config.py: urlopen must have # nosec B310 after scheme validation",
+        )
+
+    def test_bootstrap_scheme_validation_present(self):
+        """bootstrap.py wait_for_health must validate URL scheme before urlopen."""
+        self.assertIn(
+            "Invalid health check URL",
+            BOOTSTRAP_PY,
+            "bootstrap.py: URL scheme validation missing in wait_for_health (B310)",
+        )
+        self.assertRegex(
+            BOOTSTRAP_PY,
+            r'url\.startswith\([^)]+http',
+            "bootstrap.py: must check url starts with http:// or https://",
+        )
+
+    def test_bootstrap_urlopen_has_nosec(self):
+        """The urlopen call in bootstrap.py must have a # nosec B310 comment."""
+        self.assertIn(
+            "nosec B310",
+            BOOTSTRAP_PY,
+            "bootstrap.py: urlopen must have # nosec B310 after scheme validation",
+        )
+
+    def test_config_allows_http_and_https(self):
+        """config.py scheme check must permit both http and https."""
+        self.assertIn('"http"', CONFIG_PY, "config.py: http must be in allowed schemes")
+        self.assertIn('"https"', CONFIG_PY, "config.py: https must be in allowed schemes")
+
+
+# ── B110: Bare except/pass → logger.debug() ─────────────────────────────────
+
+class TestBareExceptLogging(unittest.TestCase):
+    """B110: bare except/pass blocks must be replaced with logger.debug()."""
+
+    MODULES_REQUIRING_LOGGER = [
+        ("api/auth.py", AUTH_PY),
+        ("api/config.py", CONFIG_PY),
+        ("api/gateway_watcher.py", GATEWAY_WATCHER_PY),
+        ("api/profiles.py", PROFILES_PY),
+        ("api/streaming.py", STREAMING_PY),
+        ("api/workspace.py", WORKSPACE_PY),
+        ("api/state_sync.py", STATE_SYNC_PY),
+        ("api/routes.py", ROUTES_PY),
+    ]
+
+    def test_module_level_loggers_present(self):
+        """All fixed modules must have a module-level logger = logging.getLogger(__name__)."""
+        for name, src in self.MODULES_REQUIRING_LOGGER:
+            with self.subTest(module=name):
+                self.assertIn(
+                    "logger = logging.getLogger(__name__)",
+                    src,
+                    f"{name}: module-level logger missing (B110 fix requires logger)",
+                )
+
+    def test_gateway_watcher_no_bare_pass_in_except(self):
+        """gateway_watcher.py critical except blocks must not use bare pass."""
+        # The poll loop except block that previously had 'pass' must now use logger
+        self.assertIn(
+            "logger.debug",
+            GATEWAY_WATCHER_PY,
+            "gateway_watcher.py: must use logger.debug not bare pass (B110)",
+        )
+
+    def test_profiles_reload_dotenv_logs_on_error(self):
+        """profiles.py _reload_dotenv except must log + reset _loaded_profile_env_keys."""
+        # Both the reset and the debug log should be present in the except block
+        self.assertIn(
+            "_loaded_profile_env_keys = set()",
+            PROFILES_PY,
+            "profiles.py: _reload_dotenv except must reset _loaded_profile_env_keys",
+        )
+        self.assertIn(
+            "Failed to reload dotenv",
+            PROFILES_PY,
+            "profiles.py: _reload_dotenv except must log a warning",
+        )
+
+
+# ── QuietHTTPServer ──────────────────────────────────────────────────────────
+
+class TestQuietHTTPServer(unittest.TestCase):
+    """server.py: QuietHTTPServer suppresses client disconnect noise."""
+
+    def test_quiet_http_server_class_exists(self):
+        """QuietHTTPServer must be defined in server.py."""
+        self.assertIn(
+            "class QuietHTTPServer",
+            SERVER_PY,
+            "server.py: QuietHTTPServer class missing (PR #354)",
+        )
+
+    def test_quiet_http_server_extends_threading_http_server(self):
+        """QuietHTTPServer must extend ThreadingHTTPServer."""
+        self.assertRegex(
+            SERVER_PY,
+            r"class QuietHTTPServer\(ThreadingHTTPServer\)",
+            "QuietHTTPServer must extend ThreadingHTTPServer",
+        )
+
+    def test_quiet_http_server_used_as_server(self):
+        """main() must instantiate QuietHTTPServer not raw ThreadingHTTPServer."""
+        # After the class is defined, the server creation should use QuietHTTPServer
+        after_class = SERVER_PY[SERVER_PY.find("class QuietHTTPServer"):]
+        self.assertIn(
+            "QuietHTTPServer(",
+            after_class,
+            "main() must use QuietHTTPServer, not ThreadingHTTPServer directly",
+        )
+
+    def test_handle_error_suppresses_connection_reset(self):
+        """handle_error must suppress ConnectionResetError and BrokenPipeError."""
+        self.assertIn(
+            "ConnectionResetError",
+            SERVER_PY,
+            "QuietHTTPServer.handle_error must handle ConnectionResetError",
+        )
+        self.assertIn(
+            "BrokenPipeError",
+            SERVER_PY,
+            "QuietHTTPServer.handle_error must handle BrokenPipeError",
+        )
+
+    def test_uses_sys_exc_info_not_traceback_sys(self):
+        """handle_error must use sys.exc_info() not traceback.sys.exc_info() (implementation detail)."""
+        self.assertNotIn(
+            "traceback.sys.exc_info()",
+            SERVER_PY,
+            "server.py: must use sys.exc_info() not traceback.sys.exc_info()",
+        )
+        self.assertIn(
+            "sys.exc_info()",
+            SERVER_PY,
+            "server.py: handle_error must call sys.exc_info()",
+        )
+
+    def test_sys_imported_in_server(self):
+        """server.py must import sys (needed for sys.exc_info)."""
+        import re
+        self.assertIsNotNone(
+            re.search(r"^import sys", SERVER_PY, re.MULTILINE),
+            "server.py: sys must be imported",
+        )
+
+    def test_handle_error_calls_super(self):
+        """handle_error must call super().handle_error for non-client-disconnect errors."""
+        self.assertIn(
+            "super().handle_error(request, client_address)",
+            SERVER_PY,
+            "QuietHTTPServer.handle_error must delegate to super for real errors",
+        )
+
+
+# ── Session title redaction in /api/sessions ────────────────────────────────
+
+class TestSessionTitleRedaction(unittest.TestCase):
+    """routes.py: session titles must be redacted in the sessions list endpoint."""
+
+    def test_redact_text_called_on_session_titles(self):
+        """routes.py must call _redact_text on session titles in /api/sessions."""
+        self.assertRegex(
+            ROUTES_PY,
+            r'_redact_text\([^)]*\btitle\b[^)]*\)',
+            "routes.py: session titles must be redacted via _redact_text in /api/sessions",
+        )
+
+    def test_redact_text_imported_in_routes(self):
+        """routes.py must import _redact_text from api.helpers."""
+        self.assertIn(
+            "_redact_text",
+            ROUTES_PY,
+            "routes.py: _redact_text must be imported from api.helpers",
+        )
--- a/tests/test_sprint44.py
+++ b/tests/test_sprint44.py
@@ -0,0 +1,134 @@
+"""
+Sprint 44 Tests: Workspace panel close button fixes (PR #413).
+
+Covers:
+- index.html: mobile-close-btn now calls handleWorkspaceClose() instead of
+  closeWorkspacePanel(), so hitting X while a file is open returns you to the
+  file browser rather than collapsing the whole panel.
+- boot.js: syncWorkspacePanelUI() hides #btnClearPreview (the X icon) on
+  desktop when no file preview is open, eliminating the duplicate X that
+  appeared alongside the chevron collapse button.
+- boot.js: handleWorkspaceClose() logic — clears preview when one is visible,
+  closes panel otherwise (existing function, confirmed wired to both buttons).
+"""
+import pathlib
+import re
+import unittest
+
+REPO = pathlib.Path(__file__).parent.parent
+HTML = (REPO / "static" / "index.html").read_text(encoding="utf-8")
+BOOT_JS = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
+
+
+class TestMobileCloseButtonBehavior(unittest.TestCase):
+    """mobile-close-btn must call handleWorkspaceClose(), not closeWorkspacePanel()."""
+
+    def test_mobile_close_btn_calls_handle_workspace_close(self):
+        """mobile-close-btn onclick must be handleWorkspaceClose(), not closeWorkspacePanel()."""
+        m = re.search(r'class="[^"]*mobile-close-btn[^"]*"[^>]*>', HTML)
+        self.assertIsNotNone(m, "mobile-close-btn element not found in index.html")
+        btn_html = m.group(0)
+        self.assertIn(
+            'onclick="handleWorkspaceClose()"',
+            btn_html,
+            "mobile-close-btn must call handleWorkspaceClose() so that hitting X "
+            "while a file is open closes the file first, not the whole panel",
+        )
+
+    def test_mobile_close_btn_does_not_call_close_workspace_panel_directly(self):
+        """mobile-close-btn must NOT call closeWorkspacePanel() directly."""
+        m = re.search(r'class="[^"]*mobile-close-btn[^"]*"[^>]*>', HTML)
+        self.assertIsNotNone(m, "mobile-close-btn element not found in index.html")
+        btn_html = m.group(0)
+        self.assertNotIn(
+            'onclick="closeWorkspacePanel()"',
+            btn_html,
+            "mobile-close-btn must not call closeWorkspacePanel() directly — "
+            "it would bypass the two-step close logic and collapse the panel even "
+            "when a file is being viewed",
+        )
+
+    def test_handle_workspace_close_defined_in_boot_js(self):
+        """handleWorkspaceClose() must be defined in boot.js."""
+        self.assertIn(
+            "function handleWorkspaceClose()",
+            BOOT_JS,
+            "handleWorkspaceClose() is missing from boot.js",
+        )
+
+    def test_handle_workspace_close_clears_preview_first(self):
+        """handleWorkspaceClose() must call clearPreview() when a preview is visible."""
+        # The function must check for visible preview and call clearPreview
+        self.assertIn(
+            "clearPreview()",
+            BOOT_JS,
+            "handleWorkspaceClose() must call clearPreview() when preview is visible",
+        )
+    def test_handle_workspace_close_falls_back_to_close_panel(self):
+        """handleWorkspaceClose() must call closeWorkspacePanel() as fallback."""
+        # Find the function start and extract until the closing brace by scanning
+        start = BOOT_JS.find("function handleWorkspaceClose()")
+        self.assertNotEqual(start, -1, "handleWorkspaceClose() not found in boot.js")
+        # Extract a generous window after the function start
+        fn_window = BOOT_JS[start : start + 400]
+        self.assertIn(
+            "closeWorkspacePanel()",
+            fn_window,
+            "handleWorkspaceClose() must call closeWorkspacePanel() as its fallback path",
+        )
+
+
+class TestDesktopNoDuplicateXButton(unittest.TestCase):
+    """On desktop, only one X/close control should appear at a time."""
+
+    def test_sync_workspace_panel_ui_hides_clear_preview_on_desktop(self):
+        """syncWorkspacePanelUI() must set display:none on btnClearPreview when no preview and desktop."""
+        self.assertIn(
+            "clearBtn.style.display",
+            BOOT_JS,
+            "syncWorkspacePanelUI() must control clearBtn.style.display to hide it "
+            "on desktop when no file preview is open",
+        )
+
+    def test_clear_preview_hidden_when_no_preview(self):
+        """The display toggle for btnClearPreview must key off hasPreview."""
+        # Expect something like: clearBtn.style.display=hasPreview?'':'none'
+        # or clearBtn.style.display = hasPreview ? '' : 'none'
+        pattern = r"clearBtn\.style\.display\s*=\s*hasPreview"
+        self.assertRegex(
+            BOOT_JS,
+            pattern,
+            "btnClearPreview display must be conditioned on hasPreview in "
+            "syncWorkspacePanelUI() to avoid a duplicate X on desktop",
+        )
+
+    def test_clear_preview_toggle_only_applied_on_desktop(self):
+        """The display toggle must be guarded by !isCompact so mobile is unaffected."""
+        # Expect: if(!isCompact) clearBtn.style.display=...
+        pattern = r"isCompact.*clearBtn\.style\.display|clearBtn\.style\.display.*isCompact"
+        self.assertRegex(
+            BOOT_JS,
+            pattern,
+            "btnClearPreview display toggle must be guarded by isCompact so the "
+            "mobile X button visibility is not accidentally affected",
+        )
+
+    def test_btnclearpreview_exists_in_html(self):
+        """#btnClearPreview must still exist in the HTML (not removed)."""
+        self.assertIn(
+            'id="btnClearPreview"',
+            HTML,
+            "#btnClearPreview must remain in index.html",
+        )
+
+    def test_btncollapseWorkspacepanel_exists_in_html(self):
+        """#btnCollapseWorkspacePanel (chevron) must still exist in the HTML."""
+        self.assertIn(
+            'id="btnCollapseWorkspacePanel"',
+            HTML,
+            "#btnCollapseWorkspacePanel must remain in index.html",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_sprint45.py
+++ b/tests/test_sprint45.py
@@ -0,0 +1,157 @@
+"""
+Sprint 45 Tests: v0.50.36 upstream sync with minimal local patch retention.
+
+Covers:
+- First password enablement via POST /api/settings keeps the current browser logged in
+- The returned auth metadata is present and onboarding can continue with the issued cookie
+- Legacy assistant_language is no longer exposed and is removed on the next save
+- The local reply-language UI/runtime enhancement is gone from the synced codebase
+"""
+import json
+import pathlib
+import urllib.error
+import urllib.request
+
+import os
+
+from tests._pytest_port import BASE
+REPO = pathlib.Path(__file__).parent.parent
+# Use HERMES_WEBUI_TEST_STATE_DIR if available (set by conftest for the test process),
+# falling back to the conventional webui-mvp-test path.
+def _get_settings_file() -> pathlib.Path:
+    """Resolve SETTINGS_FILE at call time (env var set by conftest after module import)."""
+    state_dir = pathlib.Path(
+        os.environ.get("HERMES_WEBUI_TEST_STATE_DIR",
+                       str(pathlib.Path.home() / ".hermes" / "webui-mvp-test"))
+    )
+    return state_dir / "settings.json"
+
+
+def get(path, headers=None):
+    req = urllib.request.Request(BASE + path, headers=headers or {})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status, dict(r.headers)
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code, dict(e.headers)
+
+
+def post(path, body=None, headers=None):
+    req = urllib.request.Request(
+        BASE + path,
+        data=json.dumps(body or {}).encode(),
+        headers={"Content-Type": "application/json", **(headers or {})},
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status, dict(r.headers)
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code, dict(e.headers)
+
+
+def read(path):
+    return (REPO / path).read_text(encoding="utf-8")
+
+
+def _snapshot_settings_file():
+    if _get_settings_file().exists():
+        return _get_settings_file().read_text(encoding="utf-8")
+    return None
+
+
+def _restore_settings_file(original_text):
+    if original_text is None:
+        _get_settings_file().unlink(missing_ok=True)
+        return
+    _get_settings_file().write_text(original_text, encoding="utf-8")
+
+
+def test_first_password_enablement_returns_cookie_and_keeps_browser_logged_in():
+    original_settings = _snapshot_settings_file()
+    cookie_header = None  # captured for teardown use
+    try:
+        saved, status, headers = post("/api/settings", {"_set_password": "sprint45-secret"})
+        assert status == 200
+        assert saved["auth_enabled"] is True
+        assert saved["logged_in"] is True
+        assert saved["auth_just_enabled"] is True
+
+        set_cookie = headers.get("Set-Cookie", "")
+        assert "hermes_session=" in set_cookie
+        cookie_header = set_cookie.split(";", 1)[0]
+
+        auth, auth_status, _ = get("/api/auth/status", headers={"Cookie": cookie_header})
+        assert auth_status == 200
+        assert auth["auth_enabled"] is True
+        assert auth["logged_in"] is True
+
+        done, done_status, _ = post(
+            "/api/onboarding/complete",
+            {},
+            headers={"Cookie": cookie_header},
+        )
+        assert done_status == 200
+        assert done["completed"] is True
+    finally:
+        # First: write a clean settings file (no password_hash) directly to disk
+        try:
+            import json as _json
+            clean = _json.loads(original_settings) if original_settings else {}
+            clean.pop("password_hash", None)
+            _get_settings_file().parent.mkdir(parents=True, exist_ok=True)
+            _get_settings_file().write_text(_json.dumps(clean, indent=2), encoding="utf-8")
+        except Exception:
+            pass
+        # Then: tell the server to clear auth via API (must use the session cookie)
+        try:
+            _headers = {"Cookie": cookie_header} if cookie_header else {}
+            post("/api/settings", {"_clear_password": True}, headers=_headers)
+        except Exception:
+            pass
+        _restore_settings_file(original_settings)
+
+
+def test_legacy_assistant_language_is_hidden_and_removed_on_next_save():
+    original_settings = _snapshot_settings_file()
+    try:
+        _get_settings_file().parent.mkdir(parents=True, exist_ok=True)
+        _get_settings_file().write_text(
+            json.dumps(
+                {
+                    "assistant_language": "zh",
+                    "send_key": "enter",
+                    "onboarding_completed": False,
+                },
+                ensure_ascii=False,
+                indent=2,
+            ),
+            encoding="utf-8",
+        )
+
+        loaded, status, _ = get("/api/settings")
+        assert status == 200
+        assert "assistant_language" not in loaded
+
+        saved, save_status, _ = post("/api/settings", {"send_key": "ctrl+enter"})
+        assert save_status == 200
+        assert "assistant_language" not in saved
+        assert saved["send_key"] == "ctrl+enter"
+
+        persisted = json.loads(_get_settings_file().read_text(encoding="utf-8"))
+        assert "assistant_language" not in persisted
+    finally:
+        _restore_settings_file(original_settings)
+
+
+def test_reply_language_customization_ui_and_runtime_are_removed():
+    index_html = read("static/index.html")
+    panels_js = read("static/panels.js")
+    streaming_py = read("api/streaming.py")
+
+    assert "settingsAssistantLanguage" not in index_html
+    assert "assistant_language" not in panels_js
+    assert "settingsAssistantLanguage" not in panels_js
+    assert "assistant_language" not in streaming_py
+    assert "Default reply language:" not in streaming_py
+
+
--- a/tests/test_sprint46.py
+++ b/tests/test_sprint46.py
@@ -0,0 +1,167 @@
+"""
+Sprint 46 Tests: manual session compression with optional focus topic.
+"""
+
+import contextlib
+import io
+import json
+import sys
+import types
+
+from api.models import Session
+from api.config import SESSION_DIR
+from api.routes import _handle_session_compress
+from tests._pytest_port import BASE
+
+
+class _FakeHandler:
+    def __init__(self):
+        self.wfile = io.BytesIO()
+        self.status = None
+        self.sent_headers = {}
+
+    def send_response(self, status):
+        self.status = status
+
+    def send_header(self, key, value):
+        self.sent_headers[key] = value
+
+    def end_headers(self):
+        pass
+
+    def payload(self):
+        return json.loads(self.wfile.getvalue().decode("utf-8"))
+
+
+class _FakeCompressor:
+    def __init__(self):
+        self.calls = []
+
+    def compress(self, messages, current_tokens=None, focus_topic=None):
+        self.calls.append(
+            {
+                "messages": list(messages),
+                "current_tokens": current_tokens,
+                "focus_topic": focus_topic,
+            }
+        )
+        if len(messages) >= 2:
+            return [messages[0], messages[-1]]
+        return list(messages)
+
+
+class _FakeAgent:
+    last_instance = None
+
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+        self.context_compressor = _FakeCompressor()
+        _FakeAgent.last_instance = self
+
+
+def _make_session(messages=None):
+    SESSION_DIR.mkdir(parents=True, exist_ok=True)
+    messages = messages or [
+        {"role": "user", "content": "one"},
+        {"role": "assistant", "content": "two"},
+        {"role": "user", "content": "three"},
+        {"role": "assistant", "content": "four"},
+    ]
+    s = Session(
+        session_id="compress_test_001",
+        title="Untitled",
+        workspace="/tmp/hermes-webui-test",
+        model="openai/gpt-5.4-mini",
+        messages=messages,
+    )
+    s.save(touch_updated_at=False)
+    return s.session_id
+
+
+def test_session_compress_requires_session_id(cleanup_test_sessions):
+    handler = _FakeHandler()
+    _handle_session_compress(handler, {})
+    assert handler.status == 400
+    assert handler.payload()["error"] == "Missing required field(s): session_id"
+
+
+def test_session_compress_roundtrip(monkeypatch, cleanup_test_sessions):
+    created = cleanup_test_sessions
+    sid = _make_session()
+    created.append(sid)
+
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = _FakeAgent
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+
+    import api.config as _cfg
+    fake_runtime_provider = types.ModuleType("hermes_cli.runtime_provider")
+    fake_runtime_provider.resolve_runtime_provider = lambda requested=None: {
+        "api_key": "fake-key",
+        "provider": requested or "openai",
+        "base_url": "https://api.openai.com/v1",
+    }
+    fake_hermes_cli = types.ModuleType("hermes_cli")
+    fake_hermes_cli.__path__ = []
+    fake_hermes_cli.runtime_provider = fake_runtime_provider
+    monkeypatch.setitem(sys.modules, "hermes_cli", fake_hermes_cli)
+    monkeypatch.setitem(sys.modules, "hermes_cli.runtime_provider", fake_runtime_provider)
+    import hermes_cli.runtime_provider as _rtp
+
+    monkeypatch.setattr(
+        _cfg,
+        "resolve_model_provider",
+        lambda model: ("openai/gpt-5.4-mini", "openai", "https://api.openai.com/v1"),
+    )
+    monkeypatch.setattr(
+        _cfg,
+        "_get_session_agent_lock",
+        lambda sid: contextlib.nullcontext(),
+    )
+    monkeypatch.setattr(
+        _rtp,
+        "resolve_runtime_provider",
+        lambda requested=None: {
+            "api_key": "fake-key",
+            "provider": requested or "openai",
+            "base_url": "https://api.openai.com/v1",
+        },
+    )
+
+    handler = _FakeHandler()
+    _handle_session_compress(handler, {"session_id": sid, "focus_topic": "database schema"})
+
+    assert handler.status == 200
+    payload = handler.payload()
+    assert payload["ok"] is True
+    assert payload["focus_topic"] == "database schema"
+    assert payload["summary"]["headline"] == "Compressed: 4 → 2 messages"
+    assert payload["session"]["session_id"] == sid
+    assert payload["session"]["messages"] == [
+        {"role": "user", "content": "one"},
+        {"role": "assistant", "content": "four"},
+    ]
+    assert _FakeAgent.last_instance is not None
+    assert _FakeAgent.last_instance.context_compressor.calls[0]["focus_topic"] == "database schema"
+
+
+def test_static_commands_js_registers_compress_alias(cleanup_test_sessions):
+    from pathlib import Path
+
+    with open(Path(__file__).resolve().parents[1] / "static" / "commands.js", encoding="utf-8") as f:
+        src = f.read()
+    assert "name:'compress'" in src
+    assert "name:'compact'" in src
+    assert "/api/session/compress" in src
+    assert "cmdCompress" in src
+    assert "cmdCompact" in src
+
+
+def test_static_commands_js_prefers_persisted_reference_message(cleanup_test_sessions):
+    from pathlib import Path
+
+    with open(Path(__file__).resolve().parents[1] / "static" / "commands.js", encoding="utf-8") as f:
+        src = f.read()
+
+    assert "const messageRef=referenceMsg?msgContent(referenceMsg)||String(referenceMsg.content||''):'';" in src
+    assert "const referenceText=messageRef || summaryRef;" in src
--- a/tests/test_sprint47.py
+++ b/tests/test_sprint47.py
@@ -0,0 +1,39 @@
+"""
+Sprint 47 tests: skill-backed slash commands appear in the Web UI autocomplete.
+
+Covers:
+- commands.js lazily loads /api/skills for slash autocomplete
+- built-in commands still win over skill name collisions
+- boot.js primes the async skill load when typing '/'
+- the dropdown marks skill-backed entries visually
+"""
+import pathlib
+
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent
+COMMANDS_JS = (REPO_ROOT / "static" / "commands.js").read_text(encoding="utf-8")
+BOOT_JS = (REPO_ROOT / "static" / "boot.js").read_text(encoding="utf-8")
+STYLE_CSS = (REPO_ROOT / "static" / "style.css").read_text(encoding="utf-8")
+
+
+def test_skill_commands_are_loaded_from_api_skills_for_autocomplete():
+    assert "loadSkillCommands" in COMMANDS_JS
+    assert "api('/api/skills')" in COMMANDS_JS
+    assert "source:'skill'" in COMMANDS_JS
+
+
+def test_builtin_commands_take_precedence_over_skill_slug_collisions():
+    # In the combined implementation, REGISTRY (agent registry + WEBUI_ONLY) wins over skills
+    assert ("if(COMMANDS.some(c=>c.name===slug)) return null;" in COMMANDS_JS or
+            "if(REGISTRY.some(c=>c.name===slug)) return null;" in COMMANDS_JS), \
+        "Built-in commands must block skill slug collisions"
+
+
+def test_typing_slash_primes_async_skill_command_loading():
+    assert "ensureSkillCommandsLoadedForAutocomplete" in BOOT_JS
+    assert "ensureSkillCommandsLoadedForAutocomplete();" in BOOT_JS
+
+
+def test_dropdown_has_visual_badge_for_skill_backed_entries():
+    assert "cmd-item-badge-skill" in STYLE_CSS
+    assert "slash_skill_badge" in COMMANDS_JS
--- a/tests/test_sprint48.py
+++ b/tests/test_sprint48.py
@@ -0,0 +1,209 @@
+"""Tests for sprint 48 UX bug fixes — v0.50.92.
+
+Covers:
+  - #702: XML tool-call syntax (<function_calls>) stripped from assistant
+          message content before rendering (server-side + client-side).
+  - #703: Workspace file panel shows an empty-state message when no workspace
+          is configured or the directory is empty.
+  - #704: Notification settings description uses "app" instead of "tab".
+"""
+
+import pathlib
+import re
+
+REPO = pathlib.Path(__file__).parent.parent
+
+
+def read(rel):
+    return (REPO / rel).read_text()
+
+
+# ── Bug #702 — XML tool-call leak on DeepSeek ────────────────────────────────
+
+class TestXmlToolCallStrip:
+    """_strip_xml_tool_calls() is defined in api/streaming.py and must remove
+    <function_calls>...</function_calls> blocks from assistant content."""
+
+    def _load_fn(self):
+        """Import the helper from streaming.py without triggering full server
+        initialisation (which would fail in unit-test contexts)."""
+        import importlib, sys, types
+
+        # Stub heavy transitive imports so we can import the module cleanly.
+        for mod in ('api.config', 'api.helpers', 'api.models', 'api.workspace'):
+            if mod not in sys.modules:
+                sys.modules[mod] = types.ModuleType(mod)
+
+        # Provide minimal symbols that streaming.py needs at import time.
+        cfg = sys.modules.setdefault('api.config', types.ModuleType('api.config'))
+        for attr in ('STREAMS', 'STREAMS_LOCK', 'CANCEL_FLAGS', 'AGENT_INSTANCES',
+                     'LOCK', 'SESSIONS', 'SESSION_DIR',
+                     '_get_session_agent_lock', '_set_thread_env',
+                     '_clear_thread_env', 'resolve_model_provider'):
+            if not hasattr(cfg, attr):
+                setattr(cfg, attr, None)
+
+        # Fall back to reading the source and exec-ing just the function.
+        src = read('api/streaming.py')
+        ns: dict = {}
+        # Extract the function definition with regex so we don't need to import
+        # the whole module (avoids all the heavy deps).
+        match = re.search(
+            r'(def _strip_xml_tool_calls\(.*?)\n(?=\ndef |\nclass )',
+            src, re.DOTALL
+        )
+        assert match, "_strip_xml_tool_calls not found in api/streaming.py"
+        exec(compile('import re\n' + match.group(1), '<streaming_extract>', 'exec'), ns)
+        return ns['_strip_xml_tool_calls']
+
+    def test_complete_block_removed(self):
+        fn = self._load_fn()
+        text = "Hello <function_calls><invoke>foo</invoke></function_calls> world"
+        result = fn(text)
+        assert '<function_calls>' not in result
+        assert 'Hello' in result
+        assert 'world' in result
+
+    def test_orphaned_opening_tag_removed(self):
+        fn = self._load_fn()
+        text = "Some answer text\n<function_calls>\n<invoke>tool</invoke>"
+        result = fn(text)
+        assert '<function_calls>' not in result
+        assert 'Some answer text' in result
+
+    def test_no_tag_unchanged(self):
+        fn = self._load_fn()
+        text = "This is a normal response with no tool calls."
+        assert fn(text) == text
+
+    def test_multiple_blocks_removed(self):
+        fn = self._load_fn()
+        text = (
+            "Part one <function_calls><invoke>a</invoke></function_calls> "
+            "middle <function_calls><invoke>b</invoke></function_calls> end"
+        )
+        result = fn(text)
+        assert '<function_calls>' not in result
+        assert 'Part one' in result
+        assert 'middle' in result
+        assert 'end' in result
+
+    def test_function_defined_in_streaming_py(self):
+        src = read('api/streaming.py')
+        assert 'def _strip_xml_tool_calls(' in src, (
+            "_strip_xml_tool_calls must be defined in api/streaming.py"
+        )
+
+    def test_strip_applied_to_assistant_messages(self):
+        """Verify the strip call is applied to assistant message content after
+        the agent run completes (server-side persistence fix)."""
+        src = read('api/streaming.py')
+        assert '_strip_xml_tool_calls' in src, (
+            "_strip_xml_tool_calls must be referenced in api/streaming.py"
+        )
+        # Confirm it is called on message content, not just defined
+        assert src.count('_strip_xml_tool_calls') >= 2, (
+            "_strip_xml_tool_calls must be both defined and called"
+        )
+
+    def test_client_side_strip_in_messages_js(self):
+        src = read('static/messages.js')
+        assert '_stripXmlToolCalls' in src, (
+            "Client-side _stripXmlToolCalls must exist in static/messages.js"
+        )
+        assert 'function_calls' in src.lower(), (
+            "Client-side strip must reference 'function_calls'"
+        )
+
+    def test_client_side_strip_in_ui_js(self):
+        src = read('static/ui.js')
+        assert '_stripXmlToolCallsDisplay' in src, (
+            "_stripXmlToolCallsDisplay must exist in static/ui.js"
+        )
+
+
+# ── Bug #703 — Workspace file panel empty state ───────────────────────────────
+
+class TestWorkspaceEmptyState:
+
+    def test_i18n_no_path_string_present(self):
+        src = read('static/i18n.js')
+        assert 'workspace_empty_no_path' in src, (
+            "i18n key workspace_empty_no_path must be defined in i18n.js"
+        )
+
+    def test_i18n_no_path_mentions_settings(self):
+        src = read('static/i18n.js')
+        # Extract the value of the key
+        m = re.search(r"workspace_empty_no_path:\s*'([^']+)'", src)
+        assert m, "workspace_empty_no_path value not found in i18n.js"
+        assert 'Settings' in m.group(1), (
+            "workspace_empty_no_path should mention Settings"
+        )
+
+    def test_i18n_empty_dir_string_present(self):
+        src = read('static/i18n.js')
+        assert 'workspace_empty_dir' in src, (
+            "i18n key workspace_empty_dir must be defined in i18n.js"
+        )
+
+    def test_empty_state_element_in_html(self):
+        src = read('static/index.html')
+        assert 'wsEmptyState' in src, (
+            "id=\"wsEmptyState\" empty-state element must exist in index.html"
+        )
+
+    def test_render_file_tree_shows_empty_state(self):
+        src = read('static/ui.js')
+        assert 'wsEmptyState' in src, (
+            "renderFileTree in ui.js must reference wsEmptyState"
+        )
+        assert 'workspace_empty_no_path' in src, (
+            "renderFileTree must use workspace_empty_no_path i18n key"
+        )
+        assert 'workspace_empty_dir' in src, (
+            "renderFileTree must use workspace_empty_dir i18n key"
+        )
+
+
+# ── Bug #704 — Notification description says "tab" ───────────────────────────
+
+class TestNotificationDescriptionText:
+
+    def test_english_uses_app_not_tab(self):
+        src = read('static/i18n.js')
+        # Find the English locale block (appears before other locales)
+        # The English block starts at line 1 (it's the first locale object).
+        # We look for the settings_desc_notifications in the English section.
+        # English block ends before the Spanish (es) block.
+        es_marker = "settings_desc_notifications: 'Muestra"
+        en_end = src.index(es_marker) if es_marker in src else len(src)
+        en_section = src[:en_end]
+
+        m = re.search(r"settings_desc_notifications:\s*'([^']+)'", en_section)
+        assert m, "English settings_desc_notifications not found"
+        desc = m.group(1)
+        assert 'tab' not in desc.lower(), (
+            f"English notification description must not say 'tab', got: {desc!r}"
+        )
+        assert 'app' in desc.lower(), (
+            f"English notification description must say 'app', got: {desc!r}"
+        )
+
+    def test_new_wording_exact(self):
+        src = read('static/i18n.js')
+        expected = 'while the app is in the background'
+        assert expected in src, (
+            f"Exact phrase {expected!r} must appear in i18n.js"
+        )
+
+    def test_old_wording_removed_from_english(self):
+        src = read('static/i18n.js')
+        old_phrase = 'while the tab is in the background'
+        # The old phrase must not appear in the English locale section
+        es_marker = "settings_desc_notifications: 'Muestra"
+        en_end = src.index(es_marker) if es_marker in src else len(src)
+        en_section = src[:en_end]
+        assert old_phrase not in en_section, (
+            "Old English notification description with 'tab' must be removed"
+        )
--- a/tests/test_sprint5.py
+++ b/tests/test_sprint5.py
@@ -0,0 +1,157 @@
+"""Sprint 5 tests: workspace CRUD, file save, session index, JS serving."""
+import json, pathlib, uuid, urllib.request, urllib.error
+import os
+
+from tests._pytest_port import BASE
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+def get_raw(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return r.read(), r.headers.get("Content-Type",""), r.status
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(BASE + path, data=data, headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+def make_session_tracked(created_list, ws=None):
+    """Create a session and register it with the cleanup fixture."""
+    import pathlib as _pathlib
+    body = {}
+    if ws: body["workspace"] = str(ws)
+    d, _ = post("/api/session/new", body)
+    sid = d["session"]["session_id"]
+    created_list.append(sid)
+    return sid, _pathlib.Path(d["session"]["workspace"])
+
+
+def make_workspace_child(base: pathlib.Path, name: str) -> pathlib.Path:
+    target = base / name
+    target.mkdir(parents=True, exist_ok=True)
+    return target
+
+
+def test_server_running_from_new_location():
+    data, status = get("/health")
+    assert status == 200 and data["status"] == "ok"
+
+def test_app_js_served():
+    """Sprint 9: app.js replaced by modules. Verify ui.js (contains renderMd) is served."""
+    raw, ct, status = get_raw("/static/ui.js")
+    assert status == 200 and "javascript" in ct and b"renderMd" in raw
+
+def test_workspaces_list():
+    data, status = get("/api/workspaces")
+    assert status == 200 and "workspaces" in data and "last" in data
+
+def test_workspace_add_valid(cleanup_test_sessions):
+    _, ws = make_session_tracked(cleanup_test_sessions)
+    child = make_workspace_child(ws, f"workspace-add-{uuid.uuid4().hex[:6]}")
+    post("/api/workspaces/remove", {"path": str(child)})
+    result, status = post("/api/workspaces/add", {"path": str(child), "name": "Temp"})
+    assert status == 200 and any(w["path"] == str(child) for w in result["workspaces"])
+    post("/api/workspaces/remove", {"path": str(child)})
+
+def test_workspace_add_validates_existence():
+    result, status = post("/api/workspaces/add", {"path": "/tmp/does_not_exist_xyz_999"})
+    assert status == 400
+
+def test_workspace_add_validates_is_dir():
+    result, status = post("/api/workspaces/add", {"path": "/etc/hostname"})
+    assert status == 400
+
+def test_workspace_add_no_duplicate(cleanup_test_sessions):
+    _, ws = make_session_tracked(cleanup_test_sessions)
+    child = make_workspace_child(ws, f"workspace-dup-{uuid.uuid4().hex[:6]}")
+    post("/api/workspaces/remove", {"path": str(child)})
+    post("/api/workspaces/add", {"path": str(child)})
+    result, status = post("/api/workspaces/add", {"path": str(child)})
+    assert status == 400 and "already" in result.get("error","").lower()
+    post("/api/workspaces/remove", {"path": str(child)})
+
+def test_workspace_add_requires_path():
+    result, status = post("/api/workspaces/add", {})
+    assert status == 400
+
+def test_workspace_remove(cleanup_test_sessions):
+    _, ws = make_session_tracked(cleanup_test_sessions)
+    child = make_workspace_child(ws, f"workspace-remove-{uuid.uuid4().hex[:6]}")
+    post("/api/workspaces/remove", {"path": str(child)})
+    post("/api/workspaces/add", {"path": str(child), "name": "Temp"})
+    result, status = post("/api/workspaces/remove", {"path": str(child)})
+    assert status == 200 and str(child) not in [w["path"] for w in result["workspaces"]]
+
+def test_workspace_rename(cleanup_test_sessions):
+    _, ws = make_session_tracked(cleanup_test_sessions)
+    child = make_workspace_child(ws, f"workspace-rename-{uuid.uuid4().hex[:6]}")
+    post("/api/workspaces/remove", {"path": str(child)})
+    post("/api/workspaces/add", {"path": str(child), "name": "Temp"})
+    result, status = post("/api/workspaces/rename", {"path": str(child), "name": "My Temp"})
+    assert status == 200
+    assert {w["path"]: w["name"] for w in result["workspaces"]}.get(str(child)) == "My Temp"
+    post("/api/workspaces/remove", {"path": str(child)})
+
+def test_workspace_rename_unknown():
+    result, status = post("/api/workspaces/rename", {"path": "/no/such/path", "name": "X"})
+    assert status == 404
+
+def test_last_workspace_updates_on_session_update(cleanup_test_sessions):
+    sid, ws = make_session_tracked(cleanup_test_sessions)
+    child = make_workspace_child(ws, f"workspace-last-{uuid.uuid4().hex[:6]}")
+    post("/api/session/update", {"session_id": sid, "workspace": str(child), "model": "openai/gpt-5.4-mini"})
+    data, _ = get("/api/workspaces")
+    assert data["last"] == str(child)
+
+def test_file_save(cleanup_test_sessions):
+    sid, ws = make_session_tracked(cleanup_test_sessions)
+    fname = f"save_{uuid.uuid4().hex[:6]}.txt"
+    (ws / fname).write_text("original content")
+    result, status = post("/api/file/save", {"session_id": sid, "path": fname, "content": "updated"})
+    assert status == 200 and (ws / fname).read_text() == "updated"
+
+def test_file_save_requires_fields(cleanup_test_sessions):
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    result, status = post("/api/file/save", {"session_id": sid})
+    assert status == 400
+
+def test_file_save_nonexistent_returns_404(cleanup_test_sessions):
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    result, status = post("/api/file/save", {"session_id": sid, "path": "no_such.txt", "content": ""})
+    assert status == 404
+
+def test_file_save_path_traversal_blocked(cleanup_test_sessions):
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    result, status = post("/api/file/save", {"session_id": sid, "path": "../../etc/passwd", "content": ""})
+    assert status in (400, 500)
+
+def test_session_index_created_after_save(cleanup_test_sessions):
+    # Index is created in the TEST state dir, not the production dir
+    test_state_dir = pathlib.Path(os.environ.get("HERMES_WEBUI_TEST_STATE_DIR", str(pathlib.Path.home() / ".hermes" / "webui-mvp-test")))
+    index_path = test_state_dir / "sessions" / "_index.json"
+    make_session_tracked(cleanup_test_sessions)
+    # Index may not exist yet if cleanup already wiped it -- just check the endpoint works
+    data, status = get("/api/sessions")
+    assert status == 200
+    assert isinstance(data["sessions"], list)
+
+def test_sessions_endpoint_returns_sorted():
+    data, status = get("/api/sessions")
+    assert status == 200
+    sessions = data["sessions"]
+    if len(sessions) >= 2:
+        assert sessions[0]["updated_at"] >= sessions[1]["updated_at"]
+
+def test_new_session_inherits_last_workspace(cleanup_test_sessions):
+    sid, ws = make_session_tracked(cleanup_test_sessions)
+    child = make_workspace_child(ws, f"workspace-inherit-{uuid.uuid4().hex[:6]}")
+    post("/api/session/update", {"session_id": sid, "workspace": str(child), "model": "openai/gpt-5.4-mini"})
+    sid2, _ = make_session_tracked(cleanup_test_sessions)
+    d, _ = get(f"/api/session?session_id={sid2}")
+    assert d["session"]["workspace"] == str(child)
--- a/tests/test_sprint6.py
+++ b/tests/test_sprint6.py
@@ -0,0 +1,152 @@
+"""Sprint 6 tests: Escape from editor, Phase D validation, HTML extraction, cron create, session export."""
+import json, uuid, pathlib, urllib.request, urllib.error
+REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
+
+from tests._pytest_port import BASE
+
+def get(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return json.loads(r.read()), r.status
+
+def get_raw(path):
+    with urllib.request.urlopen(BASE + path, timeout=10) as r:
+        return r.read(), r.headers, r.status
+
+def post(path, body=None):
+    data = json.dumps(body or {}).encode()
+    req = urllib.request.Request(BASE + path, data=data, headers={"Content-Type": "application/json"})
+    try:
+        with urllib.request.urlopen(req, timeout=10) as r:
+            return json.loads(r.read()), r.status
+    except urllib.error.HTTPError as e:
+        return json.loads(e.read()), e.code
+
+def make_session_tracked(created_list, ws=None):
+    body = {}
+    if ws: body["workspace"] = str(ws)
+    d, _ = post("/api/session/new", body)
+    sid = d["session"]["session_id"]
+    created_list.append(sid)
+    return sid, pathlib.Path(d["session"]["workspace"])
+
+# ── Phase E: HTML served from static/index.html ──
+
+def test_index_html_served():
+    raw, headers, status = get_raw("/")
+    assert status == 200
+    assert b"sidebarResize" in raw, "Resize handle not found in HTML"
+    assert b"cronCreateForm" in raw, "Cron create form not found in HTML"
+    assert b"btnHermesPanel" in raw, "Hermes control center trigger not found in HTML"
+    assert b"btnExportJSON" in raw, "Export JSON button not found in HTML"
+
+def test_index_html_file_exists():
+    p = REPO_ROOT / "static/index.html"
+    assert p.exists(), "static/index.html does not exist"
+    assert p.stat().st_size > 5000, "index.html seems too small"
+
+def test_server_py_has_no_html_string():
+    txt = (REPO_ROOT / "server.py").read_text()
+    assert 'HTML = r"""' not in txt, "server.py still contains inline HTML string"
+    assert "doctype html" not in txt.lower(), "server.py still contains raw HTML"
+
+# ── Phase D: remaining endpoint validation ──
+
+def test_approval_respond_requires_session_id():
+    result, status = post("/api/approval/respond", {"choice": "deny"})
+    assert status == 400
+
+def test_approval_respond_rejects_invalid_choice(cleanup_test_sessions):
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    result, status = post("/api/approval/respond", {"session_id": sid, "choice": "INVALID"})
+    assert status == 400
+
+def test_file_raw_requires_session_id():
+    try:
+        get_raw("/api/file/raw?path=test.png")
+        assert False, "Expected 400"
+    except urllib.error.HTTPError as e:
+        assert e.code == 400
+
+def test_file_raw_unknown_session():
+    try:
+        get_raw("/api/file/raw?session_id=nosuchsession&path=test.png")
+        assert False, "Expected 404"
+    except urllib.error.HTTPError as e:
+        assert e.code == 404
+
+# ── Cron create ──
+
+def test_cron_create_requires_prompt():
+    result, status = post("/api/crons/create", {"schedule": "0 9 * * *"})
+    assert status == 400
+    assert "prompt" in result.get("error", "").lower()
+
+def test_cron_create_requires_schedule():
+    result, status = post("/api/crons/create", {"prompt": "Say hello"})
+    assert status == 400
+    assert "schedule" in result.get("error", "").lower()
+
+def test_cron_create_invalid_schedule():
+    result, status = post("/api/crons/create", {
+        "prompt": "Say hello", "schedule": "not_a_valid_schedule_xyz"
+    })
+    assert status == 400
+
+def test_cron_create_success():
+    uid = uuid.uuid4().hex[:6]
+    result, status = post("/api/crons/create", {
+        "name": f"test-job-{uid}",
+        "prompt": "Just say 'hello' and nothing else.",
+        "schedule": "every 999h",  # far future -- won't actually run during test
+        "deliver": "local",
+    })
+    assert status == 200, f"Expected 200 got {status}: {result}"
+    assert result["ok"] is True
+    assert "job" in result
+    job_id = result["job"]["id"]
+    # Verify it appears in the cron list
+    jobs, _ = get("/api/crons")
+    ids = [j["id"] for j in jobs["jobs"]]
+    assert job_id in ids, f"Created job {job_id} not in list"
+
+# ── Session export ──
+
+def test_session_export_requires_session_id():
+    try:
+        get_raw("/api/session/export")
+        assert False
+    except urllib.error.HTTPError as e:
+        assert e.code == 400
+
+def test_session_export_unknown_session():
+    try:
+        get_raw("/api/session/export?session_id=nosuchsession")
+        assert False
+    except urllib.error.HTTPError as e:
+        assert e.code == 404
+
+def test_session_export_returns_json(cleanup_test_sessions):
+    sid, _ = make_session_tracked(cleanup_test_sessions)
+    raw, headers, status = get_raw(f"/api/session/export?session_id={sid}")
+    assert status == 200
+    assert "application/json" in headers.get("Content-Type", "")
+    data = json.loads(raw)
+    assert data["session_id"] == sid
+    assert "messages" in data
+    assert "title" in data
+
+# ── Resizable panels: static files present ──
+
+def test_static_index_has_resize_handles():
+    raw, _, status = get_raw("/")
+    assert status == 200
+    assert b"sidebarResize" in raw
+    assert b"rightpanelResize" in raw
+
+def test_app_js_has_resize_logic():
+    """Sprint 9: app.js replaced by modules. Resize logic lives in boot.js."""
+    raw, _, status = get_raw("/static/boot.js")
+    assert status == 200
+    assert b"_initResizePanels" in raw
+    assert b"hermes-sidebar-w" in raw
+    assert b"hermes-panel-w" in raw
--- a/Show More
+++ b/Show More