🔧 Initial dev copy from live

This commit is contained in:
Rose
2026-04-20 10:43:30 +02:00
commit 96977b576a
284 changed files with 95780 additions and 0 deletions

0
tests/__init__.py Normal file
View File

42
tests/_pytest_port.py Normal file
View File

@@ -0,0 +1,42 @@
"""
Shared test server constants for use in individual test files.
Instead of hardcoding ``BASE = "http://127.0.0.1:8788"`` in every test file,
import from here so the port and state dir are always consistent with
what conftest.py computed for this worktree.
Usage::
from tests._pytest_port import BASE
conftest.py publishes ``HERMES_WEBUI_TEST_PORT`` and
``HERMES_WEBUI_TEST_STATE_DIR`` to ``os.environ`` at module level
(before any test file is imported), so this module always reads the
correct values. The auto-derivation fallback matches conftest's logic
exactly, so standalone imports also work correctly.
"""
import hashlib
import os
import pathlib
def _auto_test_port(repo_root: pathlib.Path) -> int:
h = int(hashlib.md5(str(repo_root).encode()).hexdigest(), 16)
return 20000 + (h % 10000)
def _auto_state_dir_name(repo_root: pathlib.Path) -> str:
h = hashlib.md5(str(repo_root).encode()).hexdigest()[:8]
return f"webui-test-{h}"
_TESTS_DIR = pathlib.Path(__file__).parent.resolve()
_REPO_ROOT = _TESTS_DIR.parent.resolve()
_HERMES_HOME = pathlib.Path(os.getenv('HERMES_HOME',
str(pathlib.Path.home() / '.hermes')))
TEST_PORT = int(os.environ.get('HERMES_WEBUI_TEST_PORT',
str(_auto_test_port(_REPO_ROOT))))
BASE = f"http://127.0.0.1:{TEST_PORT}"
TEST_STATE_DIR = pathlib.Path(os.environ.get(
'HERMES_WEBUI_TEST_STATE_DIR',
str(_HERMES_HOME / _auto_state_dir_name(_REPO_ROOT))
))

392
tests/conftest.py Normal file
View File

@@ -0,0 +1,392 @@
"""
Shared pytest fixtures for webui-mvp tests.
TEST ISOLATION:
Tests run against a SEPARATE server instance on port 8788 with a
completely separate state directory. Production data is never touched.
The test state dir is wiped before each full test run and again on teardown.
PATH DISCOVERY:
No hardcoded paths. Discovery order:
1. Environment variables (HERMES_WEBUI_AGENT_DIR, HERMES_WEBUI_PYTHON, etc.)
2. Sibling checkout heuristics relative to this repo
3. Common install paths (~/.hermes/hermes-agent)
4. System python3 as a last resort
"""
import json
import os
import pathlib
import shutil
import subprocess
import time
import urllib.request
import urllib.error
import pytest
# ── Repo root discovery ────────────────────────────────────────────────────
# conftest.py lives at <repo>/tests/conftest.py
TESTS_DIR = pathlib.Path(__file__).parent.resolve()
REPO_ROOT = TESTS_DIR.parent.resolve()
HOME = pathlib.Path.home()
HERMES_HOME = pathlib.Path(os.getenv('HERMES_HOME', str(HOME / '.hermes')))
# ── Test server config ────────────────────────────────────────────────────
# Port and state dir auto-derive from the repo path when no env var is set,
# giving every worktree its own isolated port (8800-8899) and state directory.
# Override with HERMES_WEBUI_TEST_PORT / HERMES_WEBUI_TEST_STATE_DIR to pin.
def _auto_test_port(repo_root) -> int:
"""Map repo path to a unique port in 20000-29999 (10k range = near-zero collisions).
Far from system port ranges and Linux ephemeral ports (32768+).
Override with HERMES_WEBUI_TEST_PORT to use a specific port."""
import hashlib
h = int(hashlib.md5(str(repo_root).encode()).hexdigest(), 16)
return 20000 + (h % 10000)
def _auto_state_dir_name(repo_root) -> str:
import hashlib
h = hashlib.md5(str(repo_root).encode()).hexdigest()[:8]
return f"webui-test-{h}"
TEST_PORT = int(os.getenv('HERMES_WEBUI_TEST_PORT',
str(_auto_test_port(REPO_ROOT))))
TEST_BASE = f"http://127.0.0.1:{TEST_PORT}"
TEST_STATE_DIR = pathlib.Path(os.getenv(
'HERMES_WEBUI_TEST_STATE_DIR',
str(HERMES_HOME / _auto_state_dir_name(REPO_ROOT))
))
TEST_WORKSPACE = TEST_STATE_DIR / 'test-workspace'
# Publish at module level so _pytest_port.py (imported at collection time)
# and any test file using os.environ sees the right values immediately.
os.environ.setdefault('HERMES_WEBUI_TEST_PORT', str(TEST_PORT))
os.environ.setdefault('HERMES_WEBUI_TEST_STATE_DIR', str(TEST_STATE_DIR))
# ── Server script: always relative to repo root ───────────────────────────
SERVER_SCRIPT = REPO_ROOT / 'server.py'
if not SERVER_SCRIPT.exists():
raise RuntimeError(
f"server.py not found at {SERVER_SCRIPT}. "
"Is conftest.py in the tests/ subdirectory of the repo?"
)
# ── Hermes agent discovery (mirrors api/config._discover_agent_dir) ───────
def _discover_agent_dir() -> pathlib.Path:
candidates = [
os.getenv('HERMES_WEBUI_AGENT_DIR', ''),
str(HERMES_HOME / 'hermes-agent'),
str(REPO_ROOT.parent / 'hermes-agent'),
str(HOME / '.hermes' / 'hermes-agent'),
str(HOME / 'hermes-agent'),
]
for c in candidates:
if not c:
continue
p = pathlib.Path(c).expanduser()
if p.exists() and (p / 'run_agent.py').exists():
return p.resolve()
return None
# ── Python discovery (mirrors api/config._discover_python) ────────────────
def _discover_python(agent_dir) -> str:
if os.getenv('HERMES_WEBUI_PYTHON'):
return os.getenv('HERMES_WEBUI_PYTHON')
if agent_dir:
venv_py = agent_dir / 'venv' / 'bin' / 'python'
if venv_py.exists():
return str(venv_py)
local_venv = REPO_ROOT / '.venv' / 'bin' / 'python'
if local_venv.exists():
return str(local_venv)
return shutil.which('python3') or shutil.which('python') or 'python3'
HERMES_AGENT = _discover_agent_dir()
VENV_PYTHON = _discover_python(HERMES_AGENT)
# Work dir: agent dir if found, else repo root
WORKDIR = str(HERMES_AGENT) if HERMES_AGENT else str(REPO_ROOT)
# ── Agent availability detection ─────────────────────────────────────────────
# Tests that require hermes-agent modules (cron, skills, approval, chat/stream)
# are skipped when the agent isn't installed, instead of failing with 500 errors.
AGENT_AVAILABLE = HERMES_AGENT is not None
def _check_agent_modules():
"""Verify hermes-agent Python modules are actually importable."""
if not HERMES_AGENT:
return False
try:
import importlib
# These are the modules that cause 500 errors when missing
for mod in ['cron.jobs', 'tools.skills_tool']:
importlib.import_module(mod)
return True
except (ImportError, ModuleNotFoundError):
return False
AGENT_MODULES_AVAILABLE = _check_agent_modules()
# pytest marker: skip tests that need hermes-agent when it's not present
requires_agent = pytest.mark.skipif(
not AGENT_AVAILABLE,
reason="hermes-agent not found (skipping agent-dependent test)"
)
requires_agent_modules = pytest.mark.skipif(
not AGENT_MODULES_AVAILABLE,
reason="hermes-agent Python modules not importable (cron, skills_tool)"
)
def pytest_configure(config):
config.addinivalue_line("markers", "requires_agent: skip when hermes-agent dir is not found")
config.addinivalue_line("markers", "requires_agent_modules: skip when hermes-agent Python modules are not importable")
def pytest_collection_modifyitems(config, items):
"""Auto-skip agent-dependent tests when hermes-agent is not available.
Instead of requiring markers on every test function, we pattern-match
test names to known categories that depend on hermes-agent modules.
This keeps the test files clean and ensures new cron/skills tests
get auto-skipped without manual annotation.
"""
if AGENT_MODULES_AVAILABLE:
return # everything available, run all tests
# Exact list of tests known to fail without hermes-agent.
# These hit server endpoints that import cron.jobs, tools.skills_tool,
# or require a running agent backend — returning 500 without the agent.
_AGENT_DEPENDENT_TESTS = {
# Cron endpoints (need cron.jobs module)
'test_crons_list',
'test_crons_list_has_required_fields',
'test_crons_output_requires_job_id',
'test_crons_output_real_job',
'test_crons_run_nonexistent',
'test_cron_create_success',
'test_cron_update_unknown_job_404',
'test_cron_delete_unknown_404',
'test_crons_output_limit_param',
# Skills endpoints (need tools.skills_tool module)
'test_skills_list',
'test_skills_list_has_required_fields',
'test_skills_content_known',
'test_skills_content_requires_name',
'test_skills_search_returns_subset',
'test_skill_save_delete_roundtrip',
'test_skill_delete_unknown_404',
# Agent backend (need running AIAgent)
'test_chat_stream_opens_successfully',
'test_approval_submit_and_respond',
# Security redaction (flaky — session state varies across test ordering)
'test_api_sessions_list_redacts_titles',
# Workspace path (macOS /tmp -> /private/tmp symlink)
'test_new_session_inherits_workspace',
'test_workspace_add_valid',
'test_workspace_rename',
'test_last_workspace_updates_on_session_update',
'test_new_session_inherits_last_workspace',
}
skip_marker = pytest.mark.skip(reason="requires hermes-agent (not installed)")
skipped = 0
for item in items:
if item.name in _AGENT_DEPENDENT_TESTS:
item.add_marker(skip_marker)
skipped += 1
if skipped:
print(f"\nWARNING: hermes-agent not found; {skipped} agent-dependent tests will be skipped\n")
# ── Helpers ──────────────────────────────────────────────────────────────────
def _post(base, path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(
base + path, data=data, headers={"Content-Type": "application/json"}
)
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read())
except urllib.error.HTTPError as e:
try:
return json.loads(e.read())
except Exception:
return {}
def _wait_for_server(base, timeout=20):
deadline = time.time() + timeout
while time.time() < deadline:
try:
with urllib.request.urlopen(base + "/health", timeout=2) as r:
if json.loads(r.read()).get("status") == "ok":
return True
except Exception:
time.sleep(0.3)
return False
# ── Session-scoped test server ────────────────────────────────────────────────
@pytest.fixture(scope="session", autouse=True)
def test_server():
"""
Start an isolated test server on TEST_PORT with a clean state directory.
Paths are discovered dynamically -- no hardcoded absolute path assumptions.
"""
# Kill any leftover process on the test port before starting.
# Stale servers from QA harness runs or prior test sessions cause
# conftest to think the server is already up, producing false failures.
try:
import subprocess as _sp
_sp.run(['fuser', '-k', f'{TEST_PORT}/tcp'],
capture_output=True, timeout=5)
except Exception:
pass
import time as _time
_time.sleep(0.5) # brief pause to let the port release
# Clean slate
if TEST_STATE_DIR.exists():
shutil.rmtree(TEST_STATE_DIR)
TEST_STATE_DIR.mkdir(parents=True)
TEST_WORKSPACE.mkdir(parents=True)
# Symlink real skills into test home so skill-related tests work,
# but all write-heavy state stays isolated.
real_skills = HERMES_HOME / 'skills'
test_skills = TEST_STATE_DIR / 'skills'
if real_skills.exists() and not test_skills.exists():
test_skills.symlink_to(real_skills)
# Isolated cron state
(TEST_STATE_DIR / 'cron').mkdir(parents=True, exist_ok=True)
# Expose TEST_STATE_DIR to the test process itself so that tests which write
# directly to state.db (e.g. test_gateway_sync.py) always use the same path
# as the server. Other test files (test_auth_sessions.py) may override
# HERMES_WEBUI_STATE_DIR for their own purposes, but HERMES_WEBUI_TEST_STATE_DIR
# is reserved for this mapping and is never overridden by individual test files.
# Export both port and state-dir as env vars so individual test files
# can read them without importing conftest (avoids circular imports).
os.environ.setdefault('HERMES_WEBUI_TEST_PORT', str(TEST_PORT))
# os.environ already set at module level above; no-op here.
env = os.environ.copy()
# Strip real provider keys so test subprocess never inherits production credentials.
# The test server uses a mock/isolated config — no real API calls are made.
for _k in list(env):
if any(_k.startswith(p) for p in (
'OPENROUTER_API_KEY', 'OPENAI_API_KEY', 'ANTHROPIC_API_KEY',
'GOOGLE_API_KEY', 'DEEPSEEK_API_KEY',
)):
del env[_k]
env.update({
"HERMES_WEBUI_PORT": str(TEST_PORT),
"HERMES_WEBUI_HOST": "127.0.0.1",
"HERMES_WEBUI_STATE_DIR": str(TEST_STATE_DIR),
"HERMES_WEBUI_DEFAULT_WORKSPACE": str(TEST_WORKSPACE),
"HERMES_WEBUI_DEFAULT_MODEL": "openai/gpt-5.4-mini",
"HERMES_HOME": str(TEST_STATE_DIR),
# Belt-and-suspenders: HERMES_BASE_HOME hard-locks _DEFAULT_HERMES_HOME
# in api/profiles.py to the test state dir regardless of profile switching
# or any os.environ mutation that happens inside the server process.
# Without this, a profile switch or active_profile file in the real
# ~/.hermes can redirect _get_active_hermes_home() out of the sandbox,
# causing onboarding writes (config.yaml, .env) to land in the production
# ~/.hermes/profiles/webui/ and overwrite real API keys.
"HERMES_BASE_HOME": str(TEST_STATE_DIR),
})
# Pass agent dir if discovered so server.py doesn't have to re-discover
if HERMES_AGENT:
env["HERMES_WEBUI_AGENT_DIR"] = str(HERMES_AGENT)
proc = subprocess.Popen(
[VENV_PYTHON, str(SERVER_SCRIPT)],
cwd=WORKDIR,
env=env,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
if not _wait_for_server(TEST_BASE, timeout=20):
proc.kill()
pytest.fail(
f"Test server on port {TEST_PORT} did not start within 20s.\n"
f" server.py : {SERVER_SCRIPT}\n"
f" python : {VENV_PYTHON}\n"
f" agent dir : {HERMES_AGENT}\n"
f" workdir : {WORKDIR}\n"
)
yield proc
proc.terminate()
try:
proc.wait(timeout=5)
except subprocess.TimeoutExpired:
proc.kill()
try:
shutil.rmtree(TEST_STATE_DIR)
except Exception:
pass
# ── Test base URL ─────────────────────────────────────────────────────────────
@pytest.fixture(scope="session")
def base_url():
return TEST_BASE
# ── Per-test session cleanup ──────────────────────────────────────────────────
@pytest.fixture(autouse=True)
def cleanup_test_sessions():
"""
Yields a list for tests to register created session IDs.
Deletes all registered sessions after each test.
Resets last_workspace to the test workspace to prevent state bleed.
"""
created: list[str] = []
yield created
for sid in created:
try:
_post(TEST_BASE, "/api/session/delete", {"session_id": sid})
except Exception:
pass
try:
_post(TEST_BASE, "/api/sessions/cleanup_zero_message")
except Exception:
pass
try:
last_ws_file = TEST_STATE_DIR / "last_workspace.txt"
last_ws_file.write_text(str(TEST_WORKSPACE), encoding='utf-8')
except Exception:
pass
# ── Convenience helpers ────────────────────────────────────────────────────────
def make_session_tracked(created_list, ws=None):
"""
Create a session on the test server and register it for cleanup.
Usage:
def test_something(cleanup_test_sessions):
sid, ws = make_session_tracked(cleanup_test_sessions)
"""
body = {}
if ws:
body["workspace"] = str(ws)
d = _post(TEST_BASE, "/api/session/new", body)
sid = d["session"]["session_id"]
ws_path = pathlib.Path(d["session"]["workspace"])
created_list.append(sid)
return sid, ws_path

View File

@@ -0,0 +1,188 @@
"""Tests for approval queue multi-entry support (issue #527).
Previously _pending[sid] held one entry, so simultaneous approvals overwrote
each other. This PR changes submit_pending() to append to a list and adds
approval_id so /api/approval/respond can target a specific entry.
"""
import json
import pathlib
import re
import sys
REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
sys.path.insert(0, str(REPO_ROOT))
ROUTES_SRC = (REPO_ROOT / "api" / "routes.py").read_text(encoding="utf-8")
MESSAGES_JS = (REPO_ROOT / "static" / "messages.js").read_text(encoding="utf-8")
INDEX_HTML = (REPO_ROOT / "static" / "index.html").read_text(encoding="utf-8")
# ---------------------------------------------------------------------------
# Static-analysis: Python routes
# ---------------------------------------------------------------------------
def test_submit_pending_appends_to_list():
"""submit_pending() must append to a list, not overwrite."""
# The new wrapper must contain queue.append
assert "queue.append(entry)" in ROUTES_SRC, \
"submit_pending() must append entry to a list queue, not overwrite _pending[sid]"
def test_submit_pending_adds_approval_id():
"""Each queued entry must get a unique approval_id."""
assert "approval_id" in ROUTES_SRC and "uuid.uuid4().hex" in ROUTES_SRC, \
"submit_pending() must assign a uuid4 approval_id to each queued entry"
def test_handle_approval_pending_returns_count():
"""_handle_approval_pending must return pending_count in its response."""
assert '"pending_count"' in ROUTES_SRC, \
"_handle_approval_pending must include pending_count in the JSON response"
def test_handle_approval_respond_pops_by_approval_id():
"""_handle_approval_respond must target entry by approval_id."""
assert 'approval_id = body.get("approval_id"' in ROUTES_SRC, \
"_handle_approval_respond must read approval_id from request body"
assert 'entry.get("approval_id") == approval_id' in ROUTES_SRC, \
"_handle_approval_respond must find and pop the matching entry by approval_id"
def test_handle_approval_respond_fallback_to_oldest():
"""When no approval_id is given, fall back to popping the oldest entry (FIFO)."""
# The fallback path: queue.pop(0) when approval_id is empty
assert "queue.pop(0)" in ROUTES_SRC, \
"_handle_approval_respond must fall back to popping the oldest entry when approval_id is absent"
def test_backward_compat_legacy_dict_value():
"""The respond handler must tolerate a legacy single-dict value in _pending."""
assert "Legacy single-dict value" in ROUTES_SRC or \
"# Legacy single-dict" in ROUTES_SRC or \
"elif queue:" in ROUTES_SRC, \
"respond handler must handle legacy single-dict _pending values for backward compatibility"
# ---------------------------------------------------------------------------
# Static-analysis: JavaScript frontend
# ---------------------------------------------------------------------------
def test_respond_sends_approval_id():
"""respondApproval() must include approval_id in the POST body."""
assert "approval_id: approvalId" in MESSAGES_JS, \
"respondApproval() must send approval_id in the POST body to /api/approval/respond"
def test_show_approval_card_accepts_count():
"""showApprovalCard must accept a pendingCount parameter."""
assert re.search(r"function showApprovalCard\(pending,\s*pendingCount\)", MESSAGES_JS), \
"showApprovalCard() must accept a pendingCount argument"
def test_show_approval_card_renders_counter():
"""showApprovalCard must display a '1 of N pending' counter when N > 1."""
assert '"1 of " + pendingCount + " pending"' in MESSAGES_JS or \
"'1 of ' + pendingCount + ' pending'" in MESSAGES_JS, \
"showApprovalCard() must render '1 of N pending' counter for multiple queued approvals"
def test_approval_current_id_tracked():
"""_approvalCurrentId must be set and cleared around each approval."""
assert "_approvalCurrentId" in MESSAGES_JS, \
"_approvalCurrentId must track the approval_id of the currently displayed card"
assert "_approvalCurrentId = pending.approval_id" in MESSAGES_JS or \
"_approvalCurrentId = pending.approval_id || null" in MESSAGES_JS, \
"_approvalCurrentId must be assigned from pending.approval_id"
# Must be nulled on respond
assert "_approvalCurrentId = null" in MESSAGES_JS, \
"_approvalCurrentId must be cleared when respondApproval() is called"
def test_polling_passes_count_to_show():
"""The poll loop must pass pending_count to showApprovalCard."""
assert "showApprovalCard(data.pending, data.pending_count" in MESSAGES_JS, \
"Poll loop must pass data.pending_count to showApprovalCard"
# ---------------------------------------------------------------------------
# HTML: counter element present
# ---------------------------------------------------------------------------
def test_approval_counter_element_exists():
"""index.html must contain an approvalCounter element."""
assert 'id="approvalCounter"' in INDEX_HTML, \
"index.html must contain an element with id='approvalCounter' for the '1 of N' display"
# ---------------------------------------------------------------------------
# Functional: multiple entries behave correctly (via routes module directly)
# ---------------------------------------------------------------------------
def test_multiple_approvals_both_surfaced():
"""Two submit_pending calls must produce two queued entries, not one."""
import threading
from api import routes as r
# Reset state
sid = "test-multi-approval-sid"
with r._lock:
r._pending.pop(sid, None)
r.submit_pending(sid, {"command": "cmd1", "pattern_key": "p1", "pattern_keys": ["p1"], "description": "d1"})
r.submit_pending(sid, {"command": "cmd2", "pattern_key": "p2", "pattern_keys": ["p2"], "description": "d2"})
with r._lock:
queue = r._pending.get(sid)
assert isinstance(queue, list), "After two submit_pending calls, _pending[sid] must be a list"
assert len(queue) == 2, f"Expected 2 queued entries, got {len(queue)}"
assert queue[0]["command"] == "cmd1"
assert queue[1]["command"] == "cmd2"
assert queue[0].get("approval_id"), "First entry must have an approval_id"
assert queue[1].get("approval_id"), "Second entry must have an approval_id"
assert queue[0]["approval_id"] != queue[1]["approval_id"], "Each entry must have a unique approval_id"
# Cleanup
with r._lock:
r._pending.pop(sid, None)
def test_respond_by_approval_id_pops_correct_entry():
"""Responding with approval_id must remove only the targeted entry."""
from api import routes as r
sid = "test-respond-by-id-sid"
with r._lock:
r._pending.pop(sid, None)
r.submit_pending(sid, {"command": "cmd1", "pattern_key": "p1", "pattern_keys": ["p1"], "description": "d1"})
r.submit_pending(sid, {"command": "cmd2", "pattern_key": "p2", "pattern_keys": ["p2"], "description": "d2"})
with r._lock:
queue = r._pending.get(sid, [])
aid2 = queue[1]["approval_id"] if len(queue) > 1 else None
assert aid2, "Second entry must have an approval_id"
# Respond to the SECOND entry by its approval_id
# We call the handler internals directly (no HTTP)
with r._lock:
queue = r._pending.get(sid, [])
popped = None
for i, entry in enumerate(queue):
if entry.get("approval_id") == aid2:
popped = queue.pop(i)
break
assert popped is not None, "Should have found and popped entry by approval_id"
assert popped["command"] == "cmd2", "Popped the wrong entry"
with r._lock:
remaining = r._pending.get(sid, [])
assert len(remaining) == 1, "One entry should remain after popping the second"
assert remaining[0]["command"] == "cmd1", "The remaining entry should be cmd1"
# Cleanup
with r._lock:
r._pending.pop(sid, None)

View File

@@ -0,0 +1,288 @@
"""
Tests for fix/approval-stuck-thinking:
Verify that /api/approval/respond correctly unblocks gateway approval queues
and that the approval module exports the symbols streaming.py and routes.py
need to prevent the UI getting stuck in "Thinking…" during dangerous commands.
"""
import json
import threading
import uuid
import urllib.request
import urllib.error
import urllib.parse
import pytest
# Import approval internals — shared module-level state within this process.
# The HTTP tests use the test server (port 8788, separate process).
# The unit tests operate directly on the module.
try:
from tools.approval import (
register_gateway_notify,
unregister_gateway_notify,
resolve_gateway_approval,
_gateway_queues,
_gateway_notify_cbs,
_lock,
_ApprovalEntry,
submit_pending,
)
# has_pending and pop_pending were removed from tools.approval when the
# agent renamed has_pending -> has_blocking_approval (gateway queue check)
# and removed the polling-mode pop_pending. Routes now check _pending
# directly. These symbols are no longer part of the public API.
APPROVAL_AVAILABLE = True
except ImportError:
APPROVAL_AVAILABLE = False
pytestmark = pytest.mark.skipif(
not APPROVAL_AVAILABLE,
reason="tools.approval not available in this environment"
)
from tests._pytest_port import BASE
def get(path):
url = BASE + path
with urllib.request.urlopen(url, timeout=10) as r:
return json.loads(r.read())
def post(path, body=None):
url = BASE + path
data = json.dumps(body or {}).encode()
req = urllib.request.Request(url, data=data,
headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
# ── Unit tests (in-process, no HTTP server needed) ──────────────────────────
class TestGatewayApprovalUnblocking:
"""Unit tests for the gateway queue unblocking mechanism."""
def test_resolve_gateway_approval_sets_event(self):
"""resolve_gateway_approval() must set the entry's event and store the result."""
sid = f"unit-resolve-{uuid.uuid4().hex[:8]}"
data = {"command": "rm -rf /tmp/x", "description": "recursive delete"}
entry = _ApprovalEntry(data)
with _lock:
_gateway_queues.setdefault(sid, []).append(entry)
resolved = resolve_gateway_approval(sid, "once", resolve_all=False)
assert resolved == 1
assert entry.event.is_set()
assert entry.result == "once"
# Queue should be cleaned up
with _lock:
assert sid not in _gateway_queues
def test_resolve_gateway_approval_deny(self):
"""Deny choice is propagated correctly."""
sid = f"unit-deny-{uuid.uuid4().hex[:8]}"
entry = _ApprovalEntry({"command": "pkill -9 x", "description": "force kill"})
with _lock:
_gateway_queues.setdefault(sid, []).append(entry)
resolve_gateway_approval(sid, "deny")
assert entry.result == "deny"
def test_resolve_gateway_approval_no_queue_is_harmless(self):
"""resolve_gateway_approval with no queue entry returns 0, no crash."""
sid = f"unit-no-queue-{uuid.uuid4().hex[:8]}"
result = resolve_gateway_approval(sid, "once")
assert result == 0
def test_resolve_all_unblocks_multiple_entries(self):
"""resolve_all=True unblocks every pending entry in the queue."""
sid = f"unit-resolve-all-{uuid.uuid4().hex[:8]}"
entries = [_ApprovalEntry({"command": f"cmd{i}"}) for i in range(3)]
with _lock:
_gateway_queues[sid] = list(entries)
resolved = resolve_gateway_approval(sid, "session", resolve_all=True)
assert resolved == 3
for e in entries:
assert e.event.is_set()
assert e.result == "session"
def test_register_and_fire_notify_cb(self):
"""register_gateway_notify stores the cb; calling it delivers approval data."""
sid = f"unit-notify-{uuid.uuid4().hex[:8]}"
fired = []
register_gateway_notify(sid, lambda d: fired.append(d))
with _lock:
cb = _gateway_notify_cbs.get(sid)
assert cb is not None
data = {"command": "test", "description": "test"}
cb(data)
assert fired == [data]
unregister_gateway_notify(sid)
def test_unregister_clears_cb_and_signals_entries(self):
"""unregister_gateway_notify removes cb and unblocks any queued entries."""
sid = f"unit-unreg-{uuid.uuid4().hex[:8]}"
register_gateway_notify(sid, lambda d: None)
entry = _ApprovalEntry({"command": "x"})
with _lock:
_gateway_queues.setdefault(sid, []).append(entry)
unregister_gateway_notify(sid)
assert entry.event.is_set(), "unregister should signal blocked entries"
with _lock:
assert sid not in _gateway_notify_cbs
assert sid not in _gateway_queues
def test_streaming_approval_integration(self):
"""
End-to-end unit simulation of the streaming.py fix:
1. streaming.py registers notify_cb
2. check_all_command_guards fires notify_cb (pushing approval SSE)
3. User responds — resolve_gateway_approval unblocks agent thread
4. Agent thread sees choice and continues
"""
sid = f"unit-e2e-{uuid.uuid4().hex[:8]}"
approval_events_sent = []
# Step 1: streaming.py registers the notify callback
def _approval_notify_cb(approval_data):
approval_events_sent.append(approval_data) # would be put('approval', ...)
register_gateway_notify(sid, _approval_notify_cb)
# Step 2: check_all_command_guards fires the callback and queues an entry
approval_data = {
"command": "rm -rf /tmp/test",
"pattern_key": "recursive delete",
"pattern_keys": ["recursive delete"],
"description": "recursive delete",
}
entry = _ApprovalEntry(approval_data)
with _lock:
_gateway_queues.setdefault(sid, []).append(entry)
# notify_cb fires synchronously (gateway notifies user)
with _lock:
cb = _gateway_notify_cbs.get(sid)
cb(approval_data)
assert len(approval_events_sent) == 1, "approval SSE event should have been queued"
# Step 3: user responds via /api/approval/respond → resolve_gateway_approval
resolved = resolve_gateway_approval(sid, "once")
assert resolved == 1
# Step 4: agent thread is unblocked with the correct choice
assert entry.event.is_set()
assert entry.result == "once"
# Cleanup
unregister_gateway_notify(sid)
# ── Symbol existence tests ───────────────────────────────────────────────────
class TestApprovalModuleExports:
"""Verify the module exports all symbols that streaming.py and routes.py need."""
def test_register_gateway_notify_exported(self):
import tools.approval as ap
assert hasattr(ap, "register_gateway_notify"), \
"tools.approval must export register_gateway_notify"
def test_unregister_gateway_notify_exported(self):
import tools.approval as ap
assert hasattr(ap, "unregister_gateway_notify"), \
"tools.approval must export unregister_gateway_notify"
def test_resolve_gateway_approval_exported(self):
import tools.approval as ap
assert hasattr(ap, "resolve_gateway_approval"), \
"tools.approval must export resolve_gateway_approval"
def test_approval_entry_exported(self):
import tools.approval as ap
assert hasattr(ap, "_ApprovalEntry"), \
"tools.approval must export _ApprovalEntry"
# ── HTTP regression tests (test server, port 8788) ───────────────────────────
class TestApprovalHTTPEndpoints:
"""
Regression tests for /api/approval/respond against the live test server.
These verify that the HTTP layer behaves correctly — they don't rely on
in-process module state shared with the server subprocess.
"""
def test_respond_returns_ok_no_pending(self):
"""respond with no pending entry returns ok (no crash, no 500)."""
sid = f"http-no-pending-{uuid.uuid4().hex[:8]}"
result, status = post("/api/approval/respond", {
"session_id": sid,
"choice": "deny",
})
assert status == 200
assert result["ok"] is True
def test_respond_clears_injected_pending(self):
"""Inject a pending entry, respond, verify it's cleared."""
sid = f"http-clear-{uuid.uuid4().hex[:8]}"
cmd = "rm -rf /tmp/testdir"
inject = get(f"/api/approval/inject_test?session_id={urllib.parse.quote(sid)}"
f"&pattern_key=recursive+delete&command={urllib.parse.quote(cmd)}")
assert inject["ok"] is True
data = get(f"/api/approval/pending?session_id={urllib.parse.quote(sid)}")
assert data["pending"] is not None
result, status = post("/api/approval/respond", {
"session_id": sid,
"choice": "deny",
})
assert status == 200
assert result["ok"] is True
data2 = get(f"/api/approval/pending?session_id={urllib.parse.quote(sid)}")
assert data2["pending"] is None, "pending should be cleared after respond"
def test_respond_rejects_invalid_choice(self):
"""respond with an unknown choice returns 400."""
result, status = post("/api/approval/respond", {
"session_id": "some-session",
"choice": "INVALID",
})
assert status == 400
def test_respond_requires_session_id(self):
"""respond without session_id returns 400."""
result, status = post("/api/approval/respond", {"choice": "deny"})
assert status == 400
def test_respond_session_choice_clears_pending(self):
"""Inject pending, respond with 'session', verify cleared."""
sid = f"http-session-{uuid.uuid4().hex[:8]}"
inject = get(f"/api/approval/inject_test?session_id={urllib.parse.quote(sid)}"
f"&pattern_key=force+kill+processes&command=pkill+-9+something")
assert inject["ok"] is True
result, status = post("/api/approval/respond", {
"session_id": sid,
"choice": "session",
})
assert status == 200
assert result["choice"] == "session"
data = get(f"/api/approval/pending?session_id={urllib.parse.quote(sid)}")
assert data["pending"] is None

134
tests/test_auth_sessions.py Normal file
View File

@@ -0,0 +1,134 @@
"""
Tests for auth session lifecycle — session creation, verification, expiry,
and lazy pruning of expired entries.
"""
import time
import unittest
from pathlib import Path
import tempfile
import os
# Isolate state dir so we don't touch real sessions
_TEST_STATE = Path(tempfile.mkdtemp())
os.environ["HERMES_WEBUI_STATE_DIR"] = str(_TEST_STATE)
import sys
sys.path.insert(0, str(Path(__file__).parent.parent))
import importlib
# Force re-import of auth module so it picks up our TEST_STATE_DIR
auth = importlib.import_module("api.auth")
class TestSessionPruning(unittest.TestCase):
"""Verify expired session cleanup works correctly."""
def setUp(self):
# Clear any leftover sessions from other tests
auth._sessions.clear()
def test_session_created_valid(self):
"""A fresh session token should verify as valid."""
token = auth.create_session()
self.assertTrue(auth.verify_session(token))
def test_expired_session_pruned(self):
"""Manually inserting an expired entry should be pruned on next verify_session call."""
# Insert sessions that have already expired
auth._sessions["fake_token"] = time.time() - 100
auth._sessions["another_fake"] = time.time() - 50
# Insert one valid session (far future)
auth._sessions["good_token"] = time.time() + 3600
# _sessions has 3 entries, 2 expired
self.assertEqual(len(auth._sessions), 3)
# Call verify_session — this triggers _prune_expired_sessions()
# Cookie format is token.signature, so we need a dot to pass the early check
auth.verify_session("fake_token.fake_sig")
# After verification, only the valid session should remain
self.assertEqual(len(auth._sessions), 1)
self.assertIn("good_token", auth._sessions)
self.assertNotIn("fake_token", auth._sessions)
self.assertNotIn("another_fake", auth._sessions)
def test_prune_does_not_remove_valid_sessions(self):
"""_prune_expired_sessions should never remove sessions that are still active."""
auth._sessions["active_1"] = time.time() + 86400 # 24 hours from now
auth._sessions["active_2"] = time.time() + 7200 # 2 hours from now
auth._sessions["expired_1"] = time.time() - 10
auth._prune_expired_sessions()
self.assertEqual(len(auth._sessions), 2)
self.assertIn("active_1", auth._sessions)
self.assertIn("active_2", auth._sessions)
self.assertNotIn("expired_1", auth._sessions)
def test_verify_session_prunes_before_verification(self):
"""verify_session should prune expired entries before checking the target token.
This ensures that _prune_expired_sessions() is called at the very top
of verify_session(), so cleanup happens on every auth check.
"""
auth._sessions["expired_for_test"] = time.time() - 999
# verify_session with an invalid cookie triggers the full path:
# _prune_expired_sessions -> signature check -> return False
result = auth.verify_session("nonexistent.bad_sig")
self.assertFalse(result)
# The expired entry should have been cleaned up
self.assertNotIn("expired_for_test", auth._sessions)
def test_prune_handles_empty_dict(self):
"""_prune_expired_sessions should be safe on an empty dict."""
auth._sessions.clear()
auth._prune_expired_sessions()
self.assertEqual(len(auth._sessions), 0)
def test_session_ttl_is_24_hours(self):
"""Newly created sessions should have the expected 24-hour TTL."""
auth._sessions.clear()
token_hex = auth.create_session().split(".")[0]
# The _sessions dict stores token -> expiry_time
# We can check the expiry is approximately SESSION_TTL seconds from now
# by looking up the raw entry via the token
from api.auth import _sessions, SESSION_TTL
# find our entry
for t, exp in _sessions.items():
if t == token_hex:
# expiry should be within 5 seconds of now + SESSION_TTL
expected = time.time() + SESSION_TTL
self.assertAlmostEqual(exp, expected, delta=5)
break
else:
self.fail("Session token not found in _sessions")
class TestSessionInvalidation(unittest.TestCase):
"""Test session logout / invalidation."""
def setUp(self):
auth._sessions.clear()
def test_invalidate_session_removes_token(self):
"""Calling invalidate_session should remove the token from _sessions."""
token = auth.create_session()
self.assertTrue(auth.verify_session(token))
auth.invalidate_session(token)
# Token should be gone
self.assertFalse(auth.verify_session(token))
def test_invalidate_unknown_token_is_safe(self):
"""Invalidating a non-existent token should not raise."""
auth._sessions.clear()
auth.invalidate_session("nonexistent_token")
# Should not raise
if __name__ == "__main__":
unittest.main()

226
tests/test_batch_fixes.py Normal file
View File

@@ -0,0 +1,226 @@
"""Tests for the batch of fixes from PRs #506-#521 (v0.50.47).
Covers:
- /root workspace unblocking (#510/#521)
- Attached-files split guard (#521)
- custom_providers model visibility (#515/#519)
- Cron skill cache invalidation (#507/#508)
- System (auto) theme (#504/#506/#509/#514)
"""
import pathlib
import re
REPO = pathlib.Path(__file__).parent.parent
def read(rel):
return (REPO / rel).read_text()
# ── Group A: /root workspace ──────────────────────────────────────────────────
class TestRootWorkspaceUnblocked:
def test_root_not_in_blocked_system_roots(self):
src = read("api/workspace.py")
assert "Path('/root')" not in src, (
"/root must not be in _BLOCKED_SYSTEM_ROOTS — "
"breaks deployments where Hermes runs as root"
)
def test_etc_still_blocked(self):
"""Sanity: other dangerous paths remain blocked."""
src = read("api/workspace.py")
assert "Path('/etc')" in src
assert "Path('/proc')" in src
def test_split_guard_present(self):
src = read("api/streaming.py")
assert "'\\n\\n[Attached files:' in msg_text" in src, (
"base_text split must guard against missing '[Attached files:' "
"to avoid empty-string on plain messages"
)
# ── Group B: custom_providers visibility ─────────────────────────────────────
class TestCustomProvidersVisibility:
def test_has_custom_providers_variable_present(self):
src = read("api/config.py")
assert "_has_custom_providers" in src, (
"_has_custom_providers variable must exist in get_available_models()"
)
def test_discard_custom_conditional_on_no_custom_providers(self):
src = read("api/config.py")
assert "not _has_custom_providers" in src, (
"detected_providers.discard('custom') must be gated on "
"'not _has_custom_providers'"
)
def test_custom_providers_isinstance_check(self):
src = read("api/config.py")
assert "isinstance(_custom_providers_cfg, list)" in src, (
"_has_custom_providers must check isinstance(..., list)"
)
# ── Group C: cron skill cache ─────────────────────────────────────────────────
class TestCronSkillCacheInvalidation:
def _panels_src(self):
return read("static/panels.js")
def test_cache_busted_on_form_open(self):
src = self._panels_src()
# toggleCronForm should set cache to null unconditionally
m = re.search(
r'function toggleCronForm\(\)\{.*?_cronSkillsCache=null',
src, re.DOTALL
)
assert m, (
"toggleCronForm must unconditionally null _cronSkillsCache "
"before fetching skills"
)
def test_cache_not_guarded_by_if_on_open(self):
src = self._panels_src()
# The old guard should be gone
assert "if(!_cronSkillsCache)" not in src, (
"toggleCronForm should not use 'if(!_cronSkillsCache)' guard — "
"cache must always be busted on open"
)
def test_cache_busted_on_skill_save(self):
src = self._panels_src()
# After submitSkillSave's api() call, _cronSkillsCache must be nulled
m = re.search(
r'async function submitSkillSave\(\).*?_skillsData\s*=\s*null.*?_cronSkillsCache\s*=\s*null',
src, re.DOTALL
)
assert m, (
"_cronSkillsCache must be set to null in submitSkillSave() "
"right after _skillsData = null"
)
# ── Group D: System (auto) theme ──────────────────────────────────────────────
class TestSystemTheme:
def test_apply_theme_helper_in_boot_js(self):
src = read("static/boot.js")
assert "function _applyTheme(" in src, (
"_applyTheme helper function must be defined in boot.js"
)
def test_apply_theme_resolves_system(self):
src = read("static/boot.js")
assert "normalized.theme==='system'" in src or "=== 'system'" in src, (
"_applyTheme must branch on 'system' to resolve via matchMedia"
)
def test_apply_theme_uses_matchmedia(self):
src = read("static/boot.js")
assert "prefers-color-scheme" in src, (
"_applyTheme must use matchMedia('(prefers-color-scheme:dark)')"
)
def test_load_settings_calls_apply_theme(self):
src = read("static/boot.js")
assert "_applyTheme(appearance.theme)" in src, (
"loadSettings must call _applyTheme() instead of direct data-theme assignment"
)
def test_system_option_in_theme_picker(self):
html = read("static/index.html")
assert "_pickTheme('system')" in html, (
"Theme picker must include a system theme button"
)
assert ">System<" in html, (
"Theme picker must show 'System' label"
)
def test_theme_picker_uses_pick_theme(self):
html = read("static/index.html")
assert "_pickTheme(" in html, (
"Theme buttons must call _pickTheme()"
)
def test_flicker_script_resolves_system(self):
html = read("static/index.html")
# The head flicker-prevention IIFE must handle 'system'
assert "==='system'" in html or "=== 'system'" in html, (
"Flicker-prevention head script must resolve 'system' before setting data-theme"
)
assert "legacy={slate:['dark','slate']" in html, (
"Flicker-prevention head script must normalize legacy theme names on first paint"
)
def test_system_in_commands_themes_list(self):
src = read("static/commands.js")
assert "'system'" in src, (
"/theme command must include 'system' in the valid themes array"
)
def test_commands_uses_apply_theme(self):
src = read("static/commands.js")
assert "_applyTheme(appearance.theme)" in src, (
"cmdTheme must call _applyTheme() with the normalized canonical theme"
)
def test_commands_accept_legacy_theme_aliases(self):
src = read("static/commands.js")
assert "const legacyThemes=Object.keys(_LEGACY_THEME_MAP||{});" in src, (
"cmdTheme must accept legacy theme aliases and map them onto canonical appearance values"
)
def test_panels_reverts_via_apply_theme(self):
src = read("static/panels.js")
assert "_applyTheme(_settingsThemeOnOpen)" in src or \
"_applyTheme(" in src, (
"_revertSettingsPreview must call _applyTheme() so 'system' "
"is correctly re-activated on settings discard"
)
def test_panels_saves_system_string_not_resolved(self):
src = read("static/panels.js")
assert "localStorage.getItem('hermes-theme')" in src, (
"_settingsThemeOnOpen must read from localStorage to preserve "
"the 'system' string, not the resolved 'dark'/'light'"
)
def test_i18n_cmd_theme_includes_system_english(self):
src = read("static/i18n.js")
assert "system/dark/light" in src, (
"English cmd_theme i18n key must include 'system' in the theme list"
)
def test_i18n_cmd_theme_all_locales(self):
src = read("static/i18n.js")
count = src.count("system/dark/light")
assert count >= 5, (
f"cmd_theme description should mention 'system' in all 5 locales; "
f"found {count}"
)
def test_theme_listener_cleanup_uses_stable_handler(self):
src = read("static/boot.js")
assert "_systemThemeMq&&_onSystemThemeChange" in src, (
"_applyTheme must track the active OS-theme listener so it can be removed cleanly"
)
assert "removeEventListener('change',_onSystemThemeChange)" in src, (
"_applyTheme must remove the previous OS-theme listener before adding a new one"
)
def test_panels_hydrates_appearance_before_models_fetch(self):
src = read("static/panels.js")
skin_idx = src.index("const skinVal=(settings.skin||'default').toLowerCase();")
models_idx = src.index("const models=await api('/api/models');")
assert skin_idx < models_idx, (
"loadSettingsPanel must hydrate theme/skin before awaiting /api/models, "
"otherwise a slow model fetch can clobber an in-progress skin selection"
)

View File

@@ -0,0 +1,140 @@
"""
Bug batch fixes — April 2026.
Covers:
- #594: .app-dialog and .file-rename-input have light theme overrides in style.css
- #576: workspace panel localStorage restore is gated on session.workspace presence (boot.js)
- #585: get_available_models() calls reload_config() before reading config cache
- #567: docker-compose.yml comment mentions macOS UID mismatch
- #590: _transcribeBlob already calls setComposerStatus('Transcribing…') — confirmed present
"""
import pathlib
import re
REPO_ROOT = pathlib.Path(__file__).parent.parent
STYLE_CSS = (REPO_ROOT / "static" / "style.css").read_text(encoding="utf-8")
BOOT_JS = (REPO_ROOT / "static" / "boot.js").read_text(encoding="utf-8")
COMPOSE = (REPO_ROOT / "docker-compose.yml").read_text(encoding="utf-8")
# ── #594: light theme dialog overrides ───────────────────────────────────────
def test_594_app_dialog_has_light_mode_override():
"""style.css must have a light mode rule targeting .app-dialog background."""
assert ':root:not(.dark) .app-dialog{' in STYLE_CSS, (
"Missing light mode override for .app-dialog — dialogs appear dark on light theme"
)
def test_594_app_dialog_input_has_light_mode_override():
"""style.css must have a light mode rule for .app-dialog-input."""
assert ":root:not(.dark) .app-dialog-input{" in STYLE_CSS, (
"Missing light mode override for .app-dialog-input"
)
def test_594_app_dialog_btn_has_light_mode_override():
"""style.css must have a light mode rule for .app-dialog-btn."""
assert ":root:not(.dark) .app-dialog-btn{" in STYLE_CSS, (
"Missing light mode override for .app-dialog-btn"
)
def test_594_app_dialog_close_has_light_mode_override():
"""style.css must have a light mode rule for .app-dialog-close."""
assert ":root:not(.dark) .app-dialog-close{" in STYLE_CSS, (
"Missing light mode override for .app-dialog-close"
)
def test_594_file_rename_input_has_light_mode_override():
"""style.css must have a light mode rule for .file-rename-input."""
assert ":root:not(.dark) .file-rename-input{" in STYLE_CSS, (
"Missing light mode override for .file-rename-input"
)
# ── #576: workspace panel snap fix ───────────────────────────────────────────
def test_576_panel_restore_gated_on_workspace():
"""boot.js: localStorage panel restore must be gated on session.workspace."""
# The guard must appear: session.workspace check before _workspacePanelMode='browse'
assert "S.session&&S.session.workspace&&localStorage.getItem('hermes-webui-workspace-panel')" in BOOT_JS, (
"Workspace panel localStorage restore must be gated on S.session.workspace "
"to prevent snap-open-then-closed on sessions without a workspace (#576)"
)
def test_576_restore_happens_after_load_session():
"""boot.js: loadSession() must come before the panel restore guard."""
load_pos = BOOT_JS.find("await loadSession(saved)")
restore_pos = BOOT_JS.find("S.session&&S.session.workspace&&localStorage")
assert load_pos != -1, "loadSession call not found in boot.js"
assert restore_pos != -1, "workspace panel restore guard not found"
assert load_pos < restore_pos, (
"loadSession() must run before the panel restore guard "
"so S.session.workspace is known at restore time"
)
# ── #585: get_available_models reloads config ─────────────────────────────────
def test_585_get_available_models_calls_reload_config():
"""api/config.py: get_available_models() must do a mtime-based reload check."""
config_src = (REPO_ROOT / "api" / "config.py").read_text(encoding="utf-8")
fn_start = config_src.find("def get_available_models()")
assert fn_start != -1, "get_available_models not found"
fn_body_end = config_src.find('"""', config_src.find('"""', fn_start + 30) + 3) + 3
# Must check mtime before reading config
mtime_pos = config_src.find("_current_mtime", fn_body_end)
active_prov_pos = config_src.find("active_provider = None", fn_body_end)
assert mtime_pos != -1, (
"get_available_models() must check config file mtime before reading cache (#585)"
)
assert mtime_pos < active_prov_pos, (
"mtime check must come before active_provider = None in get_available_models()"
)
# ── #567: docker-compose UID note ─────────────────────────────────────────────
def test_567_compose_mentions_macos_uid():
"""docker-compose.yml must mention macOS UID / id -u to help macOS users."""
assert "macOS" in COMPOSE or "macos" in COMPOSE.lower(), (
"docker-compose.yml should mention macOS UID issue (#567)"
)
assert "id -u" in COMPOSE, (
"docker-compose.yml should tell users to run 'id -u' to find their UID (#567)"
)
# ── #590: transcription spinner already present ───────────────────────────────
def test_590_transcribing_status_shown_before_fetch():
"""boot.js: setComposerStatus('Transcribing…') must fire before the fetch call."""
transcribe_fn_start = BOOT_JS.find("async function _transcribeBlob(")
assert transcribe_fn_start != -1, "_transcribeBlob not found in boot.js"
fn_body = BOOT_JS[transcribe_fn_start:transcribe_fn_start + 600]
status_pos = fn_body.find("setComposerStatus('Transcribing")
fetch_pos = fn_body.find("await fetch(")
assert status_pos != -1, (
"setComposerStatus('Transcribing…') must be called before the fetch in _transcribeBlob"
)
assert fetch_pos != -1, "await fetch not found in _transcribeBlob"
assert status_pos < fetch_pos, (
"setComposerStatus('Transcribing…') must appear before 'await fetch' "
"so the UI shows a spinner immediately on stop (#590)"
)
def test_590_recording_stops_before_transcribe():
"""boot.js: _setRecording(false) must fire in onstop before _transcribeBlob."""
onstop_start = BOOT_JS.find("mediaRecorder.onstop")
assert onstop_start != -1, "mediaRecorder.onstop not found"
onstop_body = BOOT_JS[onstop_start:onstop_start + 400]
rec_pos = onstop_body.find("_setRecording(false)")
blob_pos = onstop_body.find("_transcribeBlob(")
assert rec_pos != -1 and blob_pos != -1
assert rec_pos < blob_pos, (
"_setRecording(false) must come before _transcribeBlob so mic icon clears immediately"
)

View File

@@ -0,0 +1,115 @@
"""
Unit tests for cancel/interrupt functionality.
Tests the integration between cancel_stream() and agent.interrupt().
"""
import pytest
import queue
import threading
from unittest.mock import Mock
from api.streaming import cancel_stream
from api.config import AGENT_INSTANCES, STREAMS, CANCEL_FLAGS
class TestCancelInterrupt:
"""Test suite for cancel/interrupt functionality"""
def setup_method(self):
"""Clean up before each test"""
AGENT_INSTANCES.clear()
STREAMS.clear()
CANCEL_FLAGS.clear()
def teardown_method(self):
"""Clean up after each test"""
AGENT_INSTANCES.clear()
STREAMS.clear()
CANCEL_FLAGS.clear()
def test_cancel_calls_agent_interrupt(self):
"""Verify that cancel_stream() calls agent.interrupt() when agent exists"""
# Setup
stream_id = "test_stream_123"
mock_agent = Mock()
mock_agent.interrupt = Mock()
STREAMS[stream_id] = queue.Queue()
CANCEL_FLAGS[stream_id] = threading.Event()
AGENT_INSTANCES[stream_id] = mock_agent
# Execute
result = cancel_stream(stream_id)
# Assert
assert result is True
mock_agent.interrupt.assert_called_once_with("Cancelled by user")
assert CANCEL_FLAGS[stream_id].is_set()
def test_cancel_handles_interrupt_exception(self):
"""Verify that cancel_stream() handles interrupt() exceptions gracefully"""
stream_id = "test_stream_456"
mock_agent = Mock()
mock_agent.interrupt = Mock(side_effect=RuntimeError("Agent error"))
STREAMS[stream_id] = queue.Queue()
CANCEL_FLAGS[stream_id] = threading.Event()
AGENT_INSTANCES[stream_id] = mock_agent
# Should not raise exception
result = cancel_stream(stream_id)
# Assert
assert result is True
mock_agent.interrupt.assert_called_once()
assert CANCEL_FLAGS[stream_id].is_set()
def test_cancel_before_agent_ready(self):
"""Test cancel when agent not yet stored in AGENT_INSTANCES (race condition)"""
stream_id = "test_stream_789"
STREAMS[stream_id] = queue.Queue()
CANCEL_FLAGS[stream_id] = threading.Event()
# Note: AGENT_INSTANCES[stream_id] not set (simulating race condition)
# Should succeed even without agent
result = cancel_stream(stream_id)
# Assert
assert result is True
assert CANCEL_FLAGS[stream_id].is_set()
# Agent will check this flag when it starts
def test_cancel_nonexistent_stream(self):
"""Test cancel for a stream that doesn't exist"""
result = cancel_stream("nonexistent_stream")
assert result is False
def test_cancel_sets_cancel_event(self):
"""Verify that cancel_stream() sets the cancel_event flag"""
stream_id = "test_stream_event"
STREAMS[stream_id] = queue.Queue()
cancel_event = threading.Event()
CANCEL_FLAGS[stream_id] = cancel_event
result = cancel_stream(stream_id)
assert result is True
assert cancel_event.is_set()
def test_cancel_puts_sentinel_in_queue(self):
"""Verify that cancel_stream() puts cancel sentinel in queue"""
stream_id = "test_stream_queue"
q = queue.Queue()
STREAMS[stream_id] = q
CANCEL_FLAGS[stream_id] = threading.Event()
result = cancel_stream(stream_id)
assert result is True
# Check that cancel message was queued
assert not q.empty()
event_type, data = q.get_nowait()
assert event_type == 'cancel'
assert data['message'] == 'Cancelled by user'

View File

@@ -0,0 +1,111 @@
from collections import Counter
from pathlib import Path
import re
REPO = Path(__file__).resolve().parent.parent
def read(path: Path) -> str:
return path.read_text(encoding="utf-8")
def extract_locale_block(src: str, locale_key: str) -> str:
start_match = re.search(rf"\b{re.escape(locale_key)}\s*:\s*\{{", src)
assert start_match, f"{locale_key} locale block not found"
start = start_match.end() - 1 # "{"
depth = 0
in_single = False
in_double = False
in_backtick = False
escape = False
for i in range(start, len(src)):
ch = src[i]
if escape:
escape = False
continue
if in_single:
if ch == "\\":
escape = True
elif ch == "'":
in_single = False
continue
if in_double:
if ch == "\\":
escape = True
elif ch == '"':
in_double = False
continue
if in_backtick:
if ch == "\\":
escape = True
elif ch == "`":
in_backtick = False
continue
if ch == "'":
in_single = True
continue
if ch == '"':
in_double = True
continue
if ch == "`":
in_backtick = True
continue
if ch == "{":
depth += 1
continue
if ch == "}":
depth -= 1
if depth == 0:
return src[start + 1 : i]
raise AssertionError(f"{locale_key} locale block braces are not balanced")
def test_chinese_locale_block_exists():
src = read(REPO / "static" / "i18n.js")
assert "\n zh: {" in src
assert "_lang: 'zh'" in src
assert "_speech: 'zh-CN'" in src
def test_chinese_locale_includes_representative_translations():
src = read(REPO / "static" / "i18n.js")
expected = [
"settings_title: '\\u8bbe\\u7f6e'",
"login_title: '\\u767b\\u5f55'",
"approval_heading: '需要审批'",
"tab_tasks: '任务'",
"tab_profiles: '配置'",
"session_time_just_now: '刚刚'",
"onboarding_title: '欢迎使用 Hermes Web UI'",
"onboarding_complete: '引导完成'",
]
for entry in expected:
assert entry in src
def test_chinese_locale_covers_english_keys():
src = read(REPO / "static" / "i18n.js")
key_pattern = re.compile(r"^\s{4}([a-zA-Z0-9_]+):", re.MULTILINE)
en_keys = set(key_pattern.findall(extract_locale_block(src, "en")))
zh_keys = set(key_pattern.findall(extract_locale_block(src, "zh")))
missing = sorted(en_keys - zh_keys)
assert not missing, f"Chinese locale missing keys: {missing}"
def test_chinese_locale_has_no_duplicate_keys():
src = read(REPO / "static" / "i18n.js")
key_pattern = re.compile(r"^\s{4}([a-zA-Z0-9_]+):", re.MULTILINE)
keys = key_pattern.findall(extract_locale_block(src, "zh"))
duplicates = sorted(k for k, count in Counter(keys).items() if count > 1)
assert not duplicates, f"Chinese locale has duplicate keys: {duplicates}"

View File

@@ -0,0 +1,165 @@
"""Tests for clarify prompt unblocking and HTTP endpoints."""
import json
import threading
import uuid
import urllib.request
import urllib.error
import urllib.parse
import pytest
try:
from api.clarify import (
register_gateway_notify,
unregister_gateway_notify,
resolve_clarify,
clear_pending,
_gateway_queues,
_gateway_notify_cbs,
_lock,
_ClarifyEntry,
submit_pending,
)
CLARIFY_AVAILABLE = True
except ImportError:
CLARIFY_AVAILABLE = False
pytestmark = pytest.mark.skipif(
not CLARIFY_AVAILABLE,
reason="api.clarify not available in this environment",
)
from tests._pytest_port import BASE
def get(path):
url = BASE + path
with urllib.request.urlopen(url, timeout=10) as r:
return json.loads(r.read())
def post(path, body=None):
url = BASE + path
data = json.dumps(body or {}).encode()
req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
class TestClarifyUnblocking:
"""Unit tests for clarify queue resolution."""
def test_resolve_clarify_sets_event(self):
sid = f"unit-clarify-{uuid.uuid4().hex[:8]}"
entry = _ClarifyEntry({"question": "Pick one", "choices_offered": ["a", "b"]})
with _lock:
_gateway_queues.setdefault(sid, []).append(entry)
resolved = resolve_clarify(sid, "a", resolve_all=False)
assert resolved == 1
assert entry.event.is_set()
assert entry.result == "a"
def test_register_and_fire_notify_cb(self):
sid = f"unit-notify-{uuid.uuid4().hex[:8]}"
fired = []
register_gateway_notify(sid, lambda d: fired.append(d))
with _lock:
cb = _gateway_notify_cbs.get(sid)
assert cb is not None
data = {"question": "What now?", "choices_offered": ["x", "y"]}
cb(data)
assert fired == [data]
unregister_gateway_notify(sid)
def test_clear_pending_unblocks_waiters(self):
sid = f"unit-clear-{uuid.uuid4().hex[:8]}"
entry = _ClarifyEntry({"question": "Wait", "choices_offered": []})
with _lock:
_gateway_queues.setdefault(sid, []).append(entry)
cleared = clear_pending(sid)
assert cleared == 1
assert entry.event.is_set()
with _lock:
assert sid not in _gateway_queues
def test_submit_pending_registers_entry(self):
sid = f"unit-submit-{uuid.uuid4().hex[:8]}"
data = {"question": "Pick", "choices_offered": ["one", "two"], "session_id": sid}
entry = submit_pending(sid, data)
assert entry.data == data
with _lock:
assert sid in _gateway_queues
clear_pending(sid)
class TestClarifyModuleExports:
def test_register_gateway_notify_exported(self):
import api.clarify as ap
assert hasattr(ap, "register_gateway_notify")
def test_unregister_gateway_notify_exported(self):
import api.clarify as ap
assert hasattr(ap, "unregister_gateway_notify")
def test_resolve_clarify_exported(self):
import api.clarify as ap
assert hasattr(ap, "resolve_clarify")
def test_clarify_entry_exported(self):
import api.clarify as ap
assert hasattr(ap, "_ClarifyEntry")
class TestClarifyHTTPEndpoints:
"""Regression tests for /api/clarify/respond against the live test server."""
def test_respond_returns_ok_no_pending(self):
sid = f"http-no-pending-{uuid.uuid4().hex[:8]}"
result, status = post("/api/clarify/respond", {
"session_id": sid,
"response": "Use option A",
})
assert status == 200
assert result["ok"] is True
def test_respond_requires_session_id(self):
result, status = post("/api/clarify/respond", {"response": "Hello"})
assert status == 400
def test_respond_requires_response(self):
sid = f"http-no-response-{uuid.uuid4().hex[:8]}"
result, status = post("/api/clarify/respond", {"session_id": sid})
assert status == 400
def test_respond_clears_injected_pending(self):
sid = f"http-clear-{uuid.uuid4().hex[:8]}"
question = urllib.parse.quote("Pick the better option")
choices = urllib.parse.quote("A")
inject = get(
f"/api/clarify/inject_test?session_id={urllib.parse.quote(sid)}"
f"&question={question}&choices={choices}"
)
assert inject["ok"] is True
data = get(f"/api/clarify/pending?session_id={urllib.parse.quote(sid)}")
assert data["pending"] is not None
result, status = post("/api/clarify/respond", {
"session_id": sid,
"response": "B",
})
assert status == 200
assert result["ok"] is True
data2 = get(f"/api/clarify/pending?session_id={urllib.parse.quote(sid)}")
assert data2["pending"] is None

View File

@@ -0,0 +1,84 @@
"""Tests for GET /api/commands -- exposes hermes-agent COMMAND_REGISTRY."""
import json
import urllib.request
import pytest
from tests.conftest import TEST_BASE, requires_agent_modules
def _get(path):
"""GET helper -- returns parsed JSON or raises HTTPError."""
with urllib.request.urlopen(TEST_BASE + path, timeout=10) as r:
return json.loads(r.read())
@requires_agent_modules
def test_commands_endpoint_returns_list():
"""GET /api/commands returns a JSON object with a 'commands' list."""
body = _get('/api/commands')
assert 'commands' in body
assert isinstance(body['commands'], list)
assert len(body['commands']) > 0
@requires_agent_modules
def test_commands_endpoint_includes_help():
"""The 'help' command must always be present (it's not cli_only)."""
body = _get('/api/commands')
names = {c['name'] for c in body['commands']}
assert 'help' in names
@requires_agent_modules
def test_commands_endpoint_command_shape():
"""Each command entry has the required fields."""
body = _get('/api/commands')
cmd = next(c for c in body['commands'] if c['name'] == 'help')
required = {
'name', 'description', 'category', 'aliases',
'args_hint', 'subcommands', 'cli_only', 'gateway_only',
}
assert set(cmd.keys()) >= required
assert isinstance(cmd['aliases'], list)
assert isinstance(cmd['subcommands'], list)
assert isinstance(cmd['cli_only'], bool)
assert isinstance(cmd['gateway_only'], bool)
@requires_agent_modules
def test_commands_endpoint_excludes_gateway_only_and_never_expose():
"""gateway_only commands and the _NEVER_EXPOSE set are filtered out."""
body = _get('/api/commands')
names = {c['name'] for c in body['commands']}
# /sethome, /restart, /update are gateway_only; /commands is in _NEVER_EXPOSE
for name in ('sethome', 'restart', 'update', 'commands'):
assert name not in names, f"{name} must be excluded from /api/commands"
@requires_agent_modules
def test_commands_endpoint_keeps_new_with_reset_alias():
"""The 'new' command stays exposed and carries its 'reset' alias."""
body = _get('/api/commands')
new_cmd = next(c for c in body['commands'] if c['name'] == 'new')
assert 'reset' in new_cmd['aliases']
def test_list_commands_returns_empty_for_empty_registry():
"""list_commands(_registry=[]) returns [] -- the same path as when
hermes_cli is missing (the empty-or-missing case)."""
from api.commands import list_commands
assert list_commands(_registry=[]) == []
def test_list_commands_degrades_when_agent_missing(monkeypatch):
"""If hermes_cli.commands is not importable, list_commands() returns []
via the ImportError path. Verified by stubbing sys.modules; test cleanup
is handled by monkeypatch + the fact that we don't reload api.commands."""
import sys
monkeypatch.setitem(sys.modules, 'hermes_cli.commands', None)
# NOTE: we do NOT reload api.commands. The lazy import inside
# list_commands() will re-attempt the import on each call and hit
# the stubbed-None module, raising ImportError, taking the fallback path.
from api.commands import list_commands
assert list_commands() == []

View File

@@ -0,0 +1,135 @@
"""
Tests for named custom provider display in the model dropdown (issue #557).
When a custom_providers entry carries a `name` field (e.g. "Agent37"), the
web UI model picker should show that name as the group header rather than the
generic "Custom" label.
"""
import api.config as config
def _models_with_cfg(model_cfg=None, custom_providers=None, active_provider=None):
"""Temporarily patch config.cfg, call get_available_models(), restore."""
old_cfg = dict(config.cfg)
config.cfg.clear()
if model_cfg:
config.cfg["model"] = model_cfg
if custom_providers is not None:
config.cfg["custom_providers"] = custom_providers
try:
return config.get_available_models()
finally:
config.cfg.clear()
config.cfg.update(old_cfg)
# ── Named provider shows its name in the dropdown ─────────────────────────────
class TestNamedCustomProviderGroup:
def test_named_provider_uses_name_as_group_header(self):
"""A custom_provider entry with name='Agent37' should produce
a group whose 'provider' key is 'Agent37', not 'Custom'."""
result = _models_with_cfg(
model_cfg={"provider": "custom", "base_url": "https://agent37.example.com/v1"},
custom_providers=[
{"name": "Agent37", "model": "default", "base_url": "https://agent37.example.com/v1"}
],
)
group_names = [g["provider"] for g in result.get("groups", [])]
assert "Agent37" in group_names, (
f"Expected 'Agent37' in group names, got {group_names}"
)
def test_named_provider_does_not_produce_generic_custom(self):
"""When all custom_provider entries have names, no group called 'Custom'
should appear alongside them."""
result = _models_with_cfg(
model_cfg={"provider": "custom", "base_url": "https://agent37.example.com/v1"},
custom_providers=[
{"name": "Agent37", "model": "default", "base_url": "https://agent37.example.com/v1"}
],
)
group_names = [g["provider"] for g in result.get("groups", [])]
assert "Custom" not in group_names, (
f"Expected no generic 'Custom' group when all entries are named, got {group_names}"
)
def test_named_provider_model_appears_in_its_group(self):
"""The model ID from the named entry should be inside the named group."""
result = _models_with_cfg(
model_cfg={"provider": "custom"},
custom_providers=[
{"name": "Agent37", "model": "my-llm", "base_url": "https://agent37.example.com/v1"}
],
)
agent37_group = next(
(g for g in result.get("groups", []) if g["provider"] == "Agent37"), None
)
assert agent37_group is not None, "Expected an 'Agent37' group"
model_ids = [m["id"] for m in agent37_group.get("models", [])]
assert "my-llm" in model_ids, (
f"Expected 'my-llm' in Agent37 group models, got {model_ids}"
)
def test_multiple_named_providers_each_get_their_own_group(self):
"""Two named custom providers should produce two distinct groups."""
result = _models_with_cfg(
model_cfg={"provider": "custom"},
custom_providers=[
{"name": "Agent37", "model": "fast-model"},
{"name": "PrivateProxy", "model": "private-llm"},
],
)
group_names = [g["provider"] for g in result.get("groups", [])]
assert "Agent37" in group_names, f"Expected 'Agent37' group, got {group_names}"
assert "PrivateProxy" in group_names, f"Expected 'PrivateProxy' group, got {group_names}"
assert "Custom" not in group_names, f"No generic 'Custom' group expected, got {group_names}"
def test_multiple_models_in_same_named_provider(self):
"""Multiple entries with the same name should be collapsed into one group."""
result = _models_with_cfg(
model_cfg={"provider": "custom"},
custom_providers=[
{"name": "Agent37", "model": "model-a"},
{"name": "Agent37", "model": "model-b"},
],
)
agent37_groups = [g for g in result.get("groups", []) if g["provider"] == "Agent37"]
assert len(agent37_groups) == 1, (
f"Expected exactly one 'Agent37' group, got {len(agent37_groups)}"
)
model_ids = [m["id"] for m in agent37_groups[0].get("models", [])]
assert "model-a" in model_ids
assert "model-b" in model_ids
# ── Unnamed entry still falls back to 'Custom' ─────────────────────────────────
class TestUnnamedCustomProviderFallback:
def test_unnamed_entry_still_produces_custom_group(self):
"""A custom_provider entry without a name should still show as 'Custom'."""
result = _models_with_cfg(
model_cfg={"provider": "custom"},
custom_providers=[
{"model": "unnamed-model"}
],
)
group_names = [g["provider"] for g in result.get("groups", [])]
assert "Custom" in group_names, (
f"Expected generic 'Custom' group for unnamed entry, got {group_names}"
)
def test_mixed_named_and_unnamed_entries(self):
"""Named and unnamed entries should appear in their respective groups."""
result = _models_with_cfg(
model_cfg={"provider": "custom"},
custom_providers=[
{"name": "Agent37", "model": "named-model"},
{"model": "unnamed-model"},
],
)
group_names = [g["provider"] for g in result.get("groups", [])]
assert "Agent37" in group_names, f"Expected 'Agent37' group, got {group_names}"
assert "Custom" in group_names, f"Expected 'Custom' group for unnamed entry, got {group_names}"

View File

@@ -0,0 +1,148 @@
import json
from pathlib import Path
import api.config as config
def test_resolve_default_workspace_falls_back_to_existing_home_work(monkeypatch, tmp_path):
preferred = tmp_path / "work"
preferred.mkdir()
state_dir = tmp_path / "state"
monkeypatch.setattr(config, "HOME", tmp_path)
monkeypatch.setattr(config, "STATE_DIR", state_dir)
resolved = config.resolve_default_workspace("/definitely/not/usable")
assert resolved == preferred.resolve()
def test_save_settings_rewrites_bad_default_workspace_to_fallback(monkeypatch, tmp_path):
preferred = tmp_path / "work"
preferred.mkdir()
state_dir = tmp_path / "state"
settings_file = tmp_path / "settings.json"
monkeypatch.setattr(config, "HOME", tmp_path)
monkeypatch.setattr(config, "STATE_DIR", state_dir)
monkeypatch.setattr(config, "SETTINGS_FILE", settings_file)
monkeypatch.setattr(config, "DEFAULT_WORKSPACE", preferred)
saved = config.save_settings({"default_workspace": "/definitely/not/usable"})
on_disk = json.loads(settings_file.read_text(encoding="utf-8"))
assert saved["default_workspace"] == str(preferred.resolve())
assert on_disk["default_workspace"] == str(preferred.resolve())
def test_resolve_default_workspace_creates_home_workspace_when_missing(monkeypatch, tmp_path):
"""When no preferred dir exists, resolve falls back to creating ~/workspace."""
state_dir = tmp_path / "state"
monkeypatch.setattr(config, "HOME", tmp_path)
monkeypatch.setattr(config, "STATE_DIR", state_dir)
# Neither ~/work nor ~/workspace exists yet
resolved = config.resolve_default_workspace(None)
assert resolved == (tmp_path / "workspace").resolve()
assert resolved.is_dir()
def test_resolve_default_workspace_raises_when_all_candidates_fail(monkeypatch, tmp_path):
"""RuntimeError is raised when every candidate is unwritable."""
import stat, pytest
# Make tmp_path read-only so mkdir inside it fails
tmp_path.chmod(stat.S_IRUSR | stat.S_IXUSR)
state_dir = tmp_path / "state"
monkeypatch.setattr(config, "HOME", tmp_path)
monkeypatch.setattr(config, "STATE_DIR", state_dir)
monkeypatch.delenv("HERMES_WEBUI_DEFAULT_WORKSPACE", raising=False)
try:
with pytest.raises(RuntimeError, match="Could not create or access"):
config.resolve_default_workspace(None)
finally:
tmp_path.chmod(stat.S_IRWXU) # restore for cleanup
def test_workspace_candidates_deduplicates_home_workspace(monkeypatch, tmp_path):
"""~/workspace must appear at most once in the candidates list even if it exists."""
ws = tmp_path / "workspace"
ws.mkdir()
state_dir = tmp_path / "state"
monkeypatch.setattr(config, "HOME", tmp_path)
monkeypatch.setattr(config, "STATE_DIR", state_dir)
monkeypatch.delenv("HERMES_WEBUI_DEFAULT_WORKSPACE", raising=False)
candidates = config._workspace_candidates(None)
paths = [str(p) for p in candidates]
assert paths.count(str(ws.resolve())) <= 1, "~/workspace must not appear twice"
def test_env_var_workspace_takes_priority_over_passed_raw(monkeypatch, tmp_path):
"""HERMES_WEBUI_DEFAULT_WORKSPACE env var overrides a None raw arg but not a valid one."""
env_ws = tmp_path / "env_workspace"
env_ws.mkdir()
state_dir = tmp_path / "state"
monkeypatch.setattr(config, "HOME", tmp_path)
monkeypatch.setattr(config, "STATE_DIR", state_dir)
monkeypatch.setenv("HERMES_WEBUI_DEFAULT_WORKSPACE", str(env_ws))
# When raw is None, env var should be used
resolved = config.resolve_default_workspace(None)
assert resolved == env_ws.resolve()
def test_ensure_workspace_dir_returns_false_for_unwritable_path(monkeypatch, tmp_path):
"""_ensure_workspace_dir returns False for a path that can't be created."""
import stat
# Make parent read-only so mkdir fails
parent = tmp_path / "ro_parent"
parent.mkdir()
parent.chmod(stat.S_IRUSR | stat.S_IXUSR)
try:
result = config._ensure_workspace_dir(parent / "child")
assert result is False
finally:
parent.chmod(stat.S_IRWXU)
def test_env_var_wins_over_settings_json_on_startup(monkeypatch, tmp_path):
"""HERMES_WEBUI_DEFAULT_WORKSPACE must not be overridden by settings.json at startup.
Regression for GitHub issue #609: Docker deployments set the env var to a
volume mount, but settings.json from a previous container run used to
silently win, reverting the files panel to the old path.
"""
import json as _json
import os as _os
env_ws = tmp_path / "env_workspace"
env_ws.mkdir()
settings_ws = tmp_path / "settings_workspace"
settings_ws.mkdir()
state_dir = tmp_path / "state"
state_dir.mkdir()
settings_file = state_dir / "settings.json"
settings_file.write_text(
_json.dumps({"default_workspace": str(settings_ws)}), encoding="utf-8"
)
monkeypatch.setattr(config, "HOME", tmp_path)
monkeypatch.setattr(config, "STATE_DIR", state_dir)
monkeypatch.setattr(config, "SETTINGS_FILE", settings_file)
# Simulate DEFAULT_WORKSPACE already set correctly from env var at import time
monkeypatch.setattr(config, "DEFAULT_WORKSPACE", env_ws.resolve())
monkeypatch.setenv("HERMES_WEBUI_DEFAULT_WORKSPACE", str(env_ws))
# Execute the patched startup block logic inline — env var present → skip override
current_ws = config.DEFAULT_WORKSPACE
startup_settings = config.load_settings()
if not _os.getenv("HERMES_WEBUI_DEFAULT_WORKSPACE"):
# This branch must be skipped because env var is set
current_ws = config.resolve_default_workspace(
startup_settings.get("default_workspace")
)
# env var was set → the if block was skipped → env path wins over settings.json
assert current_ws == env_ws.resolve(), (
f"Expected {env_ws.resolve()}, got {current_ws}. "
"settings.json must not override HERMES_WEBUI_DEFAULT_WORKSPACE."
)

420
tests/test_gateway_sync.py Normal file
View File

@@ -0,0 +1,420 @@
"""
Tests for Phase 1: Real-time Gateway Session Sync.
Tests are ordered TDD-style:
1. Gateway sessions appear in /api/sessions when setting enabled
2. Gateway sessions excluded when setting disabled
3. Gateway sessions have correct metadata (source_tag, is_cli_session)
4. SSE stream endpoint opens and receives events
5. Watcher detects new sessions inserted into state.db
6. Settings UI has renamed label
"""
import json
import os
import pathlib
import sqlite3
import time
import urllib.error
import urllib.request
REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
from tests._pytest_port import BASE
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(BASE + path, data=data,
headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
try:
return json.loads(e.read()), e.code
except Exception:
return {}, e.code
def _get_test_state_dir():
"""Return the test state directory (matches conftest.py TEST_STATE_DIR).
conftest.py sets HERMES_WEBUI_TEST_STATE_DIR in the test-process environment
(via os.environ.setdefault) so that tests writing directly to state.db always
use the same path the test server was started with. If the env var is not
set (e.g. when running this file standalone), fall back to the conftest
formula: HERMES_HOME/webui-mvp-test.
"""
# Use _pytest_port which applies the same auto-derivation as conftest.py
from tests._pytest_port import TEST_STATE_DIR as _ptsd
return _ptsd
def _get_state_db_path():
"""Return path to the test state.db."""
return _get_test_state_dir() / 'state.db'
def _ensure_state_db():
"""Create state.db with sessions and messages tables if it doesn't exist.
Returns a connection. Does NOT delete existing data (safe for parallel tests).
"""
db_path = _get_state_db_path()
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA journal_mode=WAL")
conn.executescript("""
CREATE TABLE IF NOT EXISTS sessions (
id TEXT PRIMARY KEY,
source TEXT NOT NULL,
user_id TEXT,
model TEXT,
started_at REAL NOT NULL,
message_count INTEGER DEFAULT 0,
title TEXT
);
CREATE TABLE IF NOT EXISTS messages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id TEXT NOT NULL,
role TEXT NOT NULL,
content TEXT,
timestamp REAL NOT NULL
);
""")
conn.commit()
return conn
def _insert_gateway_session(conn, session_id='20260401_120000_abcdefgh', source='telegram',
title='Telegram Chat', model='anthropic/claude-sonnet-4-5',
started_at=None, message_count=2):
"""Insert a gateway session into state.db."""
conn.execute(
"INSERT OR REPLACE INTO sessions (id, source, title, model, started_at, message_count) "
"VALUES (?, ?, ?, ?, ?, ?)",
(session_id, source, title, model, started_at or time.time(), message_count)
)
# Delete any existing messages for this session (idempotent re-insert)
conn.execute("DELETE FROM messages WHERE session_id = ?", (session_id,))
# Insert some messages
conn.execute(
"INSERT INTO messages (session_id, role, content, timestamp) VALUES (?, 'user', ?, ?)",
(session_id, 'Hello from Telegram', started_at or time.time())
)
conn.execute(
"INSERT INTO messages (session_id, role, content, timestamp) VALUES (?, 'assistant', ?, ?)",
(session_id, 'Hi there!', (started_at or time.time()) + 1)
)
conn.commit()
def _remove_test_sessions(conn, *session_ids):
"""Remove specific test sessions from state.db (parallel-safe cleanup)."""
for sid in session_ids:
conn.execute("DELETE FROM messages WHERE session_id = ?", (sid,))
conn.execute("DELETE FROM sessions WHERE id = ?", (sid,))
conn.commit()
def _cleanup_state_db():
"""Remove state.db if it exists (only used for tests that need a blank slate)."""
db_path = _get_state_db_path()
for p in [db_path, db_path.parent / 'state.db-wal', db_path.parent / 'state.db-shm']:
try:
p.unlink(missing_ok=True)
except Exception:
pass
# ── Tests ──────────────────────────────────────────────────────────────────
def test_gateway_sessions_appear_when_enabled():
"""Gateway sessions from state.db appear in /api/sessions when show_cli_sessions is on."""
conn = _ensure_state_db()
try:
_insert_gateway_session(conn, session_id='gw_test_tg_001', source='telegram', title='TG Test Chat')
# Enable the setting
post('/api/settings', {'show_cli_sessions': True})
data, status = get('/api/sessions')
assert status == 200
sessions = data.get('sessions', [])
gw_ids = [s['session_id'] for s in sessions if s.get('session_id') == 'gw_test_tg_001']
assert len(gw_ids) == 1, f"Expected gateway session gw_test_tg_001, got {[s['session_id'] for s in sessions]}"
finally:
try:
_remove_test_sessions(conn, 'gw_test_tg_001')
conn.close()
except Exception:
pass
post('/api/settings', {'show_cli_sessions': False})
def test_gateway_sessions_excluded_when_disabled():
"""Gateway sessions are NOT returned when show_cli_sessions is off."""
conn = _ensure_state_db()
try:
_insert_gateway_session(conn, session_id='gw_test_dc_001', source='discord', title='DC Test Chat')
# Ensure setting is off
post('/api/settings', {'show_cli_sessions': False})
data, status = get('/api/sessions')
assert status == 200
sessions = data.get('sessions', [])
gw_ids = [s['session_id'] for s in sessions if s.get('session_id') == 'gw_test_dc_001']
assert len(gw_ids) == 0, "Gateway session should not appear when setting is off"
finally:
try:
_remove_test_sessions(conn, 'gw_test_dc_001')
conn.close()
except Exception:
pass
def test_gateway_session_has_correct_metadata():
"""Gateway sessions include source_tag and is_cli_session fields."""
conn = _ensure_state_db()
try:
_insert_gateway_session(conn, session_id='gw_meta_001', source='telegram', title='Meta Test')
post('/api/settings', {'show_cli_sessions': True})
data, status = get('/api/sessions')
assert status == 200
sessions = data.get('sessions', [])
gw = next((s for s in sessions if s['session_id'] == 'gw_meta_001'), None)
assert gw is not None, "Gateway session not found"
assert gw.get('source_tag') == 'telegram', f"Expected source_tag=telegram, got {gw.get('source_tag')}"
assert gw.get('is_cli_session') is True, "is_cli_session should be True for agent sessions"
assert gw.get('title') == 'Meta Test'
finally:
try:
_remove_test_sessions(conn, 'gw_meta_001')
conn.close()
except Exception:
pass
post('/api/settings', {'show_cli_sessions': False})
def test_gateway_session_has_message_count():
"""Gateway sessions report correct message_count from state.db."""
conn = _ensure_state_db()
try:
_insert_gateway_session(conn, session_id='gw_msg_001', source='discord', title='Msg Count Test', message_count=5)
post('/api/settings', {'show_cli_sessions': True})
data, status = get('/api/sessions')
assert status == 200
sessions = data.get('sessions', [])
gw = next((s for s in sessions if s['session_id'] == 'gw_msg_001'), None)
assert gw is not None
assert gw.get('message_count') == 5, f"Expected message_count=5, got {gw.get('message_count')}"
finally:
try:
_remove_test_sessions(conn, 'gw_msg_001')
conn.close()
except Exception:
pass
post('/api/settings', {'show_cli_sessions': False})
def test_gateway_sessions_multiple_sources():
"""Sessions from multiple gateway sources (telegram, discord, slack) all appear."""
conn = _ensure_state_db()
try:
_insert_gateway_session(conn, session_id='gw_multi_tg', source='telegram', title='TG Chat')
_insert_gateway_session(conn, session_id='gw_multi_dc', source='discord', title='DC Chat')
_insert_gateway_session(conn, session_id='gw_multi_sl', source='slack', title='SL Chat')
post('/api/settings', {'show_cli_sessions': True})
data, status = get('/api/sessions')
assert status == 200
sessions = data.get('sessions', [])
gw_ids = {s['session_id'] for s in sessions if s.get('session_id') in ('gw_multi_tg', 'gw_multi_dc', 'gw_multi_sl')}
assert len(gw_ids) == 3, f"Expected 3 gateway sessions, got {len(gw_ids)}: {gw_ids}"
finally:
try:
_remove_test_sessions(conn, 'gw_multi_tg', 'gw_multi_dc', 'gw_multi_sl')
conn.close()
except Exception:
pass
post('/api/settings', {'show_cli_sessions': False})
def test_gateway_session_messages_readable():
"""Gateway session messages can be loaded via /api/session."""
conn = _ensure_state_db()
try:
_insert_gateway_session(conn, session_id='gw_read_001', source='telegram', title='Readable')
post('/api/settings', {'show_cli_sessions': True})
data, status = get(f'/api/session?session_id=gw_read_001')
assert status == 200
msgs = data.get('session', {}).get('messages', [])
assert len(msgs) >= 2, f"Expected at least 2 messages, got {len(msgs)}"
assert msgs[0].get('role') == 'user'
assert msgs[0].get('content') == 'Hello from Telegram'
finally:
try:
_remove_test_sessions(conn, 'gw_read_001')
conn.close()
except Exception:
pass
post('/api/settings', {'show_cli_sessions': False})
def test_importing_older_gateway_session_preserves_original_timestamps_and_order():
"""Importing an older gateway session should not bump it above newer WebUI sessions."""
conn = _ensure_state_db()
older_started_at = time.time() - 1800
imported_sid = 'gw_import_old_001'
newer_webui_sid = None
try:
newer_webui, status = post('/api/session/new', {'model': 'openai/gpt-5'})
assert status == 200, newer_webui
newer_webui_sid = newer_webui['session']['session_id']
rename, rename_status = post(
'/api/session/rename',
{'session_id': newer_webui_sid, 'title': 'Newer WebUI Session'},
)
assert rename_status == 200, rename
_insert_gateway_session(
conn,
session_id=imported_sid,
source='discord',
title='Older imported gateway session',
started_at=older_started_at,
)
post('/api/settings', {'show_cli_sessions': True})
imported, imported_status = post('/api/session/import_cli', {'session_id': imported_sid})
assert imported_status == 200, imported
imported_session = imported['session']
assert abs(imported_session['created_at'] - older_started_at) < 2, imported_session
assert abs(imported_session['updated_at'] - older_started_at) < 5, imported_session
sessions_payload, sessions_status = get('/api/sessions')
assert sessions_status == 200, sessions_payload
ordered_ids = [item['session_id'] for item in sessions_payload.get('sessions', [])]
assert newer_webui_sid in ordered_ids, ordered_ids
assert imported_sid in ordered_ids, ordered_ids
assert ordered_ids.index(newer_webui_sid) < ordered_ids.index(imported_sid), ordered_ids
finally:
try:
_remove_test_sessions(conn, imported_sid)
conn.close()
except Exception:
pass
if imported_sid:
try:
post('/api/session/delete', {'session_id': imported_sid})
except Exception:
pass
if newer_webui_sid:
try:
post('/api/session/delete', {'session_id': newer_webui_sid})
except Exception:
pass
post('/api/settings', {'show_cli_sessions': False})
def test_gateway_sse_stream_endpoint_exists():
"""GET /api/sessions/gateway/stream returns a response (200 or 200-range)."""
# The SSE endpoint requires show_cli_sessions to be enabled
post('/api/settings', {'show_cli_sessions': True})
try:
req = urllib.request.Request(BASE + '/api/sessions/gateway/stream')
with urllib.request.urlopen(req, timeout=5) as r:
assert r.status in (200, 204), f"Expected 200/204, got {r.status}"
# SSE should have content-type text/event-stream
ctype = r.headers.get('Content-Type', '')
assert 'text/event-stream' in ctype, f"Expected text/event-stream, got {ctype}"
except Exception as e:
# Timeout is acceptable — means the connection is held open (SSE behavior)
if 'timed out' in str(e).lower() or 'timeout' in str(e).lower():
pass # Good: SSE keeps the connection open
else:
raise
finally:
post('/api/settings', {'show_cli_sessions': False})
def test_gateway_webui_sessions_not_duplicated():
"""If a session_id exists both in WebUI store and state.db, it's not duplicated."""
# Create a WebUI session with a known ID
body = {}
d, _ = post('/api/session/new', body)
webui_sid = d['session']['session_id']
try:
# Insert the same session_id into state.db as a gateway session
conn = _ensure_state_db()
_insert_gateway_session(conn, session_id=webui_sid, source='telegram', title='Dup Test')
conn.close()
post('/api/settings', {'show_cli_sessions': True})
data, status = get('/api/sessions')
assert status == 200
sessions = data.get('sessions', [])
matching = [s for s in sessions if s['session_id'] == webui_sid]
assert len(matching) == 1, f"Expected 1 entry for {webui_sid}, got {len(matching)}"
finally:
try:
conn2 = sqlite3.connect(str(_get_state_db_path()))
_remove_test_sessions(conn2, webui_sid)
conn2.close()
except Exception:
pass
post('/api/session/delete', {'session_id': webui_sid})
post('/api/settings', {'show_cli_sessions': False})
def test_gateway_sessions_no_state_db():
"""When state.db doesn't exist, /api/sessions works fine (no gateway sessions)."""
_cleanup_state_db()
post('/api/settings', {'show_cli_sessions': True})
try:
data, status = get('/api/sessions')
assert status == 200
# Should succeed with just webui sessions (or empty)
assert 'sessions' in data
finally:
post('/api/settings', {'show_cli_sessions': False})
def test_cli_sessions_still_work():
"""CLI sessions (source='cli') still appear alongside gateway sessions."""
conn = _ensure_state_db()
try:
_insert_gateway_session(conn, session_id='cli_legacy_001', source='cli', title='CLI Legacy')
_insert_gateway_session(conn, session_id='gw_new_001', source='telegram', title='GW New')
post('/api/settings', {'show_cli_sessions': True})
data, status = get('/api/sessions')
assert status == 200
sessions = data.get('sessions', [])
agent_ids = {s['session_id'] for s in sessions if s.get('session_id') in ('cli_legacy_001', 'gw_new_001')}
assert len(agent_ids) == 2, f"Expected 2 agent sessions (cli + gateway), got {len(agent_ids)}"
finally:
try:
_remove_test_sessions(conn, 'cli_legacy_001', 'gw_new_001')
conn.close()
except Exception:
pass
post('/api/settings', {'show_cli_sessions': False})

View File

@@ -0,0 +1,61 @@
import pathlib
import re
REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
BOOT_JS = (REPO_ROOT / "static" / "boot.js").read_text(encoding="utf-8")
UI_JS = (REPO_ROOT / "static" / "ui.js").read_text(encoding="utf-8")
SESSIONS_JS = (REPO_ROOT / "static" / "sessions.js").read_text(encoding="utf-8")
def _ime_guarded_enter_pattern(event_var_pattern, require_no_shift=False):
no_shift = rf"\s*&&\s*!\s*{event_var_pattern}\.shiftKey" if require_no_shift else ""
return (
rf"if\s*\(\s*{event_var_pattern}\.key\s*===\s*'Enter'{no_shift}\s*\)\s*\{{\s*"
rf"if\s*\(\s*{event_var_pattern}\.isComposing\s*\)\s*"
rf"(?:\{{\s*return\s*;?\s*\}}|return\s*;?)"
)
def test_boot_chat_enter_send_respects_ime_composition():
assert re.search(
_ime_guarded_enter_pattern("e"),
BOOT_JS,
re.DOTALL,
), "Chat composer Enter handler must ignore IME composition Enter in static/boot.js"
assert re.search(
_ime_guarded_enter_pattern("e", require_no_shift=True),
BOOT_JS,
re.DOTALL,
), "Command dropdown Enter handler must ignore IME composition Enter in static/boot.js"
def test_ui_enter_submit_paths_respect_ime_composition():
assert re.search(
rf"document\.addEventListener\('keydown',e=>\{{[\s\S]*?{_ime_guarded_enter_pattern('e')}",
UI_JS,
re.DOTALL,
), \
"App dialog Enter handler must ignore IME composition Enter in static/ui.js"
assert re.search(
_ime_guarded_enter_pattern("e", require_no_shift=True),
UI_JS,
re.DOTALL,
), \
"Message edit Enter-to-save handler must ignore IME composition Enter in static/ui.js"
assert re.search(
rf"inp\.onkeydown=\(e2\)=>\{{\s*{_ime_guarded_enter_pattern('e2')}",
UI_JS,
re.DOTALL,
), \
"Workspace rename Enter handler must ignore IME composition Enter in static/ui.js"
def test_sessions_enter_submit_paths_respect_ime_composition():
matches = re.findall(
_ime_guarded_enter_pattern(r"e2?"),
SESSIONS_JS,
re.DOTALL,
)
assert len(matches) >= 3, \
"Session and project rename/create Enter handlers must ignore IME composition Enter in static/sessions.js"

322
tests/test_issue336.py Normal file
View File

@@ -0,0 +1,322 @@
"""
Tests for issue #336 — opt-in chat bubble layout (PR #398).
Covers:
- api/config.py: bubble_layout present in _SETTINGS_DEFAULTS with default False
- api/config.py: bubble_layout present in _SETTINGS_BOOL_KEYS
- api/config.py: bubble_layout not in password-filtered keys (safe to expose)
- static/boot.js: boot path applies bubble-layout class from settings
- static/boot.js: catch path removes bubble-layout class on API failure
- static/panels.js: loadSettingsPanel reads bubble_layout checkbox
- static/panels.js: saveSettings writes bubble_layout and toggles body class
- static/style.css: body.bubble-layout CSS selectors present
- static/style.css: responsive max-width rule for bubble layout
- static/index.html: settingsBubbleLayout checkbox element present
- static/index.html: i18n keys wired on label and description
- static/i18n.js: English label and description keys present
- static/i18n.js: Spanish label and description keys present
- Integration: bubble_layout default is False in GET /api/settings
- Integration: bubble_layout persists via POST /api/settings
- Integration: non-bool value is coerced to bool on POST
"""
import json
import pathlib
import re
import unittest
import urllib.error
import urllib.request
REPO_ROOT = pathlib.Path(__file__).parent.parent
CONFIG_PY = (REPO_ROOT / "api" / "config.py").read_text()
BOOT_JS = (REPO_ROOT / "static" / "boot.js").read_text()
PANELS_JS = (REPO_ROOT / "static" / "panels.js").read_text()
STYLE_CSS = (REPO_ROOT / "static" / "style.css").read_text()
INDEX_HTML = (REPO_ROOT / "static" / "index.html").read_text()
I18N_JS = (REPO_ROOT / "static" / "i18n.js").read_text()
from tests._pytest_port import BASE
def _get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def _post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(
BASE + path, data=data, headers={"Content-Type": "application/json"}
)
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
# ── config.py static checks ───────────────────────────────────────────────
class TestBubbleLayoutConfig(unittest.TestCase):
"""Verify bubble_layout is correctly registered in config.py."""
def test_bubble_layout_in_settings_defaults(self):
"""bubble_layout must appear in _SETTINGS_DEFAULTS."""
self.assertIn(
'"bubble_layout"',
CONFIG_PY,
"bubble_layout key missing from _SETTINGS_DEFAULTS in api/config.py",
)
def test_bubble_layout_default_is_false(self):
"""bubble_layout default value must be False (opt-in, off by default)."""
# Match "bubble_layout": False with optional spacing
self.assertRegex(
CONFIG_PY,
r'"bubble_layout"\s*:\s*False',
"bubble_layout default must be False in _SETTINGS_DEFAULTS",
)
def test_bubble_layout_in_bool_keys(self):
"""bubble_layout must be in _SETTINGS_BOOL_KEYS for coercion."""
# Find the _SETTINGS_BOOL_KEYS block and verify membership
bool_keys_match = re.search(
r"_SETTINGS_BOOL_KEYS\s*=\s*\{([^}]+)\}", CONFIG_PY, re.DOTALL
)
self.assertIsNotNone(
bool_keys_match, "_SETTINGS_BOOL_KEYS block not found in config.py"
)
self.assertIn(
'"bubble_layout"',
bool_keys_match.group(1),
"bubble_layout missing from _SETTINGS_BOOL_KEYS",
)
# ── boot.js static checks ────────────────────────────────────────────────
class TestBubbleLayoutBootJS(unittest.TestCase):
"""Verify bubble-layout class management in boot.js."""
def test_boot_applies_bubble_layout_class(self):
"""boot.js success path must toggle body.bubble-layout from settings."""
self.assertIn(
"classList.toggle('bubble-layout',!!s.bubble_layout)",
BOOT_JS,
"boot.js must call classList.toggle('bubble-layout', ...) on settings load",
)
def test_boot_catch_removes_bubble_layout_class(self):
"""boot.js catch path must remove bubble-layout (default off on API failure)."""
self.assertIn(
"classList.remove('bubble-layout')",
BOOT_JS,
"boot.js catch block must call classList.remove('bubble-layout') on API failure",
)
# ── panels.js static checks ──────────────────────────────────────────────
class TestBubbleLayoutPanelsJS(unittest.TestCase):
"""Verify settings panel wires the bubble_layout checkbox."""
def test_load_settings_reads_bubble_layout_checkbox(self):
"""loadSettingsPanel must read the settingsBubbleLayout checkbox state."""
self.assertIn(
"settingsBubbleLayout",
PANELS_JS,
"panels.js must reference settingsBubbleLayout checkbox",
)
def test_save_settings_writes_bubble_layout(self):
"""saveSettings must write body.bubble_layout from the checkbox."""
self.assertIn(
"body.bubble_layout",
PANELS_JS,
"saveSettings must set body.bubble_layout from checkbox",
)
def test_save_settings_toggles_body_class(self):
"""saveSettings must apply body class toggle for live preview."""
self.assertIn(
"classList.toggle('bubble-layout', body.bubble_layout)",
PANELS_JS,
"saveSettings must toggle 'bubble-layout' on document.body for live preview",
)
# ── style.css static checks ──────────────────────────────────────────────
class TestBubbleLayoutCSS(unittest.TestCase):
"""Verify CSS selectors for bubble layout are present and gated on body class."""
def test_user_row_right_align_selector_present(self):
"""CSS must right-align user message rows when bubble-layout is active."""
self.assertIn(
"body.bubble-layout .msg-row:has(.msg-role.user)",
STYLE_CSS,
"CSS selector for user bubble alignment missing from style.css",
)
def test_assistant_row_left_align_selector_present(self):
"""CSS must left-align assistant message rows when bubble-layout is active."""
self.assertIn(
"body.bubble-layout .msg-row:has(.msg-role.assistant)",
STYLE_CSS,
"CSS selector for assistant bubble alignment missing from style.css",
)
def test_bubble_layout_responsive_rule_present(self):
"""A responsive max-width rule for narrow screens must be present."""
# Both selectors must appear inside a @media block
self.assertRegex(
STYLE_CSS,
r"@media\([^)]*700px[^)]*\)[^{]*\{[^}]*bubble-layout",
"Responsive bubble-layout rule (700px breakpoint) missing from style.css",
)
# ── index.html static checks ─────────────────────────────────────────────
class TestBubbleLayoutHTML(unittest.TestCase):
"""Verify the settings checkbox is present and correctly wired in index.html."""
def test_settings_checkbox_present(self):
"""The settingsBubbleLayout checkbox must exist in index.html."""
self.assertIn(
'id="settingsBubbleLayout"',
INDEX_HTML,
"settingsBubbleLayout checkbox missing from index.html",
)
def test_settings_label_i18n_key_wired(self):
"""Label span must carry the settings_label_bubble_layout i18n key."""
self.assertIn(
'data-i18n="settings_label_bubble_layout"',
INDEX_HTML,
"settings_label_bubble_layout i18n key not wired on label span",
)
def test_settings_desc_i18n_key_wired(self):
"""Description div must carry the settings_desc_bubble_layout i18n key."""
self.assertIn(
'data-i18n="settings_desc_bubble_layout"',
INDEX_HTML,
"settings_desc_bubble_layout i18n key not wired on description div",
)
# ── i18n.js static checks ────────────────────────────────────────────────
class TestBubbleLayoutI18N(unittest.TestCase):
"""Verify English and Spanish locale keys are present in i18n.js."""
def _extract_locale_block(self, lang_start_marker, lang_end_marker):
"""Extract the content between two locale markers."""
start = I18N_JS.find(lang_start_marker)
end = I18N_JS.find(lang_end_marker, start)
self.assertGreater(start, -1, f"Start marker '{lang_start_marker}' not found")
self.assertGreater(end, start, f"End marker '{lang_end_marker}' not found after start")
return I18N_JS[start:end]
def test_english_label_key_present(self):
"""English locale must have settings_label_bubble_layout."""
en_block = self._extract_locale_block("\n en: {", "\n es: {")
self.assertIn(
"settings_label_bubble_layout",
en_block,
"settings_label_bubble_layout missing from English locale",
)
def test_english_desc_key_present(self):
"""English locale must have settings_desc_bubble_layout."""
en_block = self._extract_locale_block("\n en: {", "\n es: {")
self.assertIn(
"settings_desc_bubble_layout",
en_block,
"settings_desc_bubble_layout missing from English locale",
)
def test_spanish_label_key_present(self):
"""Spanish locale must have settings_label_bubble_layout."""
es_block = self._extract_locale_block("\n es: {", "\n de: {")
self.assertIn(
"settings_label_bubble_layout",
es_block,
"settings_label_bubble_layout missing from Spanish locale",
)
def test_spanish_desc_key_present(self):
"""Spanish locale must have settings_desc_bubble_layout."""
es_block = self._extract_locale_block("\n es: {", "\n de: {")
self.assertIn(
"settings_desc_bubble_layout",
es_block,
"settings_desc_bubble_layout missing from Spanish locale",
)
# ── Integration tests (require live server on test server port) ─────────────────
class TestBubbleLayoutSettingsAPI(unittest.TestCase):
"""Integration tests: bubble_layout via GET/POST /api/settings."""
def test_bubble_layout_default_is_false(self):
"""GET /api/settings must return bubble_layout: false by default."""
try:
d, status = _get("/api/settings")
except OSError:
self.skipTest("Server not running on test server port")
self.assertEqual(status, 200)
self.assertIn(
"bubble_layout",
d,
"bubble_layout missing from GET /api/settings response",
)
self.assertFalse(
d["bubble_layout"],
"bubble_layout default must be False (opt-in feature)",
)
def test_bubble_layout_persists_true(self):
"""POST /api/settings with bubble_layout:true must persist and round-trip."""
try:
_, status = _post("/api/settings", {"bubble_layout": True})
except OSError:
self.skipTest("Server not running on test server port")
self.assertEqual(status, 200)
d, _ = _get("/api/settings")
self.assertTrue(d["bubble_layout"], "bubble_layout=True must persist after POST")
# Restore
_post("/api/settings", {"bubble_layout": False})
def test_bubble_layout_persists_false(self):
"""POST /api/settings with bubble_layout:false must persist and round-trip."""
try:
_post("/api/settings", {"bubble_layout": True})
_post("/api/settings", {"bubble_layout": False})
except OSError:
self.skipTest("Server not running on test server port")
d, _ = _get("/api/settings")
self.assertFalse(d["bubble_layout"], "bubble_layout=False must persist after POST")
def test_bubble_layout_truthy_string_coerced_to_bool(self):
"""Non-bool truthy value must be coerced to bool by _SETTINGS_BOOL_KEYS logic."""
try:
_post("/api/settings", {"bubble_layout": "1"})
except OSError:
self.skipTest("Server not running on test server port")
d, _ = _get("/api/settings")
self.assertIsInstance(
d["bubble_layout"],
bool,
"bubble_layout must be a bool in API response (bool coercion via _SETTINGS_BOOL_KEYS)",
)
# Restore
_post("/api/settings", {"bubble_layout": False})

34
tests/test_issue341.py Normal file
View File

@@ -0,0 +1,34 @@
"""Tests for GitHub issue #341: .msg-body table CSS styles."""
import os
CSS_PATH = os.path.join(os.path.dirname(__file__), "..", "static", "style.css")
def _read_css():
with open(CSS_PATH, "r") as f:
return f.read()
def test_msg_body_table_css_present():
css = _read_css()
assert ".msg-body table" in css, ".msg-body table rule missing from style.css"
assert "border-collapse:collapse" in css, "border-collapse:collapse missing from style.css"
def test_msg_body_table_th_td_present():
css = _read_css()
assert ".msg-body th" in css, ".msg-body th rule missing from style.css"
assert ".msg-body td" in css, ".msg-body td rule missing from style.css"
def test_msg_body_table_tr_stripe_present():
css = _read_css()
assert ".msg-body tr:nth-child(even)" in css, ".msg-body tr:nth-child(even) rule missing from style.css"
def test_msg_body_light_theme_overrides():
css = _read_css()
assert ':root:not(.dark) .msg-body th' in css, \
'Light-mode override for .msg-body th missing from style.css'
assert ':root:not(.dark) .msg-body td' in css, \
'Light-mode override for .msg-body td missing from style.css'

124
tests/test_issue342.py Normal file
View File

@@ -0,0 +1,124 @@
"""
Tests for GitHub issue #342: auto-link plain URLs in chat messages.
These are structural tests that verify the fix is present in static/ui.js
without requiring a running server or JavaScript engine.
"""
import os
import re
UI_JS = os.path.join(os.path.dirname(__file__), '..', 'static', 'ui.js')
def read_ui_js():
with open(UI_JS, 'r') as f:
return f.read()
def test_autolink_comment_present():
"""The Autolink comment should be present in renderMd() to document the feature."""
content = read_ui_js()
assert 'Autolink: convert plain URLs' in content, (
"Expected 'Autolink: convert plain URLs' comment not found in static/ui.js. "
"Did the autolink pass get added?"
)
def test_autolink_regex_in_rendermd():
"""The autolink regex pattern (https?://) should appear in renderMd()."""
content = read_ui_js()
# Locate the renderMd function body
rendermd_start = content.find('function renderMd(raw){')
assert rendermd_start != -1, "renderMd function not found in ui.js"
# Find the closing brace after renderMd (look for the autolink pattern within it)
rendermd_body = content[rendermd_start:rendermd_start + 5000]
assert 'https?:\\/\\/' in rendermd_body, (
"Autolink regex (https?:\\/\\/) not found inside renderMd() body."
)
def test_autolink_uses_esc_for_xss_safety():
"""The autolink code must use esc() to escape the display text of URLs, preventing XSS.
Note: esc() is intentionally NOT applied to the href value (that would corrupt & in
query strings). It IS applied to the visible link text (esc(clean)) to prevent XSS."""
content = read_ui_js()
# Find the autolink section (between the SAFE_TAGS pass and paragraph wrap)
autolink_idx = content.find('// Autolink: convert plain URLs')
assert autolink_idx != -1, "Autolink comment not found in ui.js"
# Extract the autolink block (next ~600 chars after the comment)
autolink_block = content[autolink_idx:autolink_idx + 600]
# esc() must be used on the visible link text to prevent XSS
assert 'esc(clean)' in autolink_block, (
"Autolink block should use esc(clean) for the link display text (XSS safety), "
"but it was not found."
)
# esc() must NOT be used on the href value — that breaks URLs containing &
assert 'href="${esc(clean)}"' not in autolink_block, (
"Autolink block should use href=\"${clean}\" (not esc'd) to preserve & in query strings."
)
def test_autolink_in_inline_md():
"""The autolink pass should also be present inside the inlineMd() helper."""
content = read_ui_js()
# Find inlineMd function
inline_start = content.find('function inlineMd(t){')
assert inline_start != -1, "inlineMd function not found in ui.js"
# Find closing brace of inlineMd by looking for 'return t;' followed by '}'
inline_end = content.find('return t;\n }', inline_start)
assert inline_end != -1, "Could not locate end of inlineMd function"
inline_body = content[inline_start:inline_end + 20]
assert 'https?:\\/\\/' in inline_body, (
"Autolink regex not found inside inlineMd() — plain URLs in list items "
"and blockquotes won't be autolinked."
)
def test_autolink_after_safe_tags_pass():
"""The autolink pass must come AFTER the SAFE_TAGS escape pass (ordering matters)."""
content = read_ui_js()
safe_tags_idx = content.find('s=s.replace(/<\\/?[a-z][^>]*>/gi,tag=>SAFE_TAGS.test(tag)?tag:esc(tag));')
autolink_idx = content.find('// Autolink: convert plain URLs')
parts_idx = content.find('const parts=s.split(/\\n{2,}/);')
assert safe_tags_idx != -1, "SAFE_TAGS pass not found"
assert autolink_idx != -1, "Autolink pass not found"
assert parts_idx != -1, "Paragraph-wrap parts line not found"
assert safe_tags_idx < autolink_idx < parts_idx, (
f"Ordering wrong: SAFE_TAGS at {safe_tags_idx}, autolink at {autolink_idx}, "
f"parts (paragraph wrap) at {parts_idx}. "
"Autolink must come between SAFE_TAGS pass and paragraph wrap."
)
def test_autolink_target_blank_and_rel():
"""Autolinked URLs should open in a new tab with rel=noopener for security."""
content = read_ui_js()
autolink_idx = content.find('// Autolink: convert plain URLs')
assert autolink_idx != -1, "Autolink comment not found"
# Use a larger window to account for the stash preamble added by the fix
autolink_block = content[autolink_idx:autolink_idx + 700]
assert 'target="_blank"' in autolink_block, (
'Autolinked URLs should have target="_blank"'
)
assert 'rel="noopener"' in autolink_block, (
'Autolinked URLs should have rel="noopener" for security'
)
def test_safe_tags_includes_anchor():
"""SAFE_TAGS regex must include 'a' so <a> tags from autolink are not escaped."""
content = read_ui_js()
# Find the SAFE_TAGS definition line — the pattern contains slashes so we
# search for the line directly rather than extracting the regex literal.
safe_tags_line = None
for line in content.splitlines():
if 'const SAFE_TAGS=' in line:
safe_tags_line = line
break
assert safe_tags_line is not None, "SAFE_TAGS const definition not found in ui.js"
# The pattern should include 'a' as a tag alternative (e.g. |a|)
assert '|a|' in safe_tags_line or '|a)' in safe_tags_line, (
f"SAFE_TAGS line does not include 'a' tag — "
"<a> tags emitted by autolink would be escaped!\n"
f"Line: {safe_tags_line}"
)

348
tests/test_issue347.py Normal file
View File

@@ -0,0 +1,348 @@
"""
Tests for GitHub issue #347: KaTeX / LaTeX math rendering in chat and workspace previews.
Structural tests — no server required. Verify:
- renderMd() stashes and restores $..$ and $$...$$ math delimiters
- KaTeX lazy-load function exists and follows the mermaid pattern
- KaTeX JS loaded from CDN with SRI integrity hash
- KaTeX CSS loaded in index.html with SRI hash
- CSS rules present for .katex-block and .katex-inline
- SAFE_TAGS updated to allow <span> (for inline math)
- renderKatexBlocks() is wired into the requestAnimationFrame call
"""
import pathlib
import re
REPO = pathlib.Path(__file__).parent.parent
UI_JS = (REPO / 'static' / 'ui.js').read_text(encoding='utf-8')
INDEX = (REPO / 'static' / 'index.html').read_text(encoding='utf-8')
CSS = (REPO / 'static' / 'style.css').read_text(encoding='utf-8')
# ── renderMd pipeline ──────────────────────────────────────────────────────────
def test_display_math_stash_present():
"""renderMd must stash $$...$$ display math before other processing."""
assert r'\$\$([\s\S]+?)\$\$' in UI_JS or '$$' in UI_JS, \
'Display math $$..$$ stash regex not found in ui.js'
# The stash uses \\x00M token
assert '\\x00M' in UI_JS, 'Math stash token \\x00M not found in renderMd'
def test_inline_math_stash_present():
"""renderMd must stash $..$ inline math."""
# Inline math regex must be present
assert 'math_stash' in UI_JS, 'math_stash array not found in renderMd'
def test_katex_block_placeholder_emitted():
"""renderMd restore pass must emit .katex-block divs for display math."""
assert 'katex-block' in UI_JS, \
'.katex-block placeholder div not emitted by renderMd restore pass'
def test_katex_inline_placeholder_emitted():
"""renderMd restore pass must emit .katex-inline spans for inline math."""
assert 'katex-inline' in UI_JS, \
'.katex-inline placeholder span not emitted by renderMd restore pass'
def test_data_katex_attribute_present():
"""Placeholders must carry data-katex attribute for display/inline distinction."""
assert 'data-katex' in UI_JS, \
'data-katex attribute not found — renderKatexBlocks cannot distinguish display from inline'
# ── renderKatexBlocks() ────────────────────────────────────────────────────────
def test_render_katex_blocks_function_exists():
"""renderKatexBlocks() function must exist in ui.js."""
assert 'function renderKatexBlocks()' in UI_JS, \
'renderKatexBlocks() function not found in ui.js'
def test_katex_lazy_load_follows_mermaid_pattern():
"""KaTeX must use the same lazy-load pattern as mermaid (load on first use)."""
assert '_katexLoading' in UI_JS, '_katexLoading flag not found'
assert '_katexReady' in UI_JS, '_katexReady flag not found'
def test_katex_js_loaded_from_cdn():
"""KaTeX JS must be loaded from jsdelivr CDN."""
assert 'katex@0.16' in UI_JS, \
'KaTeX JS CDN URL not found in ui.js — expected katex@0.16.x'
def test_katex_js_has_sri_hash():
"""KaTeX JS CDN tag must have an SRI integrity hash."""
# The hash is in the script.integrity assignment
assert "script.integrity='sha384-" in UI_JS or 'script.integrity="sha384-' in UI_JS, \
'KaTeX JS SRI integrity hash not found in ui.js'
def test_katex_display_mode_used():
"""renderKatexBlocks must pass displayMode based on data-katex attribute."""
assert 'displayMode' in UI_JS, \
'displayMode not passed to katex.render() — display math will render inline'
def test_katex_throw_on_error_false():
"""KaTeX must be configured with throwOnError:false to degrade gracefully."""
assert 'throwOnError:false' in UI_JS, \
'throwOnError:false not set — bad LaTeX will throw and break the message'
def test_render_katex_blocks_wired_into_raf():
"""renderKatexBlocks() must be called in the same requestAnimationFrame as renderMermaidBlocks()."""
# Check that renderKatexBlocks appears somewhere near requestAnimationFrame
raf_idx = UI_JS.find('requestAnimationFrame')
# Find the rAF call that also contains renderKatexBlocks
has_katex_in_raf = any(
'renderKatexBlocks' in UI_JS[m.start():m.start()+200]
for m in re.finditer(r'requestAnimationFrame', UI_JS)
)
assert has_katex_in_raf, \
'renderKatexBlocks() not found in any requestAnimationFrame call — math will not render'
# ── index.html ────────────────────────────────────────────────────────────────
def test_katex_css_in_index_html():
"""KaTeX CSS must be loaded in index.html."""
assert 'katex@0.16' in INDEX, \
'KaTeX CSS CDN link not found in index.html'
def test_katex_css_has_sri_hash():
"""KaTeX CSS link in index.html must have an SRI integrity hash."""
assert 'sha384-5TcZemv2l' in INDEX or 'integrity' in INDEX and 'katex' in INDEX, \
'KaTeX CSS SRI integrity hash not found in index.html'
# ── style.css ─────────────────────────────────────────────────────────────────
def test_katex_block_css_present():
""".katex-block CSS rule must exist for centered display math."""
assert '.katex-block' in CSS, \
'.katex-block CSS rule missing from style.css — display math will have no layout'
def test_katex_inline_css_present():
""".katex-inline CSS rule must exist."""
assert '.katex-inline' in CSS, \
'.katex-inline CSS rule missing from style.css'
def test_katex_block_text_align_center():
""".katex-block must be text-align:center for display math."""
assert 'text-align:center' in CSS, \
'text-align:center not found for .katex-block'
# ── SAFE_TAGS ──────────────────────────────────────────────────────────────────
def test_safe_tags_includes_span():
"""SAFE_TAGS must include <span> to allow .katex-inline spans through the escape pass."""
# The SAFE_TAGS regex should contain 'span'
safe_tags_match = re.search(r'SAFE_TAGS\s*=\s*/.*?/i', UI_JS)
assert safe_tags_match, 'SAFE_TAGS pattern not found in ui.js'
assert 'span' in safe_tags_match.group(), \
'<span> not in SAFE_TAGS — inline math spans will be HTML-escaped and rendered as text'
# ── Stash ordering: fence must protect code spans from math extraction ─────────
WORKSPACE_JS = (REPO / 'static' / 'workspace.js').read_text(encoding='utf-8')
def test_fence_stash_before_math_stash():
"""fence_stash must be initialized and populated BEFORE math_stash in renderMd.
If math_stash runs first, dollar signs inside backtick code spans are extracted
as math, leaving placeholder tokens inside the stashed code string. The code span
then renders with KaTeX inside <code> instead of the literal dollar-sign text.
"""
fence_pos = UI_JS.find("const fence_stash=[]")
math_pos = UI_JS.find("const math_stash=[]")
assert fence_pos != -1, "fence_stash not found in renderMd"
assert math_pos != -1, "math_stash not found in renderMd"
assert fence_pos < math_pos, (
"fence_stash must be declared BEFORE math_stash in renderMd "
f"(fence at char {fence_pos}, math at char {math_pos}). "
"If math runs first, `$x$` inside backticks gets extracted as math instead of code."
)
def test_fence_stash_populated_before_math_stash():
"""The fence_stash s.replace call must appear before any math_stash s.replace calls."""
# Find the s.replace call that populates each stash
fence_replace_pos = UI_JS.find("fence_stash.push(m)")
math_replace_pos = UI_JS.find("math_stash.push(")
assert fence_replace_pos != -1, "fence_stash population call not found"
assert math_replace_pos != -1, "math_stash population call not found"
assert fence_replace_pos < math_replace_pos, (
"fence_stash must be populated before math_stash to protect code span contents"
)
def test_math_stash_comment_says_after_fence():
"""The math stash comment should explain it runs AFTER fence_stash, not before."""
# Should not have the old misleading comment
assert "Must run BEFORE fence_stash" not in UI_JS, (
"Old misleading comment still present. Math stash runs AFTER fence_stash. "
"The comment should say 'Runs AFTER fence_stash'."
)
# ── Pipeline regression: code spans protect their contents ────────────────────
def test_math_restore_after_fence_restore():
"""Math stash tokens are restored AFTER fence restore, so code spans get
their raw text back (not KaTeX placeholders)."""
fence_restore_pos = UI_JS.find("fence_stash[+i]")
math_restore_pos = UI_JS.find("math_stash[+i]")
assert fence_restore_pos != -1, "fence_stash restore not found"
assert math_restore_pos != -1, "math_stash restore not found"
# Both restores must exist; their relative order doesn't matter for correctness
# (they use different tokens: \x00F vs \x00M), but we assert both exist
assert fence_restore_pos != math_restore_pos, "fence and math restore must be separate calls"
def test_stash_tokens_distinct():
"""fence_stash and math_stash must use distinct sentinel tokens to avoid collisions."""
# fence uses \x00F, math uses \x00M (or similar unique prefix)
# The JS source uses escaped \\x00F and \\x00M as sentinel characters
# In the Python string read from the file these appear as '\\\\x00F' and '\\\\x00M'
assert "'\\\\x00F'" in UI_JS or 'x00F' in UI_JS, (
"fence stash token (\\x00F) not found — must be distinct from math token"
)
assert "'\\\\x00M'" in UI_JS or 'x00M' in UI_JS, (
"math stash token (\\x00M) not found — must be distinct from fence token"
)
# The two tokens must use different discriminator characters
assert 'x00F' in UI_JS and 'x00M' in UI_JS, (
"Both \\x00F (fence) and \\x00M (math) tokens must exist"
)
# ── Workspace preview renderKatexBlocks wiring ────────────────────────────────
def test_workspace_calls_render_katex_after_preview():
"""workspace.js must call renderKatexBlocks() after setting previewMd.innerHTML.
Without this, math placeholders appear in workspace file previews but are never
rendered by KaTeX (renderKatexBlocks is only wired into renderMessages rAF).
"""
assert "renderKatexBlocks" in WORKSPACE_JS, (
"workspace.js must call renderKatexBlocks() after renderMd() for file previews"
)
def test_workspace_renders_katex_after_file_open():
"""workspace.js renderKatexBlocks call must come after the renderMd(data.content) assignment."""
preview_md_pos = WORKSPACE_JS.find("renderMd(data.content)")
# Use the actual call string (not a stray regex match on 'M' characters)
katex_call_str = "renderKatexBlocks==='function'"
katex_call_pos = WORKSPACE_JS.find(katex_call_str)
assert preview_md_pos != -1, "renderMd(data.content) not found in workspace.js"
assert katex_call_pos != -1, (
"renderKatexBlocks guard (typeof renderKatexBlocks==='function') not found in workspace.js"
)
# The call after 'renderMd(data.content)' — find the LAST occurrence
# (there may be an earlier one in the save path at line ~153)
last_katex_pos = WORKSPACE_JS.rfind(katex_call_str)
assert last_katex_pos > preview_md_pos, (
"renderKatexBlocks must be called AFTER renderMd(data.content) in workspace.js "
f"(renderMd at {preview_md_pos}, last renderKatexBlocks at {last_katex_pos})"
)
def test_workspace_katex_guarded_by_typeof():
"""workspace.js renderKatexBlocks call must guard with typeof check for safety
in case KaTeX feature is not loaded (e.g. test environments, offline)."""
assert "typeof renderKatexBlocks" in WORKSPACE_JS, (
"workspace.js must guard renderKatexBlocks call with typeof check: "
"if(typeof renderKatexBlocks==='function')renderKatexBlocks()"
)
# ── SAFE_TAGS: span addition should not expand attack surface ─────────────────
def test_safe_tags_span_is_narrowly_scoped():
"""SAFE_TAGS adding <span> is only a bypass if span carries dangerous attributes.
Verify the SAFE_TAGS regex tests the tag NAME only, not arbitrary attributes.
The rest of the pipeline uses esc() for user content, so attribute injection
into KaTeX spans isn't possible.
"""
# The SAFE_TAGS regex must still require a word boundary / tag-end pattern
safe_tags_match = re.search(r"SAFE_TAGS\s*=\s*/(.+?)/i", UI_JS)
if not safe_tags_match:
safe_tags_match = re.search(r'SAFE_TAGS\s*=\s*/(.*?)/i', UI_JS)
assert safe_tags_match, "SAFE_TAGS regex not found"
pattern = safe_tags_match.group(1)
# Must have a trailing boundary check — ([\s>]|$) or similar
assert r"[\s>]" in pattern or r'[\s>]' in pattern, (
"SAFE_TAGS must enforce a boundary after the tag name to prevent "
"<spanxss> from matching when checking for <span>"
)
# ── False-positive prevention ─────────────────────────────────────────────────
def test_inline_math_regex_requires_non_space_boundaries():
"""The $...$ inline regex must require non-space at both boundaries.
This prevents 'costs $5 and $10' from matching — the space after the opening
$ means it's a currency amount, not math.
"""
# The inline math stash push is type:'inline' — find its containing replace() line
inline_push_idx = UI_JS.find("type:'inline',src:m")
assert inline_push_idx != -1, "Inline math stash push not found"
# Get the text from the start of that line back to find the regex
line_start = UI_JS.rfind('\n', 0, inline_push_idx) + 1
inline_line = UI_JS[line_start:inline_push_idx + 50]
# The regex must use \s (via [^\s...]) to exclude spaces at boundaries
assert '\\s' in inline_line or '[^' in inline_line, (
f"Inline math regex must exclude spaces at boundaries to prevent false "
f"positives on currency like $5. Found: {inline_line[:120]}"
)
def test_display_math_stashed_before_inline():
"""$$...$$ display math must be stashed before $...$ inline math.
If inline runs first on '$$x$$', it could match '$' + 'x' + '$' leaving
a stray outer '$', corrupting the output.
"""
display_pos = UI_JS.find("type:'display',src:m")
inline_pos = UI_JS.find("type:'inline',src:m")
assert display_pos != -1, "display math stash not found"
assert inline_pos != -1, "inline math stash not found"
# First occurrence of display must be before first occurrence of inline
assert display_pos < inline_pos, (
"Display math ($$...$$) must be stashed before inline math ($...$) "
"to prevent $$ from being parsed as two adjacent inline delimiters"
)
def test_math_stash_token_uses_single_backslash_null_byte():
"""Math stash tokens must use the null-byte form (single backslash x00M).
The restore regex expects a null byte character. If the stash emits
a literal backslash+x00M (double backslash = 5-char string), the restore
regex never matches and the tokens appear verbatim in the rendered output.
The fence_stash correctly uses the null byte convention. Math stash must be consistent.
"""
# In the source file, the correct form is: return '\x00M'
# The wrong form (double backslash) would be: return '\\x00M'
# Check that no double-backslash form exists in the math stash return statements
import re
bad_returns = re.findall(r"return\s+'\\\\x00M'", UI_JS)
assert not bad_returns, (
f"Found {len(bad_returns)} math stash return(s) using double-backslash \\\\x00M. "
"Must use single backslash '\x00M' (null byte) to match the restore regex."
)
# Positive check: single-backslash form must exist
good_returns = re.findall(r"math_stash\.push.*?return '\\x00M'", UI_JS, re.DOTALL)
assert good_returns, (
"Math stash return must use single-backslash '\x00M' (null byte convention)"
)

199
tests/test_issue357.py Normal file
View File

@@ -0,0 +1,199 @@
"""
Tests for GitHub issue #357: Docker container fails to start without internet access.
Structural tests — verify Dockerfile and docker_init.bash contain the expected
patterns for pre-installed uv and workspace permission fixes.
Two problems fixed:
1. uv was downloaded at container startup; fails in air-gapped / firewalled environments.
Fix: pre-install uv in the Docker image at build time (system-wide in /usr/local/bin).
2. workspace directory created with plain mkdir (as root); bind-mount dirs created by
Docker as root are unwritable by the hermeswebui user.
Fix: sudo mkdir + sudo chown for workspace directory.
"""
import pathlib
import re
REPO = pathlib.Path(__file__).parent.parent
DOCKERFILE = (REPO / "Dockerfile").read_text(encoding="utf-8")
INIT_SCRIPT = (REPO / "docker_init.bash").read_text(encoding="utf-8")
# ── Dockerfile: uv pre-installed at build time ───────────────────────────────
class TestDockerfileUvPreinstall:
def test_dockerfile_installs_uv_at_build_time(self):
"""Dockerfile must install uv via RUN curl at build time (not only at runtime)."""
assert "RUN curl" in DOCKERFILE and "uv/install.sh" in DOCKERFILE, (
"Dockerfile must install uv at build time via RUN curl .../uv/install.sh"
)
def test_dockerfile_uv_installed_system_wide(self):
"""uv must be installed to a system-wide directory (/usr/local/bin) accessible
to all users, not to a user-specific ~/.local/bin that another user can't see."""
# The install command must target /usr/local/bin or use root to install globally
uv_install_line = next(
(line for line in DOCKERFILE.splitlines() if "uv/install.sh" in line),
None,
)
assert uv_install_line is not None, "Could not find uv install line in Dockerfile"
# Must either use UV_INSTALL_DIR pointing to /usr/local/bin, or run as root
# (so the default install location is accessible to hermeswebui user)
has_system_dir = "/usr/local/bin" in uv_install_line or "UV_INSTALL_DIR=/usr/local/bin" in DOCKERFILE
assert has_system_dir, (
"uv must be installed to /usr/local/bin (system-wide) so hermeswebui user "
"can find it. Installing as hermeswebuitoo puts it in /home/hermeswebuitoo/.local/bin "
"which is NOT on hermeswebui's PATH."
)
def test_dockerfile_uv_installed_before_copy(self):
"""uv installation must happen before COPY . /apptoo so it's in the image."""
uv_pos = DOCKERFILE.find("uv/install.sh")
copy_pos = DOCKERFILE.find("COPY . /apptoo")
assert uv_pos != -1, "uv install not found in Dockerfile"
assert copy_pos != -1, "COPY . /apptoo not found in Dockerfile"
assert uv_pos < copy_pos, "uv must be installed before COPY . /apptoo"
def test_dockerfile_uv_installed_as_root_or_before_user_switch(self):
"""uv must be installed as root (USER root) to reach /usr/local/bin.
If installed as hermeswebuitoo, it lands in ~hermeswebuitoo/.local/bin,
which the hermeswebui user at runtime can't see.
"""
lines = DOCKERFILE.splitlines()
uv_line_idx = next(i for i, l in enumerate(lines) if "uv/install.sh" in l)
# Find the last USER directive before the uv install line
user_before = None
for i in range(uv_line_idx - 1, -1, -1):
if lines[i].strip().startswith("USER "):
user_before = lines[i].strip().split()[1]
break
assert user_before == "root", (
f"uv install must run as USER root (found USER {user_before!r}). "
"Installing as hermeswebuitoo puts uv in /home/hermeswebuitoo/.local/bin "
"which is not accessible to the hermeswebui runtime user."
)
# ── docker_init.bash: skip uv download when already present ─────────────────
class TestInitScriptUvSkip:
def test_init_script_checks_uv_before_download(self):
"""docker_init.bash must check 'command -v uv' before attempting download."""
assert "command -v uv" in INIT_SCRIPT, (
"docker_init.bash must check 'command -v uv' to skip download "
"when uv is already pre-installed in the image (#357)"
)
def test_init_script_skips_download_if_present(self):
"""Init script must use conditional logic (if/else) around the uv download."""
# Pattern: if command -v uv ... else ... fi
assert re.search(r'if\s+command\s+-v\s+uv', INIT_SCRIPT), (
"docker_init.bash must use 'if command -v uv' guard around the download"
)
def test_init_script_curl_download_in_else_branch(self):
"""The curl download must be in the else branch (only runs if uv not found)."""
# Find the conditional block
m = re.search(
r'if\s+command\s+-v\s+uv.*?fi',
INIT_SCRIPT, re.DOTALL
)
assert m, "Could not find uv conditional block in docker_init.bash"
block = m.group(0)
# curl must appear after 'else' not in the 'then' branch
else_pos = block.find("else")
curl_pos = block.find("curl")
assert else_pos != -1, "No 'else' branch in uv conditional"
assert curl_pos != -1, "No 'curl' in uv conditional block"
assert curl_pos > else_pos, (
"curl download must be in the 'else' branch, not the 'if/then' branch"
)
def test_init_script_error_exit_on_download_failure(self):
"""Curl download must call error_exit on failure (not silently continue)."""
assert "error_exit" in INIT_SCRIPT and "Failed to install uv" in INIT_SCRIPT, (
"docker_init.bash must call error_exit if uv download fails, "
"so the container exits with a clear message instead of failing silently"
)
def test_init_script_path_includes_hermeswebui_local_bin(self):
"""PATH must include /home/hermeswebui/.local/bin for fallback runtime install."""
assert "/home/hermeswebui/.local/bin" in INIT_SCRIPT, (
"docker_init.bash must include /home/hermeswebui/.local/bin in PATH "
"for the case where uv is installed at runtime via curl"
)
# ── docker_init.bash: workspace directory permissions ────────────────────────
class TestWorkspacePermissions:
def test_workspace_uses_sudo_mkdir(self):
"""docker_init.bash must use 'sudo mkdir' for the workspace directory.
Docker auto-creates bind-mount directories as root if they don't exist,
leaving them unwritable by hermeswebui. sudo mkdir + chown fixes this.
"""
# Find the workspace section
ws_section = INIT_SCRIPT[
INIT_SCRIPT.find("HERMES_WEBUI_DEFAULT_WORKSPACE"):
INIT_SCRIPT.find("HERMES_WEBUI_DEFAULT_WORKSPACE") + 800
]
assert "sudo mkdir" in ws_section, (
"docker_init.bash must use 'sudo mkdir -p' for the workspace directory "
"to handle the case where Docker created the bind-mount dir as root (#357)"
)
def test_workspace_uses_sudo_chown(self):
"""docker_init.bash must chown the workspace to hermeswebui when writable.
The chown is now conditional on the workspace being writable, to allow
read-only (:ro) workspace mounts without crashing (#670). The sudo chown
must still be present in the script (just guarded by [ -w ]).
"""
assert 'sudo chown hermeswebui:hermeswebui "$HERMES_WEBUI_DEFAULT_WORKSPACE"' in INIT_SCRIPT, (
"docker_init.bash must 'sudo chown hermeswebui:hermeswebui' the workspace "
"when it is writable, so the app user can write to it (#357)"
)
def test_workspace_mkdir_before_chown(self):
"""sudo mkdir must come before sudo chown in docker_init.bash."""
mkdir_pos = INIT_SCRIPT.find('sudo mkdir -p "$HERMES_WEBUI_DEFAULT_WORKSPACE"')
chown_pos = INIT_SCRIPT.find('sudo chown hermeswebui:hermeswebui "$HERMES_WEBUI_DEFAULT_WORKSPACE"')
assert mkdir_pos != -1, "sudo mkdir for workspace not found"
assert chown_pos != -1, "sudo chown for workspace not found"
assert mkdir_pos < chown_pos, "sudo mkdir must come before sudo chown"
def test_workspace_error_exit_on_mkdir_failure(self):
"""sudo mkdir must call error_exit on failure."""
assert 'sudo mkdir -p "$HERMES_WEBUI_DEFAULT_WORKSPACE" || error_exit' in INIT_SCRIPT, (
"sudo mkdir for workspace must call error_exit on failure"
)
def test_workspace_chown_is_conditional_on_writable(self):
"""chown and write-test must be skipped for read-only workspace mounts (#670).
The script must check [ -w "$HERMES_WEBUI_DEFAULT_WORKSPACE" ] before
attempting chown or a write test, so :ro bind-mounts don't crash startup.
"""
assert '[ -w "$HERMES_WEBUI_DEFAULT_WORKSPACE" ]' in INIT_SCRIPT, (
"docker_init.bash must guard chown with [ -w ] to support read-only "
"workspace mounts (:ro) without crashing (#670)"
)
# Read-only path must log a clear message rather than calling error_exit
assert "read-only workspace is supported" in INIT_SCRIPT, (
"docker_init.bash must print a clear message when workspace is read-only (#670)"
)
def test_init_script_syntax_valid(self):
"""docker_init.bash must pass bash -n syntax check."""
import subprocess
result = subprocess.run(
["bash", "-n", str(REPO / "docker_init.bash")],
capture_output=True, text=True
)
assert result.returncode == 0, (
f"docker_init.bash failed bash -n syntax check:\n{result.stderr}"
)

114
tests/test_issue401.py Normal file
View File

@@ -0,0 +1,114 @@
"""
Regression tests for tool-card persistence on session reload.
The older loadSession() path rewrote message history on the client:
- dropped role='tool' rows
- dropped empty assistant rows even when they carried tool_calls
- then ignored session.tool_calls on reload
That broke both durable logging and page refresh for valid tool runs.
"""
import json
import pathlib
import subprocess
import textwrap
REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
SESSIONS_JS = (REPO_ROOT / "static" / "sessions.js").read_text(encoding="utf-8")
UI_JS = (REPO_ROOT / "static" / "ui.js").read_text(encoding="utf-8")
def test_loadsession_preserves_tool_rows():
"""Reload must keep tool rows in S.messages so snippets can be reconstructed."""
assert "if (m.role === 'tool') continue;" not in SESSIONS_JS, (
"loadSession() must not drop role='tool' messages; renderMessages() hides them "
"visually, but it still needs them for snippet reconstruction"
)
def test_loadsession_uses_session_toolcalls_only_as_fallback():
"""Session summaries are the fallback, not the primary reload source."""
assert "if(!hasMessageToolMetadata&&data.session.tool_calls&&data.session.tool_calls.length)" in SESSIONS_JS
assert "S.toolCalls=(data.session.tool_calls||[]).map(tc=>({...tc,done:true}));" in SESSIONS_JS
assert "S.toolCalls=[];" in SESSIONS_JS
def test_rendermessages_treats_openai_toolcall_assistants_as_visible():
"""OpenAI assistant rows with empty content but tool_calls must stay anchorable."""
assert "const hasTc=Array.isArray(m.tool_calls)&&m.tool_calls.length>0;" in UI_JS
assert "if(hasTc||hasTu||_messageHasReasoningPayload(m)) return true;" in UI_JS
def _run_js(script_body: str) -> dict:
script = textwrap.dedent(f"""
function loadSessionShape(messages, sessionToolCalls) {{
const filtered = (messages || []).filter(m => m && m.role);
const hasMessageToolMetadata = filtered.some(m => {{
if (!m || m.role !== 'assistant') return false;
const hasTc = Array.isArray(m.tool_calls) && m.tool_calls.length > 0;
const hasTu = Array.isArray(m.content) && m.content.some(p => p && p.type === 'tool_use');
return hasTc || hasTu;
}});
const toolCalls = (!hasMessageToolMetadata && sessionToolCalls && sessionToolCalls.length)
? sessionToolCalls.map(tc => ({{ ...tc, done: true }}))
: [];
return {{ filtered, hasMessageToolMetadata, toolCalls }};
}}
{script_body}
""")
proc = subprocess.run(["node", "-e", script], check=True, capture_output=True, text=True)
return json.loads(proc.stdout)
def test_reload_keeps_empty_assistant_toolcall_anchor():
"""OpenAI-style assistant {content:'', tool_calls:[...]} must survive reload."""
result = _run_js("""
const messages = [
{ role: 'user', content: 'list files' },
{
role: 'assistant',
content: '',
tool_calls: [{ id: 'call-1', function: { name: 'terminal', arguments: '{}' } }]
},
{ role: 'tool', tool_call_id: 'call-1', content: '{"output":"ok"}' },
{ role: 'assistant', content: 'Done.' }
];
const loaded = loadSessionShape(messages, [{ name: 'terminal', assistant_msg_idx: 1 }]);
process.stdout.write(JSON.stringify({
filtered_len: loaded.filtered.length,
has_metadata: loaded.hasMessageToolMetadata,
fallback_len: loaded.toolCalls.length,
assistant_tool_idx: loaded.filtered.findIndex(m => m.role === 'assistant' && m.tool_calls),
tool_idx: loaded.filtered.findIndex(m => m.role === 'tool')
}));
""")
assert result["filtered_len"] == 4
assert result["has_metadata"] is True
assert result["fallback_len"] == 0
assert result["assistant_tool_idx"] == 1
assert result["tool_idx"] == 2
def test_reload_uses_session_summary_when_messages_have_no_tool_metadata():
"""Older sessions should still render from session.tool_calls on reload."""
result = _run_js("""
const messages = [
{ role: 'user', content: 'build site' },
{ role: 'assistant', content: 'Starting.' },
{ role: 'tool', content: '{"bytes_written": 4955}' },
{ role: 'assistant', content: '' }
];
const sessionToolCalls = [
{ name: 'write_file', assistant_msg_idx: 1, snippet: 'bytes_written', tid: '' }
];
const loaded = loadSessionShape(messages, sessionToolCalls);
process.stdout.write(JSON.stringify({
has_metadata: loaded.hasMessageToolMetadata,
fallback_len: loaded.toolCalls.length,
done_flag: loaded.toolCalls[0] && loaded.toolCalls[0].done === true
}));
""")
assert result["has_metadata"] is False
assert result["fallback_len"] == 1
assert result["done_flag"] is True

313
tests/test_issue470.py Normal file
View File

@@ -0,0 +1,313 @@
"""
Tests for issue #470 — markdown link rendering bugs in renderMd():
1. Double-linking: [label](url) converted to <a>, then autolink re-matches
the URL inside href="..." and wraps it in a second <a>.
2. esc() applied to URLs in href attributes turns & → &amp;, breaking
URLs with query strings and producing &amp; in displayed link text.
3. Same double-linking bug inside table cells via inlineMd().
These tests verify the fixes by asserting against the rendered HTML that
ui.js serves, using a live server request to evaluate the actual JS output
indirectly (via checking ui.js source for the fixed patterns) AND by
running a lightweight Python mirror of the fixed renderMd logic.
Strategy: verify the fix is present in the JS source, then test the
expected rendering behaviour through the Python mirror.
"""
import pathlib
import re
import html as _html
REPO_ROOT = pathlib.Path(__file__).parent.parent
UI_JS = (REPO_ROOT / "static" / "ui.js").read_text()
# ── Helpers ──────────────────────────────────────────────────────────────────
def esc(s):
return _html.escape(str(s), quote=True)
def _make_link(url, label):
"""Expected output for a [label](url) link after fix: href is NOT esc()-ed."""
return f'<a href="{url}" target="_blank" rel="noopener">{esc(label)}</a>'
# Minimal Python mirror of the FIXED renderMd() — enough to test link behaviour.
# Mirrors the stash-based approach introduced by the fix.
def render_links_only(text):
"""
Simplified render that only applies the link-related passes from the fixed
renderMd(): [label](url) conversion + autolink, with the stash protection.
Sufficient for testing that links render correctly without double-linking.
"""
s = text
# Stash [label](url) links (fix: store href as raw URL, not esc(url))
link_stash = []
def stash_link(m):
label, url = m.group(1), m.group(2)
link_stash.append(f'<a href="{url}" target="_blank" rel="noopener">{esc(label)}</a>')
return f'\x00L{len(link_stash)-1}\x00'
s = re.sub(r'\[([^\]]+)\]\((https?://[^\)]+)\)', stash_link, s)
# Autolink bare URLs (should NOT match inside already-stashed placeholders)
def autolink(m):
url = m.group(1)
trail = url[-1] if url[-1] in '.,;:!?)' else ''
clean = url[:-1] if trail else url
return f'<a href="{clean}" target="_blank" rel="noopener">{esc(clean)}</a>{trail}'
s = re.sub(r'(https?://[^\s<>"\')\]]+)', autolink, s)
# Restore stashed links
s = re.sub(r'\x00L(\d+)\x00', lambda m: link_stash[int(m.group(1))], s)
return s
def render_table_with_links(md):
"""
Render a markdown table that may contain [label](url) cells.
Mirrors the fixed inlineMd() + table rendering.
"""
lines = md.strip().split('\n')
if len(lines) < 2:
return md
def is_sep(r):
return bool(re.match(r'^\|[\s|:-]+\|$', r.strip()))
if not is_sep(lines[1]):
return md
def inline_md_fixed(t):
"""Fixed inlineMd: stash links before autolink."""
stash = []
def stash_fn(m):
lb, u = m.group(1), m.group(2)
stash.append(f'<a href="{u}" target="_blank" rel="noopener">{esc(lb)}</a>')
return f'\x00L{len(stash)-1}\x00'
t = re.sub(r'\[([^\]]+)\]\((https?://[^\)]+)\)', stash_fn, t)
# autolink remaining bare URLs
def autolink(m):
url = m.group(1)
trail = url[-1] if url[-1] in '.,;:!?)' else ''
clean = url[:-1] if trail else url
return f'<a href="{clean}" target="_blank" rel="noopener">{esc(clean)}</a>{trail}'
t = re.sub(r'(https?://[^\s<>"\')\]]+)', autolink, t)
t = re.sub(r'\x00L(\d+)\x00', lambda m: stash[int(m.group(1))], t)
return t
def parse_row(r):
cells = r.strip().lstrip('|').rstrip('|').split('|')
return ''.join(f'<td>{inline_md_fixed(c.strip())}</td>' for c in cells)
def parse_header(r):
cells = r.strip().lstrip('|').rstrip('|').split('|')
return ''.join(f'<th>{inline_md_fixed(c.strip())}</th>' for c in cells)
header = f'<tr>{parse_header(lines[0])}</tr>'
body = ''.join(f'<tr>{parse_row(r)}</tr>' for r in lines[2:])
return f'<table><thead>{header}</thead><tbody>{body}</tbody></table>'
# ── Source-level checks (verify fix is in the JS) ─────────────────────────────
def test_inlinemd_uses_link_stash():
"""Fixed inlineMd() must stash [label](url) links before autolink runs."""
assert '_link_stash' in UI_JS, (
"inlineMd() should use _link_stash to prevent double-linking"
)
def test_inlinemd_no_esc_on_href():
"""Fixed inlineMd() must not call esc() on the URL in href."""
# The old broken pattern had esc(u) inside the href
assert 'href="${esc(u)}"' not in UI_JS, (
"inlineMd() should not call esc() on href URL — it breaks & in query strings"
)
def test_outer_link_pass_uses_a_stash():
"""Fixed outer link pass must stash existing <a> tags before running."""
assert '_a_stash' in UI_JS, (
"Outer [label](url) pass should stash existing <a> tags to prevent autolink re-matching"
)
def test_autolink_pass_uses_al_stash():
"""Fixed autolink pass must stash existing <a> tags before running."""
assert '_al_stash' in UI_JS, (
"Autolink pass should stash existing <a> tags to prevent double-linking"
)
def test_autolink_no_esc_on_href():
"""Fixed autolink pass must not call esc() on href URL."""
idx = UI_JS.find('// Autolink: convert plain URLs to clickable links.')
assert idx != -1, "New autolink comment not found"
autolink_section = UI_JS[idx:idx+600]
# The return line should have href="${clean}" (JS template literal, no esc call)
assert 'href="${clean}"' in autolink_section, (
'Autolink should use href="${clean}" not href="${esc(clean)}"'
)
assert 'href="${esc(clean)}"' not in autolink_section, (
"Autolink should not esc() the URL in href"
)
# ── Behaviour tests (Python mirror of fixed renderMd) ─────────────────────────
def test_labeled_link_renders_as_single_anchor():
"""[#461](https://github.com/.../461) must produce exactly one <a> tag."""
url = 'https://github.com/nesquena/hermes-webui/issues/461'
md = f'[#461]({url})'
result = render_links_only(md)
assert result.count('<a ') == 1, f"Expected 1 <a> tag, got: {result}"
assert result.count('</a>') == 1
assert f'href="{url}"' in result
assert '#461' in result
# Must not contain the raw brackets
assert '[#461]' not in result
assert f']({url})' not in result
def test_href_not_html_escaped():
"""URLs with & must appear as literal & in href, not &amp;."""
url = 'https://example.com/search?q=foo&bar=baz'
md = f'[Search]({url})'
result = render_links_only(md)
assert f'href="{url}"' in result, (
f"& in URL should not be escaped to &amp; in href. Got: {result}"
)
assert '&amp;' not in result
def test_bare_url_not_double_linked():
"""A bare https:// URL must produce exactly one <a> tag."""
url = 'https://github.com/nesquena/hermes-webui/issues/461'
result = render_links_only(url)
assert result.count('<a ') == 1, f"Expected 1 <a> tag, got: {result}"
assert result.count('</a>') == 1
def test_labeled_link_in_table_cell_single_anchor():
"""[#461](url) inside a markdown table cell must produce exactly one <a> tag."""
url = 'https://github.com/nesquena/hermes-webui/issues/461'
md = f'| Issue | Title |\n|---|---|\n| [#461]({url}) | Reasoning effort |'
result = render_table_with_links(md)
assert result.count('<a ') == 1, f"Expected 1 <a> in table, got: {result}"
assert f'href="{url}"' in result
assert '#461' in result
# No raw brackets should appear in output
assert '[#461]' not in result
def test_multiple_links_in_table_no_double_linking():
"""Multiple [label](url) links in a table must each produce exactly one <a>."""
urls = [
'https://github.com/nesquena/hermes-webui/issues/461',
'https://github.com/nesquena/hermes-webui/issues/462',
'https://github.com/nesquena/hermes-webui/issues/463',
]
rows = '\n'.join(f'| [#{461+i}]({url}) | Title {i} |' for i, url in enumerate(urls))
md = f'| Issue | Title |\n|---|---|\n{rows}'
result = render_table_with_links(md)
assert result.count('<a ') == 3, f"Expected 3 <a> tags, got {result.count('<a ')}:\n{result}"
assert result.count('</a>') == 3
for url in urls:
assert f'href="{url}"' in result
def test_link_label_is_escaped():
"""The label text (not the URL) must still be HTML-escaped."""
url = 'https://example.com'
md = f'[Click <here>]({url})'
result = render_links_only(md)
assert '&lt;here&gt;' in result, "Label text should be HTML-escaped"
assert '<here>' not in result
def test_link_not_broken_by_prior_autolink():
"""A [label](url) followed by a bare URL must each produce one clean <a>."""
url1 = 'https://github.com/issues/461'
url2 = 'https://github.com/issues/462'
md = f'See [#461]({url1}) and also {url2}'
result = render_links_only(md)
assert result.count('<a ') == 2, f"Expected 2 links, got: {result}"
assert f'href="{url1}"' in result
assert f'href="{url2}"' in result
assert '#461' in result
def test_href_quote_sanitized():
"""A URL containing a double-quote must have it percent-encoded in href to prevent attribute breakout."""
# This would break out of href="..." and inject an event handler without the fix
url = 'https://evil.com" onmouseover="alert(1)'
# The [label](url) regex captures up to the closing ), so we test via the render helper
# by constructing a URL that contains a literal quote character
safe_url = 'https://example.com/path"with"quotes'
result = render_links_only(f'[click]({safe_url})')
# The href must not contain a raw unencoded double-quote
href_start = result.find('href="') + 6
href_end = result.find('"', href_start)
href_val = result[href_start:href_end]
assert '"' not in href_val, (
f"href value must not contain unencoded double-quote. Got href: {href_val}"
)
def test_js_source_sanitizes_quotes_in_href():
"""JS source must apply quote percent-encoding to URLs before placing in href."""
# Both the inlineMd stash and outer link pass must sanitize quotes
assert "%22" in UI_JS, (
"URL placed in href should have double-quotes percent-encoded via .replace to %22"
)
# ── Code-inside-bold tests (pre-existing bug, fixed in same PR) ───────────────
def test_js_inlinemd_stashes_code_before_bold():
"""Fixed inlineMd() must stash backtick code spans before bold/italic processing."""
assert '_code_stash' in UI_JS, (
"inlineMd() should use _code_stash to protect backtick spans from bold/italic esc()"
)
def test_code_inside_bold_renders_correctly():
"""Inline code inside bold text must render as <strong><code>...</code></strong>,
not with escaped &lt;code&gt; tags visible on screen."""
# This was the pre-existing bug: **`esc()`** → <strong>&lt;code&gt;esc()&lt;/code&gt;</strong>
text = '**`esc()` on `href`**: breaks URLs'
# Simulate the fixed inlineMd()
code_stash = []
t = text
t = re.sub(r'`([^`\n]+)`',
lambda m: (code_stash.append(f'<code>{esc(m.group(1))}</code>') or f'\x00C{len(code_stash)-1}\x00'), t)
t = re.sub(r'\*\*(.+?)\*\*', lambda m: f'<strong>{esc(m.group(1))}</strong>', t)
t = re.sub(r'\x00C(\d+)\x00', lambda m: code_stash[int(m.group(1))], t)
assert '&lt;code&gt;' not in t, (
f"Code tags should not be HTML-escaped inside bold. Got: {t}"
)
assert '<code>esc()</code>' in t, (
f"Code tags should render as <code> elements inside bold. Got: {t}"
)
assert '<strong>' in t, "Bold should still render"
def test_code_and_bold_mixed_no_escaping():
"""Bold text containing multiple backtick spans must render all code tags correctly."""
cases = [
('**`esc()` on `href`**', '<strong>', '<code>esc()</code>', '<code>href</code>'),
('***`code` in bold-italic***', '<strong>', '<code>code</code>'),
('`code` then **bold**', '<code>code</code>', '<strong>bold</strong>'),
]
for args in cases:
text = args[0]
expected_fragments = args[1:]
code_stash = []
t = text
t = re.sub(r'`([^`\n]+)`',
lambda m: (code_stash.append(f'<code>{esc(m.group(1))}</code>') or f'\x00C{len(code_stash)-1}\x00'), t)
t = re.sub(r'\*\*\*(.+?)\*\*\*', lambda m: f'<strong><em>{esc(m.group(1))}</em></strong>', t)
t = re.sub(r'\*\*(.+?)\*\*', lambda m: f'<strong>{esc(m.group(1))}</strong>', t)
t = re.sub(r'\x00C(\d+)\x00', lambda m: code_stash[int(m.group(1))], t)
assert '&lt;code&gt;' not in t, f"Escaped code tag in: {text!r}{t}"
for frag in expected_fragments:
assert frag in t, f"Expected {frag!r} in output of {text!r}, got: {t}"

26
tests/test_issue477.py Normal file
View File

@@ -0,0 +1,26 @@
"""Tests for fix #477: KaTeX font-src CSP fix."""
import pathlib
REPO = pathlib.Path(__file__).parent.parent
HELPERS_PY = (REPO / "api" / "helpers.py").read_text(encoding="utf-8")
def test_font_src_allows_jsdelivr():
"""font-src must include cdn.jsdelivr.net for KaTeX fonts."""
assert "font-src 'self' data: https://cdn.jsdelivr.net" in HELPERS_PY, (
"api/helpers.py CSP must allow cdn.jsdelivr.net in font-src "
"so KaTeX math rendering fonts load without console errors."
)
def test_font_src_still_allows_self_and_data():
"""font-src must still allow self and data: (used by other font assets)."""
assert "'self'" in HELPERS_PY.split("font-src")[1].split(";")[0]
assert "data:" in HELPERS_PY.split("font-src")[1].split(";")[0]
def test_script_src_already_allows_jsdelivr():
"""script-src already allows cdn.jsdelivr.net — font-src should too."""
assert "https://cdn.jsdelivr.net" in HELPERS_PY.split("font-src")[0], (
"script-src should already allow cdn.jsdelivr.net (KaTeX JS)"
)

572
tests/test_issue486_487.py Normal file
View File

@@ -0,0 +1,572 @@
"""
Tests for issue #486 (CSS: inline code in table cells) and
issue #487 (JS renderer: markdown image syntax not implemented).
Issue #486 — CSS fix in static/style.css:
Inline `code` spans inside table cells render with awkward sizing.
Fix: td code, th code { font-size: 0.85em; padding: 1px 4px; vertical-align: baseline; }
Issue #487 — JS fix in static/ui.js:
![alt](url) image syntax not handled — renders as stray ! + link.
Fix: add image pass to renderMd() (before link pass) and inlineMd()
reusing the .msg-media-img class.
Strategy:
- Source-level checks verify the fixes are present in the JS/CSS.
- Python mirror tests verify the rendering logic with exhaustive edge cases,
especially code blocks inside tables (the specific case Nathan flagged).
"""
import pathlib
import re
import html as _html
REPO_ROOT = pathlib.Path(__file__).parent.parent
UI_JS = (REPO_ROOT / "static" / "ui.js").read_text()
STYLE_CSS = (REPO_ROOT / "static" / "style.css").read_text()
# ── Helpers ───────────────────────────────────────────────────────────────────
def esc(s):
return _html.escape(str(s), quote=True)
def inline_md(t):
"""
Python mirror of the fixed inlineMd() function — includes:
- _code_stash (protects backtick spans from bold/italic AND from image pass)
- image pass (NEW for #487 — runs while code stash is active, before link pass)
- _img_stash (protects rendered img tags from autolink touching src=)
- _link_stash (protects links from autolink)
- autolink
- code stash restore (after autolink, so code content is never autolinked)
Correct operation order:
1. code stash — \x00C protects `...` from bold and image pass
2. bold/italic — runs on plain text only
3. image pass — runs while code content is still stashed (so ![x](url)
inside backticks stays protected as a \x00C token)
4. img stash — \x00I protects <img src="url"> from autolink
5. link stash — \x00L protects [label](url) links from autolink
6. autolink — only matches URLs not already in a stash token
7. link stash restore
8. img stash restore
9. code stash restore — restores <code> tags last
"""
# 1. Code stash — must be first to protect code content from all subsequent passes
code_stash = []
def stash_code(m):
code_stash.append(f'<code>{esc(m.group(1))}</code>')
return f'\x00C{len(code_stash)-1}\x00'
t = re.sub(r'`([^`\n]+)`', stash_code, t)
# 2. Bold/italic (code content is safely stashed)
t = re.sub(r'\*\*\*(.+?)\*\*\*', lambda m: f'<strong><em>{esc(m.group(1))}</em></strong>', t)
t = re.sub(r'\*\*(.+?)\*\*', lambda m: f'<strong>{esc(m.group(1))}</strong>', t)
t = re.sub(r'\*([^*\n]+)\*', lambda m: f'<em>{esc(m.group(1))}</em>', t)
# 3. Image pass (NEW — runs while code is still stashed, so ![x](url) inside
# backticks is protected as a \x00C token and won't match here)
def render_image(m):
alt, url = m.group(1), m.group(2)
safe_url = url.replace('"', '%22')
return (f'<img src="{safe_url}" alt="{esc(alt)}" '
f'class="msg-media-img" loading="lazy" '
f'onclick="this.classList.toggle(\'msg-media-img--full\')">')
t = re.sub(r'!\[([^\]]*)\]\((https?://[^\)]+)\)', render_image, t)
# 4. Img stash — protect rendered <img> tags so autolink never touches src= values
img_stash = []
def stash_img(m):
img_stash.append(m.group(0))
return f'\x00I{len(img_stash)-1}\x00'
t = re.sub(r'<img\b[^>]*>', stash_img, t)
# 5. Link stash
link_stash = []
def stash_link(m):
lb, u = m.group(1), m.group(2)
link_stash.append(f'<a href="{u.replace(chr(34), "%22")}" target="_blank" rel="noopener">{esc(lb)}</a>')
return f'\x00L{len(link_stash)-1}\x00'
t = re.sub(r'\[([^\]]+)\]\((https?://[^\)]+)\)', stash_link, t)
# 6. Autolink (img and link URLs are both stashed — safe)
def autolink(m):
url = m.group(1)
trail = url[-1] if url[-1] in '.,;:!?)' else ''
clean = url[:-1] if trail else url
return f'<a href="{clean}" target="_blank" rel="noopener">{esc(clean)}</a>{trail}'
t = re.sub(r'(https?://[^\s<>"\')\]]+)', autolink, t)
# 7. Restore link stash
t = re.sub(r'\x00L(\d+)\x00', lambda m: link_stash[int(m.group(1))], t)
# 8. Restore img stash
t = re.sub(r'\x00I(\d+)\x00', lambda m: img_stash[int(m.group(1))], t)
# 9. Restore code stash (last — code content was never touched by any pass)
t = re.sub(r'\x00C(\d+)\x00', lambda m: code_stash[int(m.group(1))], t)
return t
def render_table(md):
"""Python mirror of the table pass, using inline_md() per cell."""
lines = md.strip().split('\n')
if len(lines) < 2:
return md
def is_sep(r):
return bool(re.match(r'^\|[\s|:-]+\|$', r.strip()))
if not is_sep(lines[1]):
return md
def parse_header(r):
cells = r.strip().lstrip('|').rstrip('|').split('|')
return ''.join(f'<th>{inline_md(c.strip())}</th>' for c in cells)
def parse_row(r):
cells = r.strip().lstrip('|').rstrip('|').split('|')
return ''.join(f'<td>{inline_md(c.strip())}</td>' for c in cells)
header = f'<tr>{parse_header(lines[0])}</tr>'
body = ''.join(f'<tr>{parse_row(r)}</tr>' for r in lines[2:])
return f'<table><thead>{header}</thead><tbody>{body}</tbody></table>'
# ═════════════════════════════════════════════════════════════════════════════
# ISSUE #486 — CSS: code inside table cells
# ═════════════════════════════════════════════════════════════════════════════
class TestIssue486CssCodeInTable:
"""CSS fix: td code and th code must have targeted sizing rules."""
def test_td_code_font_size_present(self):
"""msg-body td code rule must set font-size (e.g. 0.85em) to prevent oversized code."""
assert 'td code' in STYLE_CSS, (
"Missing 'td code' CSS rule — inline code in table cells needs sizing fix"
)
def test_th_code_rule_present(self):
"""th code rule must also exist for header cells."""
assert 'th code' in STYLE_CSS, (
"Missing 'th code' CSS rule — inline code in header cells needs sizing fix"
)
def test_td_code_has_font_size(self):
"""The td code / th code block must include a font-size declaration."""
# Find the msg-body scoped td code rule
idx = STYLE_CSS.find('td code')
assert idx != -1, "td code rule not found in style.css"
# Check nearby text (within 200 chars) has font-size
window = STYLE_CSS[idx:idx+200]
assert 'font-size' in window, (
f"td code rule must include font-size. Found near td code: {window!r}"
)
def test_td_code_has_padding(self):
"""The td code / th code block must include a padding declaration."""
idx = STYLE_CSS.find('td code')
assert idx != -1
window = STYLE_CSS[idx:idx+200]
assert 'padding' in window, (
f"td code rule must include padding. Found near td code: {window!r}"
)
def test_td_code_has_vertical_align(self):
"""The td code / th code block must include vertical-align: baseline."""
idx = STYLE_CSS.find('td code')
assert idx != -1
window = STYLE_CSS[idx:idx+200]
assert 'vertical-align' in window, (
f"td code rule must include vertical-align. Found near td code: {window!r}"
)
def test_code_renders_inside_table_cell(self):
"""Inline `code` inside a table cell must render as <code> element."""
md = "| Syntax | Rendered |\n|---|---|\n| `code` | `code` |"
result = render_table(md)
assert '<code>code</code>' in result, (
f"Inline code in table cell should render as <code>. Got: {result}"
)
def test_bold_code_renders_inside_table_cell(self):
"""**`bold code`** inside a table cell must render as <strong><code>."""
md = "| Style | Example |\n|---|---|\n| bold code | **`bold code`** |"
result = render_table(md)
# Should have code tag (even inside bold)
assert '<code>bold code</code>' in result, (
f"Bold code in table should render as <code>. Got: {result}"
)
def test_multiple_code_spans_in_same_cell(self):
"""Multiple backtick spans in one cell all render as <code>."""
md = "| Combined |\n|---|\n| `a` and `b` |"
result = render_table(md)
assert result.count('<code>') == 2, (
f"Expected 2 code tags in cell, got: {result}"
)
def test_code_in_header_cell(self):
"""`code` in a <th> header cell must also render as <code>."""
md = "| `header code` | Normal |\n|---|---|\n| data | data |"
result = render_table(md)
assert '<code>header code</code>' in result, (
f"Code in header cell should render. Got: {result}"
)
def test_code_not_mangled_by_bold_in_table(self):
"""**`code`** in a table cell must NOT produce &lt;code&gt; (the pre-fix bug)."""
md = "| Pattern | Example |\n|---|---|\n| bold-code | **`npm install`** |"
result = render_table(md)
assert '&lt;code&gt;' not in result, (
f"Code tags inside bold in table must not be HTML-escaped. Got: {result}"
)
assert '<strong>' in result, "Bold wrapper should be present"
assert '<code>npm install</code>' in result
def test_code_with_special_chars_in_table(self):
"""`<script>` inside a table cell must have the angle brackets escaped."""
md = "| Input | Output |\n|---|---|\n| `<script>` | sanitized |"
result = render_table(md)
assert '&lt;script&gt;' in result, (
f"Code content must be HTML-escaped. Got: {result}"
)
# The <code> wrapper itself must be there
assert '<code>' in result
def test_code_adjacent_to_link_in_table(self):
"""`code` and [link](url) in same cell both render correctly."""
url = 'https://example.com'
md = f"| Mixed |\n|---|\n| `foo` and [bar]({url}) |"
result = render_table(md)
assert '<code>foo</code>' in result
assert f'href="{url}"' in result
assert 'bar' in result
def test_empty_code_span_in_table(self):
"""Edge case: empty backtick span in table cell (`` ` ` ``) — no crash."""
# This won't match the code regex (requires at least 1 char), should pass through
md = "| Col |\n|---|\n| normal text |"
result = render_table(md)
assert '<td>normal text</td>' in result
# ═════════════════════════════════════════════════════════════════════════════
# ISSUE #487 — JS renderer: markdown image syntax
# ═════════════════════════════════════════════════════════════════════════════
class TestIssue487ImageRendering:
"""Image syntax ![alt](url) must render as <img>, not as ! + link."""
# ── Source-level checks ──────────────────────────────────────────────────
def test_image_pass_present_in_ui_js(self):
"""renderMd() must contain an image regex pass for ![alt](url)."""
assert '![' in UI_JS or r'!\[' in UI_JS, (
"ui.js should contain image syntax handling (![...](url) regex)"
)
# More specifically, look for the img tag being generated
assert 'msg-media-img' in UI_JS, (
"Image pass should reuse .msg-media-img class"
)
def test_image_pass_runs_before_link_pass_in_outer(self):
"""Image regex must appear in ui.js BEFORE the [label](url) link pass."""
# Find the image pass position
img_idx = UI_JS.find('!\\[')
if img_idx == -1:
img_idx = UI_JS.find("![")
# Find the outer labeled link pass position (after table pass)
link_idx = UI_JS.find("Outer link pass for labeled links")
assert img_idx != -1, "Image pass not found in ui.js"
assert link_idx != -1, "Outer link pass comment not found in ui.js"
assert img_idx < link_idx, (
"Image pass must run before the outer [label](url) link pass "
"to prevent the image from being consumed as a plain link"
)
def test_image_url_sanitized_for_quotes(self):
"""Image src URL must have double-quotes percent-encoded."""
# The image pass must use .replace(/"/g,'%22') or equivalent
# Look for the pattern near image handling
img_idx = UI_JS.find('msg-media-img')
assert img_idx != -1
# Find all occurrences — there's the MEDIA restore and the new image pass
# The new one should have %22 for URL sanitization
assert '%22' in UI_JS, (
"Image src URL must sanitize double-quotes to %22"
)
def test_image_alt_uses_esc(self):
"""Alt text must be passed through esc() to prevent XSS."""
# Look for esc( call near the image rendering code
# The pattern should be: alt="${esc(alt)}"
assert 'esc(' in UI_JS, "esc() function must be used for alt text"
def test_safe_tags_includes_img(self):
"""SAFE_TAGS allowlist must include 'img' to prevent the tag from being escaped."""
# Find the SAFE_TAGS regex in ui.js
safe_idx = UI_JS.find('SAFE_TAGS=')
assert safe_idx != -1, "SAFE_TAGS not found in ui.js"
safe_window = UI_JS[safe_idx:safe_idx+300]
assert 'img' in safe_window, (
f"SAFE_TAGS must include 'img' tag. Found: {safe_window!r}"
)
def test_inlinemd_has_image_pass(self):
"""inlineMd() must also handle ![alt](url) for images inside table cells."""
# inlineMd is called for table cells, list items, blockquotes
# Find inlineMd function body
start = UI_JS.find('function inlineMd(')
assert start != -1, "inlineMd function not found"
# Get a generous window covering the function
fn_window = UI_JS[start:start+1500]
assert '![' in fn_window or r'!\[' in fn_window, (
"inlineMd() must handle image syntax for images in table cells"
)
# ── Behaviour tests (Python mirror) ─────────────────────────────────────
def test_basic_image_renders_as_img_tag(self):
"""![alt](https://example.com/img.png) must produce an <img> tag."""
t = '![A cat](https://example.com/cat.png)'
result = inline_md(t)
assert '<img ' in result, f"Expected <img> tag, got: {result}"
assert 'src="https://example.com/cat.png"' in result
assert 'alt="A cat"' in result
# Must NOT have the raw ![...] syntax left over
assert '![' not in result
# Must NOT have a stray ! character
assert result.startswith('<img '), f"Result should start with img tag: {result}"
def test_image_does_not_render_as_link(self):
"""![alt](url) must NOT produce an <a> tag (the pre-fix bug)."""
t = '![Logo](https://example.com/logo.png)'
result = inline_md(t)
assert '<a ' not in result, (
f"Image must not render as an <a> tag. Got: {result}"
)
def test_image_stray_exclamation_not_present(self):
"""No stray ! character before the img tag (the pre-fix symptom)."""
t = '![alt](https://example.com/img.png)'
result = inline_md(t)
# Strip the img tag and check no ! is left
cleaned = re.sub(r'<img[^>]+>', '', result)
assert '!' not in cleaned, (
f"Stray ! character present after image render. Got: {result}"
)
def test_image_uses_msg_media_img_class(self):
"""Rendered <img> must use class=\"msg-media-img\" for consistent styling."""
t = '![screenshot](https://example.com/shot.png)'
result = inline_md(t)
assert 'class="msg-media-img"' in result, (
f"Image must use .msg-media-img class. Got: {result}"
)
def test_image_has_lazy_loading(self):
"""Rendered <img> must have loading=\"lazy\"."""
t = '![x](https://example.com/x.png)'
result = inline_md(t)
assert 'loading="lazy"' in result, f"Expected loading=lazy. Got: {result}"
def test_image_has_click_to_zoom(self):
"""Rendered <img> must have onclick toggle for zoom."""
t = '![x](https://example.com/x.png)'
result = inline_md(t)
assert 'msg-media-img--full' in result, (
f"Image must have click-to-zoom onclick. Got: {result}"
)
def test_image_alt_is_escaped(self):
"""Alt text with HTML special chars must be escaped."""
t = '![<evil>](https://example.com/img.png)'
result = inline_md(t)
assert '&lt;evil&gt;' in result, (
f"Alt text must be HTML-escaped. Got: {result}"
)
assert '<evil>' not in result
def test_image_url_quote_sanitized(self):
"""Double-quote in image URL must be percent-encoded to prevent attribute breakout."""
t = '![x](https://example.com/path"with"quotes.png)'
result = inline_md(t)
# Find the src attribute value
src_match = re.search(r'src="([^"]*)"', result)
assert src_match, f"src attribute not found. Got: {result}"
src_val = src_match.group(1)
assert '"' not in src_val, (
f"Raw double-quote in src would break attribute. Got src: {src_val!r}"
)
def test_image_no_javascript_uri(self):
"""javascript: URIs must not be rendered as image src (regex only matches http/https)."""
t = '![x](javascript:alert(1))'
result = inline_md(t)
# The regex requires https?://, so this should pass through unmodified
assert '<img ' not in result, (
f"javascript: URI must not render as <img>. Got: {result}"
)
def test_image_no_data_uri(self):
"""data: URIs must not be rendered as image src."""
t = '![x](data:image/png;base64,abc123)'
result = inline_md(t)
assert '<img ' not in result, (
f"data: URI must not render as <img>. Got: {result}"
)
def test_image_followed_by_text(self):
"""Image followed by plain text — only the image becomes an <img>."""
t = '![cat](https://example.com/cat.png) and some text'
result = inline_md(t)
assert '<img ' in result
assert 'and some text' in result
def test_image_preceded_by_text(self):
"""Text before an image — both render correctly."""
t = 'Here is a screenshot: ![shot](https://example.com/shot.png)'
result = inline_md(t)
assert 'Here is a screenshot:' in result
assert '<img ' in result
def test_image_and_link_in_same_cell(self):
"""Image and link in same inline context both render correctly."""
t = '![img](https://example.com/img.png) see [here](https://example.com)'
result = inline_md(t)
assert '<img ' in result
assert '<a href="https://example.com"' in result
assert '![' not in result
def test_image_inside_table_cell(self):
"""![alt](url) inside a markdown table cell must render as <img>."""
md = ("| Image | Caption |\n"
"|---|---|\n"
"| ![logo](https://example.com/logo.png) | Company logo |")
result = render_table(md)
assert '<img ' in result, f"Image in table should render as <img>. Got: {result}"
assert 'src="https://example.com/logo.png"' in result
assert '<a ' not in result, "Image in table must not render as <a>"
def test_image_in_table_no_stray_exclamation(self):
"""No stray ! before the <img> when image is inside a table cell."""
md = ("| X |\n|---|\n| ![x](https://x.com/x.png) |")
result = render_table(md)
# Strip known tags and check no ! appears
cleaned = re.sub(r'<[^>]+>', '', result)
assert '!' not in cleaned, (
f"Stray ! in table cell after image render. Cleaned: {cleaned!r}"
)
def test_empty_alt_text_image(self):
"""![](url) with empty alt renders as <img> with empty alt attribute."""
t = '![](https://example.com/img.png)'
result = inline_md(t)
assert '<img ' in result
assert 'alt=""' in result
def test_multiple_images_in_one_cell(self):
"""Two images in one table cell both render as <img> tags."""
t = ('![a](https://example.com/a.png) '
'![b](https://example.com/b.png)')
result = inline_md(t)
assert result.count('<img ') == 2, (
f"Expected 2 img tags. Got: {result}"
)
def test_image_with_https_url(self):
"""https:// image URL renders correctly."""
t = '![secure](https://secure.example.com/img.jpg)'
result = inline_md(t)
assert 'src="https://secure.example.com/img.jpg"' in result
def test_image_with_http_url(self):
"""http:// image URL also renders (non-https still valid)."""
t = '![old](http://example.com/img.jpg)'
result = inline_md(t)
assert '<img ' in result
assert 'src="http://example.com/img.jpg"' in result
# ═════════════════════════════════════════════════════════════════════════════
# Cross-cutting: code + image together inside tables (the edge case Nathan flagged)
# ═════════════════════════════════════════════════════════════════════════════
class TestEdgeCasesCodeAndImageInTables:
"""Combination edge cases: code blocks and images mixed inside table cells."""
def test_code_and_image_in_same_table_row(self):
"""Table row with code in one cell and image in another renders both correctly."""
md = ("| Code | Preview |\n"
"|---|---|\n"
"| `print('hello')` | ![screenshot](https://example.com/shot.png) |")
result = render_table(md)
assert "<code>print(&#x27;hello&#x27;)</code>" in result or "<code>print('hello')</code>" in result, (
f"Code cell should render as <code>. Got: {result}"
)
assert '<img ' in result, "Image cell should render as <img>"
def test_code_in_cell_with_image_in_next_cell(self):
"""Multiple columns: code stays code, image stays image, no cross-contamination."""
md = ("| Step | Example |\n"
"|---|---|\n"
"| Run `npm install` | ![demo](https://example.com/demo.gif) |")
result = render_table(md)
assert '<code>npm install</code>' in result
assert '<img ' in result
assert '<a ' not in result # image must not become a link
def test_bold_code_in_cell_and_image_in_cell(self):
"""**`code`** in one cell and image in another — no esc() mangling."""
md = ("| Command | Result |\n"
"|---|---|\n"
"| **`git status`** | ![result](https://example.com/r.png) |")
result = render_table(md)
assert '&lt;code&gt;' not in result, (
"Bold+code in table cell must not produce escaped code tags"
)
assert '<code>git status</code>' in result
assert '<img ' in result
def test_link_code_image_all_in_table(self):
"""Table with code, link, and image cells all render correctly."""
url = 'https://github.com/issues/486'
img_url = 'https://example.com/img.png'
md = (f"| Code | Link | Image |\n"
f"|---|---|---|\n"
f"| `var x = 1` | [#486]({url}) | ![img]({img_url}) |")
result = render_table(md)
assert '<code>var x = 1</code>' in result
assert f'href="{url}"' in result
assert '<img ' in result
# No double-linking
assert result.count('<a ') == 1
def test_image_url_with_query_string_in_table(self):
"""Image URL with & in query string inside table cell — & not mangled."""
url = 'https://example.com/img?w=100&h=200'
md = f"| Image |\n|---|\n| ![sized]({url}) |"
result = render_table(md)
assert f'src="{url}"' in result, (
f"& in image URL must not be escaped. Got: {result}"
)
def test_image_adjacent_to_code_no_interference(self):
"""Image immediately followed by code span in same cell — no token cross-talk."""
t = '![x](https://x.com/x.png) `code`'
result = inline_md(t)
assert '<img ' in result
assert '<code>code</code>' in result
def test_image_inside_code_span_not_rendered(self):
"""An image syntax inside a backtick span must NOT render as an img tag."""
t = '`![not an image](https://example.com/img.png)`'
result = inline_md(t)
# The whole thing is inside backticks — should be literal code, not an img
assert '<img ' not in result, (
f"Image syntax inside code span must not render as <img>. Got: {result}"
)
# Should render as a code element with the raw text inside
assert '<code>' in result

131
tests/test_issue487b.py Normal file
View File

@@ -0,0 +1,131 @@
r"""
Regression test for image src URL corruption by the autolink pass.
Bug: the _al_stash before the autolink pass only stashed <a> tags.
<img> tags produced by the ![alt](url) image pass were NOT stashed,
so the autolink regex matched the URL inside src="..." and wrapped it
in <a href="...">url</a>, producing src="<a href="...">url</a>"
a completely broken image source.
Fix: extend _al_stash regex to also stash <img> tags:
(<a\b[^>]*>[\s\S]*?<\/a>|<img\b[^>]*>)
"""
import pathlib
import re
REPO_ROOT = pathlib.Path(__file__).parent.parent
UI_JS = (REPO_ROOT / "static" / "ui.js").read_text()
# ── Source-level check ────────────────────────────────────────────────────────
def test_al_stash_includes_img_tags():
"""_al_stash regex must stash both <a> and <img> tags to protect src= from autolink."""
assert '<img\\b[^>]*>' in UI_JS or '<img\\\\b[^>]*>' in UI_JS, (
"_al_stash should include <img> tag pattern to prevent autolink mangling src= URLs"
)
# ── Behaviour tests (Python mirror of fixed pipeline) ─────────────────────────
import html as _html
def esc(s): return _html.escape(str(s), quote=True)
SAFE_TAGS = re.compile(
r'^</?(strong|em|code|pre|h[1-6]|ul|ol|li|table|thead|tbody|tr|th|td'
r'|hr|blockquote|p|br|a|img|div|span)([\s>]|$)', re.I
)
def render_with_image_and_autolink(raw):
"""Simulate the image pass + SAFE_TAGS + _al_stash + autolink pipeline."""
s = raw
# Image pass
s = re.sub(
r'!\[([^\]]*)\]\((https?://[^\)]+)\)',
lambda m: (
f'<img src="{m.group(2).replace(chr(34), "%22")}" '
f'alt="{esc(m.group(1))}" class="msg-media-img" loading="lazy">'
),
s,
)
# SAFE_TAGS
s = re.sub(
r'</?[a-zA-Z][^>]*>',
lambda m: m.group() if SAFE_TAGS.match(m.group()) else esc(m.group()),
s,
)
# _al_stash (fixed: stashes both <a> and <img>)
al_stash = []
s = re.sub(
r'(<a\b[^>]*>[\s\S]*?<\/a>|<img\b[^>]*>)',
lambda m: (al_stash.append(m.group(1)) or f'\x00B{len(al_stash)-1}\x00'),
s,
)
# Autolink
def autolink(m):
url = m.group(1)
trail = url[-1] if url[-1] in '.,;:!?)' else ''
clean = url[:-1] if trail else url
return f'<a href="{clean}" target="_blank" rel="noopener">{esc(clean)}</a>{trail}'
s = re.sub(r'(https?://[^\s<>"\')\]]+)', autolink, s)
# Restore
s = re.sub(r'\x00B(\d+)\x00', lambda m: al_stash[int(m.group(1))], s)
return s
def test_image_src_not_mangled_by_autolink():
"""The URL inside src= of a rendered <img> must not be wrapped in <a> by autolink."""
url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/280px-PNG_transparency_demonstration_1.png'
result = render_with_image_and_autolink(f'![alt]({url})')
assert f'src="{url}"' in result, f"src= URL should be intact, got: {result[:200]}"
# The URL inside src= must NOT be wrapped in <a>
src_part = result.split('src="')[1].split('"')[0]
assert '<a ' not in src_part, f"src= must not contain <a> tag, got: {src_part}"
assert src_part == url, f"src= URL mangled: expected {url}, got {src_part}"
def test_image_tag_renders_as_img():
"""![alt](url) must produce an <img> tag, not a plain link."""
result = render_with_image_and_autolink('![Test image](https://example.com/img.png)')
assert '<img ' in result, f"Expected <img> tag, got: {result}"
assert 'src="https://example.com/img.png"' in result
assert '<a ' not in result # no spurious link wrapper
def test_image_and_link_in_same_paragraph():
"""Image and link in same paragraph must each render correctly without interference."""
result = render_with_image_and_autolink(
'See ![logo](https://example.com/logo.png) and visit https://example.com'
)
assert '<img ' in result, "Image should render"
assert '<a ' in result, "Bare URL should autolink"
# img src must not contain <a>
src_part = result.split('src="')[1].split('"')[0]
assert '<a' not in src_part, f"src= mangled: {src_part}"
def test_image_count_is_one():
"""One ![alt](url) should produce exactly one <img> tag."""
result = render_with_image_and_autolink('![test](https://example.com/x.png)')
assert result.count('<img ') == 1, f"Expected 1 <img>, got {result.count('<img ')}: {result}"
def test_multiple_images_not_mangled():
"""Multiple images in one message each get clean src= values."""
urls = [
'https://example.com/a.png',
'https://example.com/b.png',
]
raw = '\n\n'.join(f'![img{i}]({url})' for i, url in enumerate(urls))
result = render_with_image_and_autolink(raw)
for url in urls:
assert f'src="{url}"' in result, f"src= for {url} mangled in: {result[:300]}"
def test_image_with_query_string_src_intact():
"""Image URL with & in query string must have & (not &amp;) in src."""
url = 'https://example.com/img?w=100&h=200&fmt=png'
result = render_with_image_and_autolink(f'![img]({url})')
assert f'src="{url}"' in result, f"Query string URL mangled: {result[:200]}"
assert '&amp;' not in result.split('src="')[1].split('"')[0]

157
tests/test_issue569_579.py Normal file
View File

@@ -0,0 +1,157 @@
"""
Tests for fixes:
- #569: docker_init.bash auto-detects WANTED_UID/WANTED_GID from mounted workspace
so macOS users (UID 501) don't need to manually set the env var.
- #579: Topbar message count already filters tool messages (role !== 'tool') —
confirmed present. Closing as already fixed by #584 which removed the
sidebar meta row (the only place raw message_count was ever displayed).
"""
import pathlib
import re
REPO_ROOT = pathlib.Path(__file__).parent.parent
INIT_SH = (REPO_ROOT / "docker_init.bash").read_text(encoding="utf-8")
UI_JS = (REPO_ROOT / "static" / "ui.js").read_text(encoding="utf-8")
# ── #569: docker UID/GID auto-detect ─────────────────────────────────────────
def test_569_uid_autodetect_present():
"""docker_init.bash must have workspace-based UID auto-detection (#569)."""
assert "stat -c '%u'" in INIT_SH or 'stat -c \'%u\'' in INIT_SH, (
"docker_init.bash must use stat to read workspace UID (#569)"
)
def test_569_gid_autodetect_present():
"""docker_init.bash must have workspace-based GID auto-detection (#569)."""
assert "stat -c '%g'" in INIT_SH or 'stat -c \'%g\'' in INIT_SH, (
"docker_init.bash must use stat to read workspace GID (#569)"
)
def test_569_autodetect_before_usermod():
"""UID auto-detect must appear before usermod call in docker_init.bash."""
detect_pos = INIT_SH.find("stat -c '%u'")
if detect_pos == -1:
detect_pos = INIT_SH.find("stat -c")
usermod_pos = INIT_SH.find("sudo usermod")
assert detect_pos != -1, "stat UID detection not found"
assert usermod_pos != -1, "sudo usermod not found"
assert detect_pos < usermod_pos, (
"UID auto-detect must occur before 'sudo usermod' so the correct UID "
"is used when remapping the hermeswebui user"
)
def test_569_skips_root_uid():
"""Auto-detect must not use UID 0 (root-owned mount = untrustworthy)."""
detect_block_start = INIT_SH.find("Auto-detect from mounted volumes")
assert detect_block_start != -1, "auto-detect comment block not found"
block = INIT_SH[detect_block_start:detect_block_start + 1200]
assert '"0"' in block or "'0'" in block, (
"Auto-detect block must skip UID 0 to avoid incorrectly using root ownership"
)
def test_569_fallback_preserved():
"""Hardcoded default 1024 fallback must still exist after auto-detect."""
assert "WANTED_UID=${WANTED_UID:-1024}" in INIT_SH, (
"WANTED_UID default fallback must remain so explicit env var still works"
)
assert "WANTED_GID=${WANTED_GID:-1024}" in INIT_SH, (
"WANTED_GID default fallback must remain"
)
# ── #668: UID/GID auto-detect from hermes-home shared volume (two-container) ──
def test_668_uid_autodetect_checks_hermes_home():
"""docker_init.bash must probe hermes-home dirs for UID in two-container setups.
When hermes-agent and hermes-webui run in separate containers sharing a
named volume, /workspace may not exist but ~/.hermes will be owned by the
agent's UID. The init script must probe it so the webui user is remapped
to match (#668).
"""
assert "/home/hermeswebui/.hermes" in INIT_SH, (
"docker_init.bash must probe /home/hermeswebui/.hermes for UID detection "
"to support two-container setups where /workspace may not exist (#668)"
)
def test_668_gid_autodetect_checks_hermes_home():
"""docker_init.bash must probe hermes-home dirs for GID in two-container setups (#668)."""
# Both UID and GID detection share the same probe dirs — check GID block too
gid_detect_start = INIT_SH.find("Auto-detect GID from mounted volumes")
assert gid_detect_start != -1, (
"GID auto-detect comment must be updated to mention shared volumes (#668)"
)
gid_block = INIT_SH[gid_detect_start:gid_detect_start + 600]
assert "/home/hermeswebui/.hermes" in gid_block or "HERMES_HOME" in gid_block, (
"GID auto-detect block must probe hermes-home dirs (#668)"
)
def test_668_uid_probe_loop_uses_break():
"""UID probe loop must stop on first match (no double-detection)."""
uid_detect_start = INIT_SH.find("Auto-detect from mounted volumes")
assert uid_detect_start != -1, "UID auto-detect comment not found"
uid_block = INIT_SH[uid_detect_start:uid_detect_start + 1200]
assert "break" in uid_block, (
"UID probe loop must break after first successful detection "
"to avoid being overridden by a later probe dir (#668)"
)
def test_668_hermes_home_probe_before_workspace():
"""Hermes-home probe must appear before /workspace probe in docker_init.bash (#668)."""
hermes_home_pos = INIT_SH.find("/home/hermeswebui/.hermes")
workspace_pos = INIT_SH.find('if [ -d "/workspace" ]')
assert hermes_home_pos != -1, "/home/hermeswebui/.hermes probe not found"
assert workspace_pos != -1, "/workspace probe not found"
assert hermes_home_pos < workspace_pos, (
"Hermes-home probe must come before /workspace probe — "
"shared volume UID should take priority over workspace UID (#668)"
)
# ── #579: topbar message count already filters tool messages ──────────────────
def test_579_topbar_filters_tool_messages():
"""ui.js topbar count must filter out role='tool' messages (#579).
The sidebar previously showed raw message_count (which included tool
messages), causing a mismatch with the topbar. PR #584 removed the
sidebar count display entirely; the topbar was already correct.
This test locks in the existing topbar filter so it can't regress.
"""
# Find the topbarMeta assignment
meta_pos = UI_JS.find("topbarMeta")
assert meta_pos != -1, "topbarMeta assignment not found in ui.js"
# Find the filter that precedes it — should exclude role==='tool'
context = UI_JS[max(0, meta_pos - 400):meta_pos + 100]
assert "role" in context and "tool" in context, (
"topbarMeta count must filter by role — "
"messages with role='tool' must be excluded from the displayed count"
)
# The filter must exclude tool messages (not include them)
assert "!=='tool'" in context or "!= 'tool'" in context or "role!=='tool'" in context, (
"topbar count filter must use !== 'tool' to exclude tool messages"
)
def test_579_sidebar_no_longer_shows_raw_count():
"""sessions.js must not reference message_count in the render path (#579).
After PR #584, the sidebar no longer shows message_count at all,
eliminating the inconsistency between sidebar (raw) and topbar (filtered).
"""
sessions_js = (REPO_ROOT / "static" / "sessions.js").read_text(encoding="utf-8")
# message_count should not appear in the client-side session renderer
assert "message_count" not in sessions_js, (
"sessions.js must not reference message_count — "
"the meta row that displayed it was removed in PR #584"
)

View File

@@ -0,0 +1,56 @@
"""
Regression tests for GitHub issue #570 follow-up:
PermissionError from SETTINGS_FILE.exists() in Docker UID-mismatch scenarios.
When ~/.hermes is owned by a different UID than the container user (common in
Docker setups), Path.exists() raises PermissionError instead of returning False.
load_settings() must treat that as "file not accessible = use defaults" rather
than propagating the exception up to crash the request handler.
"""
import stat
import pytest
import api.config as config
def test_load_settings_returns_defaults_when_settings_file_unreadable(monkeypatch, tmp_path):
"""PermissionError from SETTINGS_FILE.exists() must not propagate — return defaults instead.
Regression for issue #570 comment: Docker UID mismatch caused every request
to 500 because load_settings() called SETTINGS_FILE.exists() without catching OSError.
"""
state_dir = tmp_path / "state"
state_dir.mkdir()
settings_file = state_dir / "settings.json"
# Create the file then make the parent unreadable so .exists() raises PermissionError
settings_file.write_text('{"send_key": "ctrl+enter"}', encoding="utf-8")
state_dir.chmod(stat.S_IWUSR) # write-only: stat() on children will fail
monkeypatch.setattr(config, "SETTINGS_FILE", settings_file)
try:
result = config.load_settings()
# Must not raise; must return a dict with default values
assert isinstance(result, dict)
assert "send_key" in result
# The corrupted/inaccessible value should NOT appear — defaults win
assert result["send_key"] == config._SETTINGS_DEFAULTS["send_key"]
finally:
state_dir.chmod(stat.S_IRWXU) # restore for cleanup
def test_load_settings_returns_defaults_when_exists_raises_permission_error(monkeypatch, tmp_path):
"""Direct simulation: monkeypatch SETTINGS_FILE.exists to raise PermissionError."""
from unittest import mock
state_dir = tmp_path / "state"
state_dir.mkdir()
settings_file = state_dir / "settings.json"
monkeypatch.setattr(config, "SETTINGS_FILE", settings_file)
with mock.patch.object(type(settings_file), "exists",
side_effect=PermissionError("Permission denied")):
result = config.load_settings()
assert isinstance(result, dict)
assert result["send_key"] == config._SETTINGS_DEFAULTS["send_key"]

205
tests/test_issue572.py Normal file
View File

@@ -0,0 +1,205 @@
"""Tests for issue #572: onboarding must not fire or overwrite config for
providers not in the quick-setup list (minimax-cn, deepseek, xai, etc.).
Root cause: _provider_api_key_present() only knew about the four providers in
_SUPPORTED_PROVIDER_SETUPS. For any other provider it returned False, causing
chat_ready=False, which made the wizard fire even when the user was fully
configured. The second part of the fix ensures _saveOnboardingProviderSetup()
in the frontend also skips the POST when current_is_oauth is set.
Covers:
1. _provider_api_key_present returns True for minimax-cn when
MINIMAX_CN_API_KEY is in env (via hermes_cli.auth.get_auth_status)
2. _status_from_runtime gives chat_ready=True for minimax-cn with a key set
3. get_onboarding_status returns completed=True for a fully-configured
unsupported provider when config.yaml exists
4. The hermes_cli import failure path is safe (falls back gracefully)
"""
from __future__ import annotations
import os
import pathlib
import sys
import types
from unittest import mock
import pytest
def _inject_hermes_cli_auth(get_auth_status_return):
"""Inject a minimal hermes_cli.auth stub into sys.modules.
CI doesn't install hermes_cli (it's a separate package). Tests that
exercise the hermes_cli fallback path must inject the module themselves
rather than relying on mock.patch('hermes_cli.auth.get_auth_status')
which fails with ModuleNotFoundError when the module isn't installed.
"""
mock_auth = types.ModuleType("hermes_cli.auth")
mock_auth.get_auth_status = mock.MagicMock(return_value=get_auth_status_return)
mock_hermes_cli = types.ModuleType("hermes_cli")
return mock.patch.dict(sys.modules, {
"hermes_cli": mock_hermes_cli,
"hermes_cli.auth": mock_auth,
})
# ---------------------------------------------------------------------------
# Helper
# ---------------------------------------------------------------------------
def _call_provider_api_key_present(provider: str, cfg: dict = None, env_values: dict = None):
from api.onboarding import _provider_api_key_present
return _provider_api_key_present(provider, cfg or {}, env_values or {})
# ---------------------------------------------------------------------------
# 1. _provider_api_key_present via hermes_cli fallback
# ---------------------------------------------------------------------------
class TestProviderApiKeyPresentFallback:
def test_minimax_cn_logged_in_returns_true(self):
"""minimax-cn: if hermes_cli.auth.get_auth_status returns logged_in, must be True."""
with mock.patch("api.onboarding._SUPPORTED_PROVIDER_SETUPS", {
"openrouter": {}, "anthropic": {}, "openai": {}, "custom": {}
}):
with _inject_hermes_cli_auth({"logged_in": True}):
result = _call_provider_api_key_present("minimax-cn")
assert result is True
def test_unsupported_provider_logged_out_returns_false(self):
"""Unsupported provider with no key → False, no crash."""
with mock.patch("api.onboarding._SUPPORTED_PROVIDER_SETUPS", {
"openrouter": {}, "anthropic": {}, "openai": {}, "custom": {}
}):
with _inject_hermes_cli_auth({"logged_in": False}):
result = _call_provider_api_key_present("deepseek")
assert result is False
def test_hermes_cli_import_failure_is_safe(self):
"""If hermes_cli is unavailable, falls back silently to False."""
import builtins
real_import = builtins.__import__
def _block_hermes_cli(name, *args, **kwargs):
if name.startswith("hermes_cli"):
raise ImportError("hermes_cli not available")
return real_import(name, *args, **kwargs)
with mock.patch("api.onboarding._SUPPORTED_PROVIDER_SETUPS", {
"openrouter": {}, "anthropic": {}, "openai": {}, "custom": {}
}):
with mock.patch("builtins.__import__", side_effect=_block_hermes_cli):
result = _call_provider_api_key_present("minimax-cn")
assert result is False # safe fallback
def test_supported_provider_still_works_without_fallback(self):
"""openrouter with env key must still succeed via the original path."""
from api.onboarding import _provider_api_key_present, _SUPPORTED_PROVIDER_SETUPS
env_values = {"OPENROUTER_API_KEY": "sk-test"}
result = _provider_api_key_present("openrouter", {}, env_values)
assert result is True
def test_inline_api_key_in_cfg_still_works(self):
"""model.api_key in config.yaml must be recognized for any provider."""
cfg = {"model": {"provider": "minimax-cn", "default": "MiniMax-M2.7", "api_key": "key123"}}
result = _call_provider_api_key_present("minimax-cn", cfg)
assert result is True
# ---------------------------------------------------------------------------
# 2. _status_from_runtime: unsupported provider with key → chat_ready=True
# ---------------------------------------------------------------------------
class TestStatusFromRuntimeUnsupportedProvider:
def _run(self, provider: str, model: str, api_key_present: bool, oauth_present: bool = False):
from api.onboarding import _status_from_runtime
cfg = {"model": {"provider": provider, "default": model}}
with (
mock.patch("api.onboarding._HERMES_FOUND", True),
mock.patch("api.onboarding._load_env_file", return_value={}),
mock.patch("api.onboarding._get_active_hermes_home", return_value=pathlib.Path("/tmp")),
mock.patch("api.onboarding._provider_api_key_present", return_value=api_key_present),
mock.patch("api.onboarding._provider_oauth_authenticated", return_value=oauth_present),
):
return _status_from_runtime(cfg, True)
def test_minimax_cn_with_key_gives_chat_ready(self):
"""minimax-cn + api key present → chat_ready must be True."""
result = self._run("minimax-cn", "MiniMax-M2.7", api_key_present=True)
assert result["chat_ready"] is True, f"Expected chat_ready=True, got: {result}"
assert result["provider_ready"] is True
assert result["setup_state"] == "ready"
def test_deepseek_with_key_gives_chat_ready(self):
"""deepseek + api key → chat_ready."""
result = self._run("deepseek", "deepseek-chat", api_key_present=True)
assert result["chat_ready"] is True
def test_unsupported_provider_no_key_no_oauth_gives_not_ready(self):
"""No key, no oauth → provider_ready=False."""
result = self._run("minimax-cn", "MiniMax-M2.7", api_key_present=False, oauth_present=False)
assert result["chat_ready"] is False
assert result["provider_ready"] is False
def test_oauth_provider_still_works_via_oauth_path(self):
"""openai-codex (OAuth) with no api_key but oauth present → ready."""
result = self._run("openai-codex", "codex-model", api_key_present=False, oauth_present=True)
assert result["chat_ready"] is True
# ---------------------------------------------------------------------------
# 3. get_onboarding_status: minimax-cn fully configured → completed=True
# ---------------------------------------------------------------------------
class TestOnboardingStatusUnsupportedProvider:
def _make_status(self, chat_ready: bool, provider: str = "minimax-cn"):
import api.onboarding as mod
fake_config_path = pathlib.Path("/tmp/_test_572_config.yaml")
cfg = {"model": {"provider": provider, "default": "MiniMax-M2.7"}}
runtime = {
"chat_ready": chat_ready,
"provider_configured": True,
"provider_ready": chat_ready,
"setup_state": "ready" if chat_ready else "provider_incomplete",
"provider_note": "test",
"current_provider": provider,
"current_model": "MiniMax-M2.7",
"current_base_url": None,
"env_path": "/tmp/.env",
}
with (
mock.patch.object(mod, "load_settings", return_value={}),
mock.patch.object(mod, "get_config", return_value=cfg),
mock.patch.object(mod, "verify_hermes_imports", return_value=(True, [], {})),
mock.patch.object(mod, "_status_from_runtime", return_value=runtime),
mock.patch.object(mod, "load_workspaces", return_value=[]),
mock.patch.object(mod, "get_last_workspace", return_value=None),
mock.patch.object(mod, "get_available_models", return_value=[]),
mock.patch.object(mod, "_get_config_path", return_value=fake_config_path),
mock.patch.object(pathlib.Path, "exists", return_value=True),
):
return mod.get_onboarding_status()
def test_minimax_cn_chat_ready_skips_wizard(self):
"""minimax-cn + chat_ready=True + config.yaml exists → wizard must NOT fire."""
result = self._make_status(chat_ready=True)
assert result["completed"] is True, (
"Wizard fired for minimax-cn user with valid config! "
"config.yaml + chat_ready=True must auto-complete onboarding regardless of provider."
)
def test_minimax_cn_not_ready_shows_wizard(self):
"""minimax-cn + chat_ready=False → wizard fires so user can fix it."""
result = self._make_status(chat_ready=False)
assert result["completed"] is False
def test_current_is_oauth_set_for_unsupported_provider(self):
"""setup.current_is_oauth must be True for minimax-cn (not in quick-setup list)."""
result = self._make_status(chat_ready=True)
assert result["setup"]["current_is_oauth"] is True, (
"current_is_oauth should be True for providers not in _SUPPORTED_PROVIDER_SETUPS"
)

98
tests/test_issue607.py Normal file
View File

@@ -0,0 +1,98 @@
"""Tests for PR #648 — Gemma 4 thinking token stripping (closes #607)."""
import re
import pathlib
import pytest
# ---------------------------------------------------------------------------
# _strip_thinking_markup tests
# ---------------------------------------------------------------------------
from api.streaming import _strip_thinking_markup, _looks_invalid_generated_title
class TestGemma4ThinkingTokenStrip:
"""Verify that <|turn|>thinking\n...\n<turn|> blocks are stripped."""
def test_strip_gemma4_basic(self):
"""Basic Gemma 4 thinking block stripped, answer kept."""
raw = "<|turn|>thinking\nSome internal reasoning\n<turn|>Final answer"
result = _strip_thinking_markup(raw)
assert result == "Final answer"
def test_strip_gemma4_multiline_reasoning(self):
"""Multi-line reasoning block stripped cleanly."""
raw = "<|turn|>thinking\nLine 1\nLine 2\nLine 3\n<turn|>Answer here"
result = _strip_thinking_markup(raw)
assert result == "Answer here"
def test_strip_gemma4_no_thinking_passthrough(self):
"""Normal response without thinking tokens passes through unchanged."""
raw = "Normal response without thinking tokens"
result = _strip_thinking_markup(raw)
assert result == raw
def test_strip_gemma4_with_leading_whitespace(self):
"""Leading whitespace before the thinking block is handled."""
raw = "\n\n<|turn|>thinking\nReasoning\n<turn|>Answer"
result = _strip_thinking_markup(raw)
assert result == "Answer"
def test_strip_gemma4_empty_reasoning(self):
"""Empty reasoning block (just delimiters) is stripped."""
raw = "<|turn|>thinking\n<turn|>Response"
result = _strip_thinking_markup(raw)
assert result == "Response"
def test_strip_gemma4_case_insensitive(self):
"""Pattern is case-insensitive (though Gemma 4 uses fixed case)."""
raw = "<|TURN|>THINKING\nreasoning\n<TURN|>answer"
result = _strip_thinking_markup(raw)
# The regex uses re.IGNORECASE — should strip uppercase variant too
assert "THINKING" not in result
assert "reasoning" not in result
def test_existing_think_tag_still_works(self):
"""Ensure <think>...</think> still stripped (no regression)."""
raw = "<think>inner reasoning</think>Final"
result = _strip_thinking_markup(raw)
assert result == "Final"
def test_existing_channel_tag_still_works(self):
"""Ensure <|channel|>thought...</channel|> still stripped."""
raw = "<|channel|>thoughtSome reasoning<channel|>Answer"
result = _strip_thinking_markup(raw)
assert result == "Answer"
class TestGemma4TitleLeakDetection:
"""Verify _looks_invalid_generated_title catches Gemma 4 leak."""
def test_detects_gemma4_leak_in_title(self):
raw = "<|turn|>thinking\nUser asked about X\n<turn|>Session Title"
assert _looks_invalid_generated_title(raw) is True
def test_clean_title_not_flagged(self):
assert _looks_invalid_generated_title("Python debugging session") is False
class TestGemma4MessagesJsThinkPairs:
"""Verify static/messages.js contains the correct Gemma 4 pair."""
def test_messages_js_has_correct_gemma4_open(self):
js = pathlib.Path("static/messages.js").read_text()
# Must have double-pipe format: <|turn|>thinking
assert "<|turn|>thinking" in js, (
"messages.js is missing correct Gemma 4 open delimiter '<|turn|>thinking'"
)
def test_messages_js_no_wrong_gemma4_open(self):
js = pathlib.Path("static/messages.js").read_text()
# Must NOT have single-pipe wrong format: <|turn>thinking
assert "<|turn>thinking" not in js, (
"messages.js still contains wrong Gemma 4 delimiter '<|turn>thinking' (missing |)"
)
def test_messages_js_has_gemma4_close(self):
js = pathlib.Path("static/messages.js").read_text()
assert "<turn|>" in js, "messages.js missing Gemma 4 close delimiter '<turn|>'"

107
tests/test_issue609.py Normal file
View File

@@ -0,0 +1,107 @@
"""
Tests for GitHub issue #609 — Docker workspace path trust and env-var priority.
Two independent bugs were fixed:
1. HERMES_WEBUI_DEFAULT_WORKSPACE env var was silently overridden by
settings.json at server startup. The env var must always win.
2. resolve_trusted_workspace() rejected paths that are children of
DEFAULT_WORKSPACE (e.g. /data/workspace/project) when the default is a
Docker volume mount outside the user's home directory. Any path under
the boot-time default should be trusted automatically.
"""
from pathlib import Path
import pytest
from api.workspace import resolve_trusted_workspace
# ── Fix 2: trust paths under DEFAULT_WORKSPACE ───────────────────────────────
def test_subdir_of_boot_default_is_trusted(monkeypatch, tmp_path):
"""A subdirectory of BOOT_DEFAULT_WORKSPACE must be trusted without being in
the saved workspace list and without being under the user's home directory.
This is the core Docker case: DEFAULT_WORKSPACE=/data/workspace, and the
user tries to open /data/workspace/myproject — should NOT raise ValueError.
"""
import api.workspace as ws_mod
boot_default = tmp_path / "data" / "workspace"
boot_default.mkdir(parents=True)
sub = boot_default / "myproject"
sub.mkdir()
monkeypatch.setattr(ws_mod, "_BOOT_DEFAULT_WORKSPACE", str(boot_default))
# Should not raise — sub is under the boot default
result = resolve_trusted_workspace(str(sub))
assert result == sub.resolve()
def test_boot_default_itself_is_trusted(monkeypatch, tmp_path):
"""The DEFAULT_WORKSPACE path itself must also be trusted (not only subdirs)."""
import api.workspace as ws_mod
boot_default = tmp_path / "data" / "workspace"
boot_default.mkdir(parents=True)
monkeypatch.setattr(ws_mod, "_BOOT_DEFAULT_WORKSPACE", str(boot_default))
result = resolve_trusted_workspace(str(boot_default))
assert result == boot_default.resolve()
def test_path_outside_boot_default_and_home_is_rejected(monkeypatch, tmp_path):
"""A path that is not under home, not in the saved list, and not under
DEFAULT_WORKSPACE must still be rejected."""
import api.workspace as ws_mod
boot_default = tmp_path / "data" / "workspace"
boot_default.mkdir(parents=True)
outside = tmp_path / "other_mount" / "secret"
outside.mkdir(parents=True)
monkeypatch.setattr(ws_mod, "_BOOT_DEFAULT_WORKSPACE", str(boot_default))
with pytest.raises(ValueError, match="outside the user home"):
resolve_trusted_workspace(str(outside))
def test_none_path_returns_boot_default(monkeypatch, tmp_path):
"""resolve_trusted_workspace(None) always returns the boot default unchanged."""
import api.workspace as ws_mod
boot_default = tmp_path / "data" / "workspace"
boot_default.mkdir(parents=True)
monkeypatch.setattr(ws_mod, "_BOOT_DEFAULT_WORKSPACE", str(boot_default))
result = resolve_trusted_workspace(None)
assert result == boot_default.resolve()
def test_path_traversal_via_dotdot_does_not_escape_boot_default(monkeypatch, tmp_path):
"""A path that uses `..` to escape DEFAULT_WORKSPACE must not be trusted by (C).
`Path.resolve()` collapses `..` before the `relative_to(boot_default)` check
runs, so `/data/workspace/../etc` resolves to `/etc` and is rejected (it's
also caught earlier by the system-roots block, but this test pins the
behavior in case the order of conditions ever changes).
"""
import api.workspace as ws_mod
boot_default = tmp_path / "data" / "workspace"
boot_default.mkdir(parents=True)
sibling = tmp_path / "data" / "private"
sibling.mkdir(parents=True)
monkeypatch.setattr(ws_mod, "_BOOT_DEFAULT_WORKSPACE", str(boot_default))
# `boot_default/../private` resolves to `tmp_path/data/private`, which is
# NOT a child of boot_default and not under home — must reject.
escape = boot_default / ".." / "private"
with pytest.raises(ValueError, match="outside the user home"):
resolve_trusted_workspace(str(escape))

125
tests/test_issue644.py Normal file
View File

@@ -0,0 +1,125 @@
"""Tests for PR #644 — load provider models from config.yaml in get_available_models()."""
import pytest
import api.config as _cfg
def _available_models_with_cfg(cfg_override):
"""Helper: temporarily patch config.cfg, call get_available_models(), restore."""
old_cfg = dict(_cfg.cfg)
_cfg.cfg.clear()
_cfg.cfg.update(cfg_override)
try:
return _cfg.get_available_models()
finally:
_cfg.cfg.clear()
_cfg.cfg.update(old_cfg)
class TestConfigYamlModelsLoading:
"""Verify that providers with explicit models in config.yaml use those models."""
def test_provider_in_config_but_not_provider_models_gets_cfg_models(self):
"""A provider only in cfg.providers (not _PROVIDER_MODELS) should appear
with its configured model list instead of being skipped entirely."""
cfg = {
"model": {"provider": "my-custom-llm"},
"providers": {
"my-custom-llm": {
"base_url": "http://custom.local/v1",
"models": ["custom-model-a", "custom-model-b"],
}
},
}
result = _available_models_with_cfg(cfg)
groups = {g["provider"]: g["models"] for g in result["groups"]}
# Provider should appear (previously it was silently skipped)
provider_names = [g["provider"] for g in result["groups"]]
found = any("my-custom-llm" in n.lower() or "My-Custom-Llm" in n for n in provider_names)
# If it appears, its models must include our cfg models
for g in result["groups"]:
if "custom" in g["provider"].lower():
model_ids = [m["id"] for m in g["models"]]
assert any("custom-model-a" in mid for mid in model_ids), (
f"custom-model-a not in group models: {model_ids}"
)
def test_provider_models_dict_format_expanded(self):
"""models: {model_id: {context_length: ...}} — keys become model IDs."""
cfg = {
"model": {"provider": "anthropic"},
"providers": {
"anthropic": {
"models": {
"claude-custom-1": {"context_length": 200000},
"claude-custom-2": {"context_length": 100000},
}
}
},
}
result = _available_models_with_cfg(cfg)
# Find Anthropic group
for g in result["groups"]:
if g["provider"] == "Anthropic":
model_ids = [m["id"] for m in g["models"]]
assert "claude-custom-1" in model_ids, (
f"claude-custom-1 not in Anthropic models: {model_ids}"
)
assert "claude-custom-2" in model_ids, (
f"claude-custom-2 not in Anthropic models: {model_ids}"
)
break
def test_provider_models_list_format_expanded(self):
"""models: [model_id, ...] — items become model IDs."""
cfg = {
"model": {"provider": "anthropic"},
"providers": {
"anthropic": {
"models": ["claude-list-only-1", "claude-list-only-2"],
}
},
}
result = _available_models_with_cfg(cfg)
for g in result["groups"]:
if g["provider"] == "Anthropic":
model_ids = [m["id"] for m in g["models"]]
assert "claude-list-only-1" in model_ids, (
f"claude-list-only-1 not in Anthropic models: {model_ids}"
)
break
def test_provider_in_provider_models_but_no_cfg_override_unchanged(self):
"""When no models key in cfg.providers, hardcoded _PROVIDER_MODELS still used."""
cfg = {
"model": {"provider": "anthropic"},
"providers": {
"anthropic": {
"api_key": "sk-test",
# No 'models' key
}
},
}
result = _available_models_with_cfg(cfg)
raw_ids = {m["id"] for m in _cfg._PROVIDER_MODELS.get("anthropic", [])}
for g in result["groups"]:
if g["provider"] == "Anthropic":
returned_ids = {m["id"] for m in g["models"]}
# Should still have the hardcoded models
overlap = raw_ids & returned_ids
assert overlap, (
f"No _PROVIDER_MODELS models found in Anthropic group. "
f"Expected subset of {raw_ids}, got {returned_ids}"
)
break
def test_non_dict_models_value_falls_through_gracefully(self):
"""If models value is neither dict nor list (e.g. null), no crash."""
cfg = {
"model": {"provider": "anthropic"},
"providers": {
"anthropic": {"models": None}, # invalid — should not crash
},
}
# Should not raise
result = _available_models_with_cfg(cfg)
assert "groups" in result

54
tests/test_issue646.py Normal file
View File

@@ -0,0 +1,54 @@
"""Tests for PR #649 — empty DEFAULT_MODEL does not inject blank model entries."""
import pytest
from api import config as cfg
class TestEmptyDefaultModel:
"""Verify that DEFAULT_MODEL='' does not produce blank model entries."""
def test_no_empty_id_when_default_model_is_empty(self, monkeypatch):
"""With empty DEFAULT_MODEL, no model entry should have id='' or label=''."""
monkeypatch.setattr(cfg, "DEFAULT_MODEL", "")
# Simulate the 'no providers' path by calling the model-list builder
# We test the config module directly since it's a pure function path.
# The key invariant: any model dict in the output must have non-empty id.
# We check the branches that were patched in PR #649.
# Path 1: "no providers detected" branch
# When default_model="", we should NOT append a Default group with empty model
groups = []
default_model = cfg.DEFAULT_MODEL
if default_model:
label = default_model.split("/")[-1] if "/" in default_model else default_model
groups.append(
{"provider": "Default", "models": [{"id": default_model, "label": label}]}
)
# With empty default_model, groups should be empty (not appended)
assert len(groups) == 0, "Empty default_model should not create any group"
def test_no_empty_id_when_default_model_is_set(self, monkeypatch):
"""With a real DEFAULT_MODEL, the Default group should be created normally."""
monkeypatch.setattr(cfg, "DEFAULT_MODEL", "openrouter/mistralai/mistral-7b-instruct")
groups = []
default_model = cfg.DEFAULT_MODEL
if default_model:
label = default_model.split("/")[-1] if "/" in default_model else default_model
groups.append(
{"provider": "Default", "models": [{"id": default_model, "label": label}]}
)
assert len(groups) == 1
assert groups[0]["models"][0]["id"] == "openrouter/mistralai/mistral-7b-instruct"
assert groups[0]["models"][0]["label"] == "mistral-7b-instruct"
def test_default_model_env_var_empty_string_accepted(self, monkeypatch):
"""Empty string is a valid DEFAULT_MODEL value — no KeyError or crash."""
import os
monkeypatch.setenv("HERMES_WEBUI_DEFAULT_MODEL", "")
# Verify the env var resolution pattern handles empty string gracefully
val = os.getenv("HERMES_WEBUI_DEFAULT_MODEL", "")
assert val == ""
# And that the guard works
assert not val # empty string is falsy — the guard `if default_model:` fires correctly

135
tests/test_issue677.py Normal file
View File

@@ -0,0 +1,135 @@
"""
Tests for fix #677: auto-scroll override during streaming.
The scroll system has a _scrollPinned flag and scrollIfPinned() to respect
user scroll position. The bug was that scrollToBottom() was called
unconditionally inside renderMessages() and appendThinking(), even during
an active stream — overriding any scroll position the user had set.
"""
import pathlib
import re
REPO = pathlib.Path(__file__).parent.parent
UI_JS = (REPO / "static" / "ui.js").read_text(encoding="utf-8")
INDEX_HTML = (REPO / "static" / "index.html").read_text(encoding="utf-8")
STYLE_CSS = (REPO / "static" / "style.css").read_text(encoding="utf-8")
class TestScrollPinningFix:
def test_render_messages_respects_active_stream(self):
"""renderMessages() must not call scrollToBottom() while streaming (#677).
During an active stream, scrollToBottom() unconditionally re-pins scroll
and overrides the user's position. renderMessages() must use scrollIfPinned()
instead when S.activeStreamId is set.
"""
# Find renderMessages function
rm_start = UI_JS.find("function renderMessages()")
assert rm_start != -1, "renderMessages() not found in ui.js"
rm_end = UI_JS.find("\nfunction ", rm_start + 1)
rm_body = UI_JS[rm_start:rm_end]
# Must check activeStreamId before deciding which scroll fn to call
assert "activeStreamId" in rm_body, (
"renderMessages() must check S.activeStreamId before scrolling — "
"unconditional scrollToBottom() overrides user scroll position (#677)"
)
# scrollIfPinned must be called inside renderMessages (stream path)
assert "scrollIfPinned()" in rm_body, (
"renderMessages() must call scrollIfPinned() during streaming (#677)"
)
def test_append_thinking_uses_scroll_if_pinned(self):
"""appendThinking() must use scrollIfPinned() not scrollToBottom() (#677).
appendThinking() fires continuously during streaming — calling scrollToBottom()
inside it re-pins on every token, preventing the user from scrolling up.
"""
at_start = UI_JS.find("function appendThinking(")
assert at_start != -1, "appendThinking() not found in ui.js"
at_end = UI_JS.find("\nfunction ", at_start + 1)
at_body = UI_JS[at_start:at_end]
assert "scrollIfPinned()" in at_body, (
"appendThinking() must call scrollIfPinned() not scrollToBottom() (#677)"
)
assert "scrollToBottom()" not in at_body, (
"appendThinking() must not call scrollToBottom() — it fires mid-stream (#677)"
)
def test_scroll_threshold_increased(self):
"""Scroll re-pin threshold must be at least 150px (#677).
80px was too small — a fast mouse scroll wheel can jump 100120px in one
tick, causing unintended re-pin. 150px gives a proper dead zone.
"""
# Find the nearBottom assignment in the scroll listener
near_bottom_pos = UI_JS.find("nearBottom=")
if near_bottom_pos == -1:
near_bottom_pos = UI_JS.find("nearBottom =")
assert near_bottom_pos != -1, "nearBottom scroll threshold assignment not found"
threshold_line = UI_JS[near_bottom_pos:near_bottom_pos + 120]
# Extract the numeric threshold
match = re.search(r"<\s*(\d+)", threshold_line)
assert match, f"Numeric threshold not found near nearBottom assignment: {threshold_line!r}"
threshold = int(match.group(1))
assert threshold >= 150, (
f"Scroll re-pin threshold is {threshold}px — must be >= 150px to avoid "
f"hair-trigger re-pinning on fast scroll wheels (#677)"
)
def test_scroll_to_bottom_button_exists_in_html(self):
"""index.html must contain a scroll-to-bottom button (#677).
All major streaming chat UIs (Claude, ChatGPT) show a floating ↓ button
when the user has scrolled up, giving a clear escape hatch to return to live output.
"""
assert "scrollToBottomBtn" in INDEX_HTML, (
"index.html must contain a #scrollToBottomBtn element (#677)"
)
assert "scroll-to-bottom-btn" in INDEX_HTML, (
"index.html must use class scroll-to-bottom-btn for the scroll button (#677)"
)
def test_scroll_to_bottom_button_hidden_by_default(self):
"""Scroll-to-bottom button must be hidden by default (display:none) (#677)."""
btn_pos = INDEX_HTML.find("scrollToBottomBtn")
assert btn_pos != -1
btn_context = INDEX_HTML[btn_pos:btn_pos + 200]
assert "display:none" in btn_context or 'display="none"' in btn_context, (
"scrollToBottomBtn must be hidden by default — only shown when user scrolls up (#677)"
)
def test_scroll_to_bottom_button_css_exists(self):
"""style.css must have styling for .scroll-to-bottom-btn (#677)."""
assert ".scroll-to-bottom-btn" in STYLE_CSS, (
"style.css must define .scroll-to-bottom-btn styles (#677)"
)
def test_scroll_to_bottom_button_is_sticky(self):
"""Scroll-to-bottom button must use position:sticky so it stays visible (#677)."""
btn_css_pos = STYLE_CSS.find(".scroll-to-bottom-btn")
assert btn_css_pos != -1
btn_css = STYLE_CSS[btn_css_pos:btn_css_pos + 300]
assert "sticky" in btn_css, (
".scroll-to-bottom-btn must use position:sticky to stay at bottom of viewport (#677)"
)
def test_scroll_listener_hides_button_when_pinned(self):
"""Scroll listener must hide the button when user is near the bottom (#677)."""
scroll_listener_start = UI_JS.find("el.addEventListener('scroll'")
assert scroll_listener_start != -1, "scroll event listener not found"
listener_block = UI_JS[scroll_listener_start:scroll_listener_start + 300]
assert "scrollToBottomBtn" in listener_block, (
"Scroll listener must show/hide scrollToBottomBtn based on _scrollPinned (#677)"
)
def test_scroll_to_bottom_button_calls_scroll_to_bottom(self):
"""scrollToBottomBtn onclick must call scrollToBottom() (#677)."""
btn_pos = INDEX_HTML.find("scrollToBottomBtn")
assert btn_pos != -1
btn_context = INDEX_HTML[btn_pos:btn_pos + 200]
assert "scrollToBottom()" in btn_context, (
"scrollToBottomBtn onclick must call scrollToBottom() (#677)"
)

View File

@@ -0,0 +1,25 @@
"""Regression tests for fenced code block syntax highlighting."""
from pathlib import Path
UI_JS = Path(__file__).resolve().parent.parent / "static" / "ui.js"
def _read_ui_js() -> str:
return UI_JS.read_text()
def test_fenced_code_blocks_add_prism_language_class():
js = _read_ui_js()
assert 'class="language-${esc(normalizedLang)}"' in js, (
"Fenced code blocks should add Prism language-* classes so syntax highlighting works"
)
def test_fenced_code_blocks_keep_existing_pre_header_layout():
js = _read_ui_js()
assert 'return `${h}<pre><code${langAttr}>${esc(code.replace(/\\n$/,' in js, (
"The syntax-highlight fix should preserve the existing fenced code block layout"
)
assert '<div class="code-block">' not in js, (
"This fix should not introduce a new wrapper around fenced code blocks"
)

View File

@@ -0,0 +1,317 @@
"""
Tests for issues #373, #374, and #375.
#373: Chat silently swallows errors — no feedback when agent fails to respond
#374: Remove stale OpenAI models from default list (gpt-4o, o3)
#375: Model dropdown should fetch live models from provider
"""
import pathlib
import re
REPO = pathlib.Path(__file__).parent.parent
STREAMING_PY = (REPO / "api" / "streaming.py").read_text(encoding="utf-8")
CONFIG_PY = (REPO / "api" / "config.py").read_text(encoding="utf-8")
ROUTES_PY = (REPO / "api" / "routes.py").read_text(encoding="utf-8")
MESSAGES_JS = (REPO / "static" / "messages.js").read_text(encoding="utf-8")
UI_JS = (REPO / "static" / "ui.js").read_text(encoding="utf-8")
# ── Issue #373: Silent error detection ──────────────────────────────────────
class TestSilentErrorDetection:
"""streaming.py must emit apperror when agent returns no assistant reply."""
def test_streaming_detects_no_assistant_reply(self):
"""streaming.py must check if any assistant message was produced."""
assert "_assistant_added" in STREAMING_PY, (
"streaming.py must check whether an assistant message was produced (#373)"
)
def test_streaming_emits_apperror_on_no_response(self):
"""streaming.py must emit apperror event when agent produced no reply."""
assert "no_response" in STREAMING_PY, (
"streaming.py must emit apperror with type='no_response' for silent failures (#373)"
)
def test_streaming_returns_early_after_apperror(self):
"""streaming.py must return after emitting apperror (not also emit done)."""
# The return statement must come after the put('apperror') for no_response
no_resp_pos = STREAMING_PY.find("'no_response'")
return_pos = STREAMING_PY.find("return # Don't emit done", no_resp_pos)
assert no_resp_pos != -1, "no_response type not found in streaming.py"
assert return_pos != -1, (
"streaming.py must return after emitting apperror to prevent also emitting done (#373)"
)
assert return_pos > no_resp_pos
def test_streaming_detects_auth_error_in_result(self):
"""streaming.py must detect auth errors from the result object."""
assert "_is_auth" in STREAMING_PY, (
"streaming.py must detect auth errors in silent failures (#373)"
)
assert "auth_mismatch" in STREAMING_PY, (
"streaming.py must emit auth_mismatch type for auth failures (#373)"
)
def test_messages_js_done_handler_detects_no_reply(self):
"""messages.js done handler must show an error if no assistant reply arrived."""
# Check for either the variable name or the inlined check pattern
has_no_reply_guard = (
"hasAssistantReply" in MESSAGES_JS
or ("role==='assistant'" in MESSAGES_JS and "No response received" in MESSAGES_JS)
)
assert has_no_reply_guard, (
"messages.js done handler must detect zero assistant replies (#373)"
)
assert "No response received" in MESSAGES_JS, (
"messages.js must show 'No response received' inline message (#373)"
)
def test_messages_js_handles_no_response_apperror_type(self):
"""messages.js apperror handler must recognise the no_response type."""
assert "isNoResponse" in MESSAGES_JS or "no_response" in MESSAGES_JS, (
"messages.js apperror handler must handle type='no_response' (#373)"
)
def test_messages_js_no_response_label(self):
"""messages.js must show a distinct label for no_response errors."""
assert "No response received" in MESSAGES_JS, (
"messages.js must display 'No response received' label for no_response errors (#373)"
)
# ── Issue #374: Stale model list cleanup ─────────────────────────────────────
class TestStaleModelListCleanup:
"""gpt-4o and o3 must be removed from the primary OpenAI model lists."""
def test_gpt4o_removed_from_fallback_models(self):
"""_FALLBACK_MODELS must not contain gpt-4o (issue #374)."""
fallback_block_start = CONFIG_PY.find("_FALLBACK_MODELS = [")
fallback_block_end = CONFIG_PY.find("]", fallback_block_start)
fallback_block = CONFIG_PY[fallback_block_start:fallback_block_end]
assert "gpt-4o" not in fallback_block, (
"_FALLBACK_MODELS still contains gpt-4o — remove it per issue #374"
)
def test_o3_removed_from_fallback_models(self):
"""_FALLBACK_MODELS must not contain o3 (issue #374)."""
fallback_block_start = CONFIG_PY.find("_FALLBACK_MODELS = [")
fallback_block_end = CONFIG_PY.find("]", fallback_block_start)
fallback_block = CONFIG_PY[fallback_block_start:fallback_block_end]
assert '"o3"' not in fallback_block and "'o3'" not in fallback_block, (
"_FALLBACK_MODELS still contains o3 — remove it per issue #374"
)
def test_gpt4o_removed_from_provider_models_openai(self):
"""_PROVIDER_MODELS['openai'] must not contain gpt-4o (issue #374)."""
openai_start = CONFIG_PY.find('"openai": [')
openai_end = CONFIG_PY.find("],", openai_start)
openai_block = CONFIG_PY[openai_start:openai_end]
assert "gpt-4o" not in openai_block, (
"_PROVIDER_MODELS['openai'] still contains gpt-4o — remove per issue #374"
)
def test_o3_removed_from_provider_models_openai(self):
"""_PROVIDER_MODELS['openai'] must not contain o3 (issue #374)."""
openai_start = CONFIG_PY.find('"openai": [')
openai_end = CONFIG_PY.find("],", openai_start)
openai_block = CONFIG_PY[openai_start:openai_end]
assert '"o3"' not in openai_block and "'o3'" not in openai_block, (
"_PROVIDER_MODELS['openai'] still contains o3 — remove per issue #374"
)
def test_fallback_still_has_gpt54_mini(self):
"""_FALLBACK_MODELS must still contain gpt-5.4-mini (not over-trimmed)."""
assert "gpt-5.4-mini" in CONFIG_PY, (
"_FALLBACK_MODELS must keep gpt-5.4-mini as primary OpenAI model (#374)"
)
def test_fallback_has_gpt54(self):
"""_FALLBACK_MODELS must contain gpt-5.4-mini as the primary OpenAI option."""
from api.config import _FALLBACK_MODELS
ids = [m["id"] for m in _FALLBACK_MODELS]
assert any("gpt-5.4-mini" in mid for mid in ids), (
"_FALLBACK_MODELS must include gpt-5.4-mini as the primary OpenAI option"
)
def test_copilot_list_unchanged(self):
"""Copilot provider model list should still include gpt-4o (it's a valid Copilot model)."""
copilot_start = CONFIG_PY.find('"copilot": [')
copilot_end = CONFIG_PY.find("],", copilot_start)
if copilot_start == -1:
return # No copilot list — that's fine
copilot_block = CONFIG_PY[copilot_start:copilot_end]
assert "gpt-4o" in copilot_block, (
"Copilot provider model list should keep gpt-4o (it's available via Copilot) (#374)"
)
# ── Issue #375: Live model fetching ─────────────────────────────────────────
class TestLiveModelFetching:
"""Backend and frontend must support live model fetching from provider APIs."""
def test_live_models_endpoint_exists_in_routes(self):
"""routes.py must have a /api/models/live endpoint (#375)."""
assert "/api/models/live" in ROUTES_PY, (
"routes.py must define /api/models/live endpoint (#375)"
)
def test_live_models_handler_function_exists(self):
"""routes.py must define _handle_live_models() function (#375)."""
assert "def _handle_live_models(" in ROUTES_PY, (
"routes.py must define _handle_live_models() for live model fetching (#375)"
)
def test_live_models_handler_validates_scheme(self):
"""_handle_live_models must validate URL scheme to prevent file:// injection (B310)."""
assert "nosec B310" in ROUTES_PY or ("scheme" in ROUTES_PY and "http" in ROUTES_PY), (
"_handle_live_models must validate URL scheme before urlopen (#375)"
)
def test_live_models_handler_has_ssrf_guard(self):
"""_handle_live_models must guard against SSRF (private IP access)."""
assert "ssrf_blocked" in ROUTES_PY or ("is_private" in ROUTES_PY and "live" in ROUTES_PY), (
"_handle_live_models must have SSRF protection for private IP ranges (#375)"
)
def test_live_models_all_providers_handled_via_agent(self):
"""_handle_live_models must delegate to provider_model_ids() which handles all
providers gracefully — live fetch where possible, static fallback otherwise.
The old 'not_supported' return for Anthropic/Google is superseded: those
providers now return live or static model lists via the agent delegate."""
assert "provider_model_ids" in ROUTES_PY, (
"_handle_live_models must delegate to hermes_cli.models.provider_model_ids() "
"so all providers are handled uniformly (#375 upgrade)"
)
def test_frontend_has_fetch_live_models_function(self):
"""ui.js must define _fetchLiveModels() for background live model loading (#375)."""
assert "function _fetchLiveModels(" in UI_JS or "async function _fetchLiveModels(" in UI_JS, (
"ui.js must define _fetchLiveModels() function (#375)"
)
def test_frontend_live_models_cache_exists(self):
"""ui.js must cache live model responses to avoid redundant API calls (#375)."""
assert "_liveModelCache" in UI_JS, (
"ui.js must use _liveModelCache to avoid re-fetching on every dropdown open (#375)"
)
def test_frontend_calls_live_models_after_static_load(self):
"""populateModelDropdown must call _fetchLiveModels after rendering the static list (#375)."""
assert "_fetchLiveModels" in UI_JS, (
"populateModelDropdown must call _fetchLiveModels for background update (#375)"
)
def test_frontend_live_fetch_only_adds_new_models(self):
"""_fetchLiveModels must not duplicate models already in the static list (#375)."""
assert "existingIds" in UI_JS, (
"_fetchLiveModels must track existing model IDs to avoid duplicates (#375)"
)
def test_frontend_live_fetch_covers_all_providers(self):
"""_fetchLiveModels no longer skips any provider — all providers return
live or fallback models via provider_model_ids() on the backend (#375 upgrade)."""
# The old skip list (anthropic, google, gemini) must be gone from the guard
skip_guard_pos = UI_JS.find("includes(provider)")
if skip_guard_pos != -1:
guard_line = UI_JS[max(0,skip_guard_pos-100):skip_guard_pos+50]
assert "anthropic" not in guard_line, (
"_fetchLiveModels must not skip anthropic — backend now handles it (#375 upgrade)"
)
def test_live_models_endpoint_wired_in_routes(self):
"""The /api/models/live path must be handled in handle_get()."""
# Find handle_get and check our route appears inside it
handle_get_pos = ROUTES_PY.find("def handle_get(")
live_route_pos = ROUTES_PY.find('"/api/models/live"')
assert handle_get_pos != -1 and live_route_pos != -1
assert live_route_pos > handle_get_pos, (
"/api/models/live must be inside handle_get() (#375)"
)
# ── #669: Gemini model IDs must be valid for Google AI Studio endpoint ────────
class TestGeminiModelIds:
"""Gemini 3.x model IDs must be valid for the native Google AI Studio provider.
The original code had gemini-3.1-flash-lite-preview missing from the
dropdown. The fallback list also erroneously used gemini-3.1-pro-preview
in some provider sections while omitting gemini-3.1-flash-lite-preview.
All provider sections must now include the full current Gemini 3.x lineup.
"""
VALID_GEMINI_3 = [
"gemini-3.1-pro-preview",
"gemini-3-flash-preview",
"gemini-3.1-flash-lite-preview",
]
def test_gemini_provider_models_has_3x(self):
"""_PROVIDER_MODELS['gemini'] must contain valid Gemini 3.x model IDs (#669)."""
gemini_block_start = CONFIG_PY.find('"gemini": [')
assert gemini_block_start != -1, "_PROVIDER_MODELS['gemini'] block not found"
gemini_block = CONFIG_PY[gemini_block_start:gemini_block_start + 600]
for mid in self.VALID_GEMINI_3:
assert mid in gemini_block, (
f"_PROVIDER_MODELS['gemini'] must contain {mid!r}"
f"this is a valid Google AI Studio model ID (#669)"
)
def test_gemini_provider_models_has_flash_lite(self):
"""_PROVIDER_MODELS['gemini'] must contain gemini-3.1-flash-lite-preview (#669).
This was the model the reporter selected from the wizard — it must appear
in the native gemini provider model list so users can select it.
"""
gemini_block_start = CONFIG_PY.find('"gemini": [')
assert gemini_block_start != -1
gemini_block = CONFIG_PY[gemini_block_start:gemini_block_start + 600]
assert "gemini-3.1-flash-lite-preview" in gemini_block, (
"_PROVIDER_MODELS['gemini'] missing gemini-3.1-flash-lite-preview — "
"this was the exact model the #669 reporter tried and got API_KEY_INVALID"
)
def test_fallback_models_has_gemini_3x(self):
"""_FALLBACK_MODELS must contain valid Gemini 3.x OpenRouter model IDs (#669)."""
fallback_start = CONFIG_PY.find("_FALLBACK_MODELS = [")
fallback_end = CONFIG_PY.find("]", fallback_start + len("_FALLBACK_MODELS = ["))
# Find the closing bracket for the list (multi-line)
depth = 0
pos = fallback_start + len("_FALLBACK_MODELS = [")
for i, ch in enumerate(CONFIG_PY[pos:], start=pos):
if ch == '[':
depth += 1
elif ch == ']':
if depth == 0:
fallback_end = i
break
depth -= 1
fallback_block = CONFIG_PY[fallback_start:fallback_end]
for mid in ("google/gemini-3.1-pro-preview", "google/gemini-3-flash-preview"):
assert mid in fallback_block, (
f"_FALLBACK_MODELS must contain {mid!r} for OpenRouter Google models (#669)"
)
def test_gemini_provider_also_has_stable_25(self):
"""_PROVIDER_MODELS['gemini'] must retain stable Gemini 2.5 models (#669)."""
gemini_block_start = CONFIG_PY.find('"gemini": [')
assert gemini_block_start != -1
gemini_block = CONFIG_PY[gemini_block_start:gemini_block_start + 600]
assert "gemini-2.5-pro" in gemini_block, (
"_PROVIDER_MODELS['gemini'] must keep gemini-2.5-pro as a stable fallback"
)
def test_no_invalid_gemini_3_pro_model(self):
"""gemini-3-pro-preview must not appear — it was shut down March 9 2026 (#669)."""
assert "gemini-3-pro-preview" not in CONFIG_PY or "gemini-3.1-pro-preview" in CONFIG_PY, (
"gemini-3-pro-preview was shut down — use gemini-3.1-pro-preview instead (#669)"
)
# More precise: ensure the bare (non-.1) version isn't the only one present
count_bare = CONFIG_PY.count('"gemini-3-pro-preview"')
assert count_bare == 0, (
f"gemini-3-pro-preview appears {count_bare} time(s) in config.py — "
"it was shut down March 9 2026, use gemini-3.1-pro-preview (#669)"
)

View File

@@ -0,0 +1,262 @@
import json
import pathlib
import re
import subprocess
import textwrap
REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
I18N_JS = (REPO_ROOT / "static" / "i18n.js").read_text(encoding="utf-8")
BOOT_JS = (REPO_ROOT / "static" / "boot.js").read_text(encoding="utf-8")
PANELS_JS = (REPO_ROOT / "static" / "panels.js").read_text(encoding="utf-8")
def _run_i18n_case(script_expr: str) -> dict:
wrapped_expr = f"(() => ({script_expr}))()"
script = textwrap.dedent(
f"""
const fs = require('fs');
const vm = require('vm');
const src = fs.readFileSync({json.dumps(str(REPO_ROOT / "static" / "i18n.js"))}, 'utf8');
const storage = {{}};
const ctx = {{
localStorage: {{
getItem: (k) => Object.prototype.hasOwnProperty.call(storage, k) ? storage[k] : null,
setItem: (k, v) => {{ storage[k] = String(v); }},
}},
document: {{
documentElement: {{ lang: '' }},
querySelectorAll: () => [],
}},
}};
vm.createContext(ctx);
vm.runInContext(src, ctx);
const out = vm.runInContext({json.dumps(wrapped_expr)}, ctx);
process.stdout.write(JSON.stringify(out));
"""
)
proc = subprocess.run(["node", "-e", script], check=True, capture_output=True, text=True)
return json.loads(proc.stdout)
def _extract_call_arglists(src: str, fn_name: str) -> list[str]:
token = f"{fn_name}("
out = []
search_from = 0
while True:
start = src.find(token, search_from)
if start < 0:
return out
i = start + len(token)
depth = 1
in_single = False
in_double = False
in_backtick = False
escape = False
while i < len(src):
ch = src[i]
if escape:
escape = False
i += 1
continue
if in_single:
if ch == "\\":
escape = True
elif ch == "'":
in_single = False
i += 1
continue
if in_double:
if ch == "\\":
escape = True
elif ch == '"':
in_double = False
i += 1
continue
if in_backtick:
if ch == "\\":
escape = True
elif ch == "`":
in_backtick = False
i += 1
continue
if ch == "'":
in_single = True
elif ch == '"':
in_double = True
elif ch == "`":
in_backtick = True
elif ch == "(":
depth += 1
elif ch == ")":
depth -= 1
if depth == 0:
out.append(src[start + len(token) : i])
break
i += 1
search_from = start + len(token)
def _split_top_level_args(arg_src: str) -> list[str]:
args = []
cur = []
paren = 0
brace = 0
bracket = 0
in_single = False
in_double = False
in_backtick = False
escape = False
for ch in arg_src:
if escape:
cur.append(ch)
escape = False
continue
if in_single:
cur.append(ch)
if ch == "\\":
escape = True
elif ch == "'":
in_single = False
continue
if in_double:
cur.append(ch)
if ch == "\\":
escape = True
elif ch == '"':
in_double = False
continue
if in_backtick:
cur.append(ch)
if ch == "\\":
escape = True
elif ch == "`":
in_backtick = False
continue
if ch == "'":
in_single = True
cur.append(ch)
continue
if ch == '"':
in_double = True
cur.append(ch)
continue
if ch == "`":
in_backtick = True
cur.append(ch)
continue
if ch == "(":
paren += 1
cur.append(ch)
continue
if ch == ")":
paren -= 1
cur.append(ch)
continue
if ch == "{":
brace += 1
cur.append(ch)
continue
if ch == "}":
brace -= 1
cur.append(ch)
continue
if ch == "[":
bracket += 1
cur.append(ch)
continue
if ch == "]":
bracket -= 1
cur.append(ch)
continue
if ch == "," and paren == 0 and brace == 0 and bracket == 0:
args.append("".join(cur).strip())
cur = []
continue
cur.append(ch)
if cur:
args.append("".join(cur).strip())
return args
def _has_precedence_call(src: str, first_arg: str) -> bool:
expected_second = {
"localStorage.getItem('hermes-lang')",
'localStorage.getItem("hermes-lang")',
}
for arg_src in _extract_call_arglists(src, "resolvePreferredLocale"):
args = _split_top_level_args(arg_src)
if len(args) < 2:
continue
first = re.sub(r"\s+", "", args[0])
second = re.sub(r"\s+", "", args[1])
if first == first_arg and second in expected_second:
return True
return False
def test_i18n_exposes_locale_resolvers():
assert "function resolveLocale(" in I18N_JS
assert "function resolvePreferredLocale(" in I18N_JS
def test_locale_alias_resolution_and_precedence_logic():
result = _run_i18n_case(
"""
{
zhCn: resolveLocale('zh-CN'),
zhTw: resolveLocale('zh_TW'),
enUs: resolveLocale('EN-us'),
esMx: resolveLocale('es-MX'),
bad: resolveLocale('xx-YY'),
preferred1: resolvePreferredLocale('zh-CN', 'en'),
preferred2: resolvePreferredLocale('xx-YY', 'zh-Hant'),
preferred3: resolvePreferredLocale('', 'xx-YY'),
}
"""
)
assert result["zhCn"] == "zh"
assert result["zhTw"] == "zh-Hant"
assert result["enUs"] == "en"
assert result["esMx"] == "es"
assert result["bad"] is None
assert result["preferred1"] == "zh"
assert result["preferred2"] == "zh-Hant"
assert result["preferred3"] == "en"
def test_set_locale_normalizes_alias_and_persists_canonical_key():
result = _run_i18n_case(
"""
{
...(setLocale('zh-CN'), {}),
saved: localStorage.getItem('hermes-lang'),
htmlLang: document.documentElement.lang,
}
"""
)
assert result["saved"] == "zh"
assert result["htmlLang"] == "zh-CN"
def test_boot_and_settings_panel_use_shared_locale_precedence():
assert _has_precedence_call(BOOT_JS, "s.language")
assert _has_precedence_call(PANELS_JS, "settings.language")

View File

@@ -0,0 +1,86 @@
import json
import urllib.error
import urllib.request
from tests._pytest_port import BASE
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def get_raw(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return r.read().decode(), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(
BASE + path, data=data, headers={"Content-Type": "application/json"}
)
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def _current_language():
settings, status = get("/api/settings")
assert status == 200
return settings.get("language") or "en"
def test_login_page_uses_simplified_chinese_for_zh_cn_alias():
prev_lang = _current_language()
try:
saved, status = post("/api/settings", {"language": "zh-CN"})
assert status == 200
assert saved.get("language") == "zh-CN"
html, status2 = get_raw("/login")
assert status2 == 200
assert 'lang="zh-CN"' in html
assert "\u767b\u5f55" in html
assert "\u8f93\u5165\u5bc6\u7801\u7ee7\u7eed\u4f7f\u7528" in html
finally:
restored, restore_status = post("/api/settings", {"language": prev_lang})
assert restore_status == 200
assert restored.get("language") == prev_lang
def test_login_page_uses_traditional_chinese_for_zh_hant():
prev_lang = _current_language()
try:
saved, status = post("/api/settings", {"language": "zh-Hant"})
assert status == 200
assert saved.get("language") == "zh-Hant"
html, status2 = get_raw("/login")
assert status2 == 200
assert 'lang="zh-TW"' in html
assert "\u8f38\u5165\u5bc6\u78bc\u7e7c\u7e8c\u4f7f\u7528" in html
assert "\u5bc6\u78bc\u932f\u8aa4" in html
finally:
restored, restore_status = post("/api/settings", {"language": prev_lang})
assert restore_status == 200
assert restored.get("language") == prev_lang
def test_login_page_uses_russian_for_ru():
prev_lang = _current_language()
try:
saved, status = post("/api/settings", {"language": "ru"})
assert status == 200
assert saved.get("language") == "ru"
html, status2 = get_raw("/login")
assert status2 == 200
assert 'lang="ru-RU"' in html
assert "\u0412\u043e\u0439\u0442\u0438" in html
assert "\u0412\u0432\u0435\u0434\u0438\u0442\u0435 \u043f\u0430\u0440\u043e\u043b\u044c, \u0447\u0442\u043e\u0431\u044b \u043f\u0440\u043e\u0434\u043e\u043b\u0436\u0438\u0442\u044c" in html
assert "\u041d\u0435\u0432\u0435\u0440\u043d\u044b\u0439 \u043f\u0430\u0440\u043e\u043b\u044c" in html
finally:
restored, restore_status = post("/api/settings", {"language": prev_lang})
assert restore_status == 200
assert restored.get("language") == prev_lang

216
tests/test_media_inline.py Normal file
View File

@@ -0,0 +1,216 @@
"""
Tests for feat #450: MEDIA: token inline rendering in web UI chat.
Covers:
1. /api/media endpoint: serves local image files by absolute path
2. /api/media endpoint: rejects paths outside allowed roots (path traversal)
3. /api/media endpoint: 404 for non-existent files
4. /api/media endpoint: auth gate when auth is enabled
5. renderMd() MEDIA: stash/restore logic (static JS analysis)
6. /api/media endpoint: integration test via live server (requires 8788)
"""
from __future__ import annotations
import json
import os
import pathlib
import tempfile
import unittest
import urllib.error
import urllib.request
from tests._pytest_port import BASE, TEST_STATE_DIR
REPO_ROOT = pathlib.Path(__file__).parent.parent
UI_JS = (REPO_ROOT / "static" / "ui.js").read_text(encoding="utf-8")
# ── Static analysis: renderMd MEDIA stash ────────────────────────────────────
class TestMediaRenderMdStash(unittest.TestCase):
"""Verify the MEDIA: stash/restore logic exists in ui.js."""
def test_media_stash_defined(self):
self.assertIn("media_stash", UI_JS,
"media_stash array must be defined in renderMd()")
def test_media_token_regex(self):
self.assertIn("MEDIA:", UI_JS,
"MEDIA: token regex must be present in renderMd()")
def test_media_restore_produces_img_tag(self):
self.assertIn("msg-media-img", UI_JS,
"restore pass must produce <img class='msg-media-img'>")
def test_media_restore_produces_download_link(self):
self.assertIn("msg-media-link", UI_JS,
"restore pass must produce download link for non-image files")
def test_media_api_url_pattern(self):
self.assertIn("api/media?path=", UI_JS,
"renderMd must build api/media?path=... URL for local files")
def test_media_stash_uses_null_byte_token(self):
self.assertIn("\\x00D", UI_JS,
"MEDIA stash must use null-byte token (\\x00D) to avoid conflicts")
def test_media_stash_runs_before_fence_stash(self):
media_pos = UI_JS.find("media_stash")
fence_pos = UI_JS.find("fence_stash")
self.assertGreater(fence_pos, media_pos,
"media_stash must be defined before fence_stash in renderMd()")
def test_image_extension_regex_covers_common_types(self):
# The JS source has these extensions in a regex like /\.png|jpg|.../i
# Check for the extension strings (without the dot, which may be escaped as \.)
for ext in ["png", "jpg", "jpeg", "gif", "webp"]:
self.assertIn(ext, UI_JS,
f"Image extension {ext} must be in the MEDIA img-check regex")
def test_http_url_media_rendered_as_img(self):
# renderMd should treat MEDIA:https://... as an <img>
# In the JS source, the regex is /^https?:\/\//i (escaped)
self.assertTrue(
"https?:" in UI_JS or "http" in UI_JS,
"MEDIA: restore must handle HTTPS URLs",
)
def test_zoom_toggle_on_click(self):
self.assertIn("msg-media-img--full", UI_JS,
"Clicking the image must toggle msg-media-img--full class for zoom")
# ── Static analysis: CSS ──────────────────────────────────────────────────────
class TestMediaCSS(unittest.TestCase):
CSS = (REPO_ROOT / "static" / "style.css").read_text(encoding="utf-8")
def test_msg_media_img_class_defined(self):
self.assertIn(".msg-media-img", self.CSS)
def test_msg_media_img_max_width(self):
# Should have a max-width to prevent huge images breaking layout
idx = self.CSS.find(".msg-media-img{")
self.assertGreater(idx, 0)
rule = self.CSS[idx:idx+200]
self.assertIn("max-width", rule)
def test_msg_media_img_full_class_defined(self):
self.assertIn(".msg-media-img--full", self.CSS,
"Full-size toggle class must exist for zoom-on-click")
def test_msg_media_link_class_defined(self):
self.assertIn(".msg-media-link", self.CSS,
"Download link style must be defined for non-image media")
# ── Backend: /api/media endpoint (unit-level, no server needed) ─────────────
class TestMediaEndpointUnit(unittest.TestCase):
"""Test route registration and handler logic via imports."""
def test_handle_media_function_exists(self):
from api import routes
self.assertTrue(
hasattr(routes, "_handle_media"),
"_handle_media must be defined in api/routes.py",
)
def test_api_media_route_registered(self):
"""The GET dispatch must include the /api/media path."""
routes_src = (REPO_ROOT / "api" / "routes.py").read_text(encoding="utf-8")
self.assertIn('"/api/media"', routes_src,
'/api/media must be registered in the GET route dispatch')
def test_allowed_roots_include_tmp(self):
"""Handler must allow /tmp so screenshot paths work."""
routes_src = (REPO_ROOT / "api" / "routes.py").read_text(encoding="utf-8")
self.assertIn('/tmp', routes_src,
'/tmp must be in the allowed roots list for /api/media')
def test_svg_forces_download(self):
""".svg must not be served inline (XSS risk)."""
routes_src = (REPO_ROOT / "api" / "routes.py").read_text(encoding="utf-8")
# SVG should be in _DOWNLOAD_TYPES or explicitly excluded from inline
self.assertIn("image/svg+xml", routes_src,
"SVG MIME type must be handled (forced download) in _handle_media")
def test_non_image_forces_download(self):
"""Non-image files should be forced to download, not served inline."""
routes_src = (REPO_ROOT / "api" / "routes.py").read_text(encoding="utf-8")
self.assertIn("_INLINE_IMAGE_TYPES", routes_src,
"_INLINE_IMAGE_TYPES whitelist must exist in _handle_media")
# ── Integration tests: live server on TEST_PORT ───────────────────────────────
# No collection-time skip guard — conftest.py starts the server via its
# autouse session fixture BEFORE tests run. A collection-time check always
# sees no server and turns every test into a skip. Instead we assert
# reachability inside setUp() so failures are loud errors, not silent skips.
class TestMediaEndpointIntegration(unittest.TestCase):
def setUp(self):
try:
urllib.request.urlopen(BASE + "/health", timeout=5)
except Exception as exc:
self.fail(f"Test server at {BASE} is not reachable: {exc}")
def _get(self, path):
try:
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return r.read(), r.status, r.headers
except urllib.error.HTTPError as e:
return e.read(), e.code, e.headers
def test_no_path_returns_400(self):
_, status, _ = self._get("/api/media")
self.assertEqual(status, 400)
def test_nonexistent_file_returns_404(self):
_, status, _ = self._get("/api/media?path=/tmp/__hermes_nonexistent_12345.png")
self.assertEqual(status, 404)
def test_path_outside_allowed_root_rejected(self):
# /etc/passwd is outside allowed roots
_, status, _ = self._get("/api/media?path=/etc/passwd")
self.assertIn(status, {403, 404})
def test_valid_png_served_with_image_mime(self):
"""Create a 1-pixel PNG in /tmp and verify it's served correctly."""
# Minimal valid 1x1 transparent PNG (67 bytes)
png_bytes = (
b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01'
b'\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\x00'
b'\x01\x00\x00\x05\x00\x01\r\n-\xb4\x00\x00\x00\x00IEND\xaeB`\x82'
)
with tempfile.NamedTemporaryFile(
suffix=".png", prefix="hermes_test_", dir="/tmp", delete=False
) as f:
f.write(png_bytes)
tmp_path = f.name
try:
body, status, headers = self._get(
f"/api/media?path={urllib.request.quote(tmp_path)}"
)
self.assertEqual(status, 200, f"Expected 200, got {status}")
ct = headers.get("Content-Type", "")
self.assertIn("image/png", ct, f"Expected image/png, got {ct}")
self.assertEqual(body, png_bytes)
finally:
pathlib.Path(tmp_path).unlink(missing_ok=True)
def test_path_traversal_rejected(self):
_, status, _ = self._get(
"/api/media?path=" + urllib.request.quote("/tmp/../../etc/passwd")
)
self.assertIn(status, {403, 404})
def test_health_check_still_works(self):
"""Sanity: server is up and /health works."""
body, status, _ = self._get("/health")
self.assertEqual(status, 200)
d = json.loads(body)
self.assertEqual(d["status"], "ok")

View File

@@ -0,0 +1,148 @@
"""
Tests for MiniMax provider support in the model/provider discovery layer.
Covers:
- MiniMax models appear in the fallback model list
- MINIMAX_API_KEY env var is scanned and detected from os.environ
- @minimax: provider hint routing works correctly
- minimax/MiniMax-M2.7 (slash format) is routed via openrouter when active provider differs
"""
import os
import api.config as config
# ── Helper ────────────────────────────────────────────────────────────────────
def _resolve_with_config(model_id, provider=None, base_url=None):
old_cfg = dict(config.cfg)
model_cfg = {}
if provider:
model_cfg['provider'] = provider
if base_url:
model_cfg['base_url'] = base_url
config.cfg['model'] = model_cfg if model_cfg else {}
try:
return config.resolve_model_provider(model_id)
finally:
config.cfg.clear()
config.cfg.update(old_cfg)
# ── Fallback model list ───────────────────────────────────────────────────────
def test_minimax_m2_7_in_fallback_models():
"""MiniMax-M2.7 must appear in the hardcoded fallback model list."""
ids = [m['id'] for m in config._FALLBACK_MODELS]
assert 'minimax/MiniMax-M2.7' in ids, (
f"minimax/MiniMax-M2.7 missing from _FALLBACK_MODELS. Found: {ids}"
)
def test_minimax_m2_7_highspeed_in_fallback_models():
"""MiniMax-M2.7-highspeed must appear in the hardcoded fallback model list."""
ids = [m['id'] for m in config._FALLBACK_MODELS]
assert 'minimax/MiniMax-M2.7-highspeed' in ids, (
f"minimax/MiniMax-M2.7-highspeed missing from _FALLBACK_MODELS. Found: {ids}"
)
def test_minimax_fallback_provider_label():
"""MiniMax fallback entries must use 'MiniMax' as the provider label."""
minimax_entries = [m for m in config._FALLBACK_MODELS if 'minimax' in m['id'].lower()]
assert minimax_entries, "No MiniMax entries found in _FALLBACK_MODELS"
for entry in minimax_entries:
assert entry['provider'] == 'MiniMax', (
f"Expected provider='MiniMax', got '{entry['provider']}' for {entry['id']}"
)
# ── _PROVIDER_MODELS ──────────────────────────────────────────────────────────
def test_minimax_provider_models_has_m2_7():
"""_PROVIDER_MODELS['minimax'] must include MiniMax-M2.7."""
models = config._PROVIDER_MODELS.get('minimax', [])
ids = [m['id'] for m in models]
assert 'MiniMax-M2.7' in ids, (
f"MiniMax-M2.7 missing from _PROVIDER_MODELS['minimax']. Found: {ids}"
)
def test_minimax_provider_models_has_highspeed():
"""_PROVIDER_MODELS['minimax'] must include MiniMax-M2.7-highspeed."""
models = config._PROVIDER_MODELS.get('minimax', [])
ids = [m['id'] for m in models]
assert 'MiniMax-M2.7-highspeed' in ids, (
f"MiniMax-M2.7-highspeed missing from _PROVIDER_MODELS['minimax']. Found: {ids}"
)
# ── MINIMAX_API_KEY env var detection ─────────────────────────────────────────
def test_minimax_api_key_in_env_scan_tuple():
"""MINIMAX_API_KEY must be included in the env var scan performed by
get_available_models(), so users who export MINIMAX_API_KEY see the
MiniMax provider in the dropdown without editing ~/.hermes/.env."""
import inspect, ast, textwrap
src = inspect.getsource(config.get_available_models)
assert 'MINIMAX_API_KEY' in src, (
"MINIMAX_API_KEY not found in get_available_models() source — "
"it must be added to the env var scan tuple so os.environ is checked."
)
def test_minimax_cn_api_key_in_env_scan_tuple():
"""MINIMAX_CN_API_KEY must also be scanned (mainland China API key variant)."""
import inspect
src = inspect.getsource(config.get_available_models)
assert 'MINIMAX_CN_API_KEY' in src, (
"MINIMAX_CN_API_KEY not found in get_available_models() source."
)
def test_minimax_detected_from_os_environ(monkeypatch):
"""Setting MINIMAX_API_KEY in os.environ triggers minimax provider detection."""
monkeypatch.setenv('MINIMAX_API_KEY', 'test-key-from-env')
old_cfg = dict(config.cfg)
# Clear model config so the env-var fallback path is exercised
config.cfg['model'] = {}
try:
result = config.get_available_models()
provider_names = [g['provider'] for g in result['groups']]
assert 'MiniMax' in provider_names, (
f"MiniMax not detected when MINIMAX_API_KEY is set in os.environ. "
f"Active provider groups: {provider_names}"
)
finally:
config.cfg.clear()
config.cfg.update(old_cfg)
# ── Model routing ─────────────────────────────────────────────────────────────
def test_provider_hint_minimax_m2_7():
"""@minimax:MiniMax-M2.7 routes to minimax provider with bare model name."""
model, provider, base_url = _resolve_with_config(
'@minimax:MiniMax-M2.7', provider='anthropic',
)
assert model == 'MiniMax-M2.7'
assert provider == 'minimax'
assert base_url is None
def test_provider_hint_minimax_highspeed():
"""@minimax:MiniMax-M2.7-highspeed routes to minimax provider."""
model, provider, base_url = _resolve_with_config(
'@minimax:MiniMax-M2.7-highspeed', provider='openai',
)
assert model == 'MiniMax-M2.7-highspeed'
assert provider == 'minimax'
def test_minimax_slash_format_routes_openrouter_when_not_active():
"""minimax/MiniMax-M2.7 (slash format) routes via openrouter when active
provider is anthropic (cross-provider routing)."""
model, provider, base_url = _resolve_with_config(
'minimax/MiniMax-M2.7', provider='anthropic',
)
assert model == 'minimax/MiniMax-M2.7'
assert provider == 'openrouter'

278
tests/test_mobile_layout.py Normal file
View File

@@ -0,0 +1,278 @@
"""
Mobile layout regression tests — run on every QA pass.
These tests check that the CSS and HTML structure required for correct
mobile rendering (375px640px viewport widths) is intact after every change.
They are static checks (no server needed) that catch common regressions:
- Mobile breakpoints present for key layout elements
- Right panel slide-over markup and CSS intact
- Profile dropdown not clipped by overflow on mobile
- Composer footer chips scroll correctly on narrow viewports
- Mobile sidebar navigation stays available on phones
- No full-viewport overflow that would break scroll
Run as part of the standard test suite:
pytest tests/test_mobile_layout.py -v
"""
import pathlib
import re
REPO = pathlib.Path(__file__).parent.parent
HTML = (REPO / "static" / "index.html").read_text(encoding="utf-8")
CSS = (REPO / "static" / "style.css").read_text(encoding="utf-8")
# ── Mobile breakpoint rules ───────────────────────────────────────────────────
def test_mobile_breakpoint_900px_present():
"""@media(max-width:900px) must hide the right panel and show mobile-files-btn."""
assert "@media(max-width:900px)" in CSS or "@media (max-width: 900px)" in CSS, \
"Missing @media(max-width:900px) breakpoint in style.css"
# Right panel should be hidden at 900px, replaced by slide-over
assert ".rightpanel{display:none" in CSS or ".rightpanel {display:none" in CSS or \
re.search(r'max-width:900px\).*?\.rightpanel\{display:none', CSS, re.DOTALL), \
".rightpanel must be display:none at max-width:900px (slide-over replaces it)"
def test_mobile_breakpoint_640px_present():
"""@media(max-width:640px) must exist for narrow phone layouts."""
assert "@media(max-width:640px)" in CSS or "@media (max-width: 640px)" in CSS, \
"Missing @media(max-width:640px) breakpoint in style.css"
def test_rightpanel_mobile_slide_over_css():
"""Right panel must have position:fixed slide-over CSS for mobile."""
# At max-width:900px the rightpanel should be position:fixed, off-screen right
assert "position:fixed" in CSS, \
"style.css must have position:fixed for rightpanel mobile slide-over"
assert ".rightpanel.mobile-open{right:0" in CSS or ".rightpanel.mobile-open {right:0" in CSS, \
".rightpanel.mobile-open must set right:0 to slide panel in from right"
assert "right:-320px" in CSS or "right: -320px" in CSS, \
"rightpanel must start off-screen (right:-320px) on mobile"
def test_mobile_overlay_present():
"""Mobile overlay element must exist for tap-to-close sidebar behavior."""
assert 'id="mobileOverlay"' in HTML, \
"#mobileOverlay element missing from index.html"
assert "mobile-overlay" in CSS, \
".mobile-overlay CSS rule missing from style.css"
def test_sidebar_nav_present():
"""Sidebar top navigation tabs must be present."""
assert 'class="sidebar-nav"' in HTML, \
".sidebar-nav missing from index.html"
assert ".sidebar-nav{" in CSS or ".sidebar-nav {" in CSS, \
".sidebar-nav CSS rule missing from style.css"
def test_mobile_does_not_hide_sidebar_nav():
"""Phone breakpoint must keep the sidebar top navigation visible."""
mobile_block = re.search(r'@media\(max-width:640px\)\{(.*)\n\s*\}', CSS, re.DOTALL)
assert mobile_block, "Missing @media(max-width:640px) block in style.css"
assert ".sidebar-nav{display:none" not in mobile_block.group(1).replace(" ", ""), \
".sidebar-nav must stay visible on mobile"
def test_mobile_files_button_present():
"""Mobile files toggle button (#btnWorkspacePanelToggle.workspace-toggle-btn) must be in HTML and CSS."""
assert 'id="btnWorkspacePanelToggle"' in HTML, \
"#btnWorkspacePanelToggle missing from index.html"
assert "workspace-toggle-btn" in CSS, \
".workspace-toggle-btn CSS missing from style.css"
# ── Profile dropdown overflow ─────────────────────────────────────────────────
def test_profile_dropdown_not_clipped_by_overflow():
"""Profile dropdown must not be inside an overflow:hidden or overflow-x:auto ancestor
without a higher z-index escape hatch.
The topbar-chips container uses overflow-x:auto on mobile, which creates a
stacking context that clips absolutely-positioned children. The profile dropdown
must use position:fixed on mobile OR the topbar-chips must not clip it.
"""
# The profile-chip wrapper must have position:relative so the dropdown can escape
assert 'id="profileChipWrap"' in HTML, \
"#profileChipWrap missing from index.html"
# Profile dropdown must have a z-index high enough to clear the topbar
assert ".profile-dropdown{" in CSS or ".profile-dropdown {" in CSS, \
".profile-dropdown CSS rule missing"
# z-index must be at least 200 (topbar is z-index:10)
m = re.search(r'\.profile-dropdown\{[^}]*z-index:(\d+)', CSS)
if m:
assert int(m.group(1)) >= 100, \
f".profile-dropdown z-index {m.group(1)} is too low — must be >= 100 to clear topbar"
def test_topbar_chips_mobile_overflow():
"""topbar-chips must use overflow-x:auto on mobile for chip scrolling.
Chips (profile, workspace, model, files) must scroll horizontally on narrow
viewports rather than wrapping onto a second line which would break the topbar layout.
"""
# At narrow viewport, topbar-chips should scroll
assert "overflow-x:auto" in CSS or "overflow-x: auto" in CSS, \
"topbar-chips must have overflow-x:auto for mobile chip scrolling"
# ── Workspace panel close ─────────────────────────────────────────────────────
def test_workspace_close_button_present():
"""Workspace panel must have a close/hide button accessible on mobile."""
# Accept handleWorkspaceClose() (two-step close: file→browse→closed), or the
# lower-level functions directly. handleWorkspaceClose is preferred because
# it dismisses a file preview first before closing the panel.
has_close = (
'onclick="handleWorkspaceClose()"' in HTML or
'onclick="closeWorkspacePanel()"' in HTML or
'onclick="toggleWorkspacePanel()"' in HTML
)
assert has_close, \
"handleWorkspaceClose() or closeWorkspacePanel() must be wired to a button to close the workspace panel on mobile"
def test_toggle_mobile_files_js_defined():
"""toggleMobileFiles() must be defined in boot.js."""
boot_js = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
assert "function toggleMobileFiles()" in boot_js, \
"toggleMobileFiles() missing from static/boot.js"
assert "mobile-open" in boot_js, \
"toggleMobileFiles() must toggle mobile-open class on the right panel"
def test_new_conversation_closes_mobile_sidebar():
"""New conversation must close the mobile drawer so the chat pane is visible immediately."""
boot_js = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
click_line = next((ln for ln in boot_js.splitlines() if "$('btnNewChat').onclick" in ln), "")
assert click_line, "btnNewChat onclick handler missing from static/boot.js"
assert "closeMobileSidebar" in click_line, \
"btnNewChat handler must closeMobileSidebar() after creating the new session"
shortcut_line = next((ln for ln in boot_js.splitlines() if "e.key==='k'" in ln or "e.key === 'k'" in ln), "")
assert shortcut_line, "Cmd/Ctrl+K new chat shortcut missing from static/boot.js"
shortcut_block = "\n".join(boot_js.splitlines()[boot_js.splitlines().index(shortcut_line):boot_js.splitlines().index(shortcut_line)+4])
assert "closeMobileSidebar" in shortcut_block, \
"Cmd/Ctrl+K new chat shortcut must closeMobileSidebar() after creating the new session"
# ── Viewport and scroll safety ────────────────────────────────────────────────
def test_body_overflow_hidden():
"""body must have overflow:hidden to prevent double scrollbars on mobile."""
assert "body{" in CSS or "body {" in CSS, \
"body rule missing from style.css"
assert re.search(r'body\{[^}]*overflow:hidden', CSS), \
"body must have overflow:hidden to prevent double scrollbars"
def test_flex_parents_allow_message_scroller_to_shrink():
"""The top-level flex containers must opt into min-height:0 so .messages can scroll on mobile.
Mobile Safari/Chrome can trap scroll when a flex child with overflow:auto sits inside
parents whose min-height remains auto. Both .layout and .main need min-height:0.
"""
assert re.search(r'\.layout\{[^}]*min-height:0', CSS), \
".layout must set min-height:0 so the chat column can shrink and scroll"
assert re.search(r'\.main\{[^}]*min-height:0', CSS), \
".main must set min-height:0 so .messages remains scrollable while busy"
def test_messages_touch_scrolling_hints_present():
"""The messages scroller must advertise touch-friendly scrolling behavior.
On mobile browsers, momentum scrolling and explicit pan-y/overscroll behavior help
prevent the chat area from feeling locked while the app body itself stays overflow:hidden.
"""
assert re.search(r'\.messages\{[^}]*-webkit-overflow-scrolling:\s*touch', CSS), \
".messages must enable -webkit-overflow-scrolling:touch for mobile momentum scroll"
assert re.search(r'\.messages\{[^}]*touch-action:\s*pan-y', CSS), \
".messages must set touch-action:pan-y so vertical swipe gestures scroll the transcript"
assert re.search(r'\.messages\{[^}]*overscroll-behavior-y:\s*contain', CSS), \
".messages must contain vertical overscroll so the transcript keeps the gesture"
def test_100dvh_viewport_height():
"""Layout must use 100dvh (dynamic viewport height) for correct mobile sizing.
On mobile Safari and Chrome, 100vh includes the browser chrome (address bar),
causing content to be hidden. 100dvh accounts for the actual available height.
"""
assert "100dvh" in CSS, \
"style.css must use 100dvh for correct mobile viewport height (100vh hides content under address bar)"
def test_composer_touch_target_size():
"""Send button and composer inputs must have minimum 44px touch targets on mobile.
Apple HIG and Google Material guidelines both require 44px minimum touch targets.
"""
# Check that mobile CSS doesn't make the send button smaller than 44×44
# We check that there's at least a min-height definition for touch targets
assert re.search(r'(min-height|height).*44px', CSS), \
"style.css must define 44px minimum touch targets for mobile (send button, nav buttons)"
# ── Input zoom prevention ─────────────────────────────────────────────────────
def test_composer_textarea_font_size_mobile():
"""Composer textarea must have font-size >= 16px on mobile.
iOS Safari zooms the viewport when an input with font-size < 16px is focused,
which breaks the layout. The composer textarea must be >= 16px at mobile widths.
"""
# Check for 16px font-size on the textarea in a mobile breakpoint
assert re.search(r'font-size:16px', CSS), \
"Composer textarea must have font-size:16px at mobile widths to prevent iOS zoom-on-focus"
# ── Sidebar tabs on mobile ───────────────────────────────────────────────────
def test_profiles_sidebar_tab_present():
"""Sidebar tab strip must include Profiles."""
assert 'class="nav-tab" data-panel="profiles"' in HTML, \
"Sidebar nav must have a Profiles tab"
def test_mobile_bottom_nav_removed():
"""The old fixed mobile bottom nav should not be present anymore."""
assert "mobile-bottom-nav" not in HTML, \
"mobile-bottom-nav markup should be removed from index.html"
assert "mobile-bottom-nav" not in CSS, \
"mobile-bottom-nav CSS should be removed from style.css"
# ── Mobile Enter key inserts newline (PR #315, fixes #269) ───────────────────
def test_mobile_enter_newline_condition_present():
"""boot.js keydown handler must detect touch-primary devices via pointer:coarse."""
boot_js = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
assert "pointer:coarse" in boot_js, \
"boot.js must use pointer:coarse media query for mobile Enter detection"
def test_mobile_enter_newline_uses_match_media():
"""boot.js must call matchMedia for pointer detection, not a hardcoded flag."""
boot_js = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
assert "matchMedia('(pointer:coarse)')" in boot_js or 'matchMedia("(pointer:coarse)")' in boot_js, \
"boot.js must use matchMedia('(pointer:coarse)') for mobile detection"
def test_mobile_enter_newline_only_overrides_enter_default():
"""Mobile newline override must only apply when _sendKey is the default 'enter'."""
boot_js = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
# The _mobileDefault check must gate on _sendKey==='enter' so ctrl+enter users aren't affected
assert "_sendKey===" in boot_js and "'enter'" in boot_js, \
"Mobile newline fallback must check window._sendKey==='enter' to avoid overriding user preference"
def test_mobile_enter_does_not_affect_desktop_logic():
"""The mobile Enter override must not alter the existing else branch for desktop users."""
boot_js = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
# The else branch (desktop, sends on Enter without Shift) must still be present
assert "if(!e.shiftKey){e.preventDefault();send();" in boot_js, \
"Desktop Enter-to-send logic (else branch) must still be present in boot.js"

View File

@@ -0,0 +1,476 @@
"""
Tests for resolve_model_provider() model routing logic.
Verifies that model IDs are correctly resolved to (model, provider, base_url)
tuples for different provider configurations.
"""
import api.config as config
def _resolve_with_config(model_id, provider=None, base_url=None, default=None, custom_providers=None):
"""Helper: temporarily set config.cfg model/custom provider sections, call resolve, restore."""
old_cfg = dict(config.cfg)
model_cfg = {}
if provider:
model_cfg['provider'] = provider
if base_url:
model_cfg['base_url'] = base_url
if default:
model_cfg['default'] = default
config.cfg['model'] = model_cfg if model_cfg else {}
if custom_providers is not None:
config.cfg['custom_providers'] = custom_providers
try:
return config.resolve_model_provider(model_id)
finally:
config.cfg.clear()
config.cfg.update(old_cfg)
# ── OpenRouter prefix handling ────────────────────────────────────────────
def test_openrouter_free_keeps_full_path():
"""openrouter/free must NOT be stripped to 'free' when provider is openrouter."""
model, provider, base_url = _resolve_with_config(
'openrouter/free', provider='openrouter',
base_url='https://openrouter.ai/api/v1',
)
assert model == 'openrouter/free', f"Expected 'openrouter/free', got '{model}'"
assert provider == 'openrouter'
def test_openrouter_model_with_provider_prefix():
"""anthropic/claude-sonnet-4.6 via openrouter keeps full path."""
model, provider, base_url = _resolve_with_config(
'anthropic/claude-sonnet-4.6', provider='openrouter',
base_url='https://openrouter.ai/api/v1',
)
assert model == 'anthropic/claude-sonnet-4.6'
assert provider == 'openrouter'
# ── Direct provider prefix stripping ─────────────────────────────────────
def test_anthropic_prefix_stripped_for_direct_api():
"""anthropic/claude-sonnet-4.6 strips prefix when provider is anthropic."""
model, provider, base_url = _resolve_with_config(
'anthropic/claude-sonnet-4.6', provider='anthropic',
)
assert model == 'claude-sonnet-4.6'
assert provider == 'anthropic'
def test_openai_prefix_stripped_for_direct_api():
"""openai/gpt-5.4-mini strips prefix when provider is openai."""
model, provider, base_url = _resolve_with_config(
'openai/gpt-5.4-mini', provider='openai',
)
assert model == 'gpt-5.4-mini'
assert provider == 'openai'
# ── Cross-provider routing ───────────────────────────────────────────────
def test_cross_provider_routes_through_openrouter():
"""Picking openai model when config is anthropic routes via openrouter."""
model, provider, base_url = _resolve_with_config(
'openai/gpt-5.4-mini', provider='anthropic',
)
assert model == 'openai/gpt-5.4-mini'
assert provider == 'openrouter'
assert base_url is None # openrouter uses its own endpoint
# ── Bare model names ─────────────────────────────────────────────────────
def test_bare_model_uses_config_provider():
"""A model name without / uses the config provider and base_url."""
model, provider, base_url = _resolve_with_config(
'gemma-4-26B', provider='custom',
base_url='http://192.168.1.160:4000',
)
assert model == 'gemma-4-26B'
assert provider == 'custom'
assert base_url == 'http://192.168.1.160:4000'
def test_empty_model_returns_config_defaults():
"""Empty model string returns config provider and base_url."""
model, provider, base_url = _resolve_with_config(
'', provider='anthropic',
)
assert model == ''
assert provider == 'anthropic'
# ── @provider:model hint routing (Issue #138 v2) ────────────────────────
def test_provider_hint_routes_to_specific_provider():
"""@minimax:MiniMax-M2.7 routes to minimax provider directly."""
model, provider, base_url = _resolve_with_config(
'@minimax:MiniMax-M2.7', provider='anthropic',
)
assert model == 'MiniMax-M2.7'
assert provider == 'minimax'
assert base_url is None # resolve_runtime_provider will fill this
def test_provider_hint_zai():
"""@zai:GLM-5 routes to zai provider directly."""
model, provider, base_url = _resolve_with_config(
'@zai:GLM-5', provider='openai',
)
assert model == 'GLM-5'
assert provider == 'zai'
def test_provider_hint_deepseek():
"""@deepseek:deepseek-chat routes to deepseek provider."""
model, provider, base_url = _resolve_with_config(
'@deepseek:deepseek-chat', provider='anthropic',
)
assert model == 'deepseek-chat'
assert provider == 'deepseek'
def test_slash_prefix_non_default_still_routes_openrouter():
"""minimax/MiniMax-M2.7 (old format) still routes through openrouter."""
model, provider, base_url = _resolve_with_config(
'minimax/MiniMax-M2.7', provider='anthropic',
)
assert model == 'minimax/MiniMax-M2.7'
assert provider == 'openrouter'
def test_custom_provider_model_with_slash_routes_to_named_custom_provider():
"""Slash-containing custom endpoint model IDs must not be mistaken for OpenRouter models."""
model, provider, base_url = _resolve_with_config(
'google/gemma-4-26b-a4b',
provider='openrouter',
base_url='https://openrouter.ai/api/v1',
custom_providers=[{
'name': 'Local LM Studio',
'base_url': 'http://lmstudio.local:1234/v1',
'model': 'google/gemma-4-26b-a4b',
}],
)
assert model == 'google/gemma-4-26b-a4b'
assert provider == 'custom:local-lm-studio'
assert base_url == 'http://lmstudio.local:1234/v1'
# ── get_available_models() @provider: hint behaviour ──────────────────────
def _available_models_with_provider(provider):
"""Helper: temporarily set active_provider in config."""
old_cfg = dict(config.cfg)
config.cfg['model'] = {'provider': provider}
try:
return config.get_available_models()
finally:
config.cfg.clear()
config.cfg.update(old_cfg)
def test_non_default_provider_models_use_hint_prefix():
"""With anthropic as default, minimax model IDs should use @minimax: prefix."""
result = _available_models_with_provider('anthropic')
groups = {g['provider']: g['models'] for g in result['groups']}
if 'MiniMax' in groups:
for m in groups['MiniMax']:
assert m['id'].startswith('@minimax:'), (
f"Expected @minimax: prefix, got: {m['id']!r}"
)
def test_no_duplicate_when_default_model_is_prefixed():
"""Issue #147 Bug 2: 'anthropic/claude-opus-4.6' as default_model must not
inject a duplicate alongside the existing bare 'claude-opus-4.6' entry in
the same provider group."""
import api.config as _cfg
old_cfg = dict(_cfg.cfg)
_cfg.cfg['model'] = {
'provider': 'anthropic',
'default': 'anthropic/claude-opus-4.6',
}
try:
result = _cfg.get_available_models()
norm = lambda mid: mid.split('/', 1)[-1] if '/' in mid else mid
# Check each group individually: no group should have two entries that
# normalize to the same bare model name
for g in result['groups']:
bare_ids = [norm(m['id']) for m in g['models']]
duplicates = [mid for mid in set(bare_ids) if bare_ids.count(mid) > 1]
assert not duplicates, (
f"Provider group '{g['provider']}' has duplicate models after normalization: "
f"{duplicates}\nFull group: {[m['id'] for m in g['models']]}"
)
finally:
_cfg.cfg.clear()
_cfg.cfg.update(old_cfg)
def test_default_provider_models_not_prefixed():
"""The active provider's models remain bare (no @prefix added)."""
import api.config as _cfg
raw_anthropic_ids = {m['id'] for m in _cfg._PROVIDER_MODELS.get('anthropic', [])}
result = _available_models_with_provider('anthropic')
groups = {g['provider']: g['models'] for g in result['groups']}
if 'Anthropic' in groups:
returned_ids = {m['id'] for m in groups['Anthropic']}
for bare_id in raw_anthropic_ids:
assert bare_id in returned_ids, (
f"_PROVIDER_MODELS entry '{bare_id}' is missing from the Anthropic group"
)
# ── get_available_models(): phantom "Custom" group regression ─────────────
#
# When the user has model.provider set to a real provider (e.g. openai-codex)
# AND a model.base_url set, hermes_cli reports the 'custom' pseudo-provider as
# authenticated. The WebUI picker must NOT build a separate "Custom" group in
# that case — the base_url belongs to the active provider.
def _available_models_with_full_cfg(provider, default, base_url):
"""Helper: set model.provider, model.default, model.base_url at once.
Clears model-override env vars (HERMES_MODEL, OPENAI_MODEL, LLM_MODEL)
during the call so the real hermes profile environment doesn't leak into
the test and override the fixture's default model.
"""
import os
import api.config as _cfg
old_cfg = dict(_cfg.cfg)
_cfg.cfg['model'] = {
'provider': provider,
'default': default,
'base_url': base_url,
}
try:
_cfg._cfg_mtime = _cfg.Path(_cfg._get_config_path()).stat().st_mtime
except Exception:
pass
# Clear model-override env vars to prevent the real profile from leaking in
_model_env_keys = ('HERMES_MODEL', 'OPENAI_MODEL', 'LLM_MODEL')
_saved_env = {k: os.environ.pop(k, None) for k in _model_env_keys}
try:
return _cfg.get_available_models()
finally:
_cfg.cfg.clear()
_cfg.cfg.update(old_cfg)
for k, v in _saved_env.items():
if v is not None:
os.environ[k] = v
def test_no_phantom_custom_group_when_active_provider_is_set(monkeypatch):
"""Issue: with provider=openai-codex + base_url set, gpt-5.4 was landing
under a phantom "Custom" group instead of the "OpenAI Codex" group."""
import sys, types
# Force hermes_cli to report both the real provider and the phantom
# 'custom' as authenticated, simulating what list_available_providers()
# returns when base_url is configured.
fake_mod = types.ModuleType('hermes_cli.models')
fake_mod.list_available_providers = lambda: [
{'id': 'openai-codex', 'authenticated': True},
{'id': 'custom', 'authenticated': True},
]
fake_auth = types.ModuleType('hermes_cli.auth')
fake_auth.get_auth_status = lambda pid: {'key_source': 'env'}
monkeypatch.setitem(sys.modules, 'hermes_cli.models', fake_mod)
monkeypatch.setitem(sys.modules, 'hermes_cli.auth', fake_auth)
result = _available_models_with_full_cfg(
provider='openai-codex',
default='gpt-5.4',
base_url='https://chatgpt.com/backend-api/codex',
)
group_names = [g['provider'] for g in result['groups']]
assert 'Custom' not in group_names, (
f"Phantom 'Custom' group present; full groups: {group_names}"
)
def test_default_model_lands_under_active_provider_group(monkeypatch):
"""The configured default_model must appear under the active provider's
display group, even when the model isn't in _PROVIDER_MODELS[provider]
AND the active provider isn't the alphabetical first detected provider.
Regression guard for a hyphen-vs-space bug in the "ensure default_model
appears" post-pass: the substring check `active_provider.lower() in
g.get('provider', '').lower()` was failing for 'openai-codex' vs
display name 'OpenAI Codex' (hyphen vs. space), silently falling back
to groups[0] — which, when another provider sorted earlier
alphabetically (e.g. 'anthropic'), placed gpt-5.4 in the WRONG group.
"""
import sys, types
fake_mod = types.ModuleType('hermes_cli.models')
fake_mod.list_available_providers = lambda: [
{'id': 'anthropic', 'authenticated': True}, # sorts before openai-codex
{'id': 'openai-codex', 'authenticated': True},
{'id': 'custom', 'authenticated': True},
]
fake_auth = types.ModuleType('hermes_cli.auth')
fake_auth.get_auth_status = lambda pid: {'key_source': 'env'}
monkeypatch.setitem(sys.modules, 'hermes_cli.models', fake_mod)
monkeypatch.setitem(sys.modules, 'hermes_cli.auth', fake_auth)
result = _available_models_with_full_cfg(
provider='openai-codex',
default='gpt-5.4',
base_url='https://chatgpt.com/backend-api/codex',
)
groups = {g['provider']: [m['id'] for m in g['models']] for g in result['groups']}
assert 'OpenAI Codex' in groups, f"OpenAI Codex group missing: {list(groups)}"
norm = lambda mid: mid.split('/', 1)[-1].split(':', 1)[-1]
assert 'gpt-5.4' in {norm(mid) for mid in groups['OpenAI Codex']}, (
f"gpt-5.4 not in OpenAI Codex group; contents: {groups['OpenAI Codex']}"
)
# And crucially, it must NOT have landed in the alphabetically-first
# group (Anthropic) via the fallback path.
assert 'gpt-5.4' not in {norm(mid) for mid in groups.get('Anthropic', [])}, (
f"gpt-5.4 leaked into Anthropic group via fallback: {groups.get('Anthropic')}"
)
def test_unknown_providers_do_not_inherit_default_model(monkeypatch):
"""Detected providers without their own model catalog must not be filled
with the global default_model placeholder.
Regression guard for the bug where Alibaba / Minimax-Cn ended up showing
gpt-5.4-mini even though those providers do not serve it.
"""
import sys, types
fake_mod = types.ModuleType('hermes_cli.models')
fake_mod.list_available_providers = lambda: [
{'id': 'openai-codex', 'authenticated': True},
{'id': 'alibaba', 'authenticated': True},
{'id': 'minimax-cn', 'authenticated': True},
]
fake_auth = types.ModuleType('hermes_cli.auth')
fake_auth.get_auth_status = lambda pid: {'key_source': 'env'}
monkeypatch.setitem(sys.modules, 'hermes_cli.models', fake_mod)
monkeypatch.setitem(sys.modules, 'hermes_cli.auth', fake_auth)
result = _available_models_with_full_cfg(
provider='openai-codex',
default='gpt-5.4-mini',
base_url='',
)
groups = {g['provider']: [m['id'] for m in g['models']] for g in result['groups']}
norm = lambda mid: mid.split('/', 1)[-1].split(':', 1)[-1]
assert 'Alibaba' not in groups, (
f"Alibaba should not inherit the default model placeholder: {groups}"
)
assert 'Minimax-Cn' not in groups, (
f"Minimax-Cn should not inherit the default model placeholder: {groups}"
)
assert not any(
norm(mid) == 'gpt-5.4-mini'
for mid in groups.get('Alibaba', []) + groups.get('Minimax-Cn', [])
), (
f"Unknown provider groups still inherited the default model: {groups}"
)
def test_custom_endpoint_uses_model_config_api_key_for_model_discovery(monkeypatch):
"""Custom endpoint model discovery must use model.api_key from config.yaml,
not only environment variables, otherwise the dropdown collapses to the
default model when /v1/models requires auth."""
import json as _json
import api.config as _cfg
old_cfg = dict(_cfg.cfg)
_cfg.cfg['model'] = {
'provider': 'custom',
'default': 'gpt-5.4',
'base_url': 'https://example.test/v1',
'api_key': 'sk-test-model-key',
}
try:
_cfg._cfg_mtime = _cfg.Path(_cfg._get_config_path()).stat().st_mtime
except Exception:
pass
_cfg.cfg.pop('providers', None)
captured = {}
class _Resp:
def read(self):
return _json.dumps({'data': [{'id': 'gpt-5.2', 'name': 'GPT-5.2'}]}).encode('utf-8')
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def _fake_urlopen(req, timeout=10):
captured['auth'] = req.get_header('Authorization')
captured['ua'] = req.get_header('User-agent')
return _Resp()
monkeypatch.setattr('urllib.request.urlopen', _fake_urlopen)
monkeypatch.setattr('socket.getaddrinfo', lambda *a, **k: [])
monkeypatch.delenv('OPENAI_API_KEY', raising=False)
monkeypatch.delenv('HERMES_API_KEY', raising=False)
monkeypatch.delenv('HERMES_OPENAI_API_KEY', raising=False)
monkeypatch.delenv('LOCAL_API_KEY', raising=False)
monkeypatch.delenv('OPENROUTER_API_KEY', raising=False)
monkeypatch.delenv('API_KEY', raising=False)
try:
result = _cfg.get_available_models()
finally:
_cfg.cfg.clear()
_cfg.cfg.update(old_cfg)
assert captured['auth'] == 'Bearer sk-test-model-key'
assert captured['ua'] == 'OpenAI/Python 1.0'
groups = {g['provider']: [m['id'] for m in g['models']] for g in result['groups']}
assert 'Custom' in groups
assert 'gpt-5.2' in groups['Custom']
# -- Issue #230: custom provider with slash model name -----------------------
def test_custom_endpoint_slash_model_routes_to_custom_not_openrouter():
"""Regression test for #230.
When provider=custom (or any non-openrouter provider) and base_url is set,
a model name containing a slash (e.g. google/gemma-4-26b-a4b) must NOT be
rerouted to OpenRouter -- it should stay on the configured custom endpoint.
"""
# --- custom provider with slash model name should NOT go to openrouter ---
model, provider, base_url = _resolve_with_config(
'google/gemma-4-26b-a4b',
provider='custom',
base_url='http://127.0.0.1:1234/v1',
default='google/gemma-4-26b-a4b',
)
assert provider.startswith('custom'), (
"Expected provider starting with 'custom', got '{}'. "
"Slash in model name should NOT trigger OpenRouter rerouting when base_url is set.".format(provider)
)
assert base_url == 'http://127.0.0.1:1234/v1', (
"Expected base_url 'http://127.0.0.1:1234/v1', got '{}'.".format(base_url)
)
# Fix #433: provider prefix is now stripped for custom endpoints so stale
# prefixed model IDs from previous sessions do not break custom endpoint routing.
assert model == 'gemma-4-26b-a4b', (
"Model name prefix should be stripped for custom base_url endpoint, got '{}'.".format(model)
)
# --- openrouter with slash model name MUST still route to openrouter -----
model_or, provider_or, _ = _resolve_with_config(
'google/gemma-4-26b-a4b',
provider='openrouter',
base_url='https://openrouter.ai/api/v1',
default='google/gemma-4-26b-a4b',
)
assert provider_or == 'openrouter', (
"Expected provider 'openrouter', got '{}'. "
"Slash model via openrouter provider must still resolve to openrouter.".format(provider_or)
)
assert model_or == 'google/gemma-4-26b-a4b', (
"Model name should be preserved for openrouter, got '{}'.".format(model_or)
)

View File

@@ -0,0 +1,368 @@
"""Tests for fix: onboarding wizard must not fire when Hermes is already configured.
Issue #420 — existing Hermes users (config.yaml present + chat_ready) were
shown the first-run wizard because the only gate was settings.onboarding_completed.
Covers:
(a) config.yaml present + chat_ready=True → completed=True (no wizard)
(b) no config.yaml → completed=False (wizard fires)
(c) apply_onboarding_setup refuses to overwrite an existing config without
confirm_overwrite=True
"""
from __future__ import annotations
import json
import os
import pathlib
import urllib.error
import urllib.request
from unittest import mock
import pytest
# Skip tests that call apply_onboarding_setup → _save_yaml_config when PyYAML is missing
try:
import yaml as _yaml
_HAS_YAML = True
except ImportError:
_HAS_YAML = False
_needs_yaml = pytest.mark.skipif(not _HAS_YAML, reason="PyYAML not installed — onboarding setup tests require it")
# ---------------------------------------------------------------------------
# Unit tests — no live server needed, test logic directly via imports
# ---------------------------------------------------------------------------
def _make_status(*, config_exists: bool, chat_ready: bool, onboarding_done: bool = False):
"""Call get_onboarding_status() with a controlled filesystem + settings."""
import importlib
# Import fresh copies each call so module-level state doesn't bleed across
import api.onboarding as mod
fake_config_path = pathlib.Path("/tmp/_test_config.yaml")
settings = {"onboarding_completed": onboarding_done}
# Build a minimal runtime dict that get_onboarding_status() would produce
# from _status_from_runtime. We only need the keys the gate checks.
runtime = {
"chat_ready": chat_ready,
"provider_configured": chat_ready,
"provider_ready": chat_ready,
"setup_state": "ready" if chat_ready else "needs_provider",
"provider_note": "test note",
"current_provider": "openrouter" if chat_ready else None,
"current_model": "anthropic/claude-sonnet-4.6" if chat_ready else None,
"current_base_url": None,
"env_path": "/tmp/.hermes_test/.env",
}
with (
mock.patch.object(mod, "load_settings", return_value=settings),
mock.patch.object(mod, "get_config", return_value={}),
mock.patch.object(
mod,
"verify_hermes_imports",
return_value=(chat_ready, [], {}),
),
mock.patch.object(mod, "_status_from_runtime", return_value=runtime),
mock.patch.object(mod, "load_workspaces", return_value=[]),
mock.patch.object(mod, "get_last_workspace", return_value=None),
mock.patch.object(mod, "get_available_models", return_value=[]),
mock.patch.object(mod, "_get_config_path", return_value=fake_config_path),
mock.patch.object(pathlib.Path, "exists") as mock_exists,
):
# Make Path(_get_config_path()).exists() return config_exists
mock_exists.return_value = config_exists
result = mod.get_onboarding_status()
return result
class TestOnboardingGate:
def test_config_exists_and_chat_ready_returns_completed_true(self):
"""Primary fix: existing valid config → wizard must NOT fire."""
result = _make_status(config_exists=True, chat_ready=True)
assert result["completed"] is True, (
"Wizard fired for existing Hermes user! "
"config.yaml + chat_ready must auto-complete onboarding."
)
def test_no_config_returns_completed_false(self):
"""Fresh install with no config → wizard should fire."""
result = _make_status(config_exists=False, chat_ready=False)
assert result["completed"] is False, (
"Fresh install must show the wizard (completed should be False)."
)
def test_config_exists_but_not_chat_ready_still_shows_wizard(self):
"""Broken/incomplete config (config.yaml exists but chat_ready=False) →
still show wizard so the user can fix it."""
result = _make_status(config_exists=True, chat_ready=False)
# Should NOT be auto-completed — config is present but broken
assert result["completed"] is False, (
"Broken config (chat_ready=False) must still show the wizard."
)
def test_onboarding_done_flag_always_respected(self):
"""If user already completed onboarding in settings, never show wizard."""
result = _make_status(config_exists=False, chat_ready=False, onboarding_done=True)
assert result["completed"] is True
def test_config_exists_always_exposed_in_system(self):
"""config_exists must still appear in the response system block."""
result = _make_status(config_exists=True, chat_ready=True)
assert "config_exists" in result["system"]
assert result["system"]["config_exists"] is True
class TestApplyOnboardingSetupGuard:
"""Fix #2: apply_onboarding_setup must not silently overwrite config.yaml."""
def _call_setup(self, body: dict, config_yaml_exists: bool):
import api.onboarding as mod
fake_config_path = pathlib.Path("/tmp/_test_config.yaml")
with (
mock.patch.object(mod, "_get_config_path", return_value=fake_config_path),
mock.patch.object(pathlib.Path, "exists", return_value=config_yaml_exists),
):
return mod.apply_onboarding_setup(body)
def test_setup_blocked_when_config_exists_without_confirm(self):
"""Must return an error dict (not raise) if config.yaml exists and no confirm_overwrite."""
result = self._call_setup(
{
"provider": "openrouter",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "test-key",
},
config_yaml_exists=True,
)
assert isinstance(result, dict), "Expected a dict response, not an exception"
assert result.get("error") == "config_exists", (
f"Expected error='config_exists', got: {result}"
)
assert result.get("requires_confirm") is True
@_needs_yaml
def test_setup_allowed_with_confirm_overwrite(self):
"""With confirm_overwrite=True, setup may proceed (will hit real logic)."""
import api.onboarding as mod
import tempfile
fake_config_path = pathlib.Path("/tmp/_test_config_confirm.yaml")
fake_config_path.unlink(missing_ok=True) # start clean
try:
with tempfile.TemporaryDirectory() as tmp_home:
tmp_home_path = pathlib.Path(tmp_home)
# Without patching Path.exists, use a non-existent path so it won't block.
# Also redirect _get_active_hermes_home so .env writes go to the temp dir,
# never to the real ~/.hermes/.env.
with mock.patch.object(mod, "_get_active_hermes_home", return_value=tmp_home_path):
result = mod.apply_onboarding_setup(
{
"provider": "openrouter",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "test-key-confirm",
"confirm_overwrite": True,
}
)
# Should NOT return config_exists error
if isinstance(result, dict):
assert result.get("error") != "config_exists", (
"confirm_overwrite=True should bypass the config-exists guard."
)
finally:
fake_config_path.unlink(missing_ok=True)
@_needs_yaml
def test_setup_allowed_when_no_config_exists(self):
"""Fresh install: no config.yaml → setup proceeds normally (no blocking error)."""
import api.onboarding as mod
import tempfile
fake_config_path = pathlib.Path("/tmp/_test_config_fresh.yaml")
fake_config_path.unlink(missing_ok=True)
try:
with tempfile.TemporaryDirectory() as tmp_home:
tmp_home_path = pathlib.Path(tmp_home)
# Redirect both config path and hermes home so writes stay in /tmp,
# never touching the real ~/.hermes/.env.
with (
mock.patch.object(mod, "_get_config_path", return_value=fake_config_path),
mock.patch.object(mod, "_get_active_hermes_home", return_value=tmp_home_path),
):
result = mod.apply_onboarding_setup(
{
"provider": "openrouter",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "test-key-fresh",
}
)
if isinstance(result, dict):
assert result.get("error") != "config_exists"
finally:
fake_config_path.unlink(missing_ok=True)
# ---------------------------------------------------------------------------
# Integration tests — require the live test server on port 8788
# ---------------------------------------------------------------------------
from tests._pytest_port import BASE
def _http_get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def _http_post(path, body=None):
req = urllib.request.Request(
BASE + path,
data=json.dumps(body or {}).encode(),
headers={"Content-Type": "application/json"},
)
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def _server_hermes_home() -> pathlib.Path:
data, _ = _http_get("/api/onboarding/status")
env_path = data.get("system", {}).get("env_path", "")
if env_path:
return pathlib.Path(env_path).parent
return pathlib.Path(os.environ.get("HERMES_WEBUI_TEST_STATE_DIR", str(pathlib.Path.home() / ".hermes" / "webui-mvp-test")))
def _server_reachable() -> bool:
try:
_http_get("/health")
return True
except Exception:
return False
# No collection-time skip guard — conftest.py starts the server via its
# autouse session fixture BEFORE tests run. A collection-time check always
# sees no server and turns every test into a skip. Server reachability is
# asserted inside the _require_server fixture instead so failures are loud.
class TestOnboardingGateIntegration:
"""Live-server integration tests for the onboarding gate fix."""
@pytest.fixture(autouse=True)
def _require_server(self):
"""Assert server is reachable at test runtime (not collection time)."""
if not _server_reachable():
pytest.fail(f"Test server at {BASE} is not reachable")
@pytest.fixture(autouse=True)
def _clean(self):
hermes_home = _server_hermes_home()
for rel in ("config.yaml", ".env"):
(hermes_home / rel).unlink(missing_ok=True)
yield
for rel in ("config.yaml", ".env"):
(hermes_home / rel).unlink(missing_ok=True)
# Force the server to reload its in-memory config after file deletion.
# apply_onboarding_setup() calls reload_config() which caches provider
# state in the server process. Deleting files on disk does not clear
# that cache — the next test would see provider_configured=True.
# GET /api/personalities always calls reload_config(), giving us a
# cheap way to flush the cache without a server restart.
try:
_http_get("/api/personalities")
except Exception:
pass
def test_no_config_wizard_fires(self):
"""No config.yaml → completed=False."""
data, status = _http_get("/api/onboarding/status")
assert status == 200
assert data["completed"] is False
@_needs_yaml
def test_existing_config_and_chat_ready_skips_wizard(self):
"""Write a valid config.yaml + .env → completed must be True."""
import yaml
hermes_home = _server_hermes_home()
# Write a real config.yaml
cfg = {"model": {"provider": "openrouter", "default": "anthropic/claude-sonnet-4.6"}}
(hermes_home / "config.yaml").write_text(
yaml.safe_dump(cfg, sort_keys=False), encoding="utf-8"
)
# Write a fake API key so provider_ready (and thus chat_ready) fires
# — but only when hermes_cli imports are available
data, _ = _http_get("/api/onboarding/status")
if data["system"]["hermes_found"] and data["system"]["imports_ok"]:
(hermes_home / ".env").write_text(
"OPENROUTER_API_KEY=test-existing-key\n", encoding="utf-8"
)
data, status = _http_get("/api/onboarding/status")
assert status == 200
assert data["completed"] is True, (
"Existing config + chat_ready must auto-complete onboarding."
)
else:
# Agent not installed: chat_ready is always False, so wizard still
# fires — that is the correct behaviour (can't verify readiness).
assert data["completed"] is False
@_needs_yaml
def test_setup_blocked_for_existing_config(self):
"""POST /api/onboarding/setup must return config_exists error if config.yaml exists."""
import yaml
hermes_home = _server_hermes_home()
cfg = {"model": {"provider": "openrouter", "default": "anthropic/claude-sonnet-4.6"}}
(hermes_home / "config.yaml").write_text(
yaml.safe_dump(cfg, sort_keys=False), encoding="utf-8"
)
data, status = _http_post(
"/api/onboarding/setup",
{
"provider": "openrouter",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "test-key",
},
)
assert status == 200
assert data.get("error") == "config_exists", (
f"Expected config_exists guard. Got: {data}"
)
assert data.get("requires_confirm") is True
@_needs_yaml
def test_setup_allowed_with_confirm_overwrite(self):
"""POST /api/onboarding/setup with confirm_overwrite=True succeeds."""
import yaml
hermes_home = _server_hermes_home()
cfg = {"model": {"provider": "openrouter", "default": "anthropic/claude-sonnet-4.6"}}
(hermes_home / "config.yaml").write_text(
yaml.safe_dump(cfg, sort_keys=False), encoding="utf-8"
)
data, status = _http_post(
"/api/onboarding/setup",
{
"provider": "openrouter",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "test-key",
"confirm_overwrite": True,
},
)
assert status == 200
assert data.get("error") != "config_exists", (
"confirm_overwrite=True must bypass the guard."
)

View File

@@ -0,0 +1,244 @@
"""Onboarding MVP tests — first-run wizard and provider config persistence.
Tests that call /api/onboarding/setup require PyYAML in the test server's
Python environment (the agent venv). They are skipped when hermes-agent is
not installed, since the server falls back to system Python which typically
lacks pyyaml.
"""
import json
import pathlib
import sys
import urllib.error
import urllib.request
import pytest
from tests._pytest_port import BASE
# Check if pyyaml is available — onboarding setup tests need it on the server
try:
import yaml as _yaml
_HAS_YAML = True
except ImportError:
_HAS_YAML = False
_needs_yaml = pytest.mark.skipif(not _HAS_YAML, reason="PyYAML not installed — onboarding setup tests require it")
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def post(path, body=None):
req = urllib.request.Request(
BASE + path,
data=json.dumps(body or {}).encode(),
headers={"Content-Type": "application/json"},
)
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def _server_hermes_home() -> pathlib.Path:
"""Get the hermes home path the test server is actually using.
Using the server's own /api/onboarding/status response is more robust than
reading TEST_STATE_DIR from conftest, which can get the wrong path when
conftest is imported multiple times under different HERMES_HOME environments
(api.config resets HERMES_HOME at module import time via init_profile_state).
"""
data, _ = get("/api/onboarding/status")
env_path = data.get("system", {}).get("env_path", "")
if env_path:
return pathlib.Path(env_path).parent
# Fallback
hermes_home = pathlib.Path.home() / ".hermes"
return hermes_home / "webui-mvp-test"
@pytest.fixture(autouse=True)
def clean_hermes_config_files():
hermes_home = _server_hermes_home()
for rel in ("config.yaml", ".env"):
(hermes_home / rel).unlink(missing_ok=True)
yield
for rel in ("config.yaml", ".env"):
(hermes_home / rel).unlink(missing_ok=True)
def test_onboarding_status_defaults_incomplete():
data, status = get("/api/onboarding/status")
assert status == 200
assert data["completed"] is False
assert data["settings"]["password_enabled"] is False
assert data["system"]["provider_configured"] is False
assert data["system"]["chat_ready"] is False
assert data["system"]["setup_state"] in {"needs_provider", "agent_unavailable"}
assert "provider_note" in data["system"]
assert isinstance(data["workspaces"]["items"], list)
assert data["setup"]["providers"]
@_needs_yaml
def test_onboarding_setup_openrouter_writes_real_config_and_env():
data, status = post(
"/api/onboarding/setup",
{
"provider": "openrouter",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "sk-or-test",
},
)
assert status == 200
assert data["system"]["provider_configured"] is True
assert data["system"]["provider_ready"] is True
if data["system"]["imports_ok"] and data["system"]["hermes_found"]:
assert data["system"]["chat_ready"] is True
assert data["system"]["setup_state"] == "ready"
else:
assert data["system"]["chat_ready"] is False
assert data["system"]["setup_state"] == "agent_unavailable"
cfg_text = (_server_hermes_home() / "config.yaml").read_text(encoding="utf-8")
env_text = (_server_hermes_home() / ".env").read_text(encoding="utf-8")
assert "provider: openrouter" in cfg_text
assert "default: anthropic/claude-sonnet-4.6" in cfg_text
assert "OPENROUTER_API_KEY=sk-or-test" in env_text
@_needs_yaml
def test_onboarding_setup_custom_endpoint_writes_runtime_files():
data, status = post(
"/api/onboarding/setup",
{
"provider": "custom",
"model": "google/gemma-3-27b-it",
"base_url": "http://localhost:4000/v1",
"api_key": "sk-custom-test",
},
)
assert status == 200
assert data["system"]["provider_configured"] is True
assert data["system"]["provider_ready"] is True
if data["system"]["imports_ok"] and data["system"]["hermes_found"]:
assert data["system"]["chat_ready"] is True
assert data["system"]["setup_state"] == "ready"
else:
assert data["system"]["chat_ready"] is False
assert data["system"]["setup_state"] == "agent_unavailable"
assert data["system"]["current_provider"] == "custom"
assert data["system"]["current_base_url"] == "http://localhost:4000/v1"
cfg_text = (_server_hermes_home() / "config.yaml").read_text(encoding="utf-8")
env_text = (_server_hermes_home() / ".env").read_text(encoding="utf-8")
assert "provider: custom" in cfg_text
assert "default: google/gemma-3-27b-it" in cfg_text
assert "base_url: http://localhost:4000/v1" in cfg_text
assert "OPENAI_API_KEY=sk-custom-test" in env_text
@_needs_yaml
def test_onboarding_setup_detects_incomplete_saved_provider():
status, code = post(
"/api/onboarding/setup",
{
"provider": "anthropic",
"model": "claude-sonnet-4.6",
"api_key": "sk-ant-test",
},
)
assert code == 200
(_server_hermes_home() / ".env").unlink(missing_ok=True)
data, status_code = get("/api/onboarding/status")
assert status_code == 200
assert data["system"]["provider_configured"] is True
assert data["system"]["provider_ready"] is False
assert data["system"]["chat_ready"] is False
assert data["system"]["setup_state"] in {"provider_incomplete", "agent_unavailable"}
@_needs_yaml
def test_onboarding_setup_rejects_missing_custom_base_url():
data, status = post(
"/api/onboarding/setup",
{
"provider": "custom",
"model": "qwen2.5-coder",
"api_key": "sk-test",
},
)
assert status == 400
assert "base_url is required" in data["error"]
def test_onboarding_complete_persists_flag():
data, status = post("/api/onboarding/complete", {})
assert status == 200
assert data["completed"] is True
settings = json.loads(
(_server_hermes_home() / "settings.json").read_text(encoding="utf-8")
)
assert settings["onboarding_completed"] is True
data2, status2 = get("/api/onboarding/status")
assert status2 == 200
assert data2["completed"] is True
def test_onboarding_complete_preserves_other_settings():
"""Completing onboarding must not overwrite other user settings."""
# Use send_key (a safe enum setting) to verify settings preservation
# without contaminating bot_name or theme checks in other test files.
# Use GET /api/settings (not onboarding status) to check preservation
# since the onboarding status only returns a subset of settings fields.
try:
saved, s1 = post("/api/settings", {"send_key": "ctrl+enter"})
assert s1 == 200
assert saved["send_key"] == "ctrl+enter"
_, s2 = post("/api/onboarding/complete", {})
assert s2 == 200
# Verify the non-onboarding setting survived the completion call
current_settings, s3 = get("/api/settings")
assert s3 == 200
assert current_settings["send_key"] == "ctrl+enter"
finally:
# Always restore default send_key to avoid contaminating other tests
post("/api/settings", {"send_key": "enter"})
def test_onboarding_already_completed_status():
"""After marking onboarding complete, status must reflect completed=True
so the wizard does not re-appear for returning users."""
done, status = post("/api/onboarding/complete", {})
assert status == 200
assert done["completed"] is True
data, status2 = get("/api/onboarding/status")
assert status2 == 200
assert data["completed"] is True
# Reset so test doesn't contaminate others
post("/api/settings", {"onboarding_completed": False})
@_needs_yaml
def test_onboarding_setup_rejects_api_key_with_newline():
"""API keys containing embedded newlines must be rejected to prevent .env injection."""
injected_key = "sk-bad" + chr(10) + "OTHER_KEY=injected"
data, status = post(
"/api/onboarding/setup",
{
"provider": "openrouter",
"model": "anthropic/claude-sonnet-4.6",
"api_key": injected_key,
},
)
assert status == 400
assert "newline" in data["error"].lower()

View File

@@ -0,0 +1,184 @@
"""
Tests: onboarding /api/onboarding/setup network restriction logic (issue #390).
Covers:
1. Request from 127.0.0.1 (loopback) is allowed without auth
2. Request from RFC-1918 private IP (172.x, 192.168.x, 10.x) is allowed without auth
3. Request from public IP is blocked without auth → 403
4. X-Forwarded-For loopback IP is trusted → allowed
5. X-Forwarded-For private IP is trusted → allowed
6. X-Forwarded-For public IP → still blocked
7. X-Real-IP loopback → allowed
8. HERMES_WEBUI_ONBOARDING_OPEN=1 bypasses the check entirely
9. Auth enabled → check skipped, any IP allowed
"""
import json
import os
import pathlib
import sys
import unittest.mock
import urllib.error
import urllib.request
import pytest
REPO = pathlib.Path(__file__).parent.parent
from tests._pytest_port import BASE
# ---------------------------------------------------------------------------
# Unit tests — directly test the IP-resolution + guard logic in routes.py
# without needing a live server. We replicate the logic to keep tests fast
# and independent of server startup.
# ---------------------------------------------------------------------------
def _is_local_from_handler(
raw_ip: str,
xff: str = "",
xri: str = "",
auth_enabled: bool = False,
open_env: bool = False,
) -> bool | str:
"""
Mirror of the onboarding IP check in api/routes.py.
Returns True if the request would be allowed, False if blocked,
or the error message string if blocked.
"""
import ipaddress
if auth_enabled or open_env:
return True
_xff = xff.split(",")[0].strip() if xff else ""
_xri = xri.strip()
_ip_str = _xff or _xri or raw_ip
try:
addr = ipaddress.ip_address(_ip_str)
is_local = addr.is_loopback or addr.is_private
except ValueError:
is_local = False
return is_local
class TestOnboardingIPLogic:
"""Unit tests for the IP-resolution logic (no live server needed)."""
def test_loopback_allowed(self):
assert _is_local_from_handler("127.0.0.1") is True
def test_ipv6_loopback_allowed(self):
assert _is_local_from_handler("::1") is True
def test_private_172_allowed(self):
"""Docker bridge addresses (172.17.x.x) must be allowed."""
assert _is_local_from_handler("172.17.0.1") is True
def test_private_192168_allowed(self):
assert _is_local_from_handler("192.168.1.100") is True
def test_private_10_allowed(self):
assert _is_local_from_handler("10.0.0.5") is True
def test_public_ip_blocked(self):
assert _is_local_from_handler("8.8.8.8") is False
def test_xff_loopback_trusted(self):
"""Reverse proxy sets X-Forwarded-For to 127.0.0.1 — should be allowed."""
assert _is_local_from_handler("172.20.0.1", xff="127.0.0.1") is True
def test_xff_private_trusted(self):
"""Reverse proxy sets X-Forwarded-For to LAN IP — should be allowed."""
assert _is_local_from_handler("172.20.0.1", xff="192.168.1.50") is True
def test_xff_public_blocked(self):
"""Public IP in X-Forwarded-For should still be blocked."""
assert _is_local_from_handler("172.20.0.1", xff="8.8.8.8") is False
def test_xff_first_entry_used(self):
"""X-Forwarded-For may have multiple IPs; only the first (client) is used."""
# First entry is private → allowed
assert _is_local_from_handler("172.20.0.1", xff="10.0.0.1, 172.20.0.1") is True
# First entry is public → blocked
assert _is_local_from_handler("172.20.0.1", xff="8.8.8.8, 172.20.0.1") is False
def test_xreal_ip_loopback_trusted(self):
"""X-Real-IP loopback → allowed."""
assert _is_local_from_handler("172.20.0.1", xri="127.0.0.1") is True
def test_xreal_ip_private_trusted(self):
assert _is_local_from_handler("172.20.0.1", xri="10.1.2.3") is True
def test_xff_takes_priority_over_xri(self):
"""X-Forwarded-For wins over X-Real-IP when both present."""
# XFF says public, XRI says local → blocked (XFF takes priority)
assert _is_local_from_handler("172.20.0.1", xff="8.8.8.8", xri="127.0.0.1") is False
def test_open_env_bypasses_check(self):
"""HERMES_WEBUI_ONBOARDING_OPEN=1 allows any IP."""
assert _is_local_from_handler("8.8.8.8", open_env=True) is True
def test_auth_enabled_bypasses_check(self):
"""When auth is enabled, IP check is skipped entirely."""
assert _is_local_from_handler("8.8.8.8", auth_enabled=True) is True
def test_invalid_ip_blocked(self):
"""Malformed IP in header → treated as non-local → blocked."""
assert _is_local_from_handler("not-an-ip") is False
# ---------------------------------------------------------------------------
# Integration tests — hit the live test server at test server port
# ---------------------------------------------------------------------------
@pytest.mark.integration
class TestOnboardingSetupEndpoint:
"""
Integration tests for /api/onboarding/setup.
These require the test server running on test server port.
"""
def _post(self, path: str, data: dict, headers: dict | None = None) -> tuple[int, dict]:
payload = json.dumps(data).encode()
req = urllib.request.Request(
BASE + path,
data=payload,
method="POST",
headers={"Content-Type": "application/json", **(headers or {})},
)
try:
with urllib.request.urlopen(req, timeout=10) as r:
return r.status, json.loads(r.read())
except urllib.error.HTTPError as e:
return e.code, json.loads(e.read())
def test_loopback_request_allowed(self):
"""
Requests from 127.0.0.1 (which is what the test server sees) should
pass the IP check. We confirm no 403 is returned.
"""
# The test server runs on 127.0.0.1:{TEST_PORT} so client_address[0] is 127.0.0.1.
# A valid setup payload with a mock provider should not be rejected for IP reasons.
# We patch apply_onboarding_setup to avoid actually writing any config.
import unittest.mock
with unittest.mock.patch("api.onboarding.apply_onboarding_setup", return_value={"ok": True}):
status, body = self._post(
"/api/onboarding/setup",
{"provider": "anthropic", "model": "claude-sonnet-4.6", "api_key": "test-key"},
)
# Should not be 403 (IP blocked). May be 200 or another error from apply logic.
assert status != 403, f"Got 403 — IP check incorrectly blocked loopback. Body: {body}"
def test_xff_loopback_header_respected(self):
"""
Simulated reverse proxy: raw TCP is 127.0.0.1 but X-Forwarded-For is also
127.0.0.1. Should be allowed.
"""
import unittest.mock
with unittest.mock.patch("api.onboarding.apply_onboarding_setup", return_value={"ok": True}):
status, body = self._post(
"/api/onboarding/setup",
{"provider": "anthropic", "model": "claude-sonnet-4.6", "api_key": "test-key"},
headers={"X-Forwarded-For": "127.0.0.1"},
)
assert status != 403, f"Got 403 with XFF=127.0.0.1. Body: {body}"

View File

@@ -0,0 +1,58 @@
import pathlib
REPO = pathlib.Path(__file__).parent.parent
def read(path):
return (REPO / path).read_text(encoding="utf-8")
def test_index_contains_onboarding_overlay_markup():
html = read("static/index.html")
assert 'id="onboardingOverlay"' in html
assert 'id="onboardingBody"' in html
assert 'id="onboardingNextBtn"' in html
assert 'src="static/onboarding.js"' in html
def test_onboarding_css_rules_exist():
css = read("static/style.css")
for selector in (
".onboarding-overlay",
".onboarding-card",
".onboarding-step",
".onboarding-status.warn",
):
assert selector in css
def test_onboarding_js_exposes_bootstrap_hooks():
js = read("static/onboarding.js")
assert "async function loadOnboardingWizard()" in js
assert "async function nextOnboardingStep()" in js
assert "api('/api/onboarding/status')" in js
assert "api('/api/onboarding/setup'" in js
assert "api('/api/onboarding/complete'" in js
def test_onboarding_uses_i18n_helpers():
html = read("static/index.html")
js = read("static/onboarding.js")
i18n = read("static/i18n.js")
assert 'data-i18n="onboarding_title"' in html
assert 'data-i18n="onboarding_continue"' in html
assert "t('onboarding_step_system_title')" in js
assert "t('onboarding_step_setup_title')" in js
assert "t('onboarding_complete')" in js
assert "onboarding_title: 'Welcome to Hermes Web UI'" in i18n
assert "onboarding_title: 'Bienvenido a Hermes Web UI'" in i18n
def test_bootstrap_script_contains_official_installer_and_windows_guard():
src = read("bootstrap.py")
assert (
"https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh"
in src
)
assert "Native Windows is not supported" in src

View File

@@ -0,0 +1,121 @@
"""
Tests for OpenCode Zen and OpenCode Go provider support.
Verifies provider registration in display/model catalogs and
env-var fallback detection.
"""
import os
import sys
import types
import api.config as config
# ── Provider registration ─────────────────────────────────────────────
def test_opencode_zen_in_provider_display():
assert "opencode-zen" in config._PROVIDER_DISPLAY
assert config._PROVIDER_DISPLAY["opencode-zen"] == "OpenCode Zen"
def test_opencode_go_in_provider_display():
assert "opencode-go" in config._PROVIDER_DISPLAY
assert config._PROVIDER_DISPLAY["opencode-go"] == "OpenCode Go"
def test_opencode_zen_in_provider_models():
assert "opencode-zen" in config._PROVIDER_MODELS
ids = [m["id"] for m in config._PROVIDER_MODELS["opencode-zen"]]
assert "claude-opus-4-6" in ids
assert "gpt-5.4-pro" in ids
assert "glm-5.1" in ids
def test_opencode_go_in_provider_models():
assert "opencode-go" in config._PROVIDER_MODELS
ids = [m["id"] for m in config._PROVIDER_MODELS["opencode-go"]]
assert "glm-5.1" in ids
assert "glm-5" in ids
assert "mimo-v2-pro" in ids
# ── Env-var fallback detection ────────────────────────────────────────
def _models_with_env_key(monkeypatch, env_var, expected_provider_display):
"""Helper: fake hermes_cli unavailable, set an env var, check detection."""
# Force the env-var fallback path by making hermes_cli import fail
fake_mod = types.ModuleType("hermes_cli.models")
fake_mod.list_available_providers = None # will raise on call
monkeypatch.setitem(sys.modules, "hermes_cli.models", fake_mod)
monkeypatch.delattr(fake_mod, "list_available_providers")
old_cfg = dict(config.cfg)
config.cfg["model"] = {}
config.cfg.pop("custom_providers", None)
monkeypatch.setenv(env_var, "test-key")
try:
result = config.get_available_models()
providers = [g["provider"] for g in result["groups"]]
assert expected_provider_display in providers, (
f"Expected {expected_provider_display} in {providers}"
)
finally:
config.cfg.clear()
config.cfg.update(old_cfg)
def test_opencode_zen_detected_via_env_key(monkeypatch):
_models_with_env_key(monkeypatch, "OPENCODE_ZEN_API_KEY", "OpenCode Zen")
def test_opencode_go_detected_via_env_key(monkeypatch):
_models_with_env_key(monkeypatch, "OPENCODE_GO_API_KEY", "OpenCode Go")
def test_openai_codex_model_catalog_includes_gpt54():
"""openai-codex catalog must include gpt-5.4 and the standard Codex lineup."""
assert "openai-codex" in config._PROVIDER_MODELS
ids = [m["id"] for m in config._PROVIDER_MODELS["openai-codex"]]
assert "gpt-5.4" in ids, f"gpt-5.4 missing from openai-codex catalog: {ids}"
assert "gpt-5.4-mini" in ids, f"gpt-5.4-mini missing from openai-codex catalog: {ids}"
assert "gpt-5.3-codex" in ids, f"gpt-5.3-codex missing from openai-codex catalog: {ids}"
assert "gpt-5.2-codex" in ids, f"gpt-5.2-codex missing from openai-codex catalog: {ids}"
def test_openai_codex_display_name():
"""openai-codex must have a human-readable display name."""
assert "openai-codex" in config._PROVIDER_DISPLAY
assert config._PROVIDER_DISPLAY["openai-codex"] == "OpenAI Codex"
def test_live_models_handler_delegates_to_provider_model_ids():
"""_handle_live_models must delegate to the agent's provider_model_ids()
rather than maintain its own per-provider fetch logic.
"""
import pathlib
routes_src = (pathlib.Path(__file__).parent.parent / "api" / "routes.py").read_text()
assert "provider_model_ids" in routes_src, (
"_handle_live_models must call hermes_cli.models.provider_model_ids() "
"to delegate all provider-specific live-fetch logic to the agent"
)
# The old per-provider base_url hardcoding should be gone
assert "https://api.openai.com/v1" not in routes_src, (
"_handle_live_models must not hardcode api.openai.com — "
"provider resolution is handled by the agent"
)
assert "not_supported" not in routes_src, (
"_handle_live_models must not return not_supported for any provider — "
"provider_model_ids() falls back to static list automatically"
)
def test_live_models_ui_no_longer_skips_any_provider():
"""_fetchLiveModels in ui.js must not exclude any provider from live fetching.
Previously anthropic, google, and gemini were skipped — now provider_model_ids()
handles them all (with graceful fallback to static lists).
"""
import pathlib
ui_src = (pathlib.Path(__file__).parent.parent / "static" / "ui.js").read_text()
# The old exclusion list must be gone
assert "includes(provider)" not in ui_src or "anthropic" not in ui_src[:ui_src.find("includes(provider)")+100], (
"_fetchLiveModels must not skip anthropic, google, or gemini — "
"the backend now returns live models for all providers"
)

View File

@@ -0,0 +1,175 @@
"""Tests for _sanitize_messages_for_api() orphaned-tool-message stripping.
Regression for issue #534: strictly-conformant providers (Mercury-2/Inception,
newer OpenAI models) reject histories containing tool-role messages whose
tool_call_id has no matching tool_calls entry in a prior assistant message.
"""
import sys
import pathlib
REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
sys.path.insert(0, str(REPO_ROOT))
from api.streaming import _sanitize_messages_for_api
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _asst_with_tool_call(call_id="call-1", call_id_key="id"):
return {
"role": "assistant",
"content": None,
"tool_calls": [{"type": "function", call_id_key: call_id, "function": {"name": "terminal", "arguments": "{}"}}],
"_ts": 12345, # extra field that should be stripped
}
def _tool_result(call_id="call-1"):
return {"role": "tool", "tool_call_id": call_id, "content": "ok", "_ts": 12345}
def _user(text="hello"):
return {"role": "user", "content": text, "_ts": 12345}
def _asst(text="hi"):
return {"role": "assistant", "content": text, "_ts": 12345}
# ---------------------------------------------------------------------------
# Tests: normal valid histories are preserved
# ---------------------------------------------------------------------------
def test_valid_tool_roundtrip_preserved():
"""A linked assistant→tool pair must be kept intact."""
msgs = [_user(), _asst_with_tool_call("call-1"), _tool_result("call-1"), _asst()]
result = _sanitize_messages_for_api(msgs)
roles = [m["role"] for m in result]
assert roles == ["user", "assistant", "tool", "assistant"]
def test_extra_fields_stripped():
"""Non-API fields (_ts etc.) are always stripped."""
msgs = [_user(), _asst()]
result = _sanitize_messages_for_api(msgs)
for m in result:
assert "_ts" not in m
def test_valid_history_without_tool_messages_unchanged():
"""Plain user/assistant history with no tool calls is passed through unchanged."""
msgs = [_user("a"), _asst("b"), _user("c"), _asst("d")]
result = _sanitize_messages_for_api(msgs)
assert len(result) == 4
assert all(m["role"] in ("user", "assistant") for m in result)
def test_multiple_valid_tool_calls_preserved():
"""Multiple linked tool_call_ids in one assistant message are all preserved."""
asst = {
"role": "assistant",
"content": None,
"tool_calls": [
{"type": "function", "id": "call-1", "function": {"name": "f1", "arguments": "{}"}},
{"type": "function", "id": "call-2", "function": {"name": "f2", "arguments": "{}"}},
],
}
msgs = [_user(), asst, _tool_result("call-1"), _tool_result("call-2"), _asst()]
result = _sanitize_messages_for_api(msgs)
roles = [m["role"] for m in result]
assert roles == ["user", "assistant", "tool", "tool", "assistant"]
# ---------------------------------------------------------------------------
# Tests: orphaned tool messages are dropped
# ---------------------------------------------------------------------------
def test_orphaned_tool_message_dropped():
"""A tool message with no matching assistant tool_call is dropped."""
msgs = [_user(), _asst(), _tool_result("call-orphan")]
result = _sanitize_messages_for_api(msgs)
roles = [m["role"] for m in result]
assert "tool" not in roles
assert roles == ["user", "assistant"]
def test_tool_message_missing_tool_call_id_dropped():
"""A tool message with no tool_call_id at all is dropped."""
msg = {"role": "tool", "content": "result"}
msgs = [_user(), _asst_with_tool_call("call-1"), msg]
result = _sanitize_messages_for_api(msgs)
roles = [m["role"] for m in result]
assert "tool" not in roles
def test_partially_orphaned_tool_messages():
"""In a mixed batch, only the orphaned tool messages are dropped."""
asst = _asst_with_tool_call("call-valid")
msgs = [
_user(),
asst,
_tool_result("call-valid"), # linked → kept
_tool_result("call-ghost"), # orphaned → dropped
_asst(),
]
result = _sanitize_messages_for_api(msgs)
roles = [m["role"] for m in result]
assert roles == ["user", "assistant", "tool", "assistant"]
# The kept tool message has the right call_id
tool_msgs = [m for m in result if m["role"] == "tool"]
assert tool_msgs[0]["tool_call_id"] == "call-valid"
def test_orphaned_tool_only_history():
"""A history consisting only of orphaned tool messages returns empty."""
msgs = [_tool_result("dangling-1"), _tool_result("dangling-2")]
result = _sanitize_messages_for_api(msgs)
assert result == []
# ---------------------------------------------------------------------------
# Tests: Anthropic 'call_id' field name (not OpenAI 'id')
# ---------------------------------------------------------------------------
def test_anthropic_call_id_field_recognized():
"""Anthropic tool calls use 'call_id' not 'id' — both must be recognized."""
asst = _asst_with_tool_call("call-anthropic", call_id_key="call_id")
msgs = [_user(), asst, _tool_result("call-anthropic"), _asst()]
result = _sanitize_messages_for_api(msgs)
roles = [m["role"] for m in result]
assert roles == ["user", "assistant", "tool", "assistant"]
# ---------------------------------------------------------------------------
# Tests: edge cases
# ---------------------------------------------------------------------------
def test_empty_messages_list():
assert _sanitize_messages_for_api([]) == []
def test_non_dict_messages_skipped():
"""Non-dict items in the messages list are silently ignored."""
msgs = ["not a dict", None, _user("hi"), 42]
result = _sanitize_messages_for_api(msgs)
assert len(result) == 1
assert result[0]["role"] == "user"
def test_tool_calls_none_does_not_crash():
"""An assistant message with tool_calls=None is handled without crashing."""
asst = {"role": "assistant", "content": "hello", "tool_calls": None}
msgs = [_user(), asst, _tool_result("call-1")]
result = _sanitize_messages_for_api(msgs)
# call-1 has no valid parent (tool_calls=None → no IDs registered) → dropped
roles = [m["role"] for m in result]
assert "tool" not in roles
def test_system_messages_preserved():
"""System messages are always preserved."""
msgs = [{"role": "system", "content": "You are helpful."}, _user(), _asst()]
result = _sanitize_messages_for_api(msgs)
assert result[0]["role"] == "system"

View File

@@ -0,0 +1,67 @@
import importlib
import os
import sys
from pathlib import Path
def test_profile_switch_clears_previous_profile_env_vars(monkeypatch, tmp_path):
base = tmp_path / ".hermes"
(base / "profiles" / "p1").mkdir(parents=True)
(base / "profiles" / "p2").mkdir(parents=True)
(base / "profiles" / "p1" / ".env").write_text(
"OPENAI_API_KEY=secret-from-p1\nCUSTOM_TOKEN=token-from-p1\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_BASE_HOME", str(base))
monkeypatch.delenv("HERMES_HOME", raising=False)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("CUSTOM_TOKEN", raising=False)
sys.modules.pop("api.profiles", None)
profiles = importlib.import_module("api.profiles")
profiles = importlib.reload(profiles)
profiles.init_profile_state()
profiles.switch_profile("p1")
assert os.environ.get("OPENAI_API_KEY") == "secret-from-p1"
assert os.environ.get("CUSTOM_TOKEN") == "token-from-p1"
profiles.switch_profile("p2")
assert os.environ.get("OPENAI_API_KEY") is None
assert os.environ.get("CUSTOM_TOKEN") is None
assert profiles.get_active_profile_name() == "p2"
def test_profile_switch_replaces_overlapping_keys(monkeypatch, tmp_path):
base = tmp_path / ".hermes"
(base / "profiles" / "p1").mkdir(parents=True)
(base / "profiles" / "p2").mkdir(parents=True)
(base / "profiles" / "p1" / ".env").write_text(
"OPENAI_API_KEY=secret-from-p1\nONLY_P1=one\n",
encoding="utf-8",
)
(base / "profiles" / "p2" / ".env").write_text(
"OPENAI_API_KEY=secret-from-p2\nONLY_P2=two\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_BASE_HOME", str(base))
monkeypatch.delenv("HERMES_HOME", raising=False)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("ONLY_P1", raising=False)
monkeypatch.delenv("ONLY_P2", raising=False)
sys.modules.pop("api.profiles", None)
profiles = importlib.import_module("api.profiles")
profiles = importlib.reload(profiles)
profiles.init_profile_state()
profiles.switch_profile("p1")
assert os.environ.get("OPENAI_API_KEY") == "secret-from-p1"
assert os.environ.get("ONLY_P1") == "one"
profiles.switch_profile("p2")
assert os.environ.get("OPENAI_API_KEY") == "secret-from-p2"
assert os.environ.get("ONLY_P1") is None
assert os.environ.get("ONLY_P2") == "two"

View File

@@ -0,0 +1,63 @@
import importlib
import os
import sys
import tempfile
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).parent.parent.resolve()
if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))
def _reload_profiles_module(base_home: Path):
os.environ["HERMES_BASE_HOME"] = str(base_home)
os.environ["HERMES_HOME"] = str(base_home)
for name in ["api.config", "api.profiles"]:
if name in sys.modules:
del sys.modules[name]
profiles = importlib.import_module("api.profiles")
return profiles
def test_switch_profile_rejects_path_traversal():
with tempfile.TemporaryDirectory() as td:
temp_root = Path(td)
base = temp_root / ".hermes"
(base / "profiles").mkdir(parents=True)
(temp_root / "escape-target").mkdir()
profiles = _reload_profiles_module(base)
with pytest.raises(ValueError):
profiles.switch_profile("../../escape-target")
def test_delete_profile_rejects_path_traversal():
with tempfile.TemporaryDirectory() as td:
temp_root = Path(td)
base = temp_root / ".hermes"
(base / "profiles").mkdir(parents=True)
(temp_root / "escape-target").mkdir()
profiles = _reload_profiles_module(base)
with pytest.raises(ValueError):
profiles.delete_profile_api("../../escape-target")
def test_switch_profile_allows_valid_profile_name():
with tempfile.TemporaryDirectory() as td:
temp_root = Path(td)
base = temp_root / ".hermes"
profile_dir = base / "profiles" / "demo"
profile_dir.mkdir(parents=True)
profiles = _reload_profiles_module(base)
result = profiles.switch_profile("demo")
assert result["active"] == "demo"
assert Path(os.environ["HERMES_HOME"]).resolve() == profile_dir.resolve()

View File

@@ -0,0 +1,325 @@
"""
Tests for issue #266 — provider/model mismatch warning.
Covers:
1. streaming.py: auth errors detected and classified as 'auth_mismatch'
2. static/ui.js: _checkProviderMismatch() helper exists and logic is correct
3. static/messages.js: apperror handler has auth_mismatch branch
4. static/i18n.js: provider_mismatch_warning and provider_mismatch_label keys
present in all locales (en, es, de, ru, zh, zh-Hant)
5. static/boot.js: modelSelect.onchange calls _checkProviderMismatch
6. /api/models: response includes active_provider field
"""
import json
import pathlib
import re
import urllib.request
REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
from tests._pytest_port import BASE
def _read(rel_path: str) -> str:
return (REPO_ROOT / rel_path).read_text(encoding="utf-8")
# ── 1. streaming.py: auth error detection ───────────────────────────────────
class TestStreamingAuthErrorDetection:
"""streaming.py must classify auth/401 errors as auth_mismatch."""
def test_auth_mismatch_type_defined_in_streaming(self):
"""'auth_mismatch' type must be emitted for auth errors."""
src = _read("api/streaming.py")
assert "auth_mismatch" in src, (
"auth_mismatch type not found in streaming.py — "
"401/auth errors will not be surfaced with a helpful message"
)
def test_is_auth_error_flag_defined(self):
"""is_auth_error variable must exist in the error handler."""
src = _read("api/streaming.py")
assert "is_auth_error" in src, (
"is_auth_error flag not found in streaming.py"
)
def test_auth_error_detects_401(self):
"""'401' must be part of the auth error detection logic."""
src = _read("api/streaming.py")
# Find the is_auth_error block
idx = src.find("is_auth_error")
assert idx != -1
block = src[idx:idx + 400]
assert "'401'" in block or '"401"' in block, (
"'401' not in is_auth_error detection block"
)
def test_auth_error_detects_unauthorized(self):
"""'unauthorized' must be part of the auth error detection logic."""
src = _read("api/streaming.py")
idx = src.find("is_auth_error")
block = src[idx:idx + 400]
assert "unauthorized" in block.lower(), (
"'unauthorized' not in is_auth_error detection block"
)
def test_auth_error_hint_mentions_hermes_model(self):
"""The auth_mismatch hint must mention 'hermes model' command."""
src = _read("api/streaming.py")
# Find the auth_mismatch apperror block
idx = src.find("auth_mismatch")
block = src[idx:idx + 500]
assert "hermes model" in block, (
"auth_mismatch hint must mention 'hermes model' command "
"so users know how to fix provider mismatch"
)
def test_auth_error_does_not_catch_rate_limit(self):
"""Rate limit errors must not be reclassified as auth_mismatch."""
src = _read("api/streaming.py")
# is_rate_limit must come before is_auth_error in the elif chain
rl_idx = src.find("is_rate_limit")
ae_idx = src.find("is_auth_error")
assert rl_idx < ae_idx, (
"is_rate_limit check should precede is_auth_error — "
"rate limit errors must not be mistaken for auth errors"
)
# ── 2. static/ui.js: _checkProviderMismatch() ───────────────────────────────
class TestCheckProviderMismatch:
"""ui.js must expose _checkProviderMismatch() helper."""
def test_function_defined(self):
"""_checkProviderMismatch function must be defined in ui.js."""
src = _read("static/ui.js")
assert "function _checkProviderMismatch" in src, (
"_checkProviderMismatch not defined in ui.js"
)
def test_uses_window_active_provider(self):
"""Function must read window._activeProvider."""
src = _read("static/ui.js")
idx = src.find("function _checkProviderMismatch")
block = src[idx:idx + 800]
assert "_activeProvider" in block, (
"_checkProviderMismatch must read window._activeProvider"
)
def test_skips_check_for_openrouter(self):
"""OpenRouter can route to any provider — skip the warning."""
src = _read("static/ui.js")
idx = src.find("function _checkProviderMismatch")
block = src[idx:idx + 800]
assert "openrouter" in block.lower(), (
"_checkProviderMismatch must skip the check for openrouter"
)
def test_skips_check_for_custom(self):
"""Custom endpoints can serve any model — skip the warning."""
src = _read("static/ui.js")
idx = src.find("function _checkProviderMismatch")
block = src[idx:idx + 800]
assert "custom" in block.lower(), (
"_checkProviderMismatch must skip the check for custom provider"
)
def test_active_provider_stored_on_model_load(self):
"""populateModelDropdown must store active_provider from /api/models."""
src = _read("static/ui.js")
# Find the function definition (skip the comment that also mentions the name)
idx = src.find("async function populateModelDropdown")
assert idx != -1, "async function populateModelDropdown not found"
block = src[idx:idx + 800]
assert "_activeProvider" in block, (
"populateModelDropdown must set window._activeProvider "
"from the /api/models response"
)
# ── 3. static/messages.js: apperror handler ─────────────────────────────────
class TestApperrorHandler:
"""messages.js apperror handler must handle auth_mismatch type."""
def test_auth_mismatch_type_handled(self):
"""apperror handler must check for type='auth_mismatch'."""
src = _read("static/messages.js")
assert "auth_mismatch" in src, (
"auth_mismatch type not handled in messages.js apperror handler"
)
def test_provider_mismatch_label(self):
"""'Provider mismatch' label must appear in the error handling."""
src = _read("static/messages.js")
assert "Provider mismatch" in src, (
"'Provider mismatch' label not found in messages.js"
)
def test_is_auth_mismatch_variable(self):
"""isAuthMismatch variable must be defined."""
src = _read("static/messages.js")
assert "isAuthMismatch" in src, (
"isAuthMismatch variable not found in messages.js apperror handler"
)
# ── 4. static/i18n.js: all locales ───────────────────────────────────────────
class TestI18nProviderMismatch:
"""All locales must have provider_mismatch_warning and provider_mismatch_label."""
REQUIRED_KEYS = ["provider_mismatch_warning", "provider_mismatch_label"]
def _locale_names(self, src: str) -> list[str]:
pattern = re.compile(
r"^\s{2}(?:'(?P<quoted>[A-Za-z0-9-]+)'|(?P<plain>[A-Za-z0-9-]+))\s*:\s*\{",
re.MULTILINE,
)
names = []
for match in pattern.finditer(src):
names.append(match.group("quoted") or match.group("plain"))
return names
def _count_key(self, src: str, key: str) -> int:
return len(re.findall(r'\b' + re.escape(key) + r'\b', src))
def test_all_locales_have_warning_key(self):
"""provider_mismatch_warning must appear in all locales."""
src = _read("static/i18n.js")
locale_count = len(self._locale_names(src))
count = self._count_key(src, "provider_mismatch_warning")
assert count >= locale_count, (
f"provider_mismatch_warning found {count} times, expected >= {locale_count} "
f"(one per locale)"
)
def test_all_locales_have_label_key(self):
"""provider_mismatch_label must appear in all locales."""
src = _read("static/i18n.js")
locale_count = len(self._locale_names(src))
count = self._count_key(src, "provider_mismatch_label")
assert count >= locale_count, (
f"provider_mismatch_label found {count} times, expected >= {locale_count}"
)
def test_warning_is_function_in_en(self):
"""English provider_mismatch_warning must be a function (m, p) => ..."""
src = _read("static/i18n.js")
# Find the en block
en_start = src.find("\n en: {")
es_start = src.find("\n es: {")
en_block = src[en_start:es_start]
assert "provider_mismatch_warning" in en_block, "Key not in en block"
idx = en_block.find("provider_mismatch_warning")
line = en_block[idx:idx + 200]
# Must be a function, not a plain string
assert "=>" in line, (
"provider_mismatch_warning in en locale must be an arrow function "
"that takes (m, p) parameters for model and provider interpolation"
)
def test_spanish_locale_key_coverage(self):
"""Spanish locale must have the new keys (parity with English)."""
src = _read("static/i18n.js")
es_start = src.find("\n es: {")
de_start = src.find("\n de: {")
es_block = src[es_start:de_start]
for key in self.REQUIRED_KEYS:
assert key in es_block, f"Key '{key}' missing from Spanish locale"
# ── 5. static/boot.js: dropdown change handler ──────────────────────────────
class TestBootModelSelectChange:
"""boot.js modelSelect.onchange must call _checkProviderMismatch."""
def test_onchange_calls_check_function(self):
"""modelSelect.onchange must invoke _checkProviderMismatch."""
src = _read("static/boot.js")
assert "_checkProviderMismatch" in src, (
"boot.js modelSelect.onchange must call _checkProviderMismatch "
"to warn users about provider/model mismatches"
)
# Verify it's called from the onchange handler (near modelSelect.onchange)
idx = src.find("'modelSelect').onchange") or src.find('"modelSelect").onchange')
if idx == -1:
# Try alternate patterns
idx = src.find("modelSelect")
block_start = src.rfind("\n", 0, src.find("_checkProviderMismatch")) or 0
surrounding = src[max(0, block_start - 200):block_start + 400]
assert "modelSelect" in surrounding or "selectedModel" in surrounding, (
"_checkProviderMismatch must be called in the context of model selection"
)
def test_onchange_shows_toast_on_mismatch(self):
"""The warning must be shown via showToast, not alert()."""
src = _read("static/boot.js")
# Both _checkProviderMismatch call and showToast must be near each other
idx = src.find("_checkProviderMismatch")
assert idx != -1, "_checkProviderMismatch not found in boot.js"
block = src[idx:idx + 300]
assert "showToast" in block, (
"Provider mismatch warning must be shown via showToast(), not alert()"
)
# ── 6. /api/models: active_provider in response ──────────────────────────────
def test_api_models_includes_active_provider():
"""/api/models must include 'active_provider' key in response."""
with urllib.request.urlopen(BASE + "/api/models", timeout=10) as r:
data = json.loads(r.read())
# active_provider can be None/null but the key must exist
assert "active_provider" in data, (
"/api/models response missing 'active_provider' field — "
"frontend needs this to detect provider mismatches"
)
# ── Model switch toast (#419) ─────────────────────────────────────────────────
class TestModelSwitchToast:
"""Toast appears when user switches model during an active session."""
def test_toast_in_model_select_onchange(self):
"""modelSelect.onchange must show a toast when S.messages is non-empty."""
src = _read("static/boot.js")
# Find the onchange block
idx = src.find("modelSelect').onchange")
assert idx != -1, "modelSelect.onchange not found in boot.js"
block = src[idx:idx + 1100]
assert "Model change takes effect in your next conversation" in block, (
"modelSelect.onchange must show a toast when switching model mid-session"
)
def test_toast_guards_on_messages_length(self):
"""Toast must only fire when there are existing messages (active session)."""
src = _read("static/boot.js")
idx = src.find("Model change takes effect in your next conversation")
assert idx != -1
# Look back 200 chars for the S.messages guard
surrounding = src[max(0, idx - 200):idx + 50]
assert "S.messages" in surrounding and ".length" in surrounding, (
"Model switch toast must be gated on S.messages.length > 0"
)
def test_toast_uses_show_toast_not_alert(self):
"""Toast must use showToast(), not alert()."""
src = _read("static/boot.js")
idx = src.find("Model change takes effect in your next conversation")
assert idx != -1
surrounding = src[max(0, idx - 50):idx + 100]
assert "showToast" in surrounding, "Must use showToast() not alert()"
assert "alert(" not in surrounding, "Must not use alert()"
def test_toast_has_typeof_showtoast_guard(self):
"""Toast call must guard typeof showToast to be safe during boot."""
src = _read("static/boot.js")
idx = src.find("Model change takes effect in your next conversation")
assert idx != -1
surrounding = src[max(0, idx - 100):idx + 50]
assert "typeof showToast" in surrounding, (
"showToast call must be guarded with typeof check"
)

766
tests/test_regressions.py Normal file
View File

@@ -0,0 +1,766 @@
"""
Regression tests -- one test per bug that was introduced and fixed.
These tests exist specifically to prevent those bugs from silently returning.
Each test is tagged with the sprint/commit where the bug was found and fixed.
"""
import json
import os
import pathlib
import time
import urllib.error
import urllib.request
import urllib.parse
REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
from tests._pytest_port import BASE
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def get_raw(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return r.read(), r.headers.get("Content-Type",""), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(
BASE + path, data=data, headers={"Content-Type": "application/json"}
)
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def make_session(created_list):
d, _ = post("/api/session/new", {})
sid = d["session"]["session_id"]
created_list.append(sid)
return sid
# ── R1: uuid not imported in server.py (Sprint 10 split regression) ──────────
def test_chat_start_returns_stream_id(cleanup_test_sessions):
"""R1: chat/start must return stream_id -- catches missing uuid import.
When uuid was missing, this returned 500 (NameError).
"""
sid = make_session(cleanup_test_sessions)
data, status = post("/api/chat/start", {
"session_id": sid,
"message": "ping",
"model": "openai/gpt-5.4-mini",
})
# Must return 200 with a stream_id -- not 500
assert status == 200, f"chat/start failed with {status}: {data}"
assert "stream_id" in data, "stream_id missing from chat/start response"
assert len(data["stream_id"]) > 8, "stream_id looks invalid"
post("/api/session/delete", {"session_id": sid})
cleanup_test_sessions.clear()
# ── R2: AIAgent not imported in api/streaming.py (Sprint 10 split regression) ─
def test_chat_stream_opens_successfully(cleanup_test_sessions):
"""R2: After chat/start, GET /api/chat/stream must return 200 (SSE opens).
When AIAgent was missing, the thread crashed immediately, popped STREAMS,
and the SSE GET returned 404.
"""
sid = make_session(cleanup_test_sessions)
data, status = post("/api/chat/start", {
"session_id": sid,
"message": "say: hello",
"model": "openai/gpt-5.4-mini",
})
assert status == 200, f"chat/start failed: {data}"
stream_id = data["stream_id"]
# Open the SSE stream -- must return 200, not 404
# We only check headers (don't read the full stream body)
req = urllib.request.Request(BASE + f"/api/chat/stream?stream_id={stream_id}")
try:
r = urllib.request.urlopen(req, timeout=3)
assert r.status == 200, f"SSE stream returned {r.status} (expected 200)"
ct = r.headers.get("Content-Type", "")
assert "text/event-stream" in ct, f"Wrong Content-Type: {ct}"
r.close()
except urllib.error.HTTPError as e:
assert False, f"SSE stream returned {e.code} -- AIAgent may not be imported"
except Exception:
pass # timeout or connection close after brief read is fine
post("/api/session/delete", {"session_id": sid})
cleanup_test_sessions.clear()
# ── R3: Session.__init__ missing tool_calls param (Sprint 10 split regression) ─
def test_session_with_tool_calls_in_json_loads_ok(cleanup_test_sessions):
"""R3: Sessions that have tool_calls in their JSON must load without 500.
When tool_calls=None was missing from Session.__init__, loading such sessions
threw TypeError: unexpected keyword argument.
"""
sid = make_session(cleanup_test_sessions)
# Manually inject tool_calls into the session's JSON file
sessions_dir = pathlib.Path(os.environ.get("HERMES_WEBUI_TEST_STATE_DIR", str(pathlib.Path.home() / ".hermes" / "webui-mvp-test"))) / "sessions"
session_file = sessions_dir / f"{sid}.json"
if session_file.exists():
d = json.loads(session_file.read_text())
d["tool_calls"] = [
{"name": "terminal", "snippet": "test output", "tid": "test_tid_001", "assistant_msg_idx": 1}
]
session_file.write_text(json.dumps(d))
# Loading the session must return 200, not 500
data, status = get(f"/api/session?session_id={urllib.parse.quote(sid)}")
assert status == 200, f"Session with tool_calls returned {status}: {data}"
assert data["session"]["session_id"] == sid
post("/api/session/delete", {"session_id": sid})
cleanup_test_sessions.clear()
# ── R4: has_pending not imported in streaming.py (Sprint 10 split regression) ─
def test_streaming_py_imports_has_pending(cleanup_test_sessions):
"""R4: api/streaming.py must import or define has_pending.
When missing, the approval check mid-stream caused NameError.
"""
src = (REPO_ROOT / "api/streaming.py").read_text()
assert "has_pending" in src, "has_pending not found in api/streaming.py"
# Verify it's imported (not just used)
assert "import" in src and "has_pending" in src, \
"has_pending must be imported in api/streaming.py"
def test_aiagent_imported_in_streaming(cleanup_test_sessions):
"""R2b: api/streaming.py must import AIAgent.
When missing, the streaming thread crashed immediately after being spawned.
"""
src = (REPO_ROOT / "api/streaming.py").read_text()
assert "AIAgent" in src, "AIAgent not referenced in api/streaming.py"
assert "from run_agent import AIAgent" in src or "import AIAgent" in src, \
"AIAgent must be imported in api/streaming.py"
# ── R5: SSE loop did not break on cancel event (Sprint 10 bug) ───────────────
def test_cancel_nonexistent_stream_returns_not_cancelled(cleanup_test_sessions):
"""R5a: Cancel endpoint works and returns cancelled:false for unknown stream."""
data, status = get("/api/chat/cancel?stream_id=nonexistent_test_xyz")
assert status == 200
assert data["ok"] is True
assert data["cancelled"] is False
def test_server_py_sse_loop_breaks_on_cancel(cleanup_test_sessions):
"""R5b: SSE loop must include 'cancel' in the break condition.
When missing, the connection hung after the cancel event was processed.
Sprint 11: logic moved from server.py to api/routes.py -- check both.
"""
import re
# Check server.py first, then api/routes.py (Sprint 11 extracted routes)
src = (REPO_ROOT / "server.py").read_text()
routes_src = (REPO_ROOT / "api" / "routes.py").read_text() if (REPO_ROOT / "api" / "routes.py").exists() else ""
combined = src + routes_src
m = re.search(r"if event in \([^)]+\):\s*break", combined)
assert m, "SSE break condition not found in server.py or api/routes.py"
assert "cancel" in m.group(), \
f"'cancel' missing from SSE break condition: {m.group()}"
# ── R6: Test cron isolation (Sprint 10) ──────────────────────────────────────
def test_real_jobs_json_not_polluted_by_tests(cleanup_test_sessions):
"""R6: Test runs must not write to the real ~/.hermes/cron/jobs.json.
When HERMES_HOME isolation was missing, every test run added test-job-* entries.
"""
real_jobs_path = pathlib.Path.home() / ".hermes" / "cron" / "jobs.json"
if not real_jobs_path.exists():
return # no jobs file at all -- fine
jobs = json.loads(real_jobs_path.read_text())
if isinstance(jobs, dict):
jobs = jobs.get("jobs", [])
test_jobs = [j for j in jobs if j.get("name", "").startswith("test-job-")]
assert len(test_jobs) == 0, \
f"Real jobs.json contains {len(test_jobs)} test-job-* entries: " \
f"{[j['name'] for j in test_jobs]}"
# ── General: api modules all importable ──────────────────────────────────────
def test_all_api_modules_importable(cleanup_test_sessions):
"""All api/ modules must be importable without NameError or ImportError.
Catches missing imports introduced during future module splits.
"""
import ast, pathlib
api_dir = REPO_ROOT / "api"
for module_file in api_dir.glob("*.py"):
src = module_file.read_text()
try:
ast.parse(src)
except SyntaxError as e:
assert False, f"{module_file.name} has syntax error: {e}"
def test_server_py_importable(cleanup_test_sessions):
"""server.py must parse without syntax errors after any split."""
import ast, pathlib
src = (REPO_ROOT / "server.py").read_text()
try:
ast.parse(src)
except SyntaxError as e:
assert False, f"server.py has syntax error: {e}"
# ── R7: Cross-session busy state bleed ───────────────────────────────────────
def test_loadSession_resets_busy_state_for_idle_session(cleanup_test_sessions):
"""R7: sessions.js loadSession for a non-inflight session must reset S.busy to false.
When missing, switching from a busy session to an idle one left the Send button
disabled, showed the wrong activity bar, and pointed Cancel at the wrong stream.
"""
src = (REPO_ROOT / "static/sessions.js").read_text()
# The fix adds explicit S.busy=false in the non-inflight else branch
assert "S.busy=false;" in src, "sessions.js loadSession must set S.busy=false when loading a non-inflight session"
# btnSend state must be refreshed via updateSendBtn
assert "updateSendBtn()" in src, "sessions.js loadSession must call updateSendBtn for non-inflight sessions"
def test_done_handler_guards_setbusy_with_inflight_check(cleanup_test_sessions):
"""R7b: messages.js done/error handlers must not call setBusy(false) if the
currently viewed session is itself still in-flight.
When missing, finishing session A while viewing in-flight session B would
disable B's Send button.
"""
src = (REPO_ROOT / "static/messages.js").read_text()
# The fix wraps setBusy(false) in a guard
assert "INFLIGHT[S.session.session_id]" in src, "messages.js must guard setBusy(false) with INFLIGHT check for current session"
def test_refresh_handler_does_not_drop_tool_messages_needed_by_todos(cleanup_test_sessions):
"""Todo panel state must survive session reload/refresh.
The UI can hide tool-role messages from the visible transcript, but it must not
destroy the raw session messages because loadTodos reconstructs state from the
latest todo tool output.
"""
sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
ui_src = (REPO_ROOT / "static/ui.js").read_text()
panels_src = (REPO_ROOT / "static/panels.js").read_text()
assert "data.session.messages=(data.session.messages||[]).filter(" not in sessions_src, \
"sessions.js must not overwrite raw session.messages when filtering transcript display"
assert "S.messages = (data.session.messages || []).filter(" not in ui_src, \
"ui.js refreshSession must not rebuild S.messages by discarding tool messages from the raw session payload"
assert "const sourceMessages = (S.session && Array.isArray(S.session.messages) && S.session.messages.length) ? S.session.messages : S.messages;" in panels_src, \
"loadTodos must prefer raw S.session.messages so todo state survives reloads"
def test_cancel_button_not_cleared_across_sessions(cleanup_test_sessions):
"""R7c: The Cancel button and activeStreamId must only be cleared when the
done/error event belongs to the currently viewed session.
"""
src = (REPO_ROOT / "static/messages.js").read_text()
# Both clear operations must be inside the activeSid === S.session guard
# We check for the pattern added by the fix
assert "S.session.session_id===activeSid" in src, "messages.js must guard activeStreamId/Cancel clearing with session identity check"
# ── R8: Session delete does not invalidate index (ghost sessions) ─────────────
def test_deleted_session_does_not_appear_in_list(cleanup_test_sessions):
"""R8: After deleting a session, it must not appear in /api/sessions.
When _index.json was not invalidated on delete, the session reappeared
in the list even after the JSON file was removed.
"""
# Create a session with a title so it shows in the list
d, _ = post("/api/session/new", {})
sid = d["session"]["session_id"]
post("/api/session/rename", {"session_id": sid, "title": "regression-test-delete-R8"})
# Verify it appears
sessions, _ = get("/api/sessions")
ids_before = [s["session_id"] for s in sessions["sessions"]]
assert sid in ids_before, "Session must appear in list before delete"
# Delete it
result, status = post("/api/session/delete", {"session_id": sid})
assert status == 200 and result.get("ok") is True
# Verify it no longer appears -- even after a second fetch (index rebuild)
sessions2, _ = get("/api/sessions")
ids_after = [s["session_id"] for s in sessions2["sessions"]]
assert sid not in ids_after, f"Deleted session {sid} still appears in list -- index not invalidated on delete"
def test_server_delete_invalidates_index(cleanup_test_sessions):
"""R8b: session/delete handler must unlink _index.json.
Static check that the fix is in place.
Sprint 11: handler moved from server.py to api/routes.py -- check both.
"""
src = (REPO_ROOT / "server.py").read_text()
routes_src = (REPO_ROOT / "api" / "routes.py").read_text() if (REPO_ROOT / "api" / "routes.py").exists() else ""
# Find the delete handler in either file
for label, text in [("server.py", src), ("api/routes.py", routes_src)]:
# Accept both single-quote and double-quote style (formatting varies by contributor)
delete_idx = max(
text.find("if parsed.path == '/api/session/delete':"),
text.find('if parsed.path == "/api/session/delete":'),
)
if delete_idx >= 0:
# Use 1200 chars to accommodate any validation/guard code added
# before the SESSION_INDEX_FILE.unlink() call (e.g. session_id
# character checks, path traversal guards).
delete_block = text[delete_idx:delete_idx+1200]
assert "SESSION_INDEX_FILE" in delete_block, \
f"{label} session/delete must invalidate SESSION_INDEX_FILE"
return
assert False, "session/delete handler not found in server.py or api/routes.py"
# ── R9: Token/tool SSE events write to wrong session after switch ─────────────
def test_token_handler_guards_session_id(cleanup_test_sessions):
"""R9a: The SSE token event handler must check activeSid before writing to DOM.
When missing, tokens from session A would render into session B's message area
if the user switched sessions mid-stream.
Sprint 12: handler moved into _wireSSE(source), so search source.addEventListener.
"""
src = (REPO_ROOT / "static/messages.js").read_text()
# Sprint 12 refactored es.addEventListener -> source.addEventListener inside _wireSSE()
token_idx = src.find("source.addEventListener('token'")
if token_idx < 0:
token_idx = src.find("es.addEventListener('token'")
assert token_idx >= 0, "token event handler not found"
token_block = src[token_idx:token_idx+300]
assert "activeSid" in token_block, \
"token handler must check activeSid before writing to DOM"
assert "S.session.session_id!==activeSid" in token_block or \
"S.session.session_id===activeSid" in token_block, \
"token handler must compare current session to activeSid"
def test_tool_handler_guards_session_id(cleanup_test_sessions):
"""R9b: The SSE tool event handler must check activeSid before writing to DOM.
When missing, tool cards from session A would render into session B's message area.
Sprint 12: handler moved into _wireSSE(source), so search source.addEventListener.
"""
src = (REPO_ROOT / "static/messages.js").read_text()
tool_idx = src.find("source.addEventListener('tool'")
if tool_idx < 0:
tool_idx = src.find("es.addEventListener('tool'")
assert tool_idx >= 0, "tool event handler not found"
tool_block = src[tool_idx:tool_idx+400]
assert "activeSid" in tool_block, \
"tool handler must check activeSid before writing to DOM"
# ── R10: respondApproval uses wrong session_id after switch (multi-session) ─
def test_respond_approval_uses_approval_session_id(cleanup_test_sessions):
"""R10: respondApproval must use the session_id of the session that triggered
the approval, not S.session.session_id (which may be a different session
if the user switched while approval was pending).
"""
src = (REPO_ROOT / "static/messages.js").read_text()
# The fix introduces _approvalSessionId to track the correct session
assert "_approvalSessionId" in src, "messages.js must use _approvalSessionId in respondApproval"
# respondApproval must use _approvalSessionId, not S.session.session_id directly
idx = src.find("async function respondApproval(")
assert idx >= 0, "respondApproval not found"
fn_body = src[idx:idx+300]
assert "_approvalSessionId" in fn_body, "respondApproval must read _approvalSessionId, not S.session.session_id"
# ── R11: Tool progress must not use shared status chrome ──────────────────
def test_tool_status_only_shown_for_current_session(cleanup_test_sessions):
"""R11: Tool progress should not drive the global status bar or composer
status. Live tool cards in the current conversation are the authoritative
progress UI, which avoids cross-session status leakage entirely.
"""
src = (REPO_ROOT / "static/messages.js").read_text()
# Sprint 12: handler moved into _wireSSE(source)
tool_idx = src.find("source.addEventListener('tool'")
if tool_idx < 0:
tool_idx = src.find("es.addEventListener('tool'")
assert tool_idx >= 0
tool_block = src[tool_idx:tool_idx+400]
assert "setStatus(" not in tool_block, \
"tool handler should not use the global activity/status bar"
assert "setComposerStatus(" not in tool_block, \
"tool handler should not use composer status for tool progress"
# ── R12: Live tool cards lost on switch-away and switch-back ──────────────
def test_loadSession_inflight_restores_live_tool_cards(cleanup_test_sessions):
"""R12: When switching back to an in-flight session, live tool cards in
#liveToolCards must be restored from S.toolCalls.
When missing, tool cards disappeared on switch-away even though the session
was still processing.
"""
src = (REPO_ROOT / "static/sessions.js").read_text()
# INFLIGHT branch must call appendLiveToolCard
inflight_idx = src.find("if(INFLIGHT[sid]){")
assert inflight_idx >= 0, "INFLIGHT branch not found in loadSession"
inflight_block = src[inflight_idx:inflight_idx+500]
assert "appendLiveToolCard" in inflight_block, "loadSession INFLIGHT branch must restore live tool cards via appendLiveToolCard"
assert "clearLiveToolCards" in inflight_block, "loadSession INFLIGHT branch must clear old live cards before restoring"
# ── R13: renderMessages() called before S.busy=false in done handler ────────
def test_done_handler_sets_busy_false_before_renderMessages(cleanup_test_sessions):
"""R13: In the done handler, S.busy must be set to false BEFORE renderMessages()
is called for the active session. The !S.busy guard in renderMessages() controls
whether settled tool cards are rendered. When S.busy=true during renderMessages(),
tool cards are skipped entirely after a response completes.
"""
src = (REPO_ROOT / "static/messages.js").read_text()
# Sprint 12: handler moved into _wireSSE(source)
done_idx = src.find("source.addEventListener('done'")
if done_idx < 0:
done_idx = src.find("es.addEventListener('done'")
assert done_idx >= 0
done_block = src[done_idx:done_idx+2500]
# S.busy=false must appear before renderMessages() within the done handler
busy_pos = done_block.find("S.busy=false;")
render_pos = done_block.find("renderMessages()")
assert busy_pos >= 0, "done handler must set S.busy=false before renderMessages()"
assert busy_pos < render_pos, f"S.busy=false (pos {busy_pos}) must come before renderMessages() (pos {render_pos})"
# ── R14: send() uses stale modelSelect.value instead of session model ────────
def test_send_uses_session_model_as_authoritative_source(cleanup_test_sessions):
"""R14: send() must use S.session.model as the authoritative model, not just
$('modelSelect').value. When a session was created with a model not in the
current dropdown list, the select value would be stale after switching sessions,
causing the wrong model to be sent.
"""
src = (REPO_ROOT / "static/messages.js").read_text()
# The model field in the chat/start payload must prefer S.session.model
chat_start_idx = src.find("/api/chat/start")
assert chat_start_idx >= 0
payload_block = src[chat_start_idx:chat_start_idx+300]
assert "S.session.model" in payload_block, "send() must use S.session.model in the chat/start payload"
# ── R15: newSession does not clear live tool cards ────────────────────────────
def test_newSession_clears_live_tool_cards(cleanup_test_sessions):
"""R15: newSession() must call clearLiveToolCards() so live cards from a
previous in-flight session don't persist when starting a fresh conversation.
"""
src = (REPO_ROOT / "static/sessions.js").read_text()
new_sess_idx = src.find("async function newSession(")
assert new_sess_idx >= 0
# Find end of newSession (next async function)
next_fn = src.find("async function ", new_sess_idx + 10)
new_sess_body = src[new_sess_idx:next_fn]
assert "clearLiveToolCards" in new_sess_body, "newSession() must call clearLiveToolCards() to clear stale live cards"
def test_newSession_resets_busy_state_for_fresh_chat(cleanup_test_sessions):
"""R15b: newSession() must reset the viewed chat to idle state.
Without this, starting a second chat while another session is streaming leaves
S.busy=true, so the first send in the new chat gets incorrectly queued.
"""
src = (REPO_ROOT / "static/sessions.js").read_text()
new_sess_idx = src.find("async function newSession(")
assert new_sess_idx >= 0
next_fn = src.find("async function ", new_sess_idx + 10)
new_sess_body = src[new_sess_idx:next_fn]
assert "S.busy=false;" in new_sess_body, \
"newSession() must clear S.busy so a fresh chat is immediately sendable"
assert "S.activeStreamId=null;" in new_sess_body, \
"newSession() must clear the active stream id for the newly viewed chat"
assert "updateQueueBadge(S.session.session_id);" in new_sess_body, \
"newSession() must refresh the badge for the new session rather than leaving the old session's queue badge visible"
def test_session_scoped_message_queue_frontend_wiring(cleanup_test_sessions):
"""R15bb: queued follow-ups must stay attached to their originating session.
The frontend should use a session-keyed queue store and drain only the active
session's queued messages when that session becomes idle.
"""
ui_src = (REPO_ROOT / "static/ui.js").read_text()
messages_src = (REPO_ROOT / "static/messages.js").read_text()
sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
assert "const SESSION_QUEUES" in ui_src
assert "function queueSessionMessage" in ui_src
assert "function shiftQueuedSessionMessage" in ui_src
assert "const sid=S.session&&S.session.session_id;" in ui_src
assert "const next=sid?shiftQueuedSessionMessage(sid):null;" in ui_src
assert "queueSessionMessage(S.session.session_id" in messages_src
assert "updateQueueBadge(S.session.session_id);" in messages_src
assert "updateQueueBadge(sid);" in sessions_src
def test_chat_start_persists_pending_turn_metadata_for_reload_recovery(cleanup_test_sessions):
"""R15c: chat/start must expose enough pending-turn metadata for a reload to
rebuild the in-flight conversation instead of showing a blank session.
"""
routes_src = (REPO_ROOT / "api/routes.py").read_text()
assert 's.active_stream_id = stream_id' in routes_src
assert 's.pending_user_message = msg' in routes_src
assert 's.pending_attachments = attachments' in routes_src
assert '"active_stream_id": getattr(s, "active_stream_id", None)' in routes_src
assert '"pending_user_message": getattr(s, "pending_user_message", None)' in routes_src
def test_reload_path_restores_pending_message_and_reattaches_live_stream(cleanup_test_sessions):
"""R15d: the frontend reload path must show the pending user turn and
reattach to the live SSE stream after loadSession().
"""
sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
ui_src = (REPO_ROOT / "static/ui.js").read_text()
messages_src = (REPO_ROOT / "static/messages.js").read_text()
assert 'getPendingSessionMessage' in ui_src
assert 'pending_user_message' in ui_src
assert 'function attachLiveStream' in messages_src
assert 'const pendingMsg=typeof getPendingSessionMessage' in sessions_src
assert 'const activeStreamId=data.session.active_stream_id||null;' in sessions_src
assert 'attachLiveStream(sid, activeStreamId' in sessions_src
assert 'if (S.activeStreamId && S.activeStreamId === streamId) return;' in ui_src
# ── R16: Switching away/back must preserve live partial assistant output ─────
def test_live_stream_tokens_persist_partial_assistant_for_session_switch(cleanup_test_sessions):
"""R16: in-flight assistant text must be mirrored into INFLIGHT session state,
and the live stream must rebind to the rebuilt DOM after switching away and back.
Without this, partial assistant output disappears until the final done payload lands.
"""
messages_src = (REPO_ROOT / "static/messages.js").read_text()
ui_src = (REPO_ROOT / "static/ui.js").read_text()
assert "content:assistantText" in messages_src, \
"messages.js must persist the partial assistant text into INFLIGHT state"
assert "_live:true" in messages_src, \
"messages.js must mark the persisted in-flight assistant row so renderMessages can re-anchor it"
assert "syncInflightAssistantMessage();" in messages_src, \
"token handler must update INFLIGHT state before checking the active session"
assert "assistantRow&&!assistantRow.isConnected" in messages_src, \
"live stream must drop stale detached assistant DOM references after session switches"
assert "data-live-assistant" in ui_src, \
"renderMessages must preserve a live-assistant DOM anchor when rebuilding the thread"
def test_inflight_session_state_tracks_live_tool_cards_per_session(cleanup_test_sessions):
"""R16b: live tool cards must be stored on the in-flight session, not only in the
global S.toolCalls array, so switching chats does not lose or misattach them.
"""
messages_src = (REPO_ROOT / "static/messages.js").read_text()
sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
assert "INFLIGHT[activeSid].toolCalls.push(tc);" in messages_src, \
"tool SSE handler must persist live tool calls onto the in-flight session"
assert "S.toolCalls=(INFLIGHT[sid].toolCalls||[]);" in sessions_src, \
"loadSession() must restore live tool calls from the in-flight session state"
def test_loadSession_inflight_sets_busy_before_renderMessages(cleanup_test_sessions):
"""R16c: loading an in-flight session must mark it busy before renderMessages().
Otherwise renderMessages() treats S.toolCalls as settled history cards and the
same tool call appears once inline and once in the live tool host after a
session switch.
"""
src = (REPO_ROOT / "static/sessions.js").read_text()
inflight_idx = src.find("if(INFLIGHT[sid]){")
assert inflight_idx >= 0, "INFLIGHT branch not found in loadSession"
inflight_block = src[inflight_idx:inflight_idx+700]
busy_pos = inflight_block.find("S.busy=true;")
render_pos = inflight_block.find("renderMessages();appendThinking();")
assert busy_pos >= 0, "loadSession INFLIGHT branch must set S.busy=true"
assert render_pos >= 0, "loadSession INFLIGHT branch must call renderMessages()"
assert busy_pos < render_pos, \
"loadSession must set S.busy=true before renderMessages() to avoid duplicate tool cards"
def test_streaming_bridge_accepts_current_tool_progress_callback_signature(cleanup_test_sessions):
"""R17: api/streaming.py must accept the current Hermes agent callback contract.
The agent now calls tool_progress_callback(event_type, name, preview, args, **kwargs).
If the WebUI bridge only accepts (name, preview, args), live tool updates silently vanish.
"""
src = (REPO_ROOT / "api/streaming.py").read_text()
assert "def on_tool(*cb_args, **cb_kwargs):" in src, \
"streaming.py must accept variable callback args for tool progress events"
assert "reasoning_callback=on_reasoning" in src, \
"streaming.py must wire the agent's reasoning callback into the SSE bridge"
assert "put('tool_complete'" in src or 'put("tool_complete"' in src, \
"streaming.py must emit live tool completion SSE events"
def test_messages_js_supports_live_reasoning_and_tool_completion(cleanup_test_sessions):
"""R18: messages.js must render live reasoning and react to tool completion events.
Without these handlers, the operator only sees generic Thinking… or nothing
until the final done snapshot redraws the whole turn.
"""
src = (REPO_ROOT / "static/messages.js").read_text()
assert "let reasoningText=''" in src, \
"messages.js must track streamed reasoning text separately from assistant text"
assert "let liveReasoningText=''" in src or 'let liveReasoningText = ""' in src, \
"messages.js must track the currently active reasoning segment separately from cumulative reasoning"
assert "source.addEventListener('reasoning'" in src or 'source.addEventListener("reasoning"' in src, \
"messages.js must listen for live reasoning SSE events"
assert "source.addEventListener('tool_complete'" in src or 'source.addEventListener("tool_complete"' in src, \
"messages.js must listen for live tool completion SSE events"
assert "function _parseStreamState()" in src, \
"messages.js must parse live stream state into reasoning + visible answer"
def test_ui_js_can_upgrade_thinking_spinner_into_live_reasoning_card(cleanup_test_sessions):
"""R19: ui.js must be able to replace the placeholder thinking spinner with
streamed reasoning text while a turn is in progress.
"""
src = (REPO_ROOT / "static/ui.js").read_text()
assert "function _thinkingMarkup(text='')" in src or 'function _thinkingMarkup(text="")' in src, \
"ui.js must centralize thinking row markup so it can switch between spinner and live text"
assert "function updateThinking(text=''){appendThinking(text);}" in src or 'function updateThinking(text=""){appendThinking(text);}' in src, \
"ui.js must expose an updateThinking helper for live reasoning rendering"
assert "function finalizeThinkingCard()" in src, \
"ui.js must expose a helper to finalize one live thinking card before starting another"
def test_ui_js_keeps_split_thinking_cards_and_assistant_header(cleanup_test_sessions):
"""R19b: settled render should keep distinct thinking cards for split assistant
turns inside a single assistant turn container, preserving one assistant header
for the whole response while keeping multiple thinking cards distinct.
"""
src = (REPO_ROOT / "static" / "ui.js").read_text()
assert "pendingTurnThinking" not in src, \
"renderMessages must not merge distinct thinking blocks into one settled card"
assert "_createAssistantTurn(" in src, \
"renderMessages must build a shared assistant turn wrapper instead of separate top-level rows"
assert "assistant-segment" in src, \
"settled assistant turns must preserve per-message segments for multiple thinking/tool/result blocks"
def test_ui_js_keeps_reasoning_only_assistant_messages_visible(cleanup_test_sessions):
"""R19c: assistant messages that only contain reasoning must still survive
rerenders, otherwise prior thinking cards disappear on the next turn.
"""
src = (REPO_ROOT / "static" / "ui.js").read_text()
assert "function _messageHasReasoningPayload(m)" in src, \
"ui.js must detect reasoning-only assistant messages"
assert "hasTc||hasTu||_messageHasReasoningPayload(m)" in src.replace(' ', ''), \
"renderMessages visibility filter must preserve reasoning-only assistant messages"
def test_ui_js_does_not_hide_anchor_segments_that_contain_thinking(cleanup_test_sessions):
"""R19c2: assistant anchor segments that contain a thinking card must remain
visible; only truly empty tool-call anchor segments should be hidden.
"""
src = (REPO_ROOT / "static" / "ui.js").read_text()
compact = src.replace(' ', '').replace('\n', '')
assert "}elseif(!thinkingText){" in compact, \
"renderMessages must only hide assistant anchor segments when they have no thinking content"
def test_messages_js_live_assistant_segment_reuses_live_turn_wrapper(cleanup_test_sessions):
"""R19d: live streaming must reuse the existing live assistant turn wrapper created
by appendThinking(), otherwise the header gets recreated when answer tokens start.
"""
src = (REPO_ROOT / "static" / "messages.js").read_text()
assert "function ensureAssistantRow(force=false)" in src or 'function ensureAssistantRow(force = false)' in src, \
"ensureAssistantRow should manage the live assistant content segment"
assert "let turn=$('liveAssistantTurn');" in src, \
"ensureAssistantRow must bind to the existing live assistant turn wrapper"
assert "appendThinking();" in src, \
"ensureAssistantRow should create the live turn via appendThinking() when needed"
assert "assistantRow.className='assistant-segment';" in src or 'assistantRow.className = \'assistant-segment\';' in src, \
"live answer content should be appended as a segment inside the live turn wrapper"
assert "if(!force&&!assistantRow){" in src.replace(' ', ''), \
"ensureAssistantRow must still avoid creating the live answer segment when no display text exists yet"
assert "if(String((parsed&&parsed.displayText)||'').trim()||assistantRow) ensureAssistantRow();" in src, \
"token handler must only create the live answer segment once visible answer text starts"
def test_messages_js_finalizes_thinking_card_before_tool_card(cleanup_test_sessions):
"""R19e: later reasoning after a tool call must render in a fresh card."""
src = (REPO_ROOT / "static/messages.js").read_text()
assert "finalizeThinkingCard" in src, \
"tool handler must finalize the current live thinking card before appending a tool card"
assert "liveReasoningText='';" in src or 'liveReasoningText = "";' in src, \
"tool handler must reset the active reasoning segment before post-tool reasoning arrives"
# ── R17: Stack traces must not leak to clients in 500 responses ────────────
def test_500_response_has_no_trace_field():
"""R16: HTTP 500 responses must not include a 'trace' field.
Leaking tracebacks exposes file paths, module names, and potentially
secret values from local variables.
"""
# POST to /api/chat/start with missing required fields to trigger an error
data, status = post("/api/chat/start", {})
# Should be an error response (4xx or 5xx)
assert "trace" not in data, \
"Server must not leak stack traces to clients"
def test_upload_error_has_no_trace_field():
"""R16b: Upload 500 responses must not include a 'trace' field."""
# Send a POST to /api/upload with invalid content to trigger the error handler
req = urllib.request.Request(
BASE + "/api/upload",
data=b"not-multipart-data",
headers={"Content-Type": "text/plain", "Content-Length": "18"},
)
try:
with urllib.request.urlopen(req, timeout=10) as r:
body = json.loads(r.read())
code = r.status
except urllib.error.HTTPError as e:
body = json.loads(e.read())
code = e.code
assert code >= 400, "Invalid upload should return an error status"
assert "trace" not in body, \
"Upload errors must not leak stack traces to clients"
assert "error" in body, "Error responses must include an 'error' key"
# ── #248: /skills slash command ───────────────────────────────────────────────
def test_skills_slash_command_defined():
"""#248: /skills slash command must be wired up.
Pre-Task 2 (slash-command-parity batch 1) this checked for the
hardcoded ``name:'skills'`` entry in the COMMANDS array. The COMMANDS
array is now sourced from hermes-agent's ``COMMAND_REGISTRY`` at boot
via ``GET /api/commands``, so the literal string is gone. The handler
must still exist and be registered, otherwise ``/skills`` would fall
through to \"not yet supported\".
"""
src = (REPO_ROOT / "static/commands.js").read_text()
# 1. cmdSkills function must be defined
assert "async function cmdSkills" in src or "function cmdSkills" in src, \
"cmdSkills function missing from commands.js"
# 2. HANDLERS.skills must be registered to dispatch /skills to cmdSkills
assert "HANDLERS.skills" in src, \
"HANDLERS.skills registration missing from commands.js"
def test_reload_recovery_persists_durable_inflight_state(cleanup_test_sessions):
"""Reload recovery must persist a durable per-session inflight snapshot.
Without these helpers, loadSession() references loadInflightState() but a full
browser reload has no saved state to hydrate, so recovery silently no-ops.
"""
ui_src = (REPO_ROOT / "static/ui.js").read_text()
messages_src = (REPO_ROOT / "static/messages.js").read_text()
sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
assert "const INFLIGHT_STATE_KEY = 'hermes-webui-inflight-state'" in ui_src
assert "function saveInflightState(sid, state)" in ui_src
assert "function loadInflightState(sid, streamId)" in ui_src
assert "function clearInflightState(sid)" in ui_src
assert "saveInflightState(activeSid" in messages_src, \
"messages.js must persist live stream snapshots while a turn is in flight"
assert "clearInflightState(activeSid)" in messages_src, \
"messages.js must clear durable inflight snapshots when the run ends/errors/cancels"
assert "const stored=loadInflightState(sid, activeStreamId);" in sessions_src, \
"loadSession() must hydrate in-flight state from durable browser storage on reload"

View File

@@ -0,0 +1,116 @@
from collections import Counter
from pathlib import Path
import re
REPO = Path(__file__).resolve().parent.parent
def read(path: Path) -> str:
return path.read_text(encoding="utf-8")
def test_russian_locale_block_exists():
src = read(REPO / "static" / "i18n.js")
assert "\n ru: {" in src
assert "_label: 'Русский'" in src
assert "_speech: 'ru-RU'" in src
def extract_locale_block(src: str, locale_key: str) -> str:
start_match = re.search(rf"\b{re.escape(locale_key)}\s*:\s*\{{", src)
assert start_match, f"{locale_key} locale block not found"
start = start_match.end() - 1
depth = 0
in_single = False
in_double = False
in_backtick = False
escape = False
for i in range(start, len(src)):
ch = src[i]
if escape:
escape = False
continue
if in_single:
if ch == "\\":
escape = True
elif ch == "'":
in_single = False
continue
if in_double:
if ch == "\\":
escape = True
elif ch == '"':
in_double = False
continue
if in_backtick:
if ch == "\\":
escape = True
elif ch == "`":
in_backtick = False
continue
if ch == "'":
in_single = True
continue
if ch == '"':
in_double = True
continue
if ch == "`":
in_backtick = True
continue
if ch == "{":
depth += 1
continue
if ch == "}":
depth -= 1
if depth == 0:
return src[start + 1 : i]
raise AssertionError(f"{locale_key} locale block braces are not balanced")
def test_russian_locale_includes_representative_translations():
src = read(REPO / "static" / "i18n.js")
expected = [
"settings_title: '\u041d\u0430\u0441\u0442\u0440\u043e\u0439\u043a\u0438'",
"login_title: '\u0412\u0445\u043e\u0434'",
"approval_heading: '\u0422\u0440\u0435\u0431\u0443\u0435\u0442\u0441\u044f \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u0438\u0435'",
"tab_tasks: '\u0417\u0430\u0434\u0430\u0447\u0438'",
"tab_profiles: '\u041f\u0440\u043e\u0444\u0438\u043b\u0438'",
"session_time_just_now: '\u0442\u043e\u043b\u044c\u043a\u043e \u0447\u0442\u043e'",
"onboarding_title: '\u0414\u043e\u0431\u0440\u043e \u043f\u043e\u0436\u0430\u043b\u043e\u0432\u0430\u0442\u044c \u0432 Hermes Web UI'",
"onboarding_complete: '\u041f\u0435\u0440\u0432\u0438\u0447\u043d\u0430\u044f \u043d\u0430\u0441\u0442\u0440\u043e\u0439\u043a\u0430 \u0437\u0430\u0432\u0435\u0440\u0448\u0435\u043d\u0430'",
"profile_default_label: '\u0028\u043f\u043e \u0443\u043c\u043e\u043b\u0447\u0430\u043d\u0438\u044e\u0029'",
"profile_name_placeholder: '\u041d\u0430\u0437\u0432\u0430\u043d\u0438\u0435 \u043f\u0440\u043e\u0444\u0438\u043b\u044f \u0028\u0441\u0442\u0440\u043e\u0447\u043d\u044b\u0435 \u0431\u0443\u043a\u0432\u044b, a-z, 0-9, \u0434\u0435\u0444\u0438\u0441\u044b\u0029'",
"profile_clone_label: '\u0421\u043a\u043e\u043f\u0438\u0440\u043e\u0432\u0430\u0442\u044c \u043a\u043e\u043d\u0444\u0438\u0433\u0443\u0440\u0430\u0446\u0438\u044e \u0438\u0437 \u0430\u043a\u0442\u0438\u0432\u043d\u043e\u0433\u043e \u043f\u0440\u043e\u0444\u0438\u043b\u044f'",
"profile_base_url_placeholder: '\u0411\u0430\u0437\u043e\u0432\u044b\u0439 URL \u0028\u043d\u0435\u043e\u0431\u044f\u0437\u0430\u0442\u0435\u043b\u044c\u043d\u043e, \u043d\u0430\u043f\u0440\u0438\u043c\u0435\u0440 http://localhost:11434\u0029'",
"profile_api_key_placeholder: 'API-\u043a\u043b\u044e\u0447 \u0028\u043d\u0435\u043e\u0431\u044f\u0437\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0029'",
]
for entry in expected:
assert entry in src
def test_russian_locale_covers_english_keys():
src = read(REPO / "static" / "i18n.js")
key_pattern = re.compile(r"^\s{4}([a-zA-Z0-9_]+):", re.MULTILINE)
en_keys = set(key_pattern.findall(extract_locale_block(src, "en")))
ru_keys = set(key_pattern.findall(extract_locale_block(src, "ru")))
missing = sorted(en_keys - ru_keys)
assert not missing, f"Russian locale missing keys: {missing}"
def test_russian_locale_has_no_duplicate_keys():
src = read(REPO / "static" / "i18n.js")
key_pattern = re.compile(r"^\s{4}([a-zA-Z0-9_]+):", re.MULTILINE)
keys = key_pattern.findall(extract_locale_block(src, "ru"))
duplicates = sorted(k for k, count in Counter(keys).items() if count > 1)
assert not duplicates, f"Russian locale has duplicate keys: {duplicates}"

View File

@@ -0,0 +1,310 @@
"""
Security tests: credential redaction in API responses.
Verifies that credentials (GitHub PATs, API keys, etc.) are masked in:
- GET /api/session (messages and tool_calls)
- GET /api/memory (MEMORY.md and USER.md content)
- GET /api/session/export (downloaded JSON)
- SSE done event (session payload in stream)
Tests run against the isolated test test_server on port 8788.
"""
import json
import pathlib
import sys
import urllib.request
import urllib.error
import pytest
sys.path.insert(0, str(pathlib.Path(__file__).parent.parent.parent))
def _server_is_up(port: int = 8788) -> bool:
"""Return True if the test server is accepting connections."""
try:
urllib.request.urlopen(f"http://127.0.0.1:{port}/health", timeout=2)
return True
except Exception:
return False
# _needs_server: these tests require the conftest test_server fixture (port 8788).
# The skipif is evaluated lazily via the fixture, not at collection time.
_needs_server = pytest.mark.usefixtures("test_server")
from tests._pytest_port import BASE
# Sample credentials that should be masked in every API response
_FAKE_GITHUB_PAT = "ghp_TestFakeCredential1234567890ab"
_FAKE_SK_KEY = "sk-TestFakeOpenAIKey1234567890abcdef"
_FAKE_HF_TOKEN = "hf_TestFakeHuggingFaceToken12345"
_FAKE_AWS_KEY = "AKIATESTFAKEKEY12345"
# ── HTTP helpers ──────────────────────────────────────────────────────────────
def _get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read())
def _post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(
BASE + path, data=data,
headers={"Content-Type": "application/json"},
)
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def _get_raw(path):
"""Return raw bytes (used for export endpoint)."""
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return r.read()
def _assert_no_plaintext_credentials(text: str, label: str = ""):
"""Assert that none of the fake credential strings appear in text."""
for cred in (_FAKE_GITHUB_PAT, _FAKE_SK_KEY, _FAKE_HF_TOKEN, _FAKE_AWS_KEY):
assert cred not in text, (
f"{label}: credential '{cred[:12]}...' found in plaintext. "
"Redaction is not working."
)
# ── helpers.py unit tests (import-level, no test_server needed) ───────────────────
def test_redact_value_str():
"""_redact_value masks a plaintext GitHub PAT in a string."""
from api.helpers import _redact_value
result = _redact_value(f"my token is {_FAKE_GITHUB_PAT} bye")
assert _FAKE_GITHUB_PAT not in result
assert "ghp_Te" in result # prefix preserved
def test_redact_value_dict():
"""_redact_value recurses into dicts."""
from api.helpers import _redact_value
d = {"content": f"key={_FAKE_SK_KEY}", "role": "user"}
result = _redact_value(d)
assert _FAKE_SK_KEY not in result["content"]
assert result["role"] == "user" # innocent values untouched
def test_redact_value_list():
"""_redact_value recurses into lists."""
from api.helpers import _redact_value
lst = [{"content": _FAKE_GITHUB_PAT}, {"content": "safe text"}]
result = _redact_value(lst)
assert _FAKE_GITHUB_PAT not in result[0]["content"]
assert result[1]["content"] == "safe text"
def test_redact_session_data_messages():
"""redact_session_data masks credentials in messages[]."""
from api.helpers import redact_session_data
session = {
"session_id": "abc123",
"title": f"my token {_FAKE_GITHUB_PAT}",
"messages": [
{"role": "user", "content": f"token: {_FAKE_GITHUB_PAT}"},
{"role": "assistant", "content": "sure"},
],
"tool_calls": [
{"name": "terminal", "args": {"command": f"gh auth login --token {_FAKE_GITHUB_PAT}"},
"snippet": "ok"},
],
}
result = redact_session_data(session)
dump = json.dumps(result)
_assert_no_plaintext_credentials(dump, "redact_session_data")
# Safe fields remain intact
assert result["session_id"] == "abc123"
assert result["messages"][1]["content"] == "sure"
def test_redact_session_data_multiple_cred_types():
"""redact_session_data handles sk-, ghp_, hf_, and AKIA keys."""
from api.helpers import redact_session_data
session = {
"title": "test",
"messages": [{"role": "user", "content": (
f"openai={_FAKE_SK_KEY} "
f"github={_FAKE_GITHUB_PAT} "
f"hf={_FAKE_HF_TOKEN} "
f"aws={_FAKE_AWS_KEY}"
)}],
"tool_calls": [],
}
result = redact_session_data(session)
dump = json.dumps(result)
_assert_no_plaintext_credentials(dump, "multi-type redaction")
def test_redact_session_data_non_sensitive_unchanged():
"""redact_session_data does not corrupt innocent content."""
from api.helpers import redact_session_data
session = {
"title": "Hello world",
"messages": [{"role": "user", "content": "What is 2+2?"}],
"tool_calls": [{"name": "terminal", "snippet": "4"}],
}
result = redact_session_data(session)
assert result["title"] == "Hello world"
assert result["messages"][0]["content"] == "What is 2+2?"
assert result["tool_calls"][0]["snippet"] == "4"
# ── API-level tests (require running test server started by conftest.py) ─────
# Run via `start.sh && pytest tests/test_security_redaction.py -v`
def _create_session_with_credentials() -> str:
"""Write a session file with credential-containing messages directly to disk.
Bypasses the server's in-memory cache so the GET endpoint is forced to read
from disk, exercising the redaction code path on load.
Uses TEST_STATE_DIR from conftest.py (the isolated test server state directory).
"""
import time, uuid
try:
from conftest import TEST_STATE_DIR
sessions_dir = TEST_STATE_DIR / "sessions"
except ImportError:
from api.config import SESSION_DIR as sessions_dir
sessions_dir = pathlib.Path(sessions_dir)
sessions_dir.mkdir(parents=True, exist_ok=True)
# Use a unique session ID that is NOT in the server's LRU cache
sid = "sec_test_" + uuid.uuid4().hex[:8]
now = time.time()
session_file = sessions_dir / f"{sid}.json"
session_file.write_text(json.dumps({
"session_id": sid,
"title": f"session with {_FAKE_GITHUB_PAT}",
"workspace": "/tmp",
"model": "test",
"created_at": now,
"updated_at": now,
"pinned": False, "archived": False, "project_id": None,
"profile": "default", "input_tokens": 0, "output_tokens": 0,
"estimated_cost": None, "personality": None,
"messages": [
{"role": "user", "content": f"my PAT is {_FAKE_GITHUB_PAT}"},
{"role": "assistant", "content": f"sk key is {_FAKE_SK_KEY}"},
{"role": "tool", "content": "result ok", "name": "terminal"},
],
"tool_calls": [
{"name": "terminal",
"args": {"command": f"gh auth login --token {_FAKE_GITHUB_PAT}"},
"snippet": "blocked"}
],
}))
return sid
def test_api_session_redacts_messages():
"""GET /api/session route must call redact_session_data() before returning."""
import inspect
import api.routes as routes
src = inspect.getsource(routes.handle_get)
# Verify redact_session_data is applied to the session payload
assert "redact_session_data" in src, (
"api/routes.py handle_get must call redact_session_data() on /api/session response"
)
def test_api_session_redacts_title():
"""redact_session_data must redact credentials from session title field."""
from api.helpers import redact_session_data
session = {
"session_id": "abc123",
"title": f"session with {_FAKE_GITHUB_PAT}",
"messages": [],
"tool_calls": [],
}
result = redact_session_data(session)
assert _FAKE_GITHUB_PAT not in result["title"], (
f"redact_session_data must mask credentials in title field"
)
assert result["session_id"] == "abc123" # safe fields preserved
@_needs_server
def test_api_sessions_list_redacts_titles(test_server):
"""GET /api/sessions must not return session titles containing credentials."""
_create_session_with_credentials()
data = _get("/api/sessions")
dump = json.dumps(data)
_assert_no_plaintext_credentials(dump, "GET /api/sessions titles")
def test_api_session_export_redacts():
"""GET /api/session/export must call redact_session_data() in _handle_session_export."""
import inspect
import api.routes as routes
# The export handler is a separate function (_handle_session_export)
src = inspect.getsource(routes._handle_session_export)
assert "redact_session_data" in src, (
"_handle_session_export must call redact_session_data() before serving download"
)
@_needs_server
def test_api_memory_redacts_via_write_read(test_server):
"""Credential written to MEMORY.md must be masked in GET /api/memory response."""
original = _get("/api/memory").get("memory", "")
cred_content = f"GitHub PAT: {_FAKE_GITHUB_PAT}\nNormal note: hello world"
data, status = _post("/api/memory/write", {"section": "memory", "content": cred_content})
assert status == 200, f"memory/write failed: {data}"
try:
read_back = _get("/api/memory")
dump = json.dumps(read_back)
_assert_no_plaintext_credentials(dump, "GET /api/memory")
assert "hello world" in read_back["memory"] # non-sensitive content preserved
finally:
_post("/api/memory/write", {"section": "memory", "content": original})
# ── startup: fix_credential_permissions ──────────────────────────────────────
def test_fix_credential_permissions_corrects_loose_files(tmp_path, monkeypatch):
"""fix_credential_permissions() tightens group/other read bits."""
import os
from api.startup import fix_credential_permissions
env_file = tmp_path / ".env"
env_file.write_text("SECRET=abc")
env_file.chmod(0o644) # world-readable -- should be fixed
google_file = tmp_path / "google_token.json"
google_file.write_text("{}")
google_file.chmod(0o664) # group-readable -- should be fixed
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
fix_credential_permissions()
import stat
assert stat.S_IMODE(env_file.stat().st_mode) == 0o600, ".env not fixed to 600"
assert stat.S_IMODE(google_file.stat().st_mode) == 0o600, "google_token.json not fixed to 600"
def test_fix_credential_permissions_skips_correct_files(tmp_path, monkeypatch):
"""fix_credential_permissions() does not alter already-strict files."""
env_file = tmp_path / ".env"
env_file.write_text("SECRET=abc")
env_file.chmod(0o600)
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
from api.startup import fix_credential_permissions
fix_credential_permissions()
import stat
assert stat.S_IMODE(env_file.stat().st_mode) == 0o600

251
tests/test_session_ops.py Normal file
View File

@@ -0,0 +1,251 @@
"""End-to-end tests for /api/session/retry, /api/session/undo,
/api/session/status, /api/session/usage.
Tests run against the live test subprocess server (see tests/conftest.py).
We seed transcripts via POST /api/session/import (ignores incoming
session_id; returns a fresh one we register for cleanup).
"""
import json
import urllib.request
import urllib.error
import pytest
from tests.conftest import TEST_BASE, _post, make_session_tracked
def _get(path):
"""GET helper -- returns parsed JSON, or raises HTTPError on non-2xx."""
with urllib.request.urlopen(TEST_BASE + path, timeout=10) as r:
return json.loads(r.read())
def _import_session_with_messages(cleanup_list, messages, model='openai/gpt-5.4-mini'):
"""Create a session pre-populated with `messages` via /api/session/import.
Returns the server-assigned session_id (registered for cleanup).
api/routes.py:2588 takes {title, messages, model, workspace, tool_calls,
pinned} and IGNORES any incoming session_id -- always generates a fresh
one via Session(...). We use the server's returned id, not a self-
generated one.
"""
body = {
'title': 'test',
'messages': messages,
'model': model,
}
r = _post(TEST_BASE, '/api/session/import', body)
assert r.get('ok') is True and 'session' in r, f"Import failed: {r}"
sid = r['session']['session_id']
cleanup_list.append(sid)
return sid
# -- /api/session/retry ----------------------------------------------------
def test_retry_returns_last_user_text(cleanup_test_sessions):
sid = _import_session_with_messages(cleanup_test_sessions, [
{'role': 'user', 'content': 'first user msg'},
{'role': 'assistant', 'content': 'first reply'},
{'role': 'user', 'content': 'second user msg'},
{'role': 'assistant', 'content': 'second reply'},
{'role': 'tool', 'content': 'tool output'},
])
r = _post(TEST_BASE, '/api/session/retry', {'session_id': sid})
assert r.get('ok') is True, r
assert r.get('last_user_text') == 'second user msg'
assert r.get('removed_count') == 3
def test_retry_truncates_transcript(cleanup_test_sessions):
sid = _import_session_with_messages(cleanup_test_sessions, [
{'role': 'user', 'content': 'first user msg'},
{'role': 'assistant', 'content': 'first reply'},
{'role': 'user', 'content': 'second user msg'},
{'role': 'assistant', 'content': 'second reply'},
])
_post(TEST_BASE, '/api/session/retry', {'session_id': sid})
sess = _get(f'/api/session?session_id={sid}')['session']
# After retry: only the first exchange remains (2 messages).
assert len(sess['messages']) == 2
assert sess['messages'][-1]['content'] == 'first reply'
def test_retry_no_user_returns_error(cleanup_test_sessions):
sid = _import_session_with_messages(cleanup_test_sessions, [
{'role': 'assistant', 'content': 'orphan reply'},
])
r = _post(TEST_BASE, '/api/session/retry', {'session_id': sid})
assert 'error' in r
assert 'no previous message' in r['error'].lower()
def test_retry_unknown_session_returns_404():
# _post catches HTTPError and returns the body as JSON.
# bad(handler, ..., 404) sends 404 + {error: "..."}.
r = _post(TEST_BASE, '/api/session/retry', {'session_id': 'nonexistent_zzz'})
assert 'error' in r
assert 'not found' in r['error'].lower()
def test_retry_missing_session_id_returns_error():
r = _post(TEST_BASE, '/api/session/retry', {})
assert 'error' in r
def test_retry_does_not_double_append(cleanup_test_sessions):
"""After /api/session/retry, the truncated transcript must end at the
message BEFORE the last user message. Critical assertion: no duplicate
of the resent user message gets left behind in the truncated transcript.
"""
sid = _import_session_with_messages(cleanup_test_sessions, [
{'role': 'user', 'content': 'msg A'},
{'role': 'assistant', 'content': 'reply A'},
{'role': 'user', 'content': 'msg B'},
{'role': 'assistant', 'content': 'reply B'},
])
r = _post(TEST_BASE, '/api/session/retry', {'session_id': sid})
assert r['removed_count'] == 2 # msg B + reply B
sess = _get(f'/api/session?session_id={sid}')['session']
msgs = sess['messages']
# Only msg A + reply A remain. Critically: there is NO 'msg B' anywhere.
assert len(msgs) == 2
assert msgs[0]['content'] == 'msg A'
assert msgs[1]['content'] == 'reply A'
def test_retry_concurrent_requests_are_safe(cleanup_test_sessions):
"""Two concurrent /api/session/retry calls on the same session must not
leave the transcript in a torn or doubly-truncated state.
Pre-fix race: get_session() outside `with LOCK:` could return a stale
(non-cached) Session instance to one thread; both threads then mutated
different in-memory objects, and the second s.save() overwrote the
first with stale data. The fix re-binds `s = SESSIONS.get(sid, s)`
inside the lock so both threads converge on the canonical instance.
"""
from concurrent.futures import ThreadPoolExecutor
sid = _import_session_with_messages(cleanup_test_sessions, [
{'role': 'user', 'content': 'msg A'},
{'role': 'assistant', 'content': 'reply A'},
{'role': 'user', 'content': 'msg B'},
{'role': 'assistant', 'content': 'reply B'},
])
def _do_retry():
return _post(TEST_BASE, '/api/session/retry', {'session_id': sid})
with ThreadPoolExecutor(max_workers=4) as ex:
futures = [ex.submit(_do_retry) for _ in range(4)]
results = [f.result() for f in futures]
# Each call either succeeds (truncating further) or raises 'no previous
# message to retry' once nothing is left. After the dust settles, the
# transcript must be a strict prefix of the original — never have a
# phantom duplicate of the resent message.
sess = _get(f'/api/session?session_id={sid}')['session']
msgs = sess['messages']
valid_prefixes = (
[],
[{'role': 'user', 'content': 'msg A'}, {'role': 'assistant', 'content': 'reply A'}],
[{'role': 'user', 'content': 'msg A'}],
)
msg_pairs = [(m['role'], m.get('content', '')) for m in msgs]
valid_pairs = [[(m['role'], m['content']) for m in p] for p in valid_prefixes]
assert msg_pairs in valid_pairs, (
f"Concurrent retries left transcript in unexpected state: {msg_pairs}. "
"TOCTOU race in get_session/save likely re-introduced."
)
# ── /api/session/undo ─────────────────────────────────────────────────────
def test_undo_returns_removed_preview(cleanup_test_sessions):
sid = _import_session_with_messages(cleanup_test_sessions, [
{'role': 'user', 'content': 'first user msg'},
{'role': 'assistant', 'content': 'first reply'},
{'role': 'user', 'content': 'second user msg'},
{'role': 'assistant', 'content': 'second reply'},
{'role': 'tool', 'content': 'tool output'},
])
r = _post(TEST_BASE, '/api/session/undo', {'session_id': sid})
assert r.get('ok') is True
assert r.get('removed_count') == 3
assert 'second user msg' in r.get('removed_preview', '')
def test_undo_truncates_transcript(cleanup_test_sessions):
sid = _import_session_with_messages(cleanup_test_sessions, [
{'role': 'user', 'content': 'first user msg'},
{'role': 'assistant', 'content': 'first reply'},
{'role': 'user', 'content': 'second user msg'},
{'role': 'assistant', 'content': 'second reply'},
])
_post(TEST_BASE, '/api/session/undo', {'session_id': sid})
sess = _get(f'/api/session?session_id={sid}')['session']
assert len(sess['messages']) == 2
assert sess['messages'][-1]['content'] == 'first reply'
def test_undo_repeated_until_empty(cleanup_test_sessions):
sid = _import_session_with_messages(cleanup_test_sessions, [
{'role': 'user', 'content': 'msg A'},
{'role': 'assistant', 'content': 'reply A'},
])
_post(TEST_BASE, '/api/session/undo', {'session_id': sid})
r = _post(TEST_BASE, '/api/session/undo', {'session_id': sid})
assert 'error' in r
assert 'nothing to undo' in r['error'].lower()
def test_undo_unknown_session_returns_404():
r = _post(TEST_BASE, '/api/session/undo', {'session_id': 'nonexistent_zzz'})
assert 'error' in r
assert 'not found' in r['error'].lower()
# ── /api/session/status ───────────────────────────────────────────────────
def test_status_returns_summary(cleanup_test_sessions):
sid = _import_session_with_messages(cleanup_test_sessions, [
{'role': 'user', 'content': 'a'},
{'role': 'assistant', 'content': 'b'},
{'role': 'user', 'content': 'c'},
])
r = _get(f'/api/session/status?session_id={sid}')
assert r['session_id'] == sid
assert r['title'] == 'test'
assert r['message_count'] == 3
assert 'model' in r
assert 'workspace' in r
assert 'created_at' in r
assert 'updated_at' in r
assert r['agent_running'] is False # no active stream
def test_status_unknown_returns_404():
try:
_get('/api/session/status?session_id=nonexistent_zzz')
pytest.fail('Expected HTTPError')
except urllib.error.HTTPError as e:
assert e.code == 404
def test_status_missing_param():
try:
_get('/api/session/status')
pytest.fail('Expected HTTPError')
except urllib.error.HTTPError as e:
assert e.code == 400
# ── /api/session/usage ────────────────────────────────────────────────────
def test_usage_returns_token_counts(cleanup_test_sessions):
sid, _ws = make_session_tracked(cleanup_test_sessions)
# Usage on a new session: zero everything.
r = _get(f'/api/session/usage?session_id={sid}')
assert r['input_tokens'] == 0
assert r['output_tokens'] == 0
assert r['total_tokens'] == 0

View File

@@ -0,0 +1,155 @@
import json
import pathlib
import subprocess
import textwrap
REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
SESSIONS_JS = (REPO_ROOT / "static" / "sessions.js").read_text(encoding="utf-8")
STYLE_CSS = (REPO_ROOT / "static" / "style.css").read_text(encoding="utf-8")
I18N_JS = (REPO_ROOT / "static" / "i18n.js").read_text(encoding="utf-8")
def _extract_function(source: str, name: str) -> str:
marker = f"function {name}"
start = source.index(marker)
brace_start = source.index("{", start)
depth = 0
for idx in range(brace_start, len(source)):
ch = source[idx]
if ch == "{":
depth += 1
elif ch == "}":
depth -= 1
if depth == 0:
return source[start : idx + 1]
raise AssertionError(f"Could not extract {name}")
def _run_session_time_case(script_body: str) -> dict:
functions = "\n\n".join(
_extract_function(SESSIONS_JS, name)
for name in (
"_localDayOrdinal",
"_sessionCalendarBoundaries",
"_formatSessionDate",
"_formatRelativeSessionTime",
"_sessionTimeBucketLabel",
)
)
script = textwrap.dedent(
f"""
process.env.TZ = 'UTC';
const translations = {{
session_time_unknown: 'Unknown',
session_time_just_now: 'just now',
session_time_minutes_ago: (n) => `${{n}} minute${{n === 1 ? '' : 's'}} ago`,
session_time_hours_ago: (n) => `${{n}} hour${{n === 1 ? '' : 's'}} ago`,
session_time_days_ago: (n) => `${{n}} day${{n === 1 ? '' : 's'}} ago`,
session_time_last_week: 'last week',
session_time_bucket_today: 'Today',
session_time_bucket_yesterday: 'Yesterday',
session_time_bucket_this_week: 'This week',
session_time_bucket_last_week: 'Last week',
session_time_bucket_older: 'Older',
}};
function t(key, ...args) {{
const val = translations[key];
return typeof val === 'function' ? val(...args) : val;
}}
{functions}
{script_body}
"""
)
proc = subprocess.run(["node", "-e", script], check=True, capture_output=True, text=True)
return json.loads(proc.stdout)
def test_session_sidebar_js_has_dynamic_relative_time_helpers():
assert "function _sessionCalendarBoundaries" in SESSIONS_JS
assert "function _formatRelativeSessionTime" in SESSIONS_JS
assert "function _sessionTimeBucketLabel" in SESSIONS_JS
assert "session_time_bucket_last_week" in SESSIONS_JS
assert "session_time_bucket_this_week" in SESSIONS_JS
assert "session_time_bucket_older" in SESSIONS_JS
def test_session_sidebar_renders_relative_time_and_meta_rows():
# session-time element was removed from sessions.js in v0.50.40 to
# give session titles full width — the CSS class is kept but set to display:none.
# session-meta / metaBits were removed when we dropped message-count, model, and
# source-tag badges from the sidebar (design round 2).
assert "orderedSessions" in SESSIONS_JS
assert ".session-time" in STYLE_CSS
assert ".session-title-row" in STYLE_CSS
assert ".session-item.active .session-title" in STYLE_CSS
assert "|| _sessionTimeBucketLabel" not in SESSIONS_JS
assert "const ONE_DAY=86400000;" not in SESSIONS_JS
def test_relative_time_uses_calendar_boundaries_and_year_for_old_sessions():
result = _run_session_time_case(
"""
const now = Date.UTC(2026, 3, 15, 1, 0, 0);
const mondayLate = Date.UTC(2026, 3, 13, 23, 0, 0);
const oldSession = Date.UTC(2024, 2, 5, 12, 0, 0);
process.stdout.write(JSON.stringify({
relative: _formatRelativeSessionTime(mondayLate, now),
bucket: _sessionTimeBucketLabel(mondayLate, now),
oldDate: _formatRelativeSessionTime(oldSession, now),
}));
"""
)
assert result["relative"] == "2 days ago"
assert result["bucket"] == "This week"
assert "2024" in result["oldDate"]
def test_relative_time_today_bucket():
"""Session from 2 hours ago should bucket as 'Today'."""
result = _run_session_time_case(
"""
const now = Date.UTC(2026, 3, 15, 14, 0, 0);
const twoHoursAgo = now - 2 * 60 * 60 * 1000;
process.stdout.write(JSON.stringify({
relative: _formatRelativeSessionTime(twoHoursAgo, now),
bucket: _sessionTimeBucketLabel(twoHoursAgo, now),
}));
"""
)
assert result["relative"] == "2 hours ago"
assert result["bucket"] == "Today"
def test_relative_time_handles_just_now_and_dst_safe_yesterday_boundary():
result = _run_session_time_case(
"""
const now = Date.UTC(2026, 2, 9, 12, 0, 0);
const justNow = now - 30 * 1000;
const yesterday = Date.UTC(2026, 2, 8, 23, 30, 0);
process.stdout.write(JSON.stringify({
justNow: _formatRelativeSessionTime(justNow, now),
yesterday: _formatRelativeSessionTime(yesterday, now),
yesterdayBucket: _sessionTimeBucketLabel(yesterday, now),
}));
"""
)
assert result["justNow"] == "just now"
assert result["yesterday"] == "Yesterday"
assert result["yesterdayBucket"] == "Yesterday"
def test_relative_time_strings_are_localized_in_english_and_spanish_bundles():
for key in (
"session_time_unknown",
"session_time_just_now",
"session_time_minutes_ago",
"session_time_hours_ago",
"session_time_days_ago",
"session_time_last_week",
"session_time_bucket_today",
"session_time_bucket_yesterday",
"session_time_bucket_this_week",
"session_time_bucket_last_week",
"session_time_bucket_older",
):
assert key in I18N_JS

View File

@@ -0,0 +1,66 @@
import json
import pathlib
import sys
import time
import urllib.parse
import urllib.request
import uuid
import pytest
sys.path.insert(0, str(pathlib.Path(__file__).parent.parent.parent))
_needs_server = pytest.mark.usefixtures("test_server")
from tests._pytest_port import BASE
_FULL_SECRET = "sk-" + ("B" * 24)
def _get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read())
def _write_session_with_secret_title():
from tests.conftest import TEST_STATE_DIR
sid = "sec_summary_" + uuid.uuid4().hex[:8]
sessions_dir = TEST_STATE_DIR / "sessions"
sessions_dir.mkdir(parents=True, exist_ok=True)
now = time.time()
(sessions_dir / f"{sid}.json").write_text(json.dumps({
"session_id": sid,
"title": f"session with {_FULL_SECRET}",
"workspace": "/tmp",
"model": "test",
"created_at": now,
"updated_at": now,
"pinned": False,
"archived": False,
"project_id": None,
"profile": "default",
"input_tokens": 0,
"output_tokens": 0,
"estimated_cost": None,
"personality": None,
"messages": [],
"tool_calls": [],
}))
return sid
@_needs_server
def test_api_sessions_search_redacts_titles(test_server):
sid = _write_session_with_secret_title()
data = _get("/api/sessions/search?q=" + urllib.parse.quote("B" * 24))
dump = json.dumps(data)
assert sid in dump
assert _FULL_SECRET not in dump
@_needs_server
def test_api_sessions_list_redacts_secret_titles(test_server):
sid = _write_session_with_secret_title()
data = _get("/api/sessions")
dump = json.dumps(data)
assert sid in dump
assert _FULL_SECRET not in dump

View File

@@ -0,0 +1,45 @@
from pathlib import Path
import re
REPO = Path(__file__).resolve().parent.parent
def read(path: Path) -> str:
return path.read_text(encoding="utf-8")
def test_spanish_locale_block_exists():
src = read(REPO / "static" / "i18n.js")
assert "\n es: {" in src
assert "_label: 'Español'" in src
assert "_speech: 'es-ES'" in src
def test_spanish_locale_includes_representative_translations():
src = read(REPO / "static" / "i18n.js")
expected = [
"settings_title: 'Configuración'",
"login_title: 'Iniciar sesión'",
"approval_heading: 'Se requiere aprobación'",
"tab_tasks: 'Tareas'",
"tab_skills: 'Habilidades'",
"tab_memory: 'Memoria'",
]
for entry in expected:
assert entry in src
def test_spanish_locale_covers_english_keys():
src = read(REPO / "static" / "i18n.js")
en_match = re.search(r"\n en: \{([\s\S]*?)\n \},\n\n es: \{", src)
es_match = re.search(r"\n es: \{([\s\S]*?)\n \},\n\n de: \{", src)
assert en_match, "English locale block not found"
assert es_match, "Spanish locale block not found"
key_pattern = re.compile(r"^\s{4}([a-zA-Z0-9_]+):", re.MULTILINE)
en_keys = set(key_pattern.findall(en_match.group(1)))
es_keys = set(key_pattern.findall(es_match.group(1)))
missing = sorted(en_keys - es_keys)
assert not missing, f"Spanish locale missing keys: {missing}"

440
tests/test_sprint1.py Normal file
View File

@@ -0,0 +1,440 @@
"""
Sprint 1 test suite for the Hermes Web UI.
Tests use the ISOLATED test server. Port is auto-derived per worktree (see conftest.py).
Production server (port 8787) and your real conversations are never touched.
Start the server before running:
<repo>/start.sh
# wait 2 seconds
pytest webui-mvp/tests/test_sprint1.py -v
All tests are HTTP-level: they call real API endpoints and verify responses.
No mocking required for session CRUD, upload parser, or approval API.
"""
import io
import json
import os
import sys
import time
import uuid
import urllib.request
import urllib.parse
import urllib.error
import tempfile
import pathlib
# Allow importing server modules directly for unit tests
sys.path.insert(0, str(pathlib.Path(__file__).parent.parent.parent))
from tests._pytest_port import BASE
# ──────────────────────────────────────────────
# HTTP helpers
# ──────────────────────────────────────────────
def get(path):
url = BASE + path
with urllib.request.urlopen(url, timeout=10) as r:
return json.loads(r.read())
def post(path, body=None):
url = BASE + path
data = json.dumps(body or {}).encode()
req = urllib.request.Request(url, data=data,
headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def post_multipart(path, fields, files):
"""Post a multipart/form-data request. files: {name: (filename, bytes)}"""
boundary = uuid.uuid4().hex.encode()
body = b""
for name, value in fields.items():
body += b"--" + boundary + b"\r\n"
body += f"Content-Disposition: form-data; name=\"{name}\"\r\n\r\n".encode()
body += value.encode() + b"\r\n"
for name, (filename, data) in files.items():
body += b"--" + boundary + b"\r\n"
body += f"Content-Disposition: form-data; name=\"{name}\"; filename=\"{filename}\"\r\n".encode()
body += b"Content-Type: application/octet-stream\r\n\r\n"
body += data + b"\r\n"
body += b"--" + boundary + b"--\r\n"
req = urllib.request.Request(BASE + path, data=body,
headers={"Content-Type": f"multipart/form-data; boundary={boundary.decode()}"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def make_session_tracked(created_list, ws=None):
"""Create a session and register it with the cleanup fixture."""
body = {}
if ws: body["workspace"] = str(ws)
d, _ = post("/api/session/new", body)
sid = d["session"]["session_id"]
created_list.append(sid)
return sid, pathlib.Path(d["session"]["workspace"])
# ──────────────────────────────────────────────
# Health check (prerequisite for all tests)
# ──────────────────────────────────────────────
def test_health():
"""Server must be running and healthy."""
data = get("/health")
assert data["status"] == "ok", f"health not ok: {data}"
# ──────────────────────────────────────────────
# B11: /api/session GET footgun fix
# ──────────────────────────────────────────────
def test_session_get_no_id_returns_400():
"""B11: GET /api/session with no session_id must return 400, not silently create."""
try:
data = get("/api/session")
# If we get here, the server returned 200 (old broken behavior)
assert False, f"Expected 400 but got 200: {data}"
except urllib.error.HTTPError as e:
assert e.code == 400, f"Expected 400, got {e.code}"
body = json.loads(e.read())
assert "error" in body
# ──────────────────────────────────────────────
# Session CRUD
# ──────────────────────────────────────────────
def test_session_create_and_load():
"""Create a session, verify it appears in /api/sessions, load it."""
data, status = post("/api/session/new", {"model": "openai/gpt-5.4-mini"})
assert status == 200, f"Expected 200, got {status}: {data}"
assert "session" in data
sid = data["session"]["session_id"]
assert len(sid) == 12 # uuid4().hex[:12]
# Give it a title so it's visible in the session list (empty Untitled sessions are filtered)
post("/api/session/rename", {"session_id": sid, "title": "test-create-verify"})
# Verify it appears in /api/sessions list
sessions = get("/api/sessions")
sids = [s["session_id"] for s in sessions["sessions"]]
assert sid in sids, f"New session {sid} not in sessions list"
# Load it directly
loaded = get(f"/api/session?session_id={sid}")
assert loaded["session"]["session_id"] == sid
assert loaded["session"]["messages"] == []
# Cleanup
post("/api/session/delete", {"session_id": sid})
def test_session_update():
"""Create session, update workspace and model, verify persisted."""
data, _ = post("/api/session/new", {})
sid = data["session"]["session_id"]
current_ws = pathlib.Path(data["session"]["workspace"])
child_ws = current_ws / f"session-update-{uuid.uuid4().hex[:6]}"
child_ws.mkdir(parents=True, exist_ok=True)
updated, status = post("/api/session/update", {
"session_id": sid,
"workspace": str(child_ws),
"model": "anthropic/claude-sonnet-4.6"
})
assert status == 200
assert updated["session"]["model"] == "anthropic/claude-sonnet-4.6"
# Reload and verify persistence
reloaded = get(f"/api/session?session_id={sid}")
assert reloaded["session"]["model"] == "anthropic/claude-sonnet-4.6"
def test_session_delete():
"""Create session, delete it, verify it no longer loads."""
data, _ = post("/api/session/new", {})
sid = data["session"]["session_id"]
result, status = post("/api/session/delete", {"session_id": sid})
assert status == 200
assert result.get("ok") is True
# Trying to load it should now 404/500 (KeyError -> 500 in current handler)
try:
get(f"/api/session?session_id={sid}")
assert False, "Expected error loading deleted session"
except urllib.error.HTTPError as e:
assert e.code in (404, 500), f"Expected 404 or 500, got {e.code}"
def test_session_delete_nonexistent():
"""Deleting a nonexistent session should return ok:True (idempotent)."""
result, status = post("/api/session/delete", {"session_id": "doesnotexist"})
assert status == 200
assert result.get("ok") is True
def test_sessions_list_sorted():
"""Sessions list should be sorted most-recently-updated first."""
# Create two sessions with a title so they're visible (empty Untitled sessions are filtered)
a, _ = post("/api/session/new", {})
time.sleep(0.05)
b, _ = post("/api/session/new", {})
sid_a = a["session"]["session_id"]
sid_b = b["session"]["session_id"]
post("/api/session/rename", {"session_id": sid_a, "title": "test-sort-a"})
time.sleep(0.05)
post("/api/session/rename", {"session_id": sid_b, "title": "test-sort-b"})
sessions = get("/api/sessions")
sids = [s["session_id"] for s in sessions["sessions"]]
# b was updated more recently, should appear before a
assert sids.index(sid_b) < sids.index(sid_a), \
"Sessions not sorted by updated_at desc"
# Cleanup
post("/api/session/delete", {"session_id": sid_a})
post("/api/session/delete", {"session_id": sid_b})
# ──────────────────────────────────────────────
# Upload parser unit tests (pure function, no HTTP)
# ──────────────────────────────────────────────
def test_parse_multipart_text_file():
"""parse_multipart correctly parses a text file field."""
sys.path.insert(0, str(pathlib.Path(__file__).parent.parent.parent))
# Import the function directly from the server module
import importlib.util
spec = importlib.util.spec_from_file_location(
"server",
str(pathlib.Path(__file__).parent.parent / "server.py")
)
# We only need parse_multipart; import it without running the server
# Parse manually by reading the source and exec only the function
src = pathlib.Path(__file__).parent.parent.joinpath("api/upload.py").read_text()
# Extract and exec parse_multipart
import re
# Find the function
m = re.search(r"(def parse_multipart\(.*?)(?=\ndef )", src, re.DOTALL)
assert m, "Could not find parse_multipart in server.py"
ns = {}
exec("import re as _re, email.parser as _ep\n" + m.group(1), ns)
parse_multipart = ns["parse_multipart"]
# Build a minimal multipart body
boundary = b"testboundary"
body = (
b"--testboundary\r\n"
b"Content-Disposition: form-data; name=\"session_id\"\r\n\r\n"
b"abc123\r\n"
b"--testboundary\r\n"
b"Content-Disposition: form-data; name=\"file\"; filename=\"hello.txt\"\r\n"
b"Content-Type: text/plain\r\n\r\n"
b"hello world\r\n"
b"--testboundary--\r\n"
)
fields, files = parse_multipart(
io.BytesIO(body),
"multipart/form-data; boundary=testboundary",
len(body)
)
assert fields.get("session_id") == "abc123", f"fields: {fields}"
assert "file" in files, f"files: {files}"
filename, content = files["file"]
assert filename == "hello.txt"
assert content == b"hello world"
def test_parse_multipart_binary_file():
"""parse_multipart handles binary (PNG header bytes) without corruption."""
src = pathlib.Path(__file__).parent.parent.joinpath("api/upload.py").read_text()
import re
m = re.search(r"(def parse_multipart\(.*?)(?=\ndef )", src, re.DOTALL)
ns = {}
exec("import re as _re, email.parser as _ep\n" + m.group(1), ns)
parse_multipart = ns["parse_multipart"]
# Fake PNG: first 8 bytes of PNG magic
png_magic = b"\x89PNG\r\n\x1a\n"
boundary = b"binboundary"
body = (
b"--binboundary\r\n"
b"Content-Disposition: form-data; name=\"session_id\"\r\n\r\n"
b"sess1\r\n"
b"--binboundary\r\n"
b"Content-Disposition: form-data; name=\"file\"; filename=\"test.png\"\r\n"
b"Content-Type: image/png\r\n\r\n" + png_magic + b"\r\n"
b"--binboundary--\r\n"
)
fields, files = parse_multipart(
io.BytesIO(body),
"multipart/form-data; boundary=binboundary",
len(body)
)
assert "file" in files
filename, content = files["file"]
assert filename == "test.png"
assert content == png_magic, f"Binary content corrupted: {content!r}"
# ──────────────────────────────────────────────
# File upload via HTTP
# ──────────────────────────────────────────────
def test_upload_text_file(cleanup_test_sessions):
"""Upload a text file to a session workspace, verify it appears in /api/list."""
sid, ws = make_session_tracked(cleanup_test_sessions)
result, status = post_multipart("/api/upload", {"session_id": sid}, {
"file": ("test_upload.txt", b"sprint1 test content")
})
assert status == 200, f"Upload failed {status}: {result}"
assert "filename" in result
assert result["size"] == len(b"sprint1 test content")
# Verify file appears in listing
listing = get(f"/api/list?session_id={sid}&path=.")
names = [e["name"] for e in listing["entries"]]
assert result["filename"] in names, f"{result['filename']} not in {names}"
# Cleanup the uploaded file
post("/api/file/delete", {"session_id": sid, "path": result["filename"]})
def test_upload_too_large(cleanup_test_sessions):
"""Uploading a file over MAX_UPLOAD_BYTES is rejected (413 or connection closed)."""
sid, _ = make_session_tracked(cleanup_test_sessions)
# 21MB > 20MB limit
big = b"x" * (21 * 1024 * 1024)
try:
result, status = post_multipart("/api/upload", {"session_id": sid}, {
"file": ("big.bin", big)
})
# If we get a response it should be 413
assert status == 413, f"Expected 413, got {status}: {result}"
except (urllib.error.URLError, ConnectionResetError, BrokenPipeError):
# Server closed connection after reading Content-Length > limit before body
# This is also valid rejection behavior
pass
def test_upload_no_file_field(cleanup_test_sessions):
"""Upload with no file field returns 400."""
sid, _ = make_session_tracked(cleanup_test_sessions)
result, status = post_multipart("/api/upload", {"session_id": sid}, {})
assert status == 400, f"Expected 400, got {status}: {result}"
def test_upload_bad_session():
"""Upload to nonexistent session returns 404."""
result, status = post_multipart("/api/upload", {"session_id": "nosuchsession"}, {
"file": ("x.txt", b"data")
})
assert status == 404, f"Expected 404, got {status}: {result}"
# ──────────────────────────────────────────────
# Approval API
# ──────────────────────────────────────────────
def test_approval_pending_none():
"""GET /api/approval/pending for a session with no pending entry returns null."""
data = get("/api/approval/pending?session_id=no_such_session")
assert data["pending"] is None
def test_approval_submit_and_respond():
"""Inject a pending approval via server endpoint, retrieve it, respond with deny."""
test_sid = f"test-approval-{uuid.uuid4().hex[:6]}"
cmd = "rm -rf /tmp/testdir"
key = "recursive_delete"
# Inject into server process via test endpoint (shared module state)
inject = get(f"/api/approval/inject_test?session_id={urllib.parse.quote(test_sid)}&pattern_key={key}&command={urllib.parse.quote(cmd)}")
assert inject["ok"] is True
# Poll should now show the pending entry
data = get(f"/api/approval/pending?session_id={urllib.parse.quote(test_sid)}")
assert data["pending"] is not None, "Pending entry not visible after inject"
assert data["pending"]["command"] == cmd
# Respond with deny
result, status = post("/api/approval/respond", {
"session_id": test_sid,
"choice": "deny"
})
assert status == 200
assert result["ok"] is True
assert result["choice"] == "deny"
# Pending should be gone
data2 = get(f"/api/approval/pending?session_id={urllib.parse.quote(test_sid)}")
assert data2["pending"] is None, "Pending entry should be cleared after respond"
def test_approval_respond_allow_session():
"""Inject pending entry, respond with session choice, verify cleared (approved)."""
test_sid = f"test-approval-sess-{uuid.uuid4().hex[:6]}"
inject = get(f"/api/approval/inject_test?session_id={urllib.parse.quote(test_sid)}&pattern_key=force_kill&command=pkill+-9+someproc")
assert inject["ok"] is True
result, status = post("/api/approval/respond", {
"session_id": test_sid,
"choice": "session"
})
assert status == 200
assert result["ok"] is True
assert result["choice"] == "session"
# After session approval, pending should be cleared
data = get(f"/api/approval/pending?session_id={urllib.parse.quote(test_sid)}")
assert data["pending"] is None, "Pending entry should be cleared after session approval"
# ──────────────────────────────────────────────
# Stream status endpoint (B4/B5)
# ──────────────────────────────────────────────
def test_stream_status_unknown_id():
"""GET /api/chat/stream/status for unknown stream_id returns active:false."""
data = get("/api/chat/stream/status?stream_id=doesnotexist")
assert data["active"] is False
# ──────────────────────────────────────────────
# File browser
# ──────────────────────────────────────────────
def test_list_dir(cleanup_test_sessions):
"""List workspace directory for a session."""
sid, _ = make_session_tracked(cleanup_test_sessions)
listing = get(f"/api/list?session_id={sid}&path=.")
assert "entries" in listing
assert isinstance(listing["entries"], list)
def test_list_dir_path_traversal(cleanup_test_sessions):
"""Path traversal via ../.. should be blocked (500 or 400)."""
sid, _ = make_session_tracked(cleanup_test_sessions)
try:
listing = get(f"/api/list?session_id={sid}&path=../../etc")
# If server returns entries outside workspace root, that is a bug
# (safe_resolve should raise ValueError)
assert False, f"Expected error for path traversal, got: {listing}"
except urllib.error.HTTPError as e:
assert e.code in (400, 404, 500), f"Expected 400/404/500 for traversal, got {e.code}"

139
tests/test_sprint10.py Normal file
View File

@@ -0,0 +1,139 @@
"""
Sprint 10 Tests: server.py split, cancel endpoint, cron history, tool card polish.
"""
import json, pathlib, urllib.error, urllib.request, urllib.parse
REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
from tests._pytest_port import BASE
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def get_text(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return r.read().decode(), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(BASE + path, data=data,
headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def make_session(created_list):
d, _ = post("/api/session/new", {})
sid = d["session"]["session_id"]
created_list.append(sid)
return sid
# ── server.py split: api/ modules served / importable ─────────────────────
def test_health_still_works(cleanup_test_sessions):
data, status = get("/health")
assert status == 200
assert data["status"] == "ok"
assert "uptime_seconds" in data
assert "active_streams" in data
def test_api_modules_exist(cleanup_test_sessions):
"""All api/ module files must exist on disk."""
base = REPO_ROOT / "api"
for mod in ["__init__.py", "config.py", "helpers.py", "models.py",
"workspace.py", "upload.py", "streaming.py"]:
assert (base / mod).exists(), f"Missing api/{mod}"
def test_server_py_under_750_lines(cleanup_test_sessions):
"""server.py should be under 750 lines after the split."""
lines = len((REPO_ROOT / "server.py").read_text().splitlines())
assert lines < 750, f"server.py is {lines} lines -- split may not have landed"
def test_api_config_has_cancel_flags(cleanup_test_sessions):
src = (REPO_ROOT / "api/config.py").read_text()
assert "CANCEL_FLAGS" in src
assert "STREAMS" in src
def test_session_crud_still_works(cleanup_test_sessions):
"""Full session lifecycle works after split."""
created = []
sid = make_session(created)
data, status = get(f"/api/session?session_id={urllib.parse.quote(sid)}")
assert status == 200
assert data["session"]["session_id"] == sid
post("/api/session/delete", {"session_id": sid})
def test_static_files_still_served(cleanup_test_sessions):
for f in ["ui.js", "workspace.js", "sessions.js", "messages.js", "panels.js", "boot.js"]:
src, status = get_text(f"/static/{f}")
assert status == 200, f"/static/{f} returned {status}"
assert len(src) > 100
# ── Cancel endpoint ────────────────────────────────────────────────────────
def test_cancel_requires_stream_id(cleanup_test_sessions):
try:
data, status = get("/api/chat/cancel")
assert status == 400
except urllib.error.HTTPError as e:
assert e.code == 400
def test_cancel_nonexistent_stream(cleanup_test_sessions):
data, status = get("/api/chat/cancel?stream_id=nonexistent_xyz")
assert status == 200
assert data["ok"] is True
assert data["cancelled"] is False
def test_cancel_button_in_html(cleanup_test_sessions):
src, _ = get_text("/")
assert "btnCancel" in src
assert "cancelStream" in src
def test_cancel_function_in_boot_js(cleanup_test_sessions):
src, _ = get_text("/static/boot.js")
assert "async function cancelStream(" in src
assert "api/chat/cancel" in src
# ── Cron history ───────────────────────────────────────────────────────────
def test_crons_output_limit_param(cleanup_test_sessions):
"""Server accepts limit parameter > 1."""
data, status = get("/api/crons/output?job_id=nonexistent&limit=20")
# 404 or 200 with empty -- both acceptable for nonexistent job
assert status in (200, 404)
def test_cron_history_button_in_panels_js(cleanup_test_sessions):
src, _ = get_text("/static/panels.js")
assert "loadCronHistory" in src
assert "cron_all_runs" in src # i18n key (was hardcoded 'All runs' before i18n hardening)
def test_cron_output_snippet_helper(cleanup_test_sessions):
src, _ = get_text("/static/panels.js")
assert "_cronOutputSnippet" in src
# ── Tool card polish ───────────────────────────────────────────────────────
def test_tool_card_running_dot_in_css(cleanup_test_sessions):
src, _ = get_text("/static/style.css")
assert "tool-card-running-dot" in src
def test_tool_card_show_more_in_ui_js(cleanup_test_sessions):
src, _ = get_text("/static/ui.js")
assert "Show more" in src
assert "tool-card-more" in src
def test_tool_card_smart_truncation_in_ui_js(cleanup_test_sessions):
src, _ = get_text("/static/ui.js")
assert "displaySnippet" in src
assert "lastBreak" in src
def test_cancel_sse_event_handler_in_messages_js(cleanup_test_sessions):
src, _ = get_text("/static/messages.js")
assert "addEventListener('cancel'" in src
assert "Task cancelled" in src
def test_active_stream_id_tracked(cleanup_test_sessions):
src, _ = get_text("/static/messages.js")
assert "S.activeStreamId" in src

101
tests/test_sprint11.py Normal file
View File

@@ -0,0 +1,101 @@
"""
Sprint 11 Tests: multi-provider model support, streaming smoothness, routes extraction.
"""
import json, pathlib, urllib.error, urllib.request, urllib.parse
REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
from tests._pytest_port import BASE
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(BASE + path, data=data,
headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
# ── /api/models endpoint ──────────────────────────────────────────────────
def test_models_endpoint_returns_200():
"""GET /api/models returns a valid response."""
d, status = get("/api/models")
assert status == 200
def test_models_has_required_fields():
"""Response includes groups, default_model, and active_provider."""
d, _ = get("/api/models")
assert 'groups' in d
assert 'default_model' in d
assert 'active_provider' in d
def test_models_groups_structure():
"""Each group has provider name and models list."""
d, _ = get("/api/models")
assert isinstance(d['groups'], list)
assert len(d['groups']) > 0
for group in d['groups']:
assert 'provider' in group
assert 'models' in group
assert isinstance(group['models'], list)
assert len(group['models']) > 0
def test_models_model_structure():
"""Each model has id and label."""
d, _ = get("/api/models")
for group in d['groups']:
for model in group['models']:
assert 'id' in model
assert 'label' in model
assert isinstance(model['id'], str)
assert isinstance(model['label'], str)
assert len(model['id']) > 0
assert len(model['label']) > 0
def test_models_default_model_not_empty():
"""When HERMES_WEBUI_DEFAULT_MODEL env var is set (as in conftest), the
/api/models response includes a non-empty default_model string."""
d, _ = get("/api/models")
assert isinstance(d['default_model'], str)
# conftest sets HERMES_WEBUI_DEFAULT_MODEL to "openai/gpt-5.4-mini", so
# this value should be non-empty in the test environment.
# When no env var is set (production with empty default), default_model
# can be "" — that is intentional (see PR #649).
assert len(d['default_model']) > 0 # only holds because conftest sets the env var
def test_models_at_least_one_provider():
"""At least one provider group should exist (fallback list at minimum)."""
d, _ = get("/api/models")
providers = [g['provider'] for g in d['groups']]
assert len(providers) >= 1
def test_models_no_duplicate_ids():
"""Model IDs should not be duplicated within a single group."""
d, _ = get("/api/models")
for group in d['groups']:
ids = [m['id'] for m in group['models']]
assert len(ids) == len(set(ids)), f"Duplicate model IDs in {group['provider']}: {ids}"
def test_session_preserves_unlisted_model():
"""A session with a model not in the dropdown should still load correctly."""
# Create a session with a custom model string
d, _ = post("/api/session/new", {})
sid = d['session']['session_id']
try:
custom_model = 'custom-provider/test-model-999'
post("/api/session/update", {
'session_id': sid,
'model': custom_model,
'workspace': d['session']['workspace']
})
# Reload and verify model persisted
d2, _ = get(f"/api/session?session_id={sid}")
assert d2['session']['model'] == custom_model
finally:
post("/api/session/delete", {'session_id': sid})

179
tests/test_sprint12.py Normal file
View File

@@ -0,0 +1,179 @@
"""
Sprint 12 Tests: settings panel, session pinning, session import, SSE reconnect.
"""
import json, pathlib, urllib.error, urllib.request, urllib.parse
from tests._pytest_port import BASE
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(BASE + path, data=data,
headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def make_session(created_list):
d, _ = post("/api/session/new", {})
sid = d["session"]["session_id"]
created_list.append(sid)
return sid
# ── Settings API ──────────────────────────────────────────────────────────
def test_settings_get_returns_defaults():
"""GET /api/settings returns default settings."""
d, status = get("/api/settings")
assert status == 200
assert 'default_model' in d
assert 'default_workspace' in d
def test_settings_post_persists():
"""POST /api/settings saves and returns merged settings."""
d, status = post("/api/settings", {"default_model": "test/model-123"})
assert status == 200
assert d['default_model'] == 'test/model-123'
# Verify it persisted
d2, _ = get("/api/settings")
assert d2['default_model'] == 'test/model-123'
# Restore
post("/api/settings", {"default_model": "openai/gpt-5.4-mini"})
def test_settings_partial_update():
"""POST /api/settings with partial data doesn't clobber other fields."""
d1, _ = get("/api/settings")
original_ws = d1['default_workspace']
post("/api/settings", {"default_model": "anthropic/claude-sonnet-4.6"})
d2, _ = get("/api/settings")
assert d2['default_model'] == 'anthropic/claude-sonnet-4.6'
assert d2['default_workspace'] == original_ws
# Restore
post("/api/settings", {"default_model": "openai/gpt-5.4-mini"})
# ── Session Pinning ───────────────────────────────────────────────────────
def test_pin_session():
"""POST /api/session/pin sets pinned=true."""
created = []
try:
sid = make_session(created)
d, status = post("/api/session/pin", {"session_id": sid, "pinned": True})
assert status == 200
assert d['ok'] is True
assert d['session']['pinned'] is True
finally:
for sid in created:
post("/api/session/delete", {"session_id": sid})
def test_unpin_session():
"""POST /api/session/pin with pinned=false unpins."""
created = []
try:
sid = make_session(created)
post("/api/session/pin", {"session_id": sid, "pinned": True})
d, status = post("/api/session/pin", {"session_id": sid, "pinned": False})
assert status == 200
assert d['session']['pinned'] is False
finally:
for sid in created:
post("/api/session/delete", {"session_id": sid})
def test_pinned_in_session_list():
"""Pinned sessions include pinned field in session list."""
created = []
try:
sid = make_session(created)
# Pin it and give it a title so it shows in the list
post("/api/session/rename", {"session_id": sid, "title": "Pinned Test"})
post("/api/session/pin", {"session_id": sid, "pinned": True})
d, _ = get("/api/sessions")
match = [s for s in d['sessions'] if s['session_id'] == sid]
assert len(match) == 1
assert match[0]['pinned'] is True
finally:
for sid in created:
post("/api/session/delete", {"session_id": sid})
def test_pinned_persists_on_reload():
"""Pin status survives session reload from disk."""
created = []
try:
sid = make_session(created)
post("/api/session/pin", {"session_id": sid, "pinned": True})
d, _ = get(f"/api/session?session_id={sid}")
assert d['session']['pinned'] is True
finally:
for sid in created:
post("/api/session/delete", {"session_id": sid})
# ── Session Import ────────────────────────────────────────────────────────
def test_import_session_basic():
"""POST /api/session/import creates a new session from JSON."""
payload = {
"title": "Imported Test",
"messages": [
{"role": "user", "content": "Hello from import"},
{"role": "assistant", "content": "Hi there!"},
],
"model": "test/import-model",
}
d, status = post("/api/session/import", payload)
assert status == 200
assert d['ok'] is True
sid = d['session']['session_id']
try:
assert d['session']['title'] == 'Imported Test'
assert len(d['session']['messages']) == 2
# Verify it loads correctly
d2, _ = get(f"/api/session?session_id={sid}")
assert d2['session']['model'] == 'test/import-model'
finally:
post("/api/session/delete", {"session_id": sid})
def test_import_requires_messages():
"""Import fails without a messages array."""
d, status = post("/api/session/import", {"title": "No messages"})
assert status == 400
def test_import_creates_new_id():
"""Imported session gets a new session_id, not reusing any from the payload."""
payload = {
"session_id": "should_be_ignored",
"title": "ID Test",
"messages": [{"role": "user", "content": "test"}],
}
d, _ = post("/api/session/import", payload)
sid = d['session']['session_id']
try:
# The import should create a new ID, not use the one from the payload
assert sid != "should_be_ignored"
finally:
post("/api/session/delete", {"session_id": sid})
def test_import_with_pinned():
"""Imported session can be pinned."""
payload = {
"title": "Pinned Import",
"messages": [{"role": "user", "content": "test"}],
"pinned": True,
}
d, _ = post("/api/session/import", payload)
sid = d['session']['session_id']
try:
d2, _ = get(f"/api/session?session_id={sid}")
assert d2['session']['pinned'] is True
finally:
post("/api/session/delete", {"session_id": sid})

122
tests/test_sprint13.py Normal file
View File

@@ -0,0 +1,122 @@
"""
Sprint 13 Tests: cron recent endpoint, session duplicate, background alerts.
"""
import json, pathlib, urllib.error, urllib.request
from tests._pytest_port import BASE
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(BASE + path, data=data,
headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def make_session(created_list):
d, _ = post("/api/session/new", {})
sid = d["session"]["session_id"]
created_list.append(sid)
return sid, d["session"]
# ── Cron recent endpoint ──────────────────────────────────────────────────
def test_crons_recent_returns_200():
"""GET /api/crons/recent returns completions list."""
d, status = get("/api/crons/recent?since=0")
assert status == 200
assert 'completions' in d
assert isinstance(d['completions'], list)
assert 'since' in d
def test_crons_recent_with_future_since():
"""Completions list is empty when since is in the future."""
import time
d, _ = get(f"/api/crons/recent?since={time.time() + 99999}")
assert d['completions'] == []
def test_crons_recent_default_since():
"""Default since=0 returns all completions."""
d, status = get("/api/crons/recent")
assert status == 200
assert 'completions' in d
# ── Session duplicate ─────────────────────────────────────────────────────
def test_duplicate_session():
"""Duplicating a session creates a new one with same workspace/model."""
created = []
try:
sid, sess = make_session(created)
# Set a specific model on the session
post("/api/session/update", {
"session_id": sid, "model": "test/dup-model",
"workspace": sess["workspace"]
})
# Duplicate: create new session with same workspace/model
d2, status = post("/api/session/new", {
"workspace": sess["workspace"], "model": "test/dup-model"
})
assert status == 200
new_sid = d2["session"]["session_id"]
created.append(new_sid)
assert new_sid != sid
assert d2["session"]["model"] == "test/dup-model"
assert d2["session"]["workspace"] == sess["workspace"]
finally:
for s in created:
post("/api/session/delete", {"session_id": s})
# ── Session pinned field preserved across operations ──────────────────────
def test_pinned_survives_update():
"""Pinned status survives session update."""
created = []
try:
sid, sess = make_session(created)
post("/api/session/pin", {"session_id": sid, "pinned": True})
# Update workspace/model
post("/api/session/update", {
"session_id": sid, "model": "test/other",
"workspace": sess["workspace"]
})
d, _ = get(f"/api/session?session_id={sid}")
assert d["session"]["pinned"] is True
finally:
for s in created:
post("/api/session/delete", {"session_id": s})
# ── Workspace symlink validation ──────────────────────────────────────────
def test_workspace_add_rejects_nonexistent():
"""Adding a non-existent path returns 400."""
d, status = post("/api/workspaces/add", {"path": "/nonexistent/path/12345"})
assert status == 400
def test_workspace_add_accepts_real_dir():
"""Adding a real directory under the trusted workspace root succeeds."""
d, _ = post("/api/session/new", {})
root = pathlib.Path(d["session"]["workspace"])
tmp = root / "trusted-add-test"
tmp.mkdir(parents=True, exist_ok=True)
try:
d, status = post("/api/workspaces/add", {"path": str(tmp), "name": "test-ws"})
assert status == 200
assert d["ok"] is True
finally:
post("/api/workspaces/remove", {"path": str(tmp)})
import shutil
shutil.rmtree(tmp, ignore_errors=True)

153
tests/test_sprint14.py Normal file
View File

@@ -0,0 +1,153 @@
"""
Sprint 14 Tests: file rename, folder create, session archive, session tags, mermaid, timestamps.
"""
import json, os, pathlib, shutil, tempfile, urllib.error, urllib.request
from tests._pytest_port import BASE
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(BASE + path, data=data,
headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def make_session(created_list):
d, _ = post("/api/session/new", {})
sid = d["session"]["session_id"]
created_list.append(sid)
return sid, d["session"]
# ── File rename ───────────────────────────────────────────────────────────
def test_file_rename():
"""Renaming a file changes its name on disk."""
created = []
try:
sid, sess = make_session(created)
# Create a file first
post("/api/file/create", {"session_id": sid, "path": "rename_test.txt", "content": "hello"})
d, status = post("/api/file/rename", {
"session_id": sid, "path": "rename_test.txt", "new_name": "renamed.txt"
})
assert status == 200
assert d["ok"] is True
assert "renamed.txt" in d["new_path"]
finally:
for s in created:
post("/api/session/delete", {"session_id": s})
def test_file_rename_rejects_path_traversal():
"""Rename rejects names with path separators."""
created = []
try:
sid, sess = make_session(created)
post("/api/file/create", {"session_id": sid, "path": "safe.txt", "content": ""})
d, status = post("/api/file/rename", {
"session_id": sid, "path": "safe.txt", "new_name": "../evil.txt"
})
assert status == 400
finally:
for s in created:
post("/api/session/delete", {"session_id": s})
def test_file_rename_rejects_existing():
"""Rename fails if target name already exists."""
created = []
try:
sid, sess = make_session(created)
post("/api/file/create", {"session_id": sid, "path": "a.txt", "content": "a"})
post("/api/file/create", {"session_id": sid, "path": "b.txt", "content": "b"})
d, status = post("/api/file/rename", {
"session_id": sid, "path": "a.txt", "new_name": "b.txt"
})
assert status == 400
finally:
for s in created:
post("/api/session/delete", {"session_id": s})
# ── Folder create ─────────────────────────────────────────────────────────
def test_create_dir():
"""Creating a folder succeeds."""
created = []
try:
sid, sess = make_session(created)
d, status = post("/api/file/create-dir", {
"session_id": sid, "path": "test_folder"
})
assert status == 200
assert d["ok"] is True
finally:
for s in created:
post("/api/session/delete", {"session_id": s})
def test_create_dir_rejects_existing():
"""Creating a folder that already exists fails."""
created = []
try:
sid, sess = make_session(created)
post("/api/file/create-dir", {"session_id": sid, "path": "dup_folder"})
d, status = post("/api/file/create-dir", {"session_id": sid, "path": "dup_folder"})
assert status == 400
finally:
for s in created:
post("/api/session/delete", {"session_id": s})
# ── Session archive ───────────────────────────────────────────────────────
def test_archive_session():
"""Archiving a session sets archived=true."""
created = []
try:
sid, _ = make_session(created)
d, status = post("/api/session/archive", {"session_id": sid, "archived": True})
assert status == 200
assert d["session"]["archived"] is True
finally:
for s in created:
post("/api/session/delete", {"session_id": s})
def test_unarchive_session():
"""Unarchiving a session sets archived=false."""
created = []
try:
sid, _ = make_session(created)
post("/api/session/archive", {"session_id": sid, "archived": True})
d, status = post("/api/session/archive", {"session_id": sid, "archived": False})
assert status == 200
assert d["session"]["archived"] is False
finally:
for s in created:
post("/api/session/delete", {"session_id": s})
def test_archived_in_compact():
"""Archived field appears in session list."""
created = []
try:
sid, _ = make_session(created)
post("/api/session/rename", {"session_id": sid, "title": "Archive Test"})
post("/api/session/archive", {"session_id": sid, "archived": True})
d, _ = get(f"/api/session?session_id={sid}")
assert d["session"]["archived"] is True
finally:
for s in created:
post("/api/session/delete", {"session_id": s})

234
tests/test_sprint15.py Normal file
View File

@@ -0,0 +1,234 @@
"""
Sprint 15 Tests: session projects (CRUD, move, backward compat).
"""
import json, urllib.error, urllib.request
from tests._pytest_port import BASE
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(BASE + path, data=data,
headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def make_session(created_list):
d, _ = post("/api/session/new", {})
sid = d["session"]["session_id"]
created_list.append(sid)
return sid, d["session"]
def make_project(created_list, name="Test Project", color=None):
body = {"name": name}
if color:
body["color"] = color
d, status = post("/api/projects/create", body)
assert status == 200
pid = d["project"]["project_id"]
created_list.append(pid)
return pid, d["project"]
def cleanup_projects(project_ids):
for pid in project_ids:
try:
post("/api/projects/delete", {"project_id": pid})
except Exception:
pass
# ── Project CRUD ─────────────────────────────────────────────────────────
def test_create_project():
"""Creating a project returns a valid project dict."""
pids = []
try:
pid, proj = make_project(pids, "My Project", "#7cb9ff")
assert pid and len(pid) == 12
assert proj["name"] == "My Project"
assert proj["color"] == "#7cb9ff"
assert "created_at" in proj
finally:
cleanup_projects(pids)
def test_list_projects_empty():
"""Listing projects when none exist returns empty list."""
d, status = get("/api/projects")
assert status == 200
assert isinstance(d["projects"], list)
def test_list_projects():
"""Listing projects returns created projects."""
pids = []
try:
make_project(pids, "Alpha")
make_project(pids, "Beta")
d, status = get("/api/projects")
assert status == 200
names = [p["name"] for p in d["projects"]]
assert "Alpha" in names
assert "Beta" in names
finally:
cleanup_projects(pids)
def test_rename_project():
"""Renaming a project updates its name."""
pids = []
try:
pid, _ = make_project(pids, "Old Name")
d, status = post("/api/projects/rename", {"project_id": pid, "name": "New Name"})
assert status == 200
assert d["project"]["name"] == "New Name"
# Verify via list
dl, _ = get("/api/projects")
names = [p["name"] for p in dl["projects"]]
assert "New Name" in names
assert "Old Name" not in names
finally:
cleanup_projects(pids)
def test_delete_project():
"""Deleting a project removes it from the list."""
pids = []
try:
pid, _ = make_project(pids, "Doomed")
d, status = post("/api/projects/delete", {"project_id": pid})
assert status == 200
assert d["ok"] is True
dl, _ = get("/api/projects")
assert all(p["project_id"] != pid for p in dl["projects"])
pids.clear() # already deleted
finally:
cleanup_projects(pids)
def test_delete_project_unassigns_sessions():
"""Deleting a project unassigns all sessions that belonged to it."""
pids = []
sids = []
try:
pid, _ = make_project(pids, "Temp Project")
sid, _ = make_session(sids)
# Assign session to project
post("/api/session/move", {"session_id": sid, "project_id": pid})
# Verify assigned
sd, _ = get(f"/api/session?session_id={sid}")
assert sd["session"].get("project_id") == pid
# Delete project
post("/api/projects/delete", {"project_id": pid})
pids.clear()
# Verify session is unassigned
sd2, _ = get(f"/api/session?session_id={sid}")
assert sd2["session"].get("project_id") is None
finally:
cleanup_projects(pids)
for s in sids:
post("/api/session/delete", {"session_id": s})
def test_create_project_requires_name():
"""Creating a project without a name returns 400."""
d, status = post("/api/projects/create", {})
assert status == 400
def test_delete_nonexistent_project():
"""Deleting a project that doesn't exist returns 404."""
d, status = post("/api/projects/delete", {"project_id": "nonexistent99"})
assert status == 404
# ── Session move ─────────────────────────────────────────────────────────
def test_session_move_to_project():
"""Moving a session to a project sets its project_id."""
pids = []
sids = []
try:
pid, _ = make_project(pids, "Work")
sid, _ = make_session(sids)
d, status = post("/api/session/move", {"session_id": sid, "project_id": pid})
assert status == 200
assert d["session"]["project_id"] == pid
finally:
cleanup_projects(pids)
for s in sids:
post("/api/session/delete", {"session_id": s})
def test_session_move_to_unassigned():
"""Moving a session to null project unassigns it."""
pids = []
sids = []
try:
pid, _ = make_project(pids, "Temp")
sid, _ = make_session(sids)
# Assign then unassign
post("/api/session/move", {"session_id": sid, "project_id": pid})
d, status = post("/api/session/move", {"session_id": sid, "project_id": None})
assert status == 200
assert d["session"]["project_id"] is None
finally:
cleanup_projects(pids)
for s in sids:
post("/api/session/delete", {"session_id": s})
def test_session_project_in_list():
"""Session list includes project_id for assigned sessions."""
pids = []
sids = []
try:
pid, _ = make_project(pids, "Listed")
sid, _ = make_session(sids)
# Give it a title so it shows in list (non-empty Untitled sessions are hidden)
post("/api/session/rename", {"session_id": sid, "title": "Project Test Session"})
post("/api/session/move", {"session_id": sid, "project_id": pid})
dl, _ = get("/api/sessions")
match = [s for s in dl["sessions"] if s["session_id"] == sid]
assert len(match) == 1
assert match[0]["project_id"] == pid
finally:
cleanup_projects(pids)
for s in sids:
post("/api/session/delete", {"session_id": s})
# ── Backward compat ──────────────────────────────────────────────────────
def test_compact_includes_project_id():
"""New session compact dict includes project_id as null."""
sids = []
try:
sid, sess = make_session(sids)
# Give it a title so it appears in the list
post("/api/session/rename", {"session_id": sid, "title": "Compat Test"})
dl, _ = get("/api/sessions")
match = [s for s in dl["sessions"] if s["session_id"] == sid]
assert len(match) == 1
assert "project_id" in match[0]
assert match[0]["project_id"] is None
finally:
for s in sids:
post("/api/session/delete", {"session_id": s})
def test_session_move_requires_session_id():
"""Moving without session_id returns 400."""
d, status = post("/api/session/move", {"project_id": "abc"})
assert status == 400

721
tests/test_sprint16.py Normal file
View File

@@ -0,0 +1,721 @@
"""
Sprint 16 Tests: safe HTML rendering in renderMd(), active session styling,
session sidebar polish (SVG icons, dropdown actions).
"""
import html as _html
import pathlib
import re
import urllib.request
from tests._pytest_port import BASE
REPO_ROOT = pathlib.Path(__file__).parent.parent
# ── Helpers ──────────────────────────────────────────────────────────────────
def get_text(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return r.read().decode("utf-8"), r.status
def esc(s):
"""Mirror of esc() in ui.js — HTML-escapes a string."""
return _html.escape(str(s), quote=True)
SAFE_TAGS = re.compile(
r"^<\/?(strong|em|code|pre|h[1-6]|ul|ol|li|table|thead|tbody|tr|th|td"
r"|hr|blockquote|p|br|a|div)([\s>]|$)",
re.I,
)
SAFE_INLINE = re.compile(r"^<\/?(strong|em|code|a)([\s>]|$)", re.I)
def inline_md(t):
"""Mirror of inlineMd() in ui.js — for use inside list items / blockquotes."""
t = re.sub(r"\*\*\*(.+?)\*\*\*", lambda m: "<strong><em>" + esc(m.group(1)) + "</em></strong>", t)
t = re.sub(r"\*\*(.+?)\*\*", lambda m: "<strong>" + esc(m.group(1)) + "</strong>", t)
t = re.sub(r"\*([^*\n]+)\*", lambda m: "<em>" + esc(m.group(1)) + "</em>", t)
t = re.sub(r"`([^`\n]+)`", lambda m: "<code>" + esc(m.group(1)) + "</code>", t)
t = re.sub(
r"\[([^\]]+)\]\((https?://[^\)]+)\)",
lambda m: f'<a href="{esc(m.group(2))}" target="_blank" rel="noopener">{esc(m.group(1))}</a>',
t,
)
t = re.sub(r"</?[a-zA-Z][^>]*>", lambda m: m.group() if SAFE_INLINE.match(m.group()) else esc(m.group()), t)
return t
def render_md(raw):
"""
Python mirror of renderMd() in static/ui.js.
Kept in sync with the JS implementation so tests catch regressions
if the JS logic drifts from the documented behaviour.
"""
s = raw or ""
# Pre-pass: stash code blocks/spans, convert safe HTML → markdown equivalents
fence_stash = []
def stash(m):
fence_stash.append(m.group())
return "\x00F" + str(len(fence_stash) - 1) + "\x00"
s = re.sub(r"(```[\s\S]*?```|`[^`\n]+`)", stash, s)
s = re.sub(r"<strong>([\s\S]*?)</strong>", lambda m: "**" + m.group(1) + "**", s, flags=re.I)
s = re.sub(r"<b>([\s\S]*?)</b>", lambda m: "**" + m.group(1) + "**", s, flags=re.I)
s = re.sub(r"<em>([\s\S]*?)</em>", lambda m: "*" + m.group(1) + "*", s, flags=re.I)
s = re.sub(r"<i>([\s\S]*?)</i>", lambda m: "*" + m.group(1) + "*", s, flags=re.I)
s = re.sub(r"<code>([^<]*?)</code>", lambda m: "`" + m.group(1) + "`", s, flags=re.I)
s = re.sub(r"<br\s*/?>", "\n", s, flags=re.I)
s = re.sub(r"\x00F(\d+)\x00", lambda m: fence_stash[int(m.group(1))], s)
# Fenced code blocks
def fenced(m):
lang, code = m.group(1), m.group(2).rstrip("\n")
h = f'<div class="pre-header">{esc(lang)}</div>' if lang else ""
return h + "<pre><code>" + esc(code) + "</code></pre>"
s = re.sub(r"```([\w+-]*)\n?([\s\S]*?)```", fenced, s)
s = re.sub(r"`([^`\n]+)`", lambda m: "<code>" + esc(m.group(1)) + "</code>", s)
# Inline formatting (top-level, outside list items)
s = re.sub(r"\*\*\*(.+?)\*\*\*", lambda m: "<strong><em>" + esc(m.group(1)) + "</em></strong>", s)
s = re.sub(r"\*\*(.+?)\*\*", lambda m: "<strong>" + esc(m.group(1)) + "</strong>", s)
s = re.sub(r"\*([^*\n]+)\*", lambda m: "<em>" + esc(m.group(1)) + "</em>", s)
# Block elements using inlineMd for their content
s = re.sub(r"^### (.+)$", lambda m: "<h3>" + inline_md(m.group(1)) + "</h3>", s, flags=re.M)
s = re.sub(r"^## (.+)$", lambda m: "<h2>" + inline_md(m.group(1)) + "</h2>", s, flags=re.M)
s = re.sub(r"^# (.+)$", lambda m: "<h1>" + inline_md(m.group(1)) + "</h1>", s, flags=re.M)
s = re.sub(r"^---+$", "<hr>", s, flags=re.M)
s = re.sub(r"^> (.+)$", lambda m: "<blockquote>" + inline_md(m.group(1)) + "</blockquote>", s, flags=re.M)
def handle_ul(block):
lines = block.strip().split("\n")
out = "<ul>"
for l in lines:
indent = bool(re.match(r"^ {2,}", l))
text = re.sub(r"^ {0,4}[-*+] ", "", l)
style = ' style="margin-left:16px"' if indent else ""
out += f"<li{style}>{inline_md(text)}</li>"
return out + "</ul>"
s = re.sub(r"((?:^(?: )?[-*+] .+\n?)+)", lambda m: handle_ul(m.group()), s, flags=re.M)
def handle_ol(block):
lines = block.strip().split("\n")
out = "<ol>"
for l in lines:
text = re.sub(r"^ {0,4}\d+\. ", "", l)
out += f"<li>{inline_md(text)}</li>"
return out + "</ol>"
s = re.sub(r"((?:^(?: )?\d+\. .+\n?)+)", lambda m: handle_ol(m.group()), s, flags=re.M)
# Safety net: escape unknown tags in remaining text
s = re.sub(r"</?[a-zA-Z][^>]*>", lambda m: m.group() if SAFE_TAGS.match(m.group()) else esc(m.group()), s)
# Paragraph wrap
parts = s.split("\n\n")
def wrap(p):
p = p.strip()
if not p: return ""
if re.match(r"^<(h[1-6]|ul|ol|pre|hr|blockquote)", p): return p
return "<p>" + p.replace("\n", "<br>") + "</p>"
s = "\n".join(wrap(p) for p in parts)
return s
# ── Static analysis: verify key structures exist in ui.js ────────────────────
def test_render_md_pre_pass_converts_strong(cleanup_test_sessions):
"""ui.js renderMd() must have pre-pass that converts <strong> to **."""
src = REPO_ROOT / "static" / "ui.js"
code = src.read_text()
assert "<strong>" in code and "**" in code, "pre-pass for <strong> not found"
# Verify the specific conversion pattern
assert re.search(r"<strong>.*?\*\*", code, re.S), \
"renderMd pre-pass should convert <strong>...</strong> to **...**"
def test_render_md_has_safety_net(cleanup_test_sessions):
"""ui.js must have a safety-net that escapes unknown HTML tags after the pipeline."""
src = REPO_ROOT / "static" / "ui.js"
code = src.read_text()
assert "SAFE_TAGS" in code, "SAFE_TAGS allowlist regex not found in ui.js"
assert "esc(tag)" in code, "safety-net esc(tag) call not found in ui.js"
def test_render_md_stashes_code_blocks(cleanup_test_sessions):
"""ui.js pre-pass must stash code blocks before replacing safe HTML tags."""
src = REPO_ROOT / "static" / "ui.js"
code = src.read_text()
assert "fence_stash" in code, "fence_stash not found in renderMd pre-pass"
def test_render_md_handles_br_tag(cleanup_test_sessions):
"""ui.js must convert <br> to newline in pre-pass."""
src = REPO_ROOT / "static" / "ui.js"
code = src.read_text()
assert re.search(r"<br\\s\*", code) or "<br" in code, "<br> handling not found"
def test_render_md_no_placeholder_remnants(cleanup_test_sessions):
"""Old Unicode placeholder approach (\\uE001-\\uE005) must be gone."""
src = REPO_ROOT / "static" / "ui.js"
code = src.read_text()
for old_ph in ["\\uE001", "\\uE002", "\\uE003", "\\uE004", "\\uE005"]:
assert old_ph not in code, \
f"Old placeholder {old_ph} still present — broken implementation not cleaned up"
def test_render_md_safe_tag_allowlist_complete(cleanup_test_sessions):
"""SAFE_TAGS allowlist must include all tags the pipeline emits."""
src = REPO_ROOT / "static" / "ui.js"
code = src.read_text()
required = ["strong", "em", "code", "pre", "ul", "ol", "li",
"table", "blockquote", "hr", "br", "a", "div"]
safe_tags_match = re.search(r"SAFE_TAGS\s*=\s*/(.+?)/i", code)
assert safe_tags_match, "SAFE_TAGS regex not found"
pattern = safe_tags_match.group(1)
for tag in required:
assert tag in pattern, f"Tag '{tag}' missing from SAFE_TAGS allowlist"
# ── Behavioural: renderMd logic via Python mirror ─────────────────────────────
def test_render_md_markdown_bold(cleanup_test_sessions):
"""**word** markdown renders as <strong>word</strong>."""
out = render_md("Hello **world**")
assert "<strong>world</strong>" in out
def test_render_md_html_strong_passthrough(cleanup_test_sessions):
"""<strong>word</strong> in AI output renders as bold."""
out = render_md("Hello <strong>world</strong>")
assert "<strong>world</strong>" in out
def test_render_md_html_b_tag(cleanup_test_sessions):
"""<b>word</b> renders as <strong>word</strong>."""
out = render_md("Hello <b>world</b>")
assert "<strong>world</strong>" in out
def test_render_md_html_em_passthrough(cleanup_test_sessions):
"""<em>word</em> renders as italic."""
out = render_md("Hello <em>world</em>")
assert "<em>world</em>" in out
def test_render_md_html_i_tag(cleanup_test_sessions):
"""<i>word</i> renders as <em>word</em>."""
out = render_md("Hello <i>word</i>")
assert "<em>word</em>" in out
def test_render_md_html_code_passthrough(cleanup_test_sessions):
"""<code>text</code> renders as inline code."""
out = render_md("use <code>print()</code>")
assert "<code>print()</code>" in out
def test_render_md_html_br_becomes_newline(cleanup_test_sessions):
"""<br> in AI output becomes a newline (rendered as <br> inside <p> later)."""
out = render_md("line one<br>line two")
assert "line one\nline two" in out or "line one<br>line two" in out
def test_render_md_mixed_markdown_and_html(cleanup_test_sessions):
"""Markdown and HTML formatting can coexist in the same response."""
out = render_md("**markdown** and <strong>html</strong>")
assert "<strong>markdown</strong>" in out
assert "<strong>html</strong>" in out
def test_render_md_html_strong_in_list_item(cleanup_test_sessions):
"""THE SCREENSHOT BUG: <strong> tags inside list items must render as bold,
not as escaped literal text like &lt;strong&gt;."""
out = render_md(
"- <strong>All items</strong> get `border-radius: 0 8px 8px 0`\n"
"- <strong>Active item</strong> uses <code>#e8a030</code>\n"
"- <strong>Project items</strong> show their color\n"
"- <strong>Regular items</strong> stay muted"
)
assert "&lt;strong&gt;" not in out, \
"Escaped <strong> literal found in list output — bold not rendering"
assert "<strong>All items</strong>" in out
assert "<strong>Active item</strong>" in out
assert "<code>border-radius: 0 8px 8px 0</code>" in out
assert "<code>#e8a030</code>" in out
def test_render_md_exact_screenshot_content(cleanup_test_sessions):
"""Exact text from the ui-changes-unrendered-html-tags.png screenshot.
This is the canonical regression test for the inlineMd fix.
All four bullet points must render <strong> and <code> as HTML, not literal text."""
out = render_md(
"- <strong>All items</strong> now have <code>border-radius: 0 8px 8px 0</code>"
" \u2014 straight left edge everywhere, rounded on the right\n"
"- <strong>Active item</strong> is now gold/amber (<code>#e8a030</code>)"
" \u2014 same warm gold used in the logo \u2014 instead of blue,"
" so it stands out distinctly from everything else\n"
"- <strong>Project items</strong> still show their project color on the left"
" border, but only when they're not the active item (active always wins with gold)\n"
"- <strong>Regular items</strong> (no project) still have no left border color"
)
# None of the safe tags should appear as literal escaped text
assert "&lt;strong&gt;" not in out, \
"Literal &lt;strong&gt; found — <strong> is not rendering as bold"
assert "&lt;/strong&gt;" not in out, \
"Literal &lt;/strong&gt; found — closing tag is not rendering"
assert "&lt;code&gt;" not in out, \
"Literal &lt;code&gt; found — <code> is not rendering as inline code"
# Each item's bold label must render correctly
assert "<strong>All items</strong>" in out
assert "<strong>Active item</strong>" in out
assert "<strong>Project items</strong>" in out
assert "<strong>Regular items</strong>" in out
# The code spans in items 1 and 2 must render correctly
assert "<code>border-radius: 0 8px 8px 0</code>" in out
assert "<code>#e8a030</code>" in out
# The surrounding prose text must be preserved
assert "straight left edge everywhere" in out
assert "same warm gold used in the logo" in out
assert "active always wins with gold" in out
def test_render_md_markdown_bold_in_list_item(cleanup_test_sessions):
"""**bold** markdown inside list items must render as <strong>."""
out = render_md("- **First** item\n- **Second** item with `code`")
assert "<strong>First</strong>" in out
assert "<strong>Second</strong>" in out
assert "<code>code</code>" in out
def test_render_md_html_strong_in_blockquote(cleanup_test_sessions):
"""<strong> inside blockquote must render as bold."""
out = render_md("> <strong>Note:</strong> pay attention")
assert "&lt;strong&gt;" not in out
assert "<strong>Note:</strong>" in out
def test_render_md_html_strong_in_heading(cleanup_test_sessions):
"""<strong> inside a heading must render as bold."""
out = render_md("## <strong>Important</strong> Section")
assert "&lt;strong&gt;" not in out
assert "<strong>Important</strong>" in out
def test_render_md_xss_in_list_still_blocked(cleanup_test_sessions):
"""XSS attempts in list items must still be escaped."""
out = render_md("- <img src=x onerror=alert(1)> bad")
assert "<img" not in out
assert "&lt;img" in out
def test_render_md_xss_in_blockquote_still_blocked(cleanup_test_sessions):
"""XSS in blockquote must still be escaped."""
out = render_md("> <script>alert(1)</script>")
assert "<script>" not in out
assert "&lt;script" in out
def test_render_md_code_span_in_list_protected(cleanup_test_sessions):
"""Backtick code span in list item must escape its content."""
out = render_md("- Use `<br>` for breaks")
assert "<code>&lt;br&gt;</code>" in out
def test_render_md_code_block_protects_html(cleanup_test_sessions):
"""HTML inside a backtick code span must NOT be converted — shown as literal."""
out = render_md("keep `<strong>literal</strong>` safe")
assert "&lt;strong&gt;" in out, "HTML inside code span should be escaped"
assert "<strong>literal</strong>" not in out, "HTML inside code span should NOT render as bold"
def test_render_md_fenced_code_protects_html(cleanup_test_sessions):
"""HTML inside a fenced code block must not be converted by the pre-pass.
The fenced block is stashed before tag replacement runs, so the raw HTML
is preserved intact for the pipeline's esc() to escape when rendering
the <pre><code> block. We verify the stash/restore mechanism works by
checking the content is unchanged after the pre-pass (i.e. still contains
the original tag text, not converted to **not bold**)."""
src = "```\n<strong>not bold</strong>\n```"
out = render_md(src)
# Pre-pass stash preserves the raw content -- it should NOT have been
# converted to **not bold** (which would render as bold outside the fence)
assert "**not bold**" not in out, \
"Fenced code content was incorrectly converted to markdown by the pre-pass"
# The raw content should still be present (stash/restore worked)
assert "<strong>not bold</strong>" in out or "&lt;strong&gt;" in out, \
"Fenced code content was lost after stash/restore"
# ── Security: XSS must be blocked ─────────────────────────────────────────────
def test_render_md_xss_img_tag_escaped(cleanup_test_sessions):
"""<img src=x onerror=alert(1)> must be HTML-escaped, not rendered."""
out = render_md("<img src=x onerror=alert(1)>")
assert "<img" not in out, "Raw <img> tag must not appear in output"
assert "&lt;img" in out, "<img> must be HTML-escaped"
def test_render_md_xss_script_tag_escaped(cleanup_test_sessions):
"""<script>alert(1)</script> must be HTML-escaped."""
out = render_md("<script>alert(1)</script>")
assert "<script>" not in out, "Raw <script> tag must not appear in output"
assert "&lt;script" in out, "<script> must be HTML-escaped"
def test_render_md_xss_iframe_escaped(cleanup_test_sessions):
"""<iframe> must be HTML-escaped."""
out = render_md("<iframe src='evil.com'></iframe>")
assert "<iframe" not in out
assert "&lt;iframe" in out
def test_render_md_xss_svg_onerror_escaped(cleanup_test_sessions):
"""<svg onload=...> must be HTML-escaped."""
out = render_md("<svg onload=alert(1)>")
assert "<svg" not in out
assert "&lt;svg" in out
def test_render_md_xss_in_bold_text_escaped(cleanup_test_sessions):
"""**<img onerror=...>** — XSS inside markdown bold must be escaped."""
out = render_md("**<img src=x onerror=alert(1)>**")
assert "<img" not in out, "XSS inside **bold** must be escaped"
assert "&lt;img" in out
def test_render_md_xss_in_html_strong_escaped(cleanup_test_sessions):
"""<strong><img ...></strong> — nested XSS inside HTML strong must be escaped."""
out = render_md("<strong><img src=x onerror=alert(1)></strong>")
# <strong> converts to ** which then escapes the inner content via esc()
assert "<img" not in out, "XSS nested inside <strong> must be escaped"
def test_render_md_xss_object_tag_escaped(cleanup_test_sessions):
"""<object data=...> must be HTML-escaped."""
out = render_md("<object data='evil.swf'></object>")
assert "<object" not in out
assert "&lt;object" in out
# ── Sprint 16 sidebar: static structure checks ───────────────────────────────
# ── Exhaustive inlineMd / renderMd edge-case tests ───────────────────────────
# --- Unordered list variants ---
def test_list_bold_only(cleanup_test_sessions):
"""Single bold word in list item."""
out = render_md("- **bold**")
assert "<strong>bold</strong>" in out
assert "&lt;" not in out
def test_list_italic_only(cleanup_test_sessions):
"""Single italic word in list item."""
out = render_md("- *italic*")
assert "<em>italic</em>" in out
def test_list_code_only(cleanup_test_sessions):
"""Single code span in list item."""
out = render_md("- `code`")
assert "<code>code</code>" in out
def test_list_bold_and_code_mixed(cleanup_test_sessions):
"""Bold and code together in one list item."""
out = render_md("- **run** `pip install foo`")
assert "<strong>run</strong>" in out
assert "<code>pip install foo</code>" in out
def test_list_html_strong_and_code_mixed(cleanup_test_sessions):
"""HTML <strong> and <code> together — the exact screenshot scenario."""
out = render_md("- <strong>Key</strong>: use <code>value</code>")
assert "<strong>Key</strong>" in out
assert "<code>value</code>" in out
assert "&lt;strong&gt;" not in out
assert "&lt;code&gt;" not in out
def test_list_html_em(cleanup_test_sessions):
"""HTML <em> in list item renders as italic."""
out = render_md("- <em>emphasized</em> text")
assert "<em>emphasized</em>" in out
assert "&lt;em&gt;" not in out
def test_list_html_b_tag(cleanup_test_sessions):
"""HTML <b> in list item renders as bold."""
out = render_md("- <b>bold via b tag</b>")
assert "<strong>bold via b tag</strong>" in out
assert "&lt;b&gt;" not in out
def test_list_html_i_tag(cleanup_test_sessions):
"""HTML <i> in list item renders as italic."""
out = render_md("- <i>italic via i tag</i>")
assert "<em>italic via i tag</em>" in out
assert "&lt;i&gt;" not in out
def test_list_multiple_items_each_formatted(cleanup_test_sessions):
"""Multiple list items each with different formatting."""
out = render_md(
"- **bold item**\n"
"- *italic item*\n"
"- `code item`\n"
"- plain item"
)
assert "<strong>bold item</strong>" in out
assert "<em>italic item</em>" in out
assert "<code>code item</code>" in out
assert "<li>plain item</li>" in out
def test_list_item_bold_mid_sentence(cleanup_test_sessions):
"""Bold in middle of a list item sentence."""
out = render_md("- Set the **timeout** to 30 seconds")
assert "<strong>timeout</strong>" in out
assert "Set the" in out
assert "to 30 seconds" in out
def test_list_item_multiple_bold_spans(cleanup_test_sessions):
"""Multiple bold spans in one list item."""
out = render_md("- **A** and **B** are both important")
assert "<strong>A</strong>" in out
assert "<strong>B</strong>" in out
def test_ordered_list_bold(cleanup_test_sessions):
"""Bold text inside ordered list items."""
out = render_md("1. **First** step\n2. **Second** step\n3. Plain step")
assert "<ol>" in out
assert "<strong>First</strong>" in out
assert "<strong>Second</strong>" in out
assert "<li>Plain step</li>" in out
def test_ordered_list_html_strong(cleanup_test_sessions):
"""HTML <strong> inside ordered list items renders correctly."""
out = render_md("1. <strong>Install</strong> the package\n2. <strong>Configure</strong> the settings")
assert "<ol>" in out
assert "<strong>Install</strong>" in out
assert "<strong>Configure</strong>" in out
assert "&lt;strong&gt;" not in out
def test_ordered_list_code_spans(cleanup_test_sessions):
"""Code spans inside ordered list items."""
out = render_md("1. Run `npm install`\n2. Run `npm start`")
assert "<code>npm install</code>" in out
assert "<code>npm start</code>" in out
def test_indented_list_item_bold(cleanup_test_sessions):
"""Bold inside indented (nested) list item."""
out = render_md("- top level\n - **nested bold**")
assert "<strong>nested bold</strong>" in out
assert "margin-left:16px" in out
# --- Blockquote variants ---
def test_blockquote_plain(cleanup_test_sessions):
"""Plain blockquote wraps in <blockquote>."""
out = render_md("> simple quote")
assert "<blockquote>simple quote</blockquote>" in out
def test_blockquote_bold(cleanup_test_sessions):
"""**bold** inside blockquote renders correctly."""
out = render_md("> **important** note")
assert "<strong>important</strong>" in out
def test_blockquote_html_strong(cleanup_test_sessions):
"""<strong> inside blockquote renders as bold."""
out = render_md("> <strong>Warning:</strong> read this")
assert "<strong>Warning:</strong>" in out
assert "&lt;strong&gt;" not in out
def test_blockquote_code_span(cleanup_test_sessions):
"""Code span inside blockquote renders correctly."""
out = render_md("> Use `git commit` to save")
assert "<code>git commit</code>" in out
def test_blockquote_mixed_formatting(cleanup_test_sessions):
"""Mixed bold and code in blockquote."""
out = render_md("> **Note:** run `pip install foo` first")
assert "<strong>Note:</strong>" in out
assert "<code>pip install foo</code>" in out
def test_blockquote_xss_blocked(cleanup_test_sessions):
"""XSS in blockquote content must be escaped."""
out = render_md("> <img src=x onerror=alert(1)>")
assert "&lt;img" in out
assert "<img" not in out
# --- Heading variants ---
def test_heading_h1_bold(cleanup_test_sessions):
"""Bold inside h1 renders correctly."""
out = render_md("# **Main** Title")
assert "<h1><strong>Main</strong> Title</h1>" in out
def test_heading_h2_html_strong(cleanup_test_sessions):
"""HTML <strong> inside h2 renders correctly."""
out = render_md("## <strong>Section</strong> Name")
assert "<h2><strong>Section</strong> Name</h2>" in out
assert "&lt;strong&gt;" not in out
def test_heading_h3_code(cleanup_test_sessions):
"""Code span inside h3 renders correctly."""
out = render_md("### The `renderMd` function")
assert "<h3>The <code>renderMd</code> function</h3>" in out
def test_heading_xss_blocked(cleanup_test_sessions):
"""XSS attempt in heading must be escaped."""
out = render_md("## <script>alert(1)</script>")
assert "<script>" not in out
assert "&lt;script" in out
# --- Paragraph / top-level formatting ---
def test_paragraph_bold_renders(cleanup_test_sessions):
"""Bold in a plain paragraph renders correctly."""
out = render_md("The **quick brown fox** jumps.")
assert "<strong>quick brown fox</strong>" in out
def test_paragraph_html_strong_renders(cleanup_test_sessions):
"""HTML <strong> in a plain paragraph renders correctly."""
out = render_md("The <strong>quick brown fox</strong> jumps.")
assert "<strong>quick brown fox</strong>" in out
assert "&lt;strong&gt;" not in out
def test_paragraph_html_code_renders(cleanup_test_sessions):
"""HTML <code> in a plain paragraph renders correctly."""
out = render_md("Call <code>foo()</code> to start.")
assert "<code>foo()</code>" in out
assert "&lt;code&gt;" not in out
def test_paragraph_br_creates_line_break(cleanup_test_sessions):
"""<br> in paragraph becomes a line break inside <p>."""
out = render_md("Line one<br>Line two")
# br converts to \n which inside <p> becomes <br>
assert "Line one" in out and "Line two" in out
def test_multiple_paragraphs_separated(cleanup_test_sessions):
"""Double newline creates separate <p> elements."""
out = render_md("First paragraph.\n\nSecond paragraph.")
assert out.count("<p>") == 2
# --- Table variants ---
def test_table_structure_in_ui_js(cleanup_test_sessions):
"""ui.js must contain table rendering logic with thead/tbody structure."""
src = (REPO_ROOT / "static" / "ui.js").read_text()
assert "<table>" in src or "table>" in src, "table rendering not found in ui.js"
assert "thead" in src, "thead not found in table renderer"
assert "tbody" in src, "tbody not found in table renderer"
assert "parseRow" in src, "parseRow helper not found in table renderer"
# --- br tag specifically ---
def test_br_in_list_item(cleanup_test_sessions):
"""<br> inside a list item becomes a newline."""
out = render_md("- Line one<br>Line two")
assert "Line one" in out
assert "Line two" in out
def test_br_self_closing_in_paragraph(cleanup_test_sessions):
"""<br/> self-closing form is also handled."""
out = render_md("Before<br/>After")
assert "Before" in out and "After" in out
# --- No double-escaping ---
def test_no_double_escaping_ampersand(cleanup_test_sessions):
"""A literal & in text must become &amp; exactly once, not &amp;amp;."""
out = render_md("foo & bar")
assert "&amp;amp;" not in out
assert "&amp;" in out or "foo & bar" in out # either fine (paragraph wrap may not escape)
def test_no_double_escaping_lt_in_code(cleanup_test_sessions):
"""< inside a code span must become &lt; exactly once."""
out = render_md("`a < b`")
assert "&lt;lt;" not in out
assert "&lt;" in out
def test_strong_text_not_double_escaped(cleanup_test_sessions):
"""Content of <strong> must not be double-escaped."""
out = render_md("<strong>hello & world</strong>")
# The & inside strong content should be escaped once
assert "&amp;amp;" not in out
assert "<strong>" in out
# --- inlineMd helper present in source ---
def test_inline_md_helper_in_ui_js(cleanup_test_sessions):
"""ui.js must define inlineMd() helper function."""
src = (REPO_ROOT / "static" / "ui.js").read_text()
assert "function inlineMd(" in src, "inlineMd() helper not found in ui.js"
def test_inline_md_used_in_list_handler(cleanup_test_sessions):
"""List handler in ui.js must call inlineMd() not esc() for item text."""
src = (REPO_ROOT / "static" / "ui.js").read_text()
# Find the list block handler
ul_idx = src.find("html+='<ul>'") or src.find('html+=`<ul>`') or src.find("let html='<ul>'")
assert ul_idx >= 0 or "inlineMd(text)" in src, "inlineMd not called in list handler"
# Verify inlineMd is called, not bare esc
assert "inlineMd(text)" in src, "inlineMd(text) call not found — list items may not render formatting"
def test_inline_md_used_in_blockquote_handler(cleanup_test_sessions):
"""Blockquote handler in ui.js must call inlineMd() not esc() for content."""
src = (REPO_ROOT / "static" / "ui.js").read_text()
assert "inlineMd(t)" in src, "inlineMd not called in blockquote/heading handler"
def test_sessions_js_has_svg_icons(cleanup_test_sessions):
"""sessions.js must define ICONS object with SVG strings for sidebar buttons."""
src = REPO_ROOT / "static" / "sessions.js"
code = src.read_text()
assert "const ICONS=" in code or "const ICONS =" in code, "ICONS constant not found"
for icon in ["pin", "folder", "archive", "trash", "dup"]:
assert icon + ":" in code or f"'{icon}'" in code, f"ICONS.{icon} not found"
assert "<svg" in code, "SVG content not found in ICONS"
def test_sessions_js_has_dropdown_actions(cleanup_test_sessions):
"""sessions.js must use a single trigger button and dropdown for session actions."""
src = REPO_ROOT / "static" / "sessions.js"
code = src.read_text()
assert "session-actions-trigger" in code, "session action trigger button not found in sessions.js"
assert "session-action-menu" in code, "session action dropdown menu not found in sessions.js"
def test_style_css_has_session_actions_dropdown(cleanup_test_sessions):
"""style.css must define trigger and dropdown styles for session actions."""
src = REPO_ROOT / "static" / "style.css"
code = src.read_text()
assert ".session-actions" in code, ".session-actions not found in style.css"
assert ".session-action-menu" in code, ".session-action-menu not found in style.css"
assert "position:fixed" in code or "position: fixed" in code, \
".session-action-menu must use position:fixed to avoid sidebar clipping"
def test_style_css_active_session_uses_accent(cleanup_test_sessions):
"""Active session style should use accent color variable, not hardcoded hex."""
src = REPO_ROOT / "static" / "style.css"
code = src.read_text()
assert "var(--accent" in code and ".session-item.active" in code, \
"Active session must use var(--accent) variables in style.css"
def test_sessions_js_uses_action_menu_not_per_row_buttons(cleanup_test_sessions):
"""sessions.js must use the single ⋯ action menu instead of per-row buttons.
The per-row button overlay was replaced with a single ⋯ trigger that opens a
positioned dropdown (session-action-menu). This removes the borderLeftColor
project colour override that the old code applied, which was the original
concern this test guarded. The new design uses a dot indicator for project
membership instead.
"""
src = REPO_ROOT / "static" / "sessions.js"
code = src.read_text()
assert "session-actions-trigger" in code, "session-actions-trigger not found in sessions.js"
assert "_openSessionActionMenu" in code, "_openSessionActionMenu not found in sessions.js"
assert "closeSessionActionMenu" in code, "closeSessionActionMenu not found in sessions.js"
# The old per-row buttons must not be present (they were replaced by the menu)
assert "act-pin" not in code, "old act-pin per-row button still in sessions.js"
assert "act-archive" not in code, "old act-archive per-row button still in sessions.js"

96
tests/test_sprint17.py Normal file
View File

@@ -0,0 +1,96 @@
"""
Sprint 17 Tests: send_key setting, commands.js static file, workspace subdir listing.
"""
import json, urllib.error, urllib.request
from tests._pytest_port import BASE
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(BASE + path, data=data,
headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def make_session(created_list):
d, _ = post("/api/session/new", {})
sid = d["session"]["session_id"]
created_list.append(sid)
return sid, d["session"]
# ── Settings: send_key ──────────────────────────────────────────────────────
def test_settings_send_key_default():
"""GET /api/settings returns send_key with default value 'enter'."""
data, status = get("/api/settings")
assert status == 200
assert data.get("send_key") == "enter"
def test_settings_save_send_key():
"""POST /api/settings with send_key persists and round-trips."""
try:
# Save ctrl+enter
_, status = post("/api/settings", {"send_key": "ctrl+enter"})
assert status == 200
# Verify it persisted
data, _ = get("/api/settings")
assert data["send_key"] == "ctrl+enter"
finally:
# Always restore default
post("/api/settings", {"send_key": "enter"})
data, _ = get("/api/settings")
assert data["send_key"] == "enter"
def test_settings_invalid_send_key_rejected():
"""POST /api/settings with invalid send_key value is silently ignored."""
# Set a known good value first
post("/api/settings", {"send_key": "enter"})
# Try to set an invalid value
data, status = post("/api/settings", {"send_key": "invalid_value"})
assert status == 200
# Should still be 'enter' (invalid value ignored)
assert data["send_key"] == "enter"
def test_settings_unknown_key_ignored():
"""POST /api/settings ignores unknown keys."""
data, status = post("/api/settings", {"unknown_key": "value", "send_key": "enter"})
assert status == 200
assert "unknown_key" not in data
# ── Static file: commands.js ────────────────────────────────────────────────
def test_static_commands_js_served():
"""GET /static/commands.js returns 200 and contains COMMANDS registry."""
req = urllib.request.Request(BASE + "/static/commands.js")
with urllib.request.urlopen(req, timeout=10) as r:
body = r.read().decode()
assert r.status == 200
assert "COMMANDS" in body
assert "executeCommand" in body
# ── Workspace: subdir listing ───────────────────────────────────────────────
def test_list_workspace_root():
"""GET /api/list with path=. returns entries for workspace root."""
created = []
sid, _ = make_session(created)
data, status = get(f"/api/list?session_id={sid}&path=.")
assert status == 200
assert "entries" in data
assert isinstance(data["entries"], list)

128
tests/test_sprint19.py Normal file
View File

@@ -0,0 +1,128 @@
"""
Sprint 19 Tests: auth/login, security headers, request size limit.
"""
import json, urllib.error, urllib.request
from tests._pytest_port import BASE
def get(path, headers=None):
req = urllib.request.Request(BASE + path)
if headers:
for k, v in headers.items():
req.add_header(k, v)
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status, dict(r.headers)
def post(path, body=None, headers=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(BASE + path, data=data,
headers={"Content-Type": "application/json"})
if headers:
for k, v in headers.items():
req.add_header(k, v)
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status, dict(r.headers)
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code, dict(e.headers)
# ── Auth status (no password configured in test env) ──────────────────────
def test_auth_status_disabled():
"""Auth should be disabled by default (no password set)."""
d, status, _ = get("/api/auth/status")
assert status == 200
assert d["auth_enabled"] is False
def test_login_when_auth_disabled():
"""Login should succeed trivially when auth is not enabled."""
d, status, _ = post("/api/auth/login", {"password": "anything"})
assert status == 200
assert d["ok"] is True
def test_all_routes_accessible_without_auth():
"""When auth is disabled, all routes should work without cookies."""
d, status, _ = get("/api/sessions")
assert status == 200
assert "sessions" in d
def test_login_page_served():
"""GET /login should return the login page HTML."""
req = urllib.request.Request(BASE + "/login")
with urllib.request.urlopen(req, timeout=10) as r:
html = r.read().decode()
assert r.status == 200
assert "Sign in" in html
assert "Hermes" in html
# ── Security headers ─────────────────────────────────────────────────────
def test_security_headers_on_json():
"""JSON responses should include security headers."""
d, status, headers = get("/api/auth/status")
assert status == 200
assert headers.get("X-Content-Type-Options") == "nosniff"
assert headers.get("X-Frame-Options") == "DENY"
assert headers.get("Referrer-Policy") == "same-origin"
def test_security_headers_on_health():
"""Health endpoint should include security headers."""
d, status, headers = get("/health")
assert status == 200
assert headers.get("X-Content-Type-Options") == "nosniff"
def test_permissions_policy_does_not_disable_microphone():
"""Permissions-Policy must not hard-disable microphone access for same-origin voice input."""
_, status, headers = get("/health")
assert status == 200
policy = headers.get("Permissions-Policy", "")
assert policy, "Permissions-Policy header missing"
assert "microphone=()" not in policy, \
"Permissions-Policy must not block microphone access or desktop/mobile voice input cannot work"
def test_cache_control_no_store():
"""API responses should have Cache-Control: no-store."""
d, status, headers = get("/api/sessions")
assert headers.get("Cache-Control") == "no-store"
# ── Settings password field ──────────────────────────────────────────────
def test_settings_password_hash_not_exposed():
"""GET /api/settings must never expose the stored password hash."""
d, status, _ = get("/api/settings")
assert status == 200
assert "password_hash" not in d # security: never send hash to client
def test_settings_save_preserves_other_fields():
"""Saving settings should not break existing fields."""
# Get current settings
current, _, _ = get("/api/settings")
# Save with just send_key
d, status, _ = post("/api/settings", {"send_key": "enter"})
assert status == 200
# Verify other fields still present
updated, _, _ = get("/api/settings")
assert "default_model" in updated
assert "default_workspace" in updated
def test_settings_password_hash_not_directly_settable():
"""POST /api/settings with password_hash must not overwrite the stored hash."""
# Attempt to set a raw hash directly (attack vector)
post("/api/settings", {"password_hash": "deadbeef" * 8})
# Settings response must not expose it regardless
updated, status, _ = get("/api/settings")
assert status == 200
assert "password_hash" not in updated

106
tests/test_sprint2.py Normal file
View File

@@ -0,0 +1,106 @@
"""Sprint 2 tests: image preview, file types, markdown. Uses cleanup_test_sessions fixture."""
import io, json, uuid, urllib.request, urllib.error, pathlib
from tests._pytest_port import BASE
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def get_raw(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return r.read(), r.headers.get('Content-Type', ''), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(BASE + path, data=data, headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def make_session_tracked(created_list, ws=None):
"""Create a session and register it with the cleanup fixture."""
import pathlib as _pathlib
body = {}
if ws: body["workspace"] = str(ws)
d, _ = post("/api/session/new", body)
sid = d["session"]["session_id"]
created_list.append(sid)
return sid, _pathlib.Path(d["session"]["workspace"])
def test_raw_endpoint_serves_png(cleanup_test_sessions):
sid, ws = make_session_tracked(cleanup_test_sessions)
png = (b"\x89PNG\r\n\x1a\n" b"\x00\x00\x00\rIHDR\x00\x00\x00\x01"
b"\x00\x00\x00\x01\x08\x02\x00\x00\x00"
b"\x90wS\xde\x00\x00\x00\x0cIDATx\x9cc"
b"\xf8\x0f\x00\x00\x01\x01\x00\x05\x18"
b"\xd8N\x00\x00\x00\x00IEND\xaeB`\x82")
(ws / "test.png").write_bytes(png)
raw, ct, status = get_raw(f"/api/file/raw?session_id={sid}&path=test.png")
assert status == 200
assert "image/png" in ct
assert raw == png
def test_raw_endpoint_serves_jpeg(cleanup_test_sessions):
sid, ws = make_session_tracked(cleanup_test_sessions)
jpeg = b"\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xd9"
(ws / "photo.jpg").write_bytes(jpeg)
raw, ct, status = get_raw(f"/api/file/raw?session_id={sid}&path=photo.jpg")
assert status == 200
assert "image/jpeg" in ct
def test_raw_endpoint_serves_svg(cleanup_test_sessions):
sid, ws = make_session_tracked(cleanup_test_sessions)
svg = b"<svg xmlns=\"http://www.w3.org/2000/svg\" width=\"100\" height=\"100\"><circle/></svg>"
(ws / "icon.svg").write_bytes(svg)
raw, ct, status = get_raw(f"/api/file/raw?session_id={sid}&path=icon.svg")
assert status == 200
assert "image/svg" in ct
def test_raw_endpoint_path_traversal_blocked(cleanup_test_sessions):
sid, _ = make_session_tracked(cleanup_test_sessions)
try:
get_raw(f"/api/file/raw?session_id={sid}&path=../../etc/passwd")
assert False
except urllib.error.HTTPError as e:
assert e.code in (400, 500)
def test_raw_endpoint_missing_file_returns_404(cleanup_test_sessions):
sid, _ = make_session_tracked(cleanup_test_sessions)
try:
get_raw(f"/api/file/raw?session_id={sid}&path=no_such_file.png")
assert False
except urllib.error.HTTPError as e:
assert e.code in (404, 500)
def test_md_file_returns_text_via_api_file(cleanup_test_sessions):
sid, ws = make_session_tracked(cleanup_test_sessions)
md = "# Hello\n\nThis is **bold**.\n"
(ws / "README.md").write_text(md)
data, status = get(f"/api/file?session_id={sid}&path=README.md")
assert status == 200
assert data["content"] == md
def test_md_file_with_table(cleanup_test_sessions):
sid, ws = make_session_tracked(cleanup_test_sessions)
md = "| Name | Value |\n|------|-------|\n| foo | bar |\n"
(ws / "table.md").write_text(md)
data, status = get(f"/api/file?session_id={sid}&path=table.md")
assert status == 200
assert "| Name | Value |" in data["content"]
def test_file_listing_includes_images(cleanup_test_sessions):
sid, ws = make_session_tracked(cleanup_test_sessions)
(ws / "photo.png").write_bytes(b"fake png")
(ws / "notes.md").write_text("# Notes")
(ws / "script.py").write_text("print('hello')")
data, status = get(f"/api/list?session_id={sid}&path=.")
assert status == 200
names = {e["name"]: e for e in data["entries"]}
assert "photo.png" in names
assert "notes.md" in names
assert "script.py" in names

444
tests/test_sprint20.py Normal file
View File

@@ -0,0 +1,444 @@
"""
Sprint 20 Tests: Voice input (mic button) via Web Speech API.
These tests verify the static assets contain the correct HTML structure,
CSS rules, and JS logic for the mic feature — all of which runs purely in
the browser with no server-side component.
"""
import re
import urllib.request
import json
import pathlib
from tests._pytest_port import BASE
def get_text(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return r.read().decode(), r.status
# ── index.html ────────────────────────────────────────────────────────────
def test_mic_button_present_in_html():
"""index.html must contain the mic button with id='btnMic'."""
html, status = get_text("/")
assert status == 200
assert 'id="btnMic"' in html
def test_mic_button_has_mic_btn_class():
"""btnMic must carry the mic-btn CSS class for styling hooks."""
html, _ = get_text("/")
assert 'class="icon-btn mic-btn"' in html
def test_mic_button_hidden_by_default():
"""btnMic starts hidden (display:none) — JS shows it only if supported."""
html, _ = get_text("/")
# The button element should have display:none in its style attribute
assert 'id="btnMic"' in html
btn_match = re.search(r'id="btnMic"[^>]*>', html)
assert btn_match, "btnMic element not found"
assert 'display:none' in btn_match.group(0)
def test_mic_button_has_title():
"""btnMic should have a descriptive title for accessibility."""
html, _ = get_text("/")
btn_match = re.search(r'id="btnMic"[^>]*>', html)
assert btn_match
assert 'title=' in btn_match.group(0)
def test_mic_status_div_present():
"""index.html must contain the #micStatus listening indicator."""
html, _ = get_text("/")
assert 'id="micStatus"' in html
def test_mic_status_hidden_by_default():
"""#micStatus starts hidden — only shown during active recording."""
html, _ = get_text("/")
status_match = re.search(r'id="micStatus"[^>]*>', html)
assert status_match, "#micStatus element not found"
assert 'display:none' in status_match.group(0)
def test_mic_status_has_mic_dot():
"""#micStatus must contain a .mic-dot element for the pulse animation."""
html, _ = get_text("/")
# mic-dot should appear after micStatus
idx_status = html.find('id="micStatus"')
idx_dot = html.find('mic-dot', idx_status)
assert idx_status != -1 and idx_dot != -1
assert idx_dot > idx_status
def test_mic_status_has_listening_text():
"""#micStatus should display a 'Listening' label."""
html, _ = get_text("/")
assert 'Listening' in html
def test_mic_button_svg_microphone_shape():
"""btnMic SVG must include the rect (mic body) and path (mic arc)."""
html, _ = get_text("/")
# Find mic button section
btn_start = html.find('id="btnMic"')
btn_end = html.find('</button>', btn_start) + len('</button>')
btn_html = html[btn_start:btn_end]
assert '<rect' in btn_html, "mic SVG missing rect (mic body)"
assert '<path' in btn_html, "mic SVG missing path (arc)"
assert '<line' in btn_html, "mic SVG missing line (stand)"
def test_mic_button_inside_composer_left():
"""btnMic must be inside .composer-left, next to the attach button."""
html, _ = get_text("/")
composer_left_start = html.find('class="composer-left"')
composer_left_end = html.find('</div>', composer_left_start)
section = html[composer_left_start:composer_left_end]
assert 'btnAttach' in section
assert 'btnMic' in section
# ── style.css ────────────────────────────────────────────────────────────
def test_mic_btn_css_rule_exists():
"""style.css must define .mic-btn rule."""
css, status = get_text("/static/style.css")
assert status == 200
assert '.mic-btn' in css
def test_mic_btn_recording_state_css():
""".mic-btn.recording must be defined for active recording visual state."""
css, _ = get_text("/static/style.css")
assert '.mic-btn.recording' in css
def test_mic_recording_color_error():
""".mic-btn.recording must use the error color variable or red."""
css, _ = get_text("/static/style.css")
recording_idx = css.find('.mic-btn.recording')
# Find the rule block after the selector
brace_open = css.find('{', recording_idx)
brace_close = css.find('}', brace_open)
rule = css[brace_open:brace_close]
assert 'var(--error)' in rule or '#e94560' in rule
def test_mic_recording_has_animation():
""".mic-btn.recording must use an animation for the pulse effect."""
css, _ = get_text("/static/style.css")
recording_idx = css.find('.mic-btn.recording')
brace_open = css.find('{', recording_idx)
brace_close = css.find('}', brace_open)
rule = css[brace_open:brace_close]
assert 'animation' in rule
def test_mic_pulse_keyframes_defined():
"""@keyframes mic-pulse must be defined for the pulsing animation."""
css, _ = get_text("/static/style.css")
assert 'mic-pulse' in css
assert '@keyframes' in css
def test_mic_status_css_rule_exists():
"""style.css must define .mic-status rule."""
css, _ = get_text("/static/style.css")
assert '.mic-status' in css
def test_mic_dot_css_rule_exists():
"""style.css must define .mic-dot rule with animation."""
css, _ = get_text("/static/style.css")
assert '.mic-dot' in css
dot_idx = css.find('.mic-dot')
brace_open = css.find('{', dot_idx)
brace_close = css.find('}', brace_open)
rule = css[brace_open:brace_close]
assert 'animation' in rule
def test_mic_btn_has_transition():
""".mic-btn must define a transition for smooth state changes."""
css, _ = get_text("/static/style.css")
mic_btn_idx = css.find('.mic-btn{')
if mic_btn_idx == -1:
mic_btn_idx = css.find('.mic-btn ')
brace_open = css.find('{', mic_btn_idx)
brace_close = css.find('}', brace_open)
rule = css[brace_open:brace_close]
assert 'transition' in rule
# ── boot.js ──────────────────────────────────────────────────────────────
def test_boot_js_serves_ok():
"""boot.js must be served successfully."""
_, status = get_text("/static/boot.js")
assert status == 200
def test_boot_js_speech_recognition_check():
"""boot.js must check for SpeechRecognition (with webkit fallback)."""
js, _ = get_text("/static/boot.js")
assert 'SpeechRecognition' in js
assert 'webkitSpeechRecognition' in js
def test_boot_js_recognition_config():
"""boot.js must configure recognition.continuous, interimResults, and lang."""
js, _ = get_text("/static/boot.js")
assert 'recognition.continuous' in js
assert 'recognition.interimResults' in js
assert 'recognition.lang' in js
def test_boot_js_recognition_not_continuous():
"""recognition.continuous must be false (auto-stop after silence)."""
js, _ = get_text("/static/boot.js")
assert 'recognition.continuous=false' in js or 'recognition.continuous = false' in js
def test_boot_js_recognition_interim_results():
"""recognition.interimResults must be true (live transcription preview)."""
js, _ = get_text("/static/boot.js")
assert 'recognition.interimResults=true' in js or 'recognition.interimResults = true' in js
def test_boot_js_recognition_lang_en():
"""recognition.lang must be set (static en-US or dynamic via _locale._speech)."""
js, _ = get_text("/static/boot.js")
# Accept either the old static value or the new locale-driven assignment
assert (
"recognition.lang='en-US'" in js
or 'recognition.lang = "en-US"' in js
or "recognition.lang=" in js # dynamic: recognition.lang=(_locale._speech)||'en-US'
)
def test_boot_js_onresult_handler():
"""boot.js must define recognition.onresult to handle transcription."""
js, _ = get_text("/static/boot.js")
assert 'recognition.onresult' in js
def test_boot_js_onend_handler():
"""boot.js must define recognition.onend to reset state when recording stops."""
js, _ = get_text("/static/boot.js")
assert 'recognition.onend' in js
def test_boot_js_onerror_handler():
"""boot.js must define recognition.onerror for graceful error handling."""
js, _ = get_text("/static/boot.js")
assert 'recognition.onerror' in js
def test_boot_js_not_allowed_error_message():
"""onerror must handle 'not-allowed' with a user-friendly message."""
js, _ = get_text("/static/boot.js")
assert 'not-allowed' in js
assert 'permission' in js.lower() or 'denied' in js.lower() or 'access' in js.lower()
def test_boot_js_no_speech_error_message():
"""onerror must handle 'no-speech' with a user-friendly message."""
js, _ = get_text("/static/boot.js")
assert 'no-speech' in js
def test_boot_js_network_error_message():
"""onerror must handle 'network' error."""
js, _ = get_text("/static/boot.js")
assert "'network'" in js or '"network"' in js
def test_boot_js_mic_active_flag():
"""boot.js must track recording state via _micActive flag."""
js, _ = get_text("/static/boot.js")
assert '_micActive' in js
def test_boot_js_mic_recording_class_toggle():
"""boot.js must toggle 'recording' CSS class on the mic button."""
js, _ = get_text("/static/boot.js")
assert "'recording'" in js or '"recording"' in js
def test_boot_js_mic_status_toggle():
"""boot.js must show/hide #micStatus during recording."""
js, _ = get_text("/static/boot.js")
assert 'micStatus' in js
def test_boot_js_send_stops_mic():
"""btnSend onclick must stop mic before sending (send guard)."""
js, _ = get_text("/static/boot.js")
# The send button onclick should check _micActive and stop recording
send_onclick_idx = js.find("$('btnSend').onclick")
assert send_onclick_idx != -1
# Find the handler code — check that _micActive check appears near send assignment
handler_end = js.find(';', send_onclick_idx)
handler = js[send_onclick_idx:handler_end + 1]
assert '_micActive' in handler or 'stopMic' in handler.lower()
def test_boot_js_btn_mic_onclick():
"""boot.js must attach an onclick handler to btnMic."""
js, _ = get_text("/static/boot.js")
assert 'btn.onclick' in js or "btnMic.onclick" in js or "$('btnMic').onclick" in js
def test_boot_js_recognition_start():
"""boot.js must call recognition.start() to begin recording."""
js, _ = get_text("/static/boot.js")
assert 'recognition.start()' in js
def test_boot_js_recognition_stop():
"""boot.js must call recognition.stop() to end recording."""
js, _ = get_text("/static/boot.js")
assert 'recognition.stop()' in js
def test_boot_js_iife_guard():
"""Mic logic must be wrapped in an IIFE so it doesn't pollute global scope."""
js, _ = get_text("/static/boot.js")
# IIFE pattern: (function(){...})() or (() => {...})()
assert '(function(){' in js or '(function () {' in js
def test_boot_js_browser_unsupported_guard_uses_fallback_capabilities():
"""boot.js must keep the mic available when either speech recognition OR recorder capture exists."""
js, _ = get_text("/static/boot.js")
assert 'navigator.mediaDevices' in js
assert 'getUserMedia' in js
assert 'MediaRecorder' in js
assert '_canRecordAudio' in js or 'canRecordAudio' in js, \
"boot.js should compute a recorder fallback instead of bailing only on SpeechRecognition"
def test_boot_js_media_recorder_fallback_posts_to_transcribe_api():
"""Desktop fallback must send recorded audio to /api/transcribe for transcription."""
js, _ = get_text("/static/boot.js")
assert 'api/transcribe' in js
assert 'fetch(' in js
def test_routes_define_transcribe_endpoint():
"""Server routes must expose /api/transcribe for MediaRecorder fallback uploads."""
routes = pathlib.Path(__file__).parent.parent.joinpath("api/routes.py").read_text(encoding="utf-8")
assert '"/api/transcribe"' in routes
def test_boot_js_shows_mic_button_when_any_voice_path_is_supported():
"""boot.js must reveal btnMic when speech recognition or recorder fallback is available."""
js, _ = get_text("/static/boot.js")
assert "btn.style.display=''" in js or 'btn.style.display = ""' in js
def test_boot_js_show_toast_on_error():
"""boot.js must call showToast() for mic errors."""
js, _ = get_text("/static/boot.js")
assert 'showToast' in js
def test_boot_js_autoresize_called():
"""boot.js must call autoResize() after updating textarea from transcript."""
js, _ = get_text("/static/boot.js")
assert 'autoResize()' in js
# ── Append behaviour (fix: mic appends to existing text, not replace) ────
def test_boot_js_prefix_variable_declared():
"""boot.js must declare _prefix variable to snapshot pre-existing textarea content."""
js, _ = get_text("/static/boot.js")
assert "_prefix" in js
def test_boot_js_prefix_captured_on_start():
"""_prefix must be set from ta.value when the user starts recording."""
js, _ = get_text("/static/boot.js")
# _prefix assignment must happen in the btn.onclick else branch (before recognition.start)
btn_onclick_idx = js.find("btn.onclick")
btn_onclick_end = js.find("};", btn_onclick_idx)
onclick_body = js[btn_onclick_idx:btn_onclick_end]
assert "_prefix=ta.value" in onclick_body or "_prefix = ta.value" in onclick_body
def test_boot_js_onresult_prepends_prefix():
"""onresult must include _prefix when writing to textarea (append, not replace)."""
js, _ = get_text("/static/boot.js")
onresult_idx = js.find("recognition.onresult")
onresult_end = js.find("};", onresult_idx)
onresult_body = js[onresult_idx:onresult_end]
# ta.value must be set to _prefix + something, not just the transcript alone
assert "_prefix" in onresult_body
def test_boot_js_onend_commits_with_prefix():
"""onend must commit _prefix + _finalText so appended text survives after recognition ends."""
js, _ = get_text("/static/boot.js")
onend_idx = js.find("recognition.onend")
onend_end = js.find("};", onend_idx)
onend_body = js[onend_idx:onend_end]
assert "_prefix" in onend_body
def test_boot_js_prefix_reset_on_stop():
"""_prefix must be reset when recording stops so next session starts clean."""
js, _ = get_text("/static/boot.js")
# _setRecording(false) clears both _finalText and _prefix
set_rec_idx = js.find("function _setRecording")
set_rec_end = js.find("}", set_rec_idx) + 1
fn_body = js[set_rec_idx:set_rec_end]
assert "_prefix" in fn_body
def test_boot_js_auto_space_between_prefix_and_transcript():
"""onend must insert a space between existing text and new transcript when needed."""
js, _ = get_text("/static/boot.js")
onend_idx = js.find("recognition.onend")
onend_end = js.find("};", onend_idx)
onend_body = js[onend_idx:onend_end]
# Should handle spacing — look for trimStart or endsWith(' ') check
has_spacing = ("trimStart" in onend_body or "endsWith(' ')" in onend_body
or "endsWith(\" \")" in onend_body or "endsWith('\\n')" in onend_body)
assert has_spacing, "onend should handle spacing between prefix and new transcript"
# ── Regression: existing behaviour unchanged ──────────────────────────────
def test_attach_button_still_wired():
"""btnAttach onclick must still be wired up (no regression)."""
js, _ = get_text("/static/boot.js")
assert "$('btnAttach').onclick" in js
def test_file_input_onchange_still_wired():
"""fileInput onchange must still be wired up (no regression)."""
js, _ = get_text("/static/boot.js")
assert "$('fileInput').onchange" in js
def test_index_html_still_has_send_button():
"""btnSend must still be present in index.html (no regression)."""
html, _ = get_text("/")
assert 'id="btnSend"' in html
def test_index_html_still_has_attach_button():
"""btnAttach must still be present in index.html (no regression)."""
html, _ = get_text("/")
assert 'id="btnAttach"' in html

341
tests/test_sprint20b.py Normal file
View File

@@ -0,0 +1,341 @@
"""
Sprint 21 Tests: Send button polish — hidden until content, pop-in animation,
icon-only circle design.
"""
import re
import urllib.request
from tests._pytest_port import BASE
def get_text(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return r.read().decode(), r.status
# ── index.html ────────────────────────────────────────────────────────────
def test_send_button_present():
"""btnSend must still exist in the DOM."""
html, status = get_text("/")
assert status == 200
assert 'id="btnSend"' in html
def test_send_button_disabled_by_default():
"""btnSend must start disabled — enabled only when there is content."""
html, _ = get_text("/")
btn_match = re.search(r'id="btnSend"[^>]*>', html)
assert btn_match, "btnSend element not found"
assert 'disabled' in btn_match.group(0)
def test_send_button_no_text_label():
"""Send button must be icon-only — no visible 'Send' text label."""
html, _ = get_text("/")
# Find the full button element (from opening tag to closing tag)
btn_open_end = html.find('>', html.find('id="btnSend"')) + 1
btn_end = html.find('</button>', btn_open_end) + len('</button>')
btn_inner = html[btn_open_end:btn_end]
# Strip SVG content and any remaining tags; check visible text
no_svg = re.sub(r'<svg[^>]*>.*?</svg>', '', btn_inner, flags=re.DOTALL)
visible_text = re.sub(r'<[^>]+>', '', no_svg).strip()
assert visible_text == '', f"Send button has visible text: {visible_text!r}"
def test_send_button_has_svg_icon():
"""Send button must have an SVG icon."""
html, _ = get_text("/")
btn_start = html.find('id="btnSend"')
btn_end = html.find('</button>', btn_start) + len('</button>')
btn_html = html[btn_start:btn_end]
assert '<svg' in btn_html
def test_send_button_has_title_attribute():
"""btnSend must have a title attribute for accessibility (replaces text label)."""
html, _ = get_text("/")
btn_match = re.search(r'id="btnSend"[^>]*>', html)
assert btn_match
assert 'title=' in btn_match.group(0)
def test_send_button_svg_arrow_up():
"""Send button SVG should use an upward arrow (line + polyline or path)."""
html, _ = get_text("/")
btn_start = html.find('id="btnSend"')
btn_end = html.find('</button>', btn_start) + len('</button>')
btn_html = html[btn_start:btn_end]
# Must have some directional shape element
has_shape = ('<line' in btn_html or '<polyline' in btn_html or
'<polygon' in btn_html or '<path' in btn_html)
assert has_shape, "Send button SVG missing directional shape"
# ── style.css ────────────────────────────────────────────────────────────
def test_send_btn_is_circle():
"""send-btn must use border-radius:50% for the circle shape."""
css, status = get_text("/static/style.css")
assert status == 200
send_idx = css.find('.send-btn{')
brace_open = css.find('{', send_idx)
brace_close = css.find('}', brace_open)
rule = css[brace_open:brace_close]
assert 'border-radius:50%' in rule or 'border-radius: 50%' in rule
def test_send_btn_fixed_dimensions():
"""send-btn must have explicit width and height (icon-circle, not text-padded)."""
css, _ = get_text("/static/style.css")
send_idx = css.find('.send-btn{')
brace_open = css.find('{', send_idx)
brace_close = css.find('}', brace_open)
rule = css[brace_open:brace_close]
assert 'width:' in rule or 'width :' in rule
assert 'height:' in rule or 'height :' in rule
def test_send_btn_no_old_padding():
"""send-btn must not use text padding layout (old pill style removed)."""
css, _ = get_text("/static/style.css")
send_idx = css.find('.send-btn{')
brace_open = css.find('{', send_idx)
brace_close = css.find('}', brace_open)
rule = css[brace_open:brace_close]
# Old style used padding:7px 18px — should be gone
assert 'padding:7px' not in rule and 'padding: 7px' not in rule
def test_send_btn_accent_background():
"""send-btn background must use the accent color variable."""
css, _ = get_text("/static/style.css")
send_idx = css.find('.send-btn{')
brace_open = css.find('{', send_idx)
brace_close = css.find('}', brace_open)
rule = css[brace_open:brace_close]
assert 'var(--accent)' in rule or 'var(--blue)' in rule or '7cb9ff' in rule
def test_send_btn_has_transition():
"""send-btn must have transition for smooth hover/active states."""
css, _ = get_text("/static/style.css")
send_idx = css.find('.send-btn{')
brace_open = css.find('{', send_idx)
brace_close = css.find('}', brace_open)
rule = css[brace_open:brace_close]
assert 'transition' in rule
def test_send_btn_has_box_shadow():
"""send-btn must have a box-shadow glow effect."""
css, _ = get_text("/static/style.css")
send_idx = css.find('.send-btn{')
brace_open = css.find('{', send_idx)
brace_close = css.find('}', brace_open)
rule = css[brace_open:brace_close]
assert 'box-shadow' in rule
def test_send_btn_hover_has_scale():
"""send-btn:hover must use transform:scale for a satisfying hover effect."""
css, _ = get_text("/static/style.css")
hover_idx = css.find('.send-btn:hover{')
brace_open = css.find('{', hover_idx)
brace_close = css.find('}', brace_open)
rule = css[brace_open:brace_close]
assert 'scale' in rule
def test_send_btn_active_shrinks():
"""send-btn:active must scale down slightly for tactile press feedback."""
css, _ = get_text("/static/style.css")
active_idx = css.find('.send-btn:active{')
brace_open = css.find('{', active_idx)
brace_close = css.find('}', brace_open)
rule = css[brace_open:brace_close]
assert 'scale' in rule
def test_send_btn_disabled_rule_exists():
"""send-btn:disabled must still be styled."""
css, _ = get_text("/static/style.css")
assert '.send-btn:disabled' in css
def test_send_btn_visible_class_defined():
""".send-btn.visible class must be defined for the pop-in animation."""
css, _ = get_text("/static/style.css")
assert '.send-btn.visible' in css
def test_send_pop_in_keyframes_defined():
"""@keyframes send-pop-in must be defined."""
css, _ = get_text("/static/style.css")
assert 'send-pop-in' in css
assert '@keyframes' in css
def _extract_keyframe(css, name):
"""Extract the full @keyframes block for the given animation name."""
# Find '@keyframes <name>' directly (forward search) to avoid hitting
# an earlier keyframe when multiple are defined on the same line.
kf_start = css.find('@keyframes ' + name)
assert kf_start != -1, f"@keyframes {name} not found in CSS"
depth = 0
kf_end = kf_start
for i, ch in enumerate(css[kf_start:], kf_start):
if ch == '{':
depth += 1
elif ch == '}':
depth -= 1
if depth == 0:
kf_end = i
break
return css[kf_start:kf_end]
def test_send_pop_in_uses_scale():
"""send-pop-in keyframe must animate from a scaled-down state."""
css, _ = get_text("/static/style.css")
kf_rule = _extract_keyframe(css, 'send-pop-in')
assert 'scale' in kf_rule
def test_send_pop_in_uses_opacity():
"""send-pop-in keyframe must fade in (opacity transition)."""
css, _ = get_text("/static/style.css")
kf_rule = _extract_keyframe(css, 'send-pop-in')
assert 'opacity' in kf_rule
def test_send_btn_mobile_override_no_padding():
"""Mobile override for send-btn must not add text padding (keeps circle shape)."""
css, _ = get_text("/static/style.css")
# Find the @media block
media_idx = css.find('@media')
send_mobile_idx = css.find('.send-btn', media_idx)
if send_mobile_idx == -1:
return # No mobile override, fine
brace_open = css.find('{', send_mobile_idx)
brace_close = css.find('}', brace_open)
rule = css[brace_open:brace_close]
assert 'padding:' not in rule and 'font-size' not in rule
# ── ui.js ─────────────────────────────────────────────────────────────────
def test_ui_js_update_send_btn_function():
"""ui.js must define updateSendBtn() function."""
js, status = get_text("/static/ui.js")
assert status == 200
assert 'function updateSendBtn' in js
def test_update_send_btn_checks_content():
"""updateSendBtn must check textarea value length."""
js, _ = get_text("/static/ui.js")
fn_idx = js.find('function updateSendBtn')
fn_end = js.find('\n}', fn_idx) + 2
fn_body = js[fn_idx:fn_end]
assert 'msg' in fn_body
assert '.value' in fn_body
assert '.length' in fn_body or '.trim()' in fn_body
def test_update_send_btn_checks_pending_files():
"""updateSendBtn must also show send button when files are attached."""
js, _ = get_text("/static/ui.js")
fn_idx = js.find('function updateSendBtn')
fn_end = js.find('\n}', fn_idx) + 2
fn_body = js[fn_idx:fn_end]
assert 'pendingFiles' in fn_body
def test_update_send_btn_uses_visible_class():
"""updateSendBtn must add .visible class to trigger the pop-in animation."""
js, _ = get_text("/static/ui.js")
fn_idx = js.find('function updateSendBtn')
fn_end = js.find('\n}', fn_idx) + 2
fn_body = js[fn_idx:fn_end]
assert 'visible' in fn_body
def test_update_send_btn_uses_disabled():
"""updateSendBtn must disable the button when no content or busy."""
js, _ = get_text("/static/ui.js")
fn_idx = js.find('function updateSendBtn')
fn_end = js.find('\n}', fn_idx) + 2
fn_body = js[fn_idx:fn_end]
assert 'disabled' in fn_body
def test_set_busy_calls_update_send_btn():
"""setBusy must call updateSendBtn() so button hides while agent is responding."""
js, _ = get_text("/static/ui.js")
busy_idx = js.find('function setBusy')
busy_end = js.find('\n}', busy_idx) + 2
busy_body = js[busy_idx:busy_end]
assert 'updateSendBtn' in busy_body
def test_render_tray_calls_update_send_btn():
"""renderTray must call updateSendBtn() so button appears when files are attached."""
js, _ = get_text("/static/ui.js")
tray_idx = js.find('function renderTray')
tray_end = js.find('\n}', tray_idx) + 2
tray_body = js[tray_idx:tray_end]
assert 'updateSendBtn' in tray_body
# ── boot.js ──────────────────────────────────────────────────────────────
def test_boot_js_input_calls_update_send_btn():
"""boot.js input event listener must call updateSendBtn()."""
js, status = get_text("/static/boot.js")
assert status == 200
assert 'updateSendBtn' in js
# ── messages.js ───────────────────────────────────────────────────────────
def test_auto_resize_calls_update_send_btn():
"""autoResize() must call updateSendBtn() so button hides after send clears textarea."""
js, status = get_text("/static/messages.js")
assert status == 200
assert 'updateSendBtn' in js
# ── Regression: existing behaviour unchanged ──────────────────────────────
def test_send_button_still_has_send_btn_class():
"""btnSend must still carry class='send-btn' for CSS targeting."""
html, _ = get_text("/")
assert 'class="send-btn"' in html
def test_ui_js_set_busy_calls_update_send_btn():
"""setBusy must call updateSendBtn to manage button disabled state."""
js, _ = get_text("/static/ui.js")
busy_idx = js.find('function setBusy')
busy_end = js.find('\n}', busy_idx) + 2
busy_body = js[busy_idx:busy_end]
assert 'updateSendBtn' in busy_body
def test_index_html_attach_button_unchanged():
"""btnAttach must still be present (no regression)."""
html, _ = get_text("/")
assert 'id="btnAttach"' in html
def test_send_function_still_exists():
"""send() function must still be defined in messages.js."""
js, _ = get_text("/static/messages.js")
assert 'async function send()' in js

196
tests/test_sprint23.py Normal file
View File

@@ -0,0 +1,196 @@
"""
Sprint 23 Tests: agentic transparency — token/cost display, session usage fields,
subagent card names, skill picker in cron, skill linked files.
"""
import json, urllib.error, urllib.request
from tests._pytest_port import BASE
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(BASE + path, data=data,
headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def make_session(created_list):
d, _ = post("/api/session/new", {})
sid = d["session"]["session_id"]
created_list.append(sid)
return sid, d["session"]
# ── Session usage fields ─────────────────────────────────────────────────
def test_new_session_has_usage_fields():
"""New session should include input_tokens, output_tokens, estimated_cost."""
created = []
try:
sid, sess = make_session(created)
post("/api/session/rename", {"session_id": sid, "title": "Usage Test"})
d, status = get(f"/api/session?session_id={sid}")
assert status == 200
sess = d["session"]
assert "input_tokens" in sess, "input_tokens field missing from session"
assert "output_tokens" in sess, "output_tokens field missing from session"
assert "estimated_cost" in sess, "estimated_cost field missing from session"
assert sess["input_tokens"] == 0
assert sess["output_tokens"] == 0
finally:
for s in created:
post("/api/session/delete", {"session_id": s})
def test_session_compact_has_usage_fields():
"""Session list should include usage fields in compact form."""
created = []
try:
sid, _ = make_session(created)
post("/api/session/rename", {"session_id": sid, "title": "Compact Usage"})
d, status = get("/api/sessions")
assert status == 200
match = [s for s in d["sessions"] if s["session_id"] == sid]
assert len(match) == 1
assert "input_tokens" in match[0], "input_tokens missing from session list"
assert "output_tokens" in match[0], "output_tokens missing from session list"
assert match[0]["input_tokens"] == 0
assert match[0]["output_tokens"] == 0
finally:
for s in created:
post("/api/session/delete", {"session_id": s})
def test_session_usage_defaults_zero():
"""New session usage fields should default to 0/None in creation response."""
created = []
try:
sid, sess = make_session(created)
assert "input_tokens" in sess, "input_tokens missing from new session response"
assert "output_tokens" in sess, "output_tokens missing from new session response"
assert sess["input_tokens"] == 0
assert sess["output_tokens"] == 0
finally:
for s in created:
post("/api/session/delete", {"session_id": s})
# ── Skills content linked_files ──────────────────────────────────────────
def test_skills_content_requires_name():
"""GET /api/skills/content without name should return 400 (or 500 if skills module unavailable)."""
try:
d, status = get("/api/skills/content")
assert status in (400, 500), f"Expected 400/500 for missing name, got {status}"
except urllib.error.HTTPError as e:
assert e.code in (400, 500), f"Expected 400/500 for missing name, got {e.code}"
def test_skills_content_has_linked_files_key():
"""GET /api/skills/content should always return a linked_files key."""
try:
d, status = get("/api/skills")
if not d.get("skills"):
return # no skills in test env, skip
name = d["skills"][0]["name"]
d2, status2 = get(f"/api/skills/content?name={name}")
assert status2 == 200
assert "linked_files" in d2, "linked_files key missing from skills/content response"
# linked_files must be a dict (possibly empty), not None
assert isinstance(d2["linked_files"], dict), "linked_files must be a dict"
except urllib.error.HTTPError:
pass # skills module unavailable in this env
def test_skills_content_file_path_traversal_rejected():
"""GET /api/skills/content with traversal path should be rejected."""
from urllib.parse import quote as _quote
try:
d, status = get("/api/skills")
if not d.get("skills"):
return # no skills in test env, skip
name = d["skills"][0]["name"]
traversal = _quote("../../etc/passwd", safe="")
try:
d2, status2 = get(f"/api/skills/content?name={name}&file={traversal}")
assert status2 in (400, 404, 500), f"Path traversal should be rejected, got {status2}"
except urllib.error.HTTPError as e:
assert e.code in (400, 404, 500), f"Path traversal should be rejected, got {e.code}"
except urllib.error.HTTPError:
pass # skills module unavailable in test env
def test_skills_content_wildcard_name_rejected():
"""GET /api/skills/content with glob wildcard in name should be rejected when file param present."""
try:
try:
d2, status2 = get("/api/skills/content?name=*&file=SKILL.md")
assert status2 == 400, f"Wildcard name should return 400, got {status2}"
except urllib.error.HTTPError as e:
assert e.code in (400, 404), f"Wildcard name should be rejected, got {e.code}"
except Exception:
pass
# ── Cron create with skills ───────────────────────────────────────────────
def test_cron_create_accepts_skills():
"""POST /api/crons/create should accept and store a skills array (or 500 if cron module unavailable)."""
created_jobs = []
try:
body = {
"name": "test-sprint23-skills",
"schedule": "0 9 * * *",
"prompt": "test prompt",
"deliver": "local",
"skills": ["some-skill"]
}
d, status = post("/api/crons/create", body)
if status in (400, 500) and ('module' in str(d.get('error','')) or 'cron' in str(d.get('error',''))):
return # cron module not available in test env
assert status == 200, f"Expected 200 from cron create, got {status}: {d}"
assert d.get("ok"), f"Cron create did not return ok: {d}"
job_id = d.get("job", {}).get("id") or d.get("id")
if job_id:
created_jobs.append(job_id)
# Verify job appears in list
jobs_d, _ = get("/api/crons")
job = next((j for j in jobs_d.get("jobs", []) if j.get("name") == "test-sprint23-skills"), None)
assert job is not None, "Created cron job not found in job list"
assert job.get("skills") == ["some-skill"] or job.get("skill") == "some-skill", \
f"skills not stored on job: {job}"
finally:
try:
for jid in created_jobs:
post("/api/crons/delete", {"id": jid})
jobs_d, _ = get("/api/crons")
for j in jobs_d.get("jobs", []):
if j.get("name") == "test-sprint23-skills":
post("/api/crons/delete", {"id": j["id"]})
except Exception:
pass # cron module may not be available
# ── Tool call integrity ──────────────────────────────────────────────────
def test_tool_calls_have_real_names():
"""Tool calls in session JSON should not have unresolved 'tool' name."""
created = []
try:
sid, _ = make_session(created)
d, status = get(f"/api/session?session_id={sid}")
assert status == 200
for tc in d["session"].get("tool_calls", []):
assert tc.get("name") not in ("tool", "", None), f"Unresolved tool name: {tc}"
finally:
for s in created:
post("/api/session/delete", {"session_id": s})

175
tests/test_sprint26.py Normal file
View File

@@ -0,0 +1,175 @@
"""
Sprint 26 Tests: canonical appearance settings persist and legacy theme names
map onto the new theme + skin system.
"""
import json, urllib.error, urllib.request
import pathlib
import sys
from tests._pytest_port import BASE
REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent
if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))
from api import config
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(BASE + path, data=data,
headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
# ── Theme settings ───────────────────────────────────────────────────────
def test_settings_default_theme():
"""Default theme should be 'dark'."""
d, status = get("/api/settings")
assert status == 200
assert d.get("theme") == "dark"
def test_settings_set_theme_light_persists():
"""Setting theme to 'light' should persist and round-trip."""
try:
d, status = post("/api/settings", {"theme": "light"})
assert status == 200
d2, _ = get("/api/settings")
assert d2.get("theme") == "light"
finally:
# Reset to dark
post("/api/settings", {"theme": "dark"})
def test_settings_set_theme_light():
"""Setting theme to 'light' should persist."""
try:
post("/api/settings", {"theme": "light"})
d, _ = get("/api/settings")
assert d.get("theme") == "light"
finally:
post("/api/settings", {"theme": "dark"})
def test_settings_set_theme_system():
"""Setting theme to 'system' should persist."""
try:
post("/api/settings", {"theme": "system"})
d, _ = get("/api/settings")
assert d.get("theme") == "system"
finally:
post("/api/settings", {"theme": "dark"})
def test_settings_set_skin():
"""Setting skin should persist."""
try:
post("/api/settings", {"skin": "ares"})
d, _ = get("/api/settings")
assert d.get("skin") == "ares"
finally:
post("/api/settings", {"skin": "default"})
def test_settings_set_skin_poseidon():
"""Setting skin to 'poseidon' should persist."""
try:
post("/api/settings", {"skin": "poseidon"})
d, _ = get("/api/settings")
assert d.get("skin") == "poseidon"
finally:
post("/api/settings", {"skin": "default"})
def test_settings_legacy_theme_maps_to_dark_skin_pair():
"""Legacy theme names should map to the closest supported theme + skin."""
try:
d, status = post("/api/settings", {"theme": "slate"})
assert status == 200
d2, _ = get("/api/settings")
assert d2.get("theme") == "dark"
assert d2.get("skin") == "slate"
finally:
post("/api/settings", {"theme": "dark", "skin": "default"})
def test_settings_legacy_monokai_maps_to_sisyphus_skin():
"""Monokai should migrate onto the closest supported accent skin."""
try:
d, status = post("/api/settings", {"theme": "monokai"})
assert status == 200
d2, _ = get("/api/settings")
assert d2.get("theme") == "dark"
assert d2.get("skin") == "sisyphus"
finally:
post("/api/settings", {"theme": "dark", "skin": "default"})
def test_settings_unknown_theme_falls_back_to_dark_default():
"""Unknown themes should normalize to a safe canonical appearance."""
try:
d, status = post("/api/settings", {"theme": "my-custom-theme"})
assert status == 200
d2, _ = get("/api/settings")
assert d2.get("theme") == "dark"
assert d2.get("skin") == "default"
finally:
post("/api/settings", {"theme": "dark", "skin": "default"})
def test_settings_invalid_skin_falls_back_to_default():
"""Unknown skin names should normalize back to the default accent."""
try:
d, status = post("/api/settings", {"skin": "not-a-skin"})
assert status == 200
d2, _ = get("/api/settings")
assert d2.get("skin") == "default"
finally:
post("/api/settings", {"skin": "default"})
def test_load_settings_normalizes_legacy_theme_from_file(monkeypatch, tmp_path):
"""Existing settings.json files with legacy theme names should normalize on load."""
settings_path = tmp_path / "settings.json"
settings_path.write_text(json.dumps({"theme": "solarized"}), encoding="utf-8")
monkeypatch.setattr(config, "SETTINGS_FILE", settings_path)
loaded = config.load_settings()
assert loaded["theme"] == "dark"
assert loaded["skin"] == "poseidon"
def test_theme_does_not_break_other_settings():
"""Setting theme should not disturb other settings."""
d_before, _ = get("/api/settings")
send_key_before = d_before.get("send_key")
try:
post("/api/settings", {"theme": "light"})
d_after, _ = get("/api/settings")
assert d_after.get("send_key") == send_key_before
assert d_after.get("theme") == "light"
finally:
post("/api/settings", {"theme": "dark"})
def test_theme_survives_round_trip():
"""Theme set via POST should appear in subsequent GET."""
try:
post("/api/settings", {"theme": "light"})
d, status = get("/api/settings")
assert status == 200
assert d["theme"] == "light"
finally:
post("/api/settings", {"theme": "dark"})

136
tests/test_sprint27.py Normal file
View File

@@ -0,0 +1,136 @@
"""
Sprint 27 Tests: configurable assistant display name (bot_name).
Tests cover settings API round-trip, empty/missing input defaults,
login page rendering, and server-side sanitization.
"""
import json
import urllib.error
import urllib.request
from tests._pytest_port import BASE
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def get_raw(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return r.read().decode(), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(BASE + path, data=data,
headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
# ── Default value ─────────────────────────────────────────────────────────
def test_settings_default_bot_name():
"""GET /api/settings should return bot_name defaulting to 'Hermes'."""
d, status = get("/api/settings")
assert status == 200
assert "bot_name" in d
assert d["bot_name"] == "Hermes"
# ── Round-trip ────────────────────────────────────────────────────────────
def test_settings_set_bot_name():
"""POST /api/settings with bot_name should persist and round-trip."""
try:
d, status = post("/api/settings", {"bot_name": "TestBot"})
assert status == 200
assert d.get("bot_name") == "TestBot"
d2, _ = get("/api/settings")
assert d2.get("bot_name") == "TestBot"
finally:
post("/api/settings", {"bot_name": "Hermes"})
def test_settings_bot_name_special_chars():
"""bot_name with safe special characters should persist correctly."""
try:
d, status = post("/api/settings", {"bot_name": "My Assistant 2.0"})
assert status == 200
d2, _ = get("/api/settings")
assert d2.get("bot_name") == "My Assistant 2.0"
finally:
post("/api/settings", {"bot_name": "Hermes"})
# ── Server-side sanitization ──────────────────────────────────────────────
def test_settings_empty_bot_name_defaults_to_hermes():
"""Posting an empty bot_name should default to 'Hermes' server-side."""
try:
d, status = post("/api/settings", {"bot_name": ""})
assert status == 200
assert d.get("bot_name") == "Hermes"
d2, _ = get("/api/settings")
assert d2.get("bot_name") == "Hermes"
finally:
post("/api/settings", {"bot_name": "Hermes"})
def test_settings_whitespace_bot_name_defaults_to_hermes():
"""Posting a whitespace-only bot_name should default to 'Hermes'."""
try:
d, status = post("/api/settings", {"bot_name": " "})
assert status == 200
assert d.get("bot_name") == "Hermes"
finally:
post("/api/settings", {"bot_name": "Hermes"})
# ── Login page rendering ──────────────────────────────────────────────────
def test_login_page_shows_default_bot_name():
"""GET /login should contain 'Hermes' in title and h1 when default."""
html, status = get_raw("/login")
assert status == 200
assert "<title>Hermes" in html
assert "<h1>Hermes</h1>" in html
def test_login_page_shows_custom_bot_name():
"""GET /login should reflect the configured bot_name."""
try:
post("/api/settings", {"bot_name": "Aria"})
html, status = get_raw("/login")
assert status == 200
assert "<title>Aria" in html
assert "<h1>Aria</h1>" in html
finally:
post("/api/settings", {"bot_name": "Hermes"})
def test_login_page_empty_name_does_not_crash():
"""Login page must not 500 even if somehow bot_name is empty in settings."""
# Force an empty value by patching settings file directly — skipped here
# because the server-side guard in POST /api/settings prevents storing empty.
# Instead, verify that /login returns 200 reliably.
html, status = get_raw("/login")
assert status == 200
assert "Sign in" in html
def test_login_page_xss_escaped():
"""bot_name with HTML special chars should be escaped in the login page."""
try:
post("/api/settings", {"bot_name": "<script>alert(1)</script>"})
html, status = get_raw("/login")
assert status == 200
# Raw tag must not appear unescaped
assert "<script>alert(1)</script>" not in html
# Escaped form should appear
assert "&lt;script&gt;" in html
finally:
post("/api/settings", {"bot_name": "Hermes"})

224
tests/test_sprint28.py Normal file
View File

@@ -0,0 +1,224 @@
"""
Sprint 28 Tests: /personality slash command — backend API coverage.
Tests: GET /api/personalities, POST /api/personality/set, Session.compact(),
path traversal defence, size cap, clear personality.
"""
import json
import pathlib
import shutil
import sys
import urllib.error
import urllib.request
# Import test constants from conftest (same process — these are module-level values)
sys.path.insert(0, str(pathlib.Path(__file__).parent))
from conftest import TEST_STATE_DIR
from tests._pytest_port import BASE
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(BASE + path, data=data,
headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def _personalities_dir():
"""Return the personalities directory the test server will look in.
conftest sets HERMES_HOME=TEST_STATE_DIR in the server's environment.
The server's api/profiles._DEFAULT_HERMES_HOME resolves to TEST_STATE_DIR,
so get_active_hermes_home() returns TEST_STATE_DIR, and personalities
live at TEST_STATE_DIR/personalities.
"""
p = TEST_STATE_DIR / 'personalities'
p.mkdir(parents=True, exist_ok=True)
return p
def _make_personality(name, content="# Test Bot\nA test personality."):
"""Create a personality directory with a SOUL.md."""
d = _personalities_dir() / name
d.mkdir(parents=True, exist_ok=True)
(d / "SOUL.md").write_text(content)
return d
def _make_session():
"""Create a new session and return its session_id."""
d, status = post("/api/session/new", {})
assert status == 200, f"Failed to create session: {d}"
return d["session"]["session_id"]
def _cleanup_session(sid):
try:
post("/api/session/delete", {"session_id": sid})
except Exception:
pass
# ── GET /api/personalities ────────────────────────────────────────────────────
def test_personalities_empty_when_none_exist():
"""GET /api/personalities returns empty list when no personalities exist."""
p_dir = _personalities_dir()
for child in list(p_dir.iterdir()):
if child.is_dir() and not child.is_symlink():
shutil.rmtree(child)
d, status = get("/api/personalities")
assert status == 200
assert d.get("personalities") == []
def test_personalities_lists_from_config():
"""GET /api/personalities returns personalities from config.yaml agent.personalities.
Skipped if no personalities configured in test environment.
"""
d, status = get("/api/personalities")
assert status == 200
assert isinstance(d.get("personalities"), list)
# If personalities are configured, verify structure
for p in d.get("personalities", []):
assert "name" in p
assert "description" in p
def test_personalities_returns_empty_when_none_configured():
"""GET /api/personalities returns empty list when no personalities in config."""
# The test server starts with a clean state dir (no config.yaml),
# so agent.personalities is empty by default
d, status = get("/api/personalities")
assert status == 200
# May or may not have personalities depending on the real ~/.hermes/config.yaml
# being loaded. Just verify the structure is correct.
assert isinstance(d.get("personalities"), list)
def test_personalities_skips_non_dict_config():
"""GET /api/personalities handles non-dict agent config gracefully."""
d, status = get("/api/personalities")
assert status == 200
assert isinstance(d.get("personalities"), list)
# ── POST /api/personality/set ─────────────────────────────────────────────────
_test_personalities = {}
def _inject_personality(name, value):
"""Write a personality into the test config.yaml so the server picks it up."""
_test_personalities[name] = value
_write_test_config()
def _remove_personality(name):
"""Remove a personality from the test config.yaml."""
_test_personalities.pop(name, None)
_write_test_config()
def _write_test_config():
"""Write config.yaml with test personalities using simple YAML format."""
TEST_STATE_DIR.mkdir(parents=True, exist_ok=True)
config_path = TEST_STATE_DIR / 'config.yaml'
lines = ['agent:', ' personalities:']
for pname, pval in _test_personalities.items():
if isinstance(pval, dict):
lines.append(f' {pname}:')
for k, v in pval.items():
lines.append(f' {k}: "{v}"')
else:
lines.append(f' {pname}: "{pval}"')
config_path.write_text('\n'.join(lines) + '\n')
def test_set_personality_valid():
"""Setting a personality that exists in config stores name and returns prompt.
Skipped if config.yaml has no personalities (common in test environments).
"""
# First check if any personalities are configured
d, status = get("/api/personalities")
if not d.get("personalities"):
return # skip — no personalities in test server config
name = d["personalities"][0]["name"]
sid = _make_session()
try:
d2, status2 = post("/api/personality/set", {"session_id": sid, "name": name})
assert status2 == 200
assert d2.get("ok") is True
assert d2.get("personality") == name
finally:
_cleanup_session(sid)
def test_set_personality_persists_in_compact():
"""After setting personality, GET /api/session returns personality in compact.
Skipped if config.yaml has no personalities.
"""
d, status = get("/api/personalities")
if not d.get("personalities"):
return # skip
name = d["personalities"][0]["name"]
sid = _make_session()
try:
post("/api/personality/set", {"session_id": sid, "name": name})
d2, status2 = get(f"/api/session?session_id={sid}")
assert status2 == 200
session = d2.get("session", {})
assert session.get("personality") == name
finally:
_cleanup_session(sid)
def test_clear_personality_sets_null():
"""Clearing personality with name='' sets it to None (null in JSON)."""
sid = _make_session()
try:
# Set a personality name directly on the session (no config validation needed for clear)
d, status = post("/api/personality/set", {"session_id": sid, "name": ""})
assert status == 200
assert d.get("personality") is None
# Verify persisted
d2, s2 = get(f"/api/session?session_id={sid}")
assert s2 == 200
assert d2.get("session", {}).get("personality") is None
finally:
_cleanup_session(sid)
def test_set_personality_not_found_returns_404():
"""Setting a non-existent personality returns 404."""
sid = _make_session()
try:
d, status = post("/api/personality/set",
{"session_id": sid, "name": "doesnotexist"})
assert status == 404
finally:
_cleanup_session(sid)
def test_set_personality_nonexistent_returns_404():
"""Names not in config.yaml agent.personalities return 404."""
sid = _make_session()
try:
d, status = post("/api/personality/set",
{"session_id": sid, "name": "doesnotexist"})
assert status == 404, f"Expected 404, got {status}: {d}"
finally:
_cleanup_session(sid)
def test_set_personality_missing_session_returns_404():
"""Setting personality on non-existent session returns 404."""
d, status = post("/api/personality/set",
{"session_id": "nonexistent000", "name": "x"})
assert status == 404

731
tests/test_sprint29.py Normal file
View File

@@ -0,0 +1,731 @@
"""
Sprint 29 Tests: Security hardening — 12 fixes from PR #171.
Covers:
1. CSRF protection — cross-origin POST rejected, same-origin allowed
2. Login rate limiting — 5th attempt 429, 6th rejected, still works after burst
3. Session ID validation — non-hex chars rejected in Session.load()
4. Error path sanitization — _sanitize_error() strips filesystem paths
5. Secure cookie detection — getattr used safely on plain socket
6. HMAC signature length — 32-char hex (128-bit), not 16
7. Skills path traversal — path outside SKILLS_DIR rejected
8. Content-Disposition for dangerous MIME types — HTML/SVG force download
9. PBKDF2 password hashing — save_settings uses auth._hash_password
10. Non-loopback startup warning (manual / integration test)
11. SSRF DNS check logic (unit test on helper function)
12. ENV_LOCK export — _ENV_LOCK importable from streaming module
"""
import importlib
import json
import pathlib
import sys
import time
import urllib.error
import urllib.parse
import urllib.request
sys.path.insert(0, str(pathlib.Path(__file__).parent))
from conftest import TEST_STATE_DIR
from tests._pytest_port import BASE
def get(path, headers=None):
req = urllib.request.Request(BASE + path, headers=headers or {})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def post(path, body=None, headers=None):
data = json.dumps(body or {}).encode()
h = {"Content-Type": "application/json"}
if headers:
h.update(headers)
req = urllib.request.Request(BASE + path, data=data, headers=h)
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def get_raw_with_headers(path):
req = urllib.request.Request(BASE + path)
with urllib.request.urlopen(req, timeout=10) as r:
return r.read(), dict(r.headers.items()), r.status
# ── 1. CSRF Protection ─────────────────────────────────────────────────────
class TestCSRF:
@staticmethod
def _csrf_allowed(headers):
from types import SimpleNamespace
from api.routes import _check_csrf
return _check_csrf(SimpleNamespace(headers=headers))
def test_no_origin_no_referer_allowed(self):
"""Curl-style request with no Origin/Referer must pass CSRF check."""
body, status = post("/api/sessions/new", {})
# Should succeed (200 or 404) but NOT 403
assert status != 403, f"Expected non-403 for no-origin request, got {status}"
def test_cross_origin_post_rejected(self):
"""Cross-origin POST (Origin != Host) must be rejected with 403."""
body, status = post(
"/api/sessions/new",
{},
headers={"Origin": "http://evil.com", "Host": "127.0.0.1:8788"},
)
assert status == 403, f"Expected 403 for cross-origin request, got {status}: {body}"
assert "cross-origin" in body.get("error", "").lower() or "rejected" in body.get("error", "").lower()
def test_same_origin_post_allowed(self):
"""Same-origin POST (Origin matches Host) must be allowed."""
body, status = post(
"/api/sessions/new",
{},
headers={"Origin": "http://127.0.0.1:8788", "Host": "127.0.0.1:8788"},
)
assert status != 403, f"Expected non-403 for same-origin request, got {status}: {body}"
def test_same_origin_referer_allowed(self):
"""Same-origin Referer (matching Host) must be allowed."""
body, status = post(
"/api/sessions/new",
{},
headers={"Referer": "http://127.0.0.1:8788/", "Host": "127.0.0.1:8788"},
)
assert status != 403, f"Expected non-403 for same-referer request, got {status}"
def test_proxy_host_default_https_port_matches_http_origin(self):
"""http:// origin without port must NOT match X-Forwarded-Host with :443.
After the scheme-aware _ports_match fix: http:// absent port = :80,
which is not equal to :443. These are different protocols/ports and
should be rejected. In real reverse proxy scenarios where the external
URL is HTTPS, the browser sends Origin: https://... not http://...
See test_proxy_host_default_https_port_matches_https_origin for the
real-world proxy case that should pass.
"""
assert not self._csrf_allowed({
"Origin": "http://example.com",
"X-Forwarded-Host": "example.com:443",
}), 'http origin (port :80) must not match https host (:443)'
def test_proxy_host_default_https_port_matches_https_origin(self):
"""HTTPS Origin without port should match X-Forwarded-Host with explicit :443."""
assert self._csrf_allowed({
"Origin": "https://example.com",
"X-Forwarded-Host": "example.com:443",
})
def test_proxy_host_port_normalization_still_rejects_other_host(self):
"""Port normalization must not allow different hosts through."""
assert not self._csrf_allowed({
"Origin": "https://evil.com",
"X-Forwarded-Host": "example.com:443",
})
def test_allowed_public_origin_bypasses_missing_proxy_port(self, monkeypatch):
"""Explicitly configured public origins should pass even if proxy strips :port from Host."""
monkeypatch.setenv('HERMES_WEBUI_ALLOWED_ORIGINS', 'https://myapp.example.com:8000')
assert self._csrf_allowed({
'Origin': 'https://myapp.example.com:8000',
'Host': 'myapp.example.com',
'X-Forwarded-Proto': 'https',
})
def test_other_origin_not_allowed_by_public_origin_allowlist(self, monkeypatch):
"""Allowlist must stay exact; unrelated origins must still be rejected."""
monkeypatch.setenv('HERMES_WEBUI_ALLOWED_ORIGINS', 'https://myapp.example.com:8000')
assert not self._csrf_allowed({
'Origin': 'https://evil.com:8000',
'Host': 'myapp.example.com',
'X-Forwarded-Proto': 'https',
})
# ── Port normalization: scheme-aware (M-1 fix) ────────────────────────────
def test_cross_protocol_port_not_confused_http_origin_https_host(self):
"""http:// origin must NOT match a host with :443 (HTTPS default).
Before M-1 fix, _ports_match treated both 80 and 443 as equivalent to
absent port, allowing http://host to match https://host:443 servers.
"""
assert not self._csrf_allowed({
'Origin': 'http://example.com', # http, no port = :80
'X-Forwarded-Host': 'example.com:443', # HTTPS port
}), 'http origin should NOT match host advertising port 443'
def test_cross_protocol_port_not_confused_https_origin_http_host(self):
"""https:// origin must NOT match a host with :80 (HTTP default)."""
assert not self._csrf_allowed({
'Origin': 'https://example.com', # https, no port = :443
'X-Forwarded-Host': 'example.com:80', # HTTP port
}), 'https origin should NOT match host advertising port 80'
def test_http_explicit_port_80_matches_host_without_port(self):
"""http://example.com:80 is the same origin as http://example.com."""
assert self._csrf_allowed({
'Origin': 'http://example.com:80',
'Host': 'example.com',
})
def test_https_explicit_port_443_matches_host_without_port(self):
"""https://example.com:443 is the same origin as https://example.com."""
assert self._csrf_allowed({
'Origin': 'https://example.com:443',
'Host': 'example.com',
})
def test_non_default_port_not_waived(self):
"""Non-default ports (e.g. :8000) must not be treated as equivalent to absent."""
assert not self._csrf_allowed({
'Origin': 'https://example.com:8000',
'Host': 'example.com',
})
# ── Bug scenario: proxy strips non-standard port ──────────────────────────
def test_bug_origin_8000_host_without_port_rejected_without_allowlist(self, monkeypatch):
"""Without the allowlist, origin with :8000 must be rejected when proxy strips port.
This documents the original bug: Origin: https://app.com:8000 with
Host: app.com (proxy stripped the port). Before this PR that returned 403.
The fix (HERMES_WEBUI_ALLOWED_ORIGINS) handles it; without the env var
the request is still rejected, which is the safe default.
"""
monkeypatch.delenv('HERMES_WEBUI_ALLOWED_ORIGINS', raising=False)
assert not self._csrf_allowed({
'Origin': 'https://myapp.example.com:8000',
'Host': 'myapp.example.com',
}), 'without allowlist, port mismatch must be rejected (safe default)'
def test_allowed_origins_comma_separated(self, monkeypatch):
"""HERMES_WEBUI_ALLOWED_ORIGINS accepts multiple comma-separated origins."""
monkeypatch.setenv(
'HERMES_WEBUI_ALLOWED_ORIGINS',
'https://app1.example.com:8000, https://app2.example.com:9000',
)
assert self._csrf_allowed({'Origin': 'https://app1.example.com:8000', 'Host': 'proxy.internal'})
assert self._csrf_allowed({'Origin': 'https://app2.example.com:9000', 'Host': 'proxy.internal'})
assert not self._csrf_allowed({'Origin': 'https://evil.com:8000', 'Host': 'proxy.internal'})
def test_allowed_origins_without_scheme_ignored(self, monkeypatch, capsys):
"""Allowlist entries missing the scheme are skipped and a warning is printed."""
monkeypatch.setenv('HERMES_WEBUI_ALLOWED_ORIGINS', 'myapp.example.com:8000')
from api.routes import _allowed_public_origins
result = _allowed_public_origins()
assert len(result) == 0, 'entry without scheme must be ignored'
captured = capsys.readouterr()
assert 'WARNING' in captured.err and 'scheme' in captured.err.lower()
def test_allowed_origins_trailing_slash_normalized(self, monkeypatch):
"""Trailing slash in allowlist entry is stripped before comparison."""
monkeypatch.setenv('HERMES_WEBUI_ALLOWED_ORIGINS', 'https://myapp.example.com:8000/')
assert self._csrf_allowed({
'Origin': 'https://myapp.example.com:8000',
'Host': 'proxy.internal',
})
# ── CSRF helpers: unit tests ─────────────────────────────────────────────────
class TestCSRFHelpers:
"""Direct unit tests for _normalize_host_port and _ports_match."""
def test_normalize_host_only(self):
from api.routes import _normalize_host_port
assert _normalize_host_port('example.com') == ('example.com', None)
def test_normalize_host_with_port(self):
from api.routes import _normalize_host_port
assert _normalize_host_port('example.com:8000') == ('example.com', '8000')
def test_normalize_ipv6_no_port(self):
from api.routes import _normalize_host_port
assert _normalize_host_port('[::1]') == ('::1', None)
def test_normalize_ipv6_with_port(self):
from api.routes import _normalize_host_port
assert _normalize_host_port('[::1]:8080') == ('::1', '8080')
def test_normalize_empty(self):
from api.routes import _normalize_host_port
assert _normalize_host_port('') == ('', None)
def test_normalize_whitespace_stripped(self):
from api.routes import _normalize_host_port
assert _normalize_host_port(' example.com ') == ('example.com', None)
def test_normalize_lowercases(self):
from api.routes import _normalize_host_port
assert _normalize_host_port('EXAMPLE.COM:80') == ('example.com', '80')
def test_ports_match_identical(self):
from api.routes import _ports_match
assert _ports_match('https', '8000', '8000') is True
def test_ports_match_both_absent(self):
from api.routes import _ports_match
assert _ports_match('https', None, None) is True
def test_ports_match_https_absent_vs_443(self):
from api.routes import _ports_match
assert _ports_match('https', None, '443') is True
assert _ports_match('https', '443', None) is True
def test_ports_match_http_absent_vs_80(self):
from api.routes import _ports_match
assert _ports_match('http', None, '80') is True
assert _ports_match('http', '80', None) is True
def test_ports_match_http_absent_vs_443_rejected(self):
"""http:// scheme: absent port is :80, not :443."""
from api.routes import _ports_match
assert _ports_match('http', None, '443') is False
assert _ports_match('http', '443', None) is False
def test_ports_match_https_absent_vs_80_rejected(self):
"""https:// scheme: absent port is :443, not :80."""
from api.routes import _ports_match
assert _ports_match('https', None, '80') is False
assert _ports_match('https', '80', None) is False
def test_ports_match_non_default_never_waived(self):
from api.routes import _ports_match
assert _ports_match('https', None, '8000') is False
assert _ports_match('https', '8000', None) is False
assert _ports_match('http', None, '8080') is False
def test_ports_match_different_non_default(self):
from api.routes import _ports_match
assert _ports_match('https', '8000', '9000') is False
# ── 2. Login Rate Limiting ─────────────────────────────────────────────────
class TestLoginRateLimit:
def test_rate_limit_triggers_429(self):
"""More than 5 failed login attempts from same IP must yield 429."""
from api.auth import _login_attempts, _LOGIN_WINDOW
# Force the rate limiter state: inject 5 stale-now timestamps so next call is fresh
# Actually easier: just hit the endpoint 6 times with wrong password
# But we can't set a password in a test without config file.
# Instead test the helper directly.
import time
from api import auth as _auth
# Reset state for a fake IP
fake_ip = "10.255.254.253"
_auth._login_attempts[fake_ip] = []
# Record 5 attempts — should still be allowed
for _ in range(5):
_auth._record_login_attempt(fake_ip)
assert not _auth._check_login_rate(fake_ip), \
"After 5 attempts, _check_login_rate should return False (blocked)"
def test_rate_limit_resets_after_window(self):
"""After window expires, rate limit resets."""
import time
from api import auth as _auth
fake_ip = "10.255.254.252"
# Inject 5 old timestamps (outside window)
old_ts = time.time() - 70 # 70s ago, outside 60s window
_auth._login_attempts[fake_ip] = [old_ts] * 5
assert _auth._check_login_rate(fake_ip), \
"After window expires, IP should be allowed again"
def test_rate_limit_endpoint_returns_429(self, webui_server):
"""Live endpoint: 6th bad attempt returns 429 (auth enabled required)."""
# This test only runs meaningfully when auth is enabled.
# We can still verify the helper returns 429 from the unit test above.
# If auth not enabled, endpoint returns 200 OK with 'Auth not enabled'.
from api import auth as _auth
fake_ip = "10.255.254.251"
# Fill the bucket
_auth._login_attempts[fake_ip] = [time.time()] * 5
assert not _auth._check_login_rate(fake_ip)
# ── 3. Session ID Validation ───────────────────────────────────────────────
class TestSessionIDValidation:
def test_hex_session_id_loads(self, tmp_path):
"""A valid hex session ID gets past the validation check."""
import sys
sys.path.insert(0, str(pathlib.Path(__file__).parent.parent))
from api.models import Session, SESSION_DIR
valid_hex = "deadbeef" * 8 # 64 hex chars
# Should not raise — returns None only if file doesn't exist (it won't)
result = Session.load(valid_hex)
assert result is None # No file, but no error
def test_new_format_session_id_passes_validation(self):
"""New hermes-agent session IDs (YYYYMMDD_HHMMSS_xxxxxx) must pass validation."""
from api.models import Session
# Should pass the validator (returns None only because the file doesn't exist)
result = Session.load("20260406_164014_74b2d1")
assert result is None # file doesn't exist, but validator passed
def test_non_hex_session_id_rejected(self):
"""A session ID with dangerous chars must be rejected."""
from api.models import Session
evil_ids = [
"../../../etc/passwd",
"../../../../root/.ssh/id_rsa",
"session; rm -rf /",
"hello world",
"ZZZZZZZZZZZZZZZZ",
"session\x00evil",
"..\\..\\windows\\system32",
"session/../../etc/passwd",
"valid_looking.json",
]
for sid in evil_ids:
result = Session.load(sid)
assert result is None, \
f"Session.load should reject dangerous ID '{sid}', got {result}"
def test_empty_session_id_rejected(self):
"""An empty session ID must be rejected."""
from api.models import Session
assert Session.load("") is None
assert Session.load(None) is None
# ── 4. Error Path Sanitization ────────────────────────────────────────────
class TestSanitizeError:
def test_unix_path_stripped(self):
from api.helpers import _sanitize_error
e = FileNotFoundError("/home/hermes/.hermes/sessions/abc123.json")
result = _sanitize_error(e)
assert "/home/hermes" not in result
assert "<path>" in result
def test_nested_unix_path_stripped(self):
from api.helpers import _sanitize_error
e = ValueError("cannot read /var/lib/hermes/data.db: permission denied")
result = _sanitize_error(e)
assert "/var/lib/hermes" not in result
assert "<path>" in result
def test_no_path_unchanged(self):
from api.helpers import _sanitize_error
e = ValueError("session not found")
result = _sanitize_error(e)
assert result == "session not found"
def test_windows_path_stripped(self):
from api.helpers import _sanitize_error
e = FileNotFoundError("C:\\Users\\hermes\\AppData\\sessions\\x.json not found")
result = _sanitize_error(e)
assert "C:\\Users\\hermes" not in result
def test_live_404_does_not_leak_path(self, webui_server):
"""Live server: file-not-found errors must not expose filesystem paths."""
body, status = post("/api/file/read", {"path": "../../etc/passwd"})
err = body.get("error", "")
assert "/home" not in err and "/var" not in err and "/etc" not in err, \
f"Error message leaks filesystem path: {err}"
# ── 5. Secure Cookie Flag ─────────────────────────────────────────────────
class TestSecureCookieFlag:
def test_getattr_safe_on_plain_socket(self):
"""getattr(handler.request, 'getpeercert', None) must not raise on plain socket."""
import socket
# Plain socket has no getpeercert attribute
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
result = getattr(s, 'getpeercert', None)
assert result is None, \
f"Expected None on plain socket, got {result}"
finally:
s.close()
def test_secure_flag_not_set_for_plain_http(self, webui_server):
"""Login endpoint over plain HTTP must NOT set Secure cookie flag."""
# Auth is disabled in tests, so this just checks no crash
body, status = post("/api/auth/login", {"password": "test"})
# Either 200 (auth not enabled) or 401 (auth enabled, wrong pw)
assert status in (200, 401, 429), f"Unexpected status {status}"
# ── 6. HMAC Signature Length ──────────────────────────────────────────────
class TestHMACLength:
def test_session_token_sig_is_32_chars(self):
"""Session cookie signature must be 32 hex chars (128-bit), not 16."""
from api.auth import create_session
cookie = create_session()
token, sig = cookie.rsplit('.', 1)
assert len(sig) == 32, \
f"Expected 32-char signature (128-bit), got {len(sig)}: {sig}"
def test_verify_session_rejects_old_16char_sig(self):
"""A cookie with a 16-char sig must fail verification."""
import hmac as _hmac
import hashlib
from api.auth import _signing_key, verify_session, _sessions
import time
import secrets
token = secrets.token_hex(32)
_sessions[token] = time.time() + 3600 # valid session
old_sig = _hmac.new(_signing_key(), token.encode(), hashlib.sha256).hexdigest()[:16]
old_cookie = f"{token}.{old_sig}"
# Should fail: sig length wrong
assert not verify_session(old_cookie), \
"Old 16-char sig cookie must not verify (sig mismatch)"
# ── 7. Skills Path Traversal ──────────────────────────────────────────────
class TestSkillsPathTraversal:
def test_traversal_rejected(self, webui_server):
"""Saving a skill with a traversal path must return 400."""
body, status = post("/api/skills/save", {
"name": "../../evil",
"content": "# evil",
})
assert status in (400, 403), \
f"Expected 400/403 for traversal skill path, got {status}: {body}"
def test_valid_skill_accepted(self, webui_server):
"""Saving a skill with a valid name must succeed."""
body, status = post("/api/skills/save", {
"name": "test-security-skill",
"content": "---\nname: test-security-skill\ndescription: test\n---\n# test",
})
# 500 = skills module not available (hermes-agent not installed) — skip
if status == 500:
import pytest; pytest.skip("skills module requires hermes-agent")
# Should succeed (200) or need auth (401/403) — not path error (400)
assert status in (200, 401, 403, 404), \
f"Valid skill save got unexpected status {status}: {body}"
# ── 8. Content-Disposition for Dangerous MIME Types ───────────────────────
class TestContentDisposition:
def test_html_file_forced_download(self, webui_server, tmp_path):
"""HTML files served via /api/file/raw must have Content-Disposition: attachment."""
import urllib.request
import urllib.error
# Use a session to create an HTML file in the workspace
sessions_body, _ = post("/api/sessions/new", {})
sid = sessions_body.get("session_id") or sessions_body.get("id")
if not sid:
return # Skip if sessions API shape is unexpected
# Can't easily create a file via the test server without a workspace,
# so test the logic directly instead.
from api.routes import _handle_file_raw
dangerous_types = {'text/html', 'application/xhtml+xml', 'image/svg+xml'}
for mime in dangerous_types:
assert mime in dangerous_types, f"{mime} should be in dangerous_types set"
def test_dangerous_mime_types_set_complete(self):
"""The set of dangerous MIME types must include html, xhtml, and svg."""
import ast
import pathlib
routes_src = pathlib.Path(__file__).parent.parent / "api" / "routes.py"
src = routes_src.read_text()
assert "text/html" in src
assert "application/xhtml+xml" in src
assert "image/svg+xml" in src
assert "dangerous_types" in src
def test_unicode_filename_download_header_is_latin1_safe(self, cleanup_test_sessions):
"""Unicode filenames must not crash download responses."""
body, status = post("/api/session/new", {})
assert status == 200, body
sid = body["session"]["session_id"]
cleanup_test_sessions.append(sid)
ws = pathlib.Path(body["session"]["workspace"])
filename = "中文对照表.pdf"
pdf_bytes = b"%PDF-1.3\n1 0 obj\n<<>>\nendobj\ntrailer\n<<>>\n%%EOF\n"
(ws / filename).write_bytes(pdf_bytes)
encoded = urllib.parse.quote(filename)
raw, headers, raw_status = get_raw_with_headers(
f"/api/file/raw?session_id={sid}&path={encoded}&download=1"
)
assert raw_status == 200
assert raw == pdf_bytes
disp = headers["Content-Disposition"]
assert disp.startswith("attachment; ")
assert "filename*=UTF-8''" in disp
disp.encode("latin-1")
def test_unicode_filename_inline_header_is_latin1_safe(self, cleanup_test_sessions):
"""Inline responses must also work for unicode filenames."""
body, status = post("/api/session/new", {})
assert status == 200, body
sid = body["session"]["session_id"]
cleanup_test_sessions.append(sid)
ws = pathlib.Path(body["session"]["workspace"])
filename = "预览.pdf"
pdf_bytes = b"%PDF-1.3\n1 0 obj\n<<>>\nendobj\ntrailer\n<<>>\n%%EOF\n"
(ws / filename).write_bytes(pdf_bytes)
encoded = urllib.parse.quote(filename)
raw, headers, raw_status = get_raw_with_headers(
f"/api/file/raw?session_id={sid}&path={encoded}"
)
assert raw_status == 200
assert raw == pdf_bytes
disp = headers["Content-Disposition"]
assert disp.startswith("inline; ")
assert "filename*=UTF-8''" in disp
disp.encode("latin-1")
# ── 9. PBKDF2 Password Hashing ───────────────────────────────────────────
class TestPasswordHashing:
def test_hash_password_is_hex(self):
"""_hash_password must produce a non-empty hex string (PBKDF2-SHA256)."""
from api.auth import _hash_password
result = _hash_password("mysecretpassword")
assert isinstance(result, str) and len(result) == 64, \
f"Expected 64-char hex hash (SHA-256 output), got len={len(result)}: {result}"
# Hex-only chars
assert all(c in "0123456789abcdef" for c in result), \
f"Hash must be hex string, got: {result}"
def test_hash_password_is_deterministic_with_same_salt(self):
"""_hash_password must return the same hash for same input (signing key is stable)."""
from api.auth import _hash_password
h1 = _hash_password("consistent_password")
h2 = _hash_password("consistent_password")
assert h1 == h2, "Same password must produce same hash (stable signing key)"
def test_hash_password_different_inputs_differ(self):
"""Different passwords must produce different hashes."""
from api.auth import _hash_password
assert _hash_password("password_a") != _hash_password("password_b"), \
"Different passwords must produce different hashes"
def test_hash_password_longer_than_sha256(self):
"""PBKDF2 with 600k iterations is much stronger than single SHA-256.
We verify indirectly: the code must call pbkdf2_hmac, not sha256 directly."""
import inspect
from api import auth as _auth
src = inspect.getsource(_auth._hash_password)
assert "pbkdf2_hmac" in src, \
"_hash_password must use pbkdf2_hmac, not raw sha256"
assert "600_000" in src or "600000" in src, \
"_hash_password must use 600,000 iterations"
def test_save_settings_stores_64char_hex_hash(self):
"""save_settings with _set_password must store a 64-char hex hash (PBKDF2)."""
from api.config import save_settings, load_settings, SETTINGS_FILE
import json
# Remember original content so we can restore it
original = None
if SETTINGS_FILE.exists():
original = SETTINGS_FILE.read_text()
try:
save_settings({"_set_password": "test_pbkdf2_pw"})
settings = load_settings()
ph = settings.get("password_hash", "")
assert len(ph) == 64 and all(c in "0123456789abcdef" for c in ph), \
f"save_settings must store 64-char hex PBKDF2 hash, got: {ph!r}"
finally:
# Restore original settings
if original is not None:
SETTINGS_FILE.write_text(original)
else:
save_settings({"_clear_password": True})
# ── 10. Non-loopback Startup Warning ─────────────────────────────────────
class TestStartupWarning:
def test_warning_code_present_in_server(self):
"""server.py must contain non-loopback warning code."""
src = pathlib.Path(__file__).parent.parent / "server.py"
text = src.read_text()
assert "0.0.0.0" in text or "non-loopback" in text.lower() or "WARNING" in text, \
"server.py must contain non-loopback warning logic"
assert "is_auth_enabled" in text, \
"server.py must check is_auth_enabled() before warning"
# ── 11. SSRF DNS Check ─────────────────────────────────────────────────────
class TestSSRFCheck:
def test_ssrf_guard_code_present_in_config(self):
"""config.py must contain SSRF DNS resolution guard."""
src = pathlib.Path(__file__).parent.parent / "api" / "config.py"
text = src.read_text()
assert "getaddrinfo" in text, "SSRF guard must resolve DNS with getaddrinfo"
assert "is_private" in text, "SSRF guard must check is_private IP"
assert "is_loopback" in text, "SSRF guard must check is_loopback IP"
def test_known_local_providers_whitelisted(self):
"""Ollama and localhost endpoints should NOT be blocked by SSRF guard."""
src = pathlib.Path(__file__).parent.parent / "api" / "config.py"
text = src.read_text()
assert "ollama" in text.lower()
assert "localhost" in text.lower()
assert "lmstudio" in text.lower() or "lm-studio" in text.lower()
# ── 12. ENV_LOCK Export ────────────────────────────────────────────────────
class TestENVLock:
def test_env_lock_importable_from_streaming(self):
"""_ENV_LOCK must be importable from api.streaming."""
from api.streaming import _ENV_LOCK
import threading
assert isinstance(_ENV_LOCK, type(threading.Lock())), \
"_ENV_LOCK must be a threading.Lock"
def test_env_lock_importable_in_routes(self):
"""api.routes must be able to import _ENV_LOCK from api.streaming."""
# If routes.py fails to import, this will raise ImportError
import importlib
import api.routes # noqa: F401 -- just checking import works
# No error means the circular import is OK
# ── Fixture ────────────────────────────────────────────────────────────────
import pytest
@pytest.fixture(scope="module")
def webui_server():
"""Reuse the module-scoped server started by conftest.py."""
return BASE

199
tests/test_sprint3.py Normal file
View File

@@ -0,0 +1,199 @@
"""Sprint 3 tests: cron API, skills API, memory API, input validation."""
import json, uuid, urllib.request, urllib.error
from tests._pytest_port import BASE
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(BASE + path, data=data, headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def make_session_tracked(created_list, ws=None):
"""Create a session and register it with the cleanup fixture."""
import pathlib as _pathlib
body = {}
if ws: body["workspace"] = str(ws)
d, _ = post("/api/session/new", body)
sid = d["session"]["session_id"]
created_list.append(sid)
return sid, _pathlib.Path(d["session"]["workspace"])
def test_crons_list():
data, status = get("/api/crons")
assert status == 200
assert "jobs" in data
def test_crons_list_has_required_fields():
data, _ = get("/api/crons")
if not data["jobs"]: return
job = data["jobs"][0]
for field in ("id", "name", "prompt", "enabled", "schedule_display"):
assert field in job
def test_crons_output_requires_job_id():
try:
get("/api/crons/output")
assert False
except urllib.error.HTTPError as e:
assert e.code == 400
def test_crons_output_real_job():
data, _ = get("/api/crons")
if not data["jobs"]: return
job_id = data["jobs"][0]["id"]
out, status = get(f"/api/crons/output?job_id={job_id}&limit=3")
assert status == 200
assert "outputs" in out
def test_crons_pause_requires_job_id():
result, status = post("/api/crons/pause", {})
assert status in (400, 404)
def test_crons_resume_requires_job_id():
result, status = post("/api/crons/resume", {})
assert status in (400, 404)
def test_crons_run_nonexistent():
result, status = post("/api/crons/run", {"job_id": "doesnotexist999"})
assert status == 404
def test_skills_list():
data, status = get("/api/skills")
assert status == 200
assert len(data["skills"]) > 0
def test_skills_list_has_required_fields():
data, _ = get("/api/skills")
skill = data["skills"][0]
assert "name" in skill and "description" in skill
def test_skills_content_known():
data, status = get("/api/skills/content?name=dogfood")
assert status == 200
assert len(data["content"]) > 0
def test_skills_content_requires_name():
try:
get("/api/skills/content")
assert False
except urllib.error.HTTPError as e:
assert e.code == 400
def test_skills_search_returns_subset():
data, _ = get("/api/skills")
assert len(data["skills"]) > 5
def test_memory_returns_both_files():
data, status = get("/api/memory")
assert status == 200
assert "memory" in data and "user" in data
def test_memory_content_is_string():
data, _ = get("/api/memory")
assert isinstance(data["memory"], str)
assert isinstance(data["user"], str)
def test_memory_has_mtime():
data, _ = get("/api/memory")
assert "memory_mtime" in data and "user_mtime" in data
def test_session_update_requires_session_id():
result, status = post("/api/session/update", {"model": "openai/gpt-5.4-mini"})
assert status == 400
def test_session_delete_requires_session_id():
result, status = post("/api/session/delete", {})
assert status == 400
def test_session_delete_rejects_absolute_path_payload(tmp_path):
victim = tmp_path / "victim.json"
victim.write_text("TOPSECRET", encoding="utf-8")
result, status = post("/api/session/delete", {"session_id": str(victim.with_suffix(""))})
assert status == 400
assert victim.exists(), "absolute-path payload must not delete arbitrary files"
def test_session_delete_rejects_traversal_payload(tmp_path):
victim = tmp_path / "outside.json"
victim.write_text("TOPSECRET", encoding="utf-8")
traversal = f"../../../../{victim.with_suffix('').as_posix().lstrip('/')}"
result, status = post("/api/session/delete", {"session_id": traversal})
assert status == 400
assert victim.exists(), "traversal payload must not delete arbitrary files"
def test_chat_start_requires_session_id():
result, status = post("/api/chat/start", {"message": "hello"})
assert status == 400
def test_chat_start_requires_message(cleanup_test_sessions):
sid, _ = make_session_tracked(cleanup_test_sessions)
result, status = post("/api/chat/start", {"session_id": sid, "message": ""})
assert status == 400
def test_session_update_unknown_id_returns_404():
result, status = post("/api/session/update", {"session_id": "nosuchsession", "model": "openai/gpt-5.4-mini"})
assert status == 404
def test_session_update_rejects_workspace_outside_trusted_root(tmp_path):
d, _ = post("/api/session/new", {})
sid = d["session"]["session_id"]
outside = tmp_path / "outside"
outside.mkdir(parents=True, exist_ok=True)
result, status = post("/api/session/update", {"session_id": sid, "workspace": str(outside)})
assert status == 400
assert "outside" in result.get("error", "").lower()
def test_chat_start_rejects_workspace_outside_trusted_root(tmp_path):
d, _ = post("/api/session/new", {})
sid = d["session"]["session_id"]
outside = tmp_path / "outside-chat"
outside.mkdir(parents=True, exist_ok=True)
result, status = post("/api/chat/start", {"session_id": sid, "message": "hello", "workspace": str(outside)})
assert status == 400
assert "outside" in result.get("error", "").lower()
def test_workspace_add_rejects_path_outside_trusted_root(tmp_path):
outside = tmp_path / "outside-add"
outside.mkdir(parents=True, exist_ok=True)
result, status = post("/api/workspaces/add", {"path": str(outside), "name": "Outside"})
assert status == 400
assert "outside" in result.get("error", "").lower()
def test_session_new_rejects_workspace_outside_trusted_root(tmp_path):
outside = tmp_path / "outside-new"
outside.mkdir(parents=True, exist_ok=True)
result, status = post("/api/session/new", {"workspace": str(outside)})
assert status == 400
assert "outside" in result.get("error", "").lower()
def test_session_search_returns_matches(cleanup_test_sessions):
sid, _ = make_session_tracked(cleanup_test_sessions)
post("/api/session/rename", {"session_id": sid, "title": f"unique-s3-{sid}"})
data, status = get(f"/api/sessions/search?q=unique-s3-{sid}")
assert status == 200
sids = [s["session_id"] for s in data["sessions"]]
assert sid in sids
def test_session_search_empty_query_returns_all():
data, status = get("/api/sessions/search?q=")
assert status == 200 and "sessions" in data
def test_session_search_no_results():
data, status = get("/api/sessions/search?q=zzznomatchzzz9999")
assert status == 200 and data["sessions"] == []

576
tests/test_sprint30.py Normal file
View File

@@ -0,0 +1,576 @@
"""
Sprint 30: Approval card UI, i18n coverage, and approval flow polish.
Tests for:
- Approval card HTML structure (all 4 buttons, IDs, data-i18n attrs)
- Keyboard shortcut handler presence in boot.js
- i18n keys for approval card in both locales
- CSS for approval-btn states (loading, disabled, kbd badge)
- respondApproval loading/disable pattern in messages.js
- streaming.py scoping fix (_unreg_notify=None initialisation)
- Approval respond HTTP endpoint (existing + new behaviour)
"""
import json
import re
import urllib.request
import urllib.error
import urllib.parse
import pytest
from tests._pytest_port import BASE
def get(path):
url = BASE + path
with urllib.request.urlopen(url, timeout=10) as r:
return json.loads(r.read())
def post(path, body=None):
url = BASE + path
data = json.dumps(body or {}).encode()
req = urllib.request.Request(url, data=data,
headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def read(path):
with open(path, encoding="utf-8") as f:
return f.read()
import pathlib
REPO = pathlib.Path(__file__).parent.parent
# ── HTML structure ───────────────────────────────────────────────────────────
class TestApprovalCardHTML:
def test_approval_card_has_four_buttons(self):
html = read(REPO / "static/index.html")
for choice in ("once", "session", "always", "deny"):
assert f"respondApproval('{choice}')" in html, \
f"approval button for '{choice}' missing from index.html"
def test_approval_buttons_have_ids(self):
html = read(REPO / "static/index.html")
for btn_id in ("approvalBtnOnce", "approvalBtnSession",
"approvalBtnAlways", "approvalBtnDeny"):
assert f'id="{btn_id}"' in html, \
f"button id '{btn_id}' missing from approval card"
def test_approval_heading_has_data_i18n(self):
html = read(REPO / "static/index.html")
assert 'data-i18n="approval_heading"' in html, \
"approval heading missing data-i18n attribute"
def test_approval_buttons_have_data_i18n_labels(self):
html = read(REPO / "static/index.html")
for key in ("approval_btn_once", "approval_btn_session",
"approval_btn_always", "approval_btn_deny"):
assert f'data-i18n="{key}"' in html, \
f"button label data-i18n='{key}' missing"
def test_approval_once_button_has_kbd_badge(self):
html = read(REPO / "static/index.html")
assert '<kbd class="approval-kbd">' in html, \
"kbd badge missing from Allow once button"
def test_approval_card_has_aria_roles(self):
html = read(REPO / "static/index.html")
assert 'role="alertdialog"' in html, \
"approval card missing role=alertdialog for accessibility"
assert 'aria-labelledby="approvalHeading"' in html, \
"approval card missing aria-labelledby"
class TestClarifyCardHTML:
def test_clarify_card_markup_present(self):
html = read(REPO / "static/index.html")
assert 'id="clarifyCard"' in html, "clarify card missing from index.html"
assert 'id="clarifyHeading"' in html, "clarify heading missing"
assert 'id="clarifyQuestion"' in html, "clarify question text missing"
assert 'id="clarifyChoices"' in html, "clarify choices container missing"
assert 'id="clarifyInput"' in html, "clarify input missing"
assert 'id="clarifySubmit"' in html, "clarify submit button missing"
def test_clarify_card_has_data_i18n(self):
html = read(REPO / "static/index.html")
assert 'data-i18n="clarify_heading"' in html
assert 'data-i18n="clarify_send"' in html
assert 'data-i18n-placeholder="clarify_input_placeholder"' in html
def test_clarify_card_has_aria_roles(self):
html = read(REPO / "static/index.html")
assert 'role="dialog"' in html, \
"clarify card missing role=dialog for accessibility"
assert 'aria-labelledby="clarifyHeading"' in html, \
"clarify card missing aria-labelledby"
# ── CSS ──────────────────────────────────────────────────────────────────────
class TestApprovalCardCSS:
def test_btn_disabled_style_present(self):
css = read(REPO / "static/style.css")
assert ".approval-btn:disabled" in css, \
"disabled state style missing for approval buttons"
def test_btn_loading_class_present(self):
css = read(REPO / "static/style.css")
assert ".approval-btn.loading" in css, \
"loading class style missing for approval buttons"
def test_approval_kbd_style_present(self):
css = read(REPO / "static/style.css")
assert ".approval-kbd" in css, \
".approval-kbd style missing from style.css"
def test_approval_kbd_hidden_on_mobile(self):
css = read(REPO / "static/style.css")
# Should be display:none inside the mobile media query
assert ".approval-kbd{display:none;}" in css or \
".approval-kbd { display: none; }" in css or \
re.search(r'\.approval-kbd\s*\{[^}]*display\s*:\s*none', css), \
".approval-kbd should be hidden on mobile"
def test_btn_transform_on_hover(self):
css = read(REPO / "static/style.css")
assert "translateY(-1px)" in css, \
"hover lift effect missing from approval buttons"
def test_four_choice_styles_present(self):
css = read(REPO / "static/style.css")
for cls in (".approval-btn.once", ".approval-btn.session",
".approval-btn.always", ".approval-btn.deny"):
assert cls in css, f"CSS class '{cls}' missing"
class TestClarifyCardCSS:
def test_clarify_styles_present(self):
css = read(REPO / "static/style.css")
for cls in (
".clarify-card",
".clarify-card.visible",
".clarify-inner",
".clarify-header",
".clarify-question",
".clarify-choices",
".clarify-choice",
".clarify-response",
".clarify-input",
".clarify-submit",
".clarify-hint",
):
assert cls in css, f"CSS class '{cls}' missing"
def test_clarify_mobile_styles_present(self):
css = read(REPO / "static/style.css")
assert ".clarify-card{padding:0 10px 8px;}" in css or \
".clarify-card { padding:0 10px 8px; }" in css or \
"clarify-card" in css, "clarify mobile styles missing"
def test_clarify_focus_styles_present(self):
css = read(REPO / "static/style.css")
assert ".clarify-choice:focus" in css and ".clarify-submit:focus" in css, \
"clarify focus styles missing"
# ── i18n keys ────────────────────────────────────────────────────────────────
class TestApprovalI18nKeys:
REQUIRED_KEYS = [
"approval_heading",
"approval_btn_once",
"approval_btn_session",
"approval_btn_always",
"approval_btn_deny",
"approval_responding",
]
def test_english_locale_has_all_approval_keys(self):
src = read(REPO / "static/i18n.js")
# Find en locale block (before the first closing };)
en_block_end = src.find("\n};")
en_block = src[:en_block_end]
for key in self.REQUIRED_KEYS:
assert f"{key}:" in en_block, \
f"English locale missing i18n key: {key}"
def test_chinese_locale_has_all_approval_keys(self):
src = read(REPO / "static/i18n.js")
# Find zh locale block (from ` zh: {` to the closing ` },` before `};`)
zh_start = src.find("\n zh: {")
assert zh_start != -1, "zh locale block not found in i18n.js"
zh_block = src[zh_start:]
for key in self.REQUIRED_KEYS:
assert f"{key}:" in zh_block, \
f"Chinese locale missing i18n key: {key}"
def test_approval_heading_english_value(self):
src = read(REPO / "static/i18n.js")
assert "approval_heading: 'Approval required'" in src, \
"English approval_heading value incorrect"
def test_approval_btn_once_english_value(self):
src = read(REPO / "static/i18n.js")
assert "approval_btn_once: 'Allow once'" in src, \
"English approval_btn_once value incorrect"
def test_approval_btn_deny_english_value(self):
src = read(REPO / "static/i18n.js")
assert "approval_btn_deny: 'Deny'" in src, \
"English approval_btn_deny value incorrect"
class TestClarifyI18nKeys:
REQUIRED_KEYS = [
"clarify_heading",
"clarify_hint",
"clarify_other",
"clarify_send",
"clarify_input_placeholder",
"clarify_responding",
]
def test_english_locale_has_all_clarify_keys(self):
src = read(REPO / "static/i18n.js")
en_block_end = src.find("\n};")
en_block = src[:en_block_end]
for key in self.REQUIRED_KEYS:
assert f"{key}:" in en_block, f"English locale missing i18n key: {key}"
def test_chinese_locale_has_all_clarify_keys(self):
src = read(REPO / "static/i18n.js")
zh_start = src.find("\n zh: {")
assert zh_start != -1, "zh locale block not found in i18n.js"
zh_block = src[zh_start:]
for key in self.REQUIRED_KEYS:
assert f"{key}:" in zh_block, f"Chinese locale missing i18n key: {key}"
def test_clarify_heading_english_value(self):
src = read(REPO / "static/i18n.js")
assert "clarify_heading: 'Clarification needed'" in src, \
"English clarify_heading value incorrect"
# ── messages.js behaviour ────────────────────────────────────────────────────
class TestApprovalMessagesJS:
def test_show_approval_card_re_enables_buttons(self):
src = read(REPO / "static/messages.js")
assert "b.disabled = false" in src and "loading" in src, \
"showApprovalCard should re-enable buttons on each show"
def test_respond_disables_buttons_immediately(self):
src = read(REPO / "static/messages.js")
assert "b.disabled = true" in src, \
"respondApproval should disable buttons immediately to prevent double-submit"
def test_respond_uses_i18n_for_error(self):
src = read(REPO / "static/messages.js")
# Should use t('approval_responding') not a hardcoded string
assert "t(\"approval_responding\")" in src or "t('approval_responding')" in src, \
"respondApproval error message should use t('approval_responding')"
def test_show_card_applies_locale_to_dom(self):
src = read(REPO / "static/messages.js")
assert "applyLocaleToDOM" in src, \
"showApprovalCard should call applyLocaleToDOM to translate data-i18n labels"
def test_show_card_focuses_once_button(self):
src = read(REPO / "static/messages.js")
assert "approvalBtnOnce" in src and "focus()" in src, \
"showApprovalCard should focus the Allow once button"
class TestClarifyMessagesJS:
def test_clarify_event_listener_present(self):
src = read(REPO / "static/messages.js")
assert "addEventListener('clarify'" in src, \
"clarify SSE listener missing from messages.js"
def test_show_clarify_card_present(self):
src = read(REPO / "static/messages.js")
assert "function showClarifyCard" in src, "showClarifyCard missing"
assert "clarifyChoices" in src and "clarifyInput" in src, \
"showClarifyCard should manage clarify DOM elements"
def test_respond_clarify_uses_api_endpoint(self):
src = read(REPO / "static/messages.js")
assert '/api/clarify/respond' in src, \
"respondClarify should POST to /api/clarify/respond"
def test_clarify_polling_helpers_present(self):
src = read(REPO / "static/messages.js")
for token in ("startClarifyPolling", "stopClarifyPolling", "hideClarifyCard", "_clarifySessionId"):
assert token in src, f"{token} missing from messages.js"
# ── boot.js keyboard shortcut ────────────────────────────────────────────────
class TestApprovalKeyboardShortcut:
def test_enter_shortcut_present_in_boot_js(self):
src = read(REPO / "static/boot.js")
assert "respondApproval('once')" in src or 'respondApproval("once")' in src, \
"Enter shortcut calling respondApproval('once') missing from boot.js"
def test_enter_shortcut_checks_card_visible(self):
src = read(REPO / "static/boot.js")
assert "approvalCard" in src and "visible" in src, \
"Enter shortcut should check if approval card is visible"
def test_enter_shortcut_guards_input_elements(self):
src = read(REPO / "static/boot.js")
assert "TEXTAREA" in src and "INPUT" in src, \
"Enter shortcut should not fire when focus is on TEXTAREA or INPUT"
# ── streaming.py scoping fix ─────────────────────────────────────────────────
class TestStreamingApprovalScoping:
def test_unreg_notify_initialised_to_none(self):
src = read(REPO / "api/streaming.py")
assert "_unreg_notify = None" in src, \
"_unreg_notify must be initialised to None before the try block"
def test_finally_checks_unreg_notify_not_none(self):
src = read(REPO / "api/streaming.py")
assert "_unreg_notify is not None" in src, \
"finally block must check '_unreg_notify is not None' before calling it"
def test_approval_registered_flag_present(self):
src = read(REPO / "api/streaming.py")
assert "_approval_registered = False" in src, \
"_approval_registered flag must be initialised to False"
def test_clarify_registered_flag_present(self):
src = read(REPO / "api/streaming.py")
assert "_clarify_registered = False" in src, \
"_clarify_registered flag must be initialised to False"
def test_clarify_unreg_notify_initialised_to_none(self):
src = read(REPO / "api/streaming.py")
assert "_unreg_clarify_notify = None" in src, \
"_unreg_clarify_notify must be initialised to None before the try block"
def test_finally_checks_clarify_unreg_notify_not_none(self):
src = read(REPO / "api/streaming.py")
assert "_unreg_clarify_notify is not None" in src, \
"finally block must check '_unreg_clarify_notify is not None' before calling it"
# ── HTTP regression: approval respond ────────────────────────────────────────
class TestApprovalRespondHTTP:
def test_respond_ok_with_all_choices(self):
for choice in ("once", "session", "always", "deny"):
import uuid
sid = f"sprint30-{uuid.uuid4().hex[:8]}"
result, status = post("/api/approval/respond",
{"session_id": sid, "choice": choice})
assert status == 200, f"choice={choice} should return 200"
assert result["ok"] is True
assert result["choice"] == choice
def test_respond_rejects_bad_choice(self):
result, status = post("/api/approval/respond",
{"session_id": "x", "choice": "HACKED"})
assert status == 400
def test_respond_requires_session_id(self):
result, status = post("/api/approval/respond", {"choice": "deny"})
assert status == 400
def test_respond_returns_choice_field(self):
import uuid
sid = f"sprint30-choice-{uuid.uuid4().hex[:8]}"
result, status = post("/api/approval/respond",
{"session_id": sid, "choice": "always"})
assert status == 200
assert "choice" in result
assert result["choice"] == "always"
class TestApprovalCardTimerLogic:
"""Tests for the 30s minimum visibility guard introduced in PR #225."""
def _get_js(self):
return pathlib.Path(__file__).parent.parent / 'static' / 'messages.js'
def test_approval_min_visible_ms_constant_present(self):
"""APPROVAL_MIN_VISIBLE_MS constant exists and is 30000."""
src = self._get_js().read_text()
assert 'APPROVAL_MIN_VISIBLE_MS' in src
import re
m = re.search(r'APPROVAL_MIN_VISIBLE_MS\s*=\s*(\d+)', src)
assert m is not None, 'APPROVAL_MIN_VISIBLE_MS not assigned'
assert int(m.group(1)) == 30000, f'Expected 30000, got {m.group(1)}'
def test_hide_approval_card_has_force_parameter(self):
"""hideApprovalCard() accepts a force parameter."""
src = self._get_js().read_text()
assert 'hideApprovalCard(force=false)' in src or \
'hideApprovalCard(force = false)' in src, \
'hideApprovalCard must have force=false default parameter'
def test_hide_approval_card_checks_force_flag(self):
"""hideApprovalCard body has a conditional on force."""
src = self._get_js().read_text()
# The guard: if (!force && _approvalVisibleSince)
assert '!force' in src, 'hideApprovalCard must check !force before deferred hide'
def test_approval_hide_timer_variable_present(self):
"""Module-level _approvalHideTimer variable is declared."""
src = self._get_js().read_text()
assert '_approvalHideTimer' in src
def test_approval_visible_since_variable_present(self):
"""Module-level _approvalVisibleSince variable is declared."""
src = self._get_js().read_text()
assert '_approvalVisibleSince' in src
def test_approval_signature_variable_present(self):
"""Module-level _approvalSignature variable is declared."""
src = self._get_js().read_text()
assert '_approvalSignature' in src
def test_respond_approval_calls_hide_with_force(self):
"""respondApproval must call hideApprovalCard(true) — not no-arg."""
src = self._get_js().read_text()
# Extract respondApproval function body
import re
m = re.search(r'async function respondApproval.*?(?=\nasync function|\nfunction |\Z)',
src, re.DOTALL)
assert m, 'respondApproval function not found'
body = m.group(0)
# Must call hideApprovalCard(true), not the bare hideApprovalCard()
assert 'hideApprovalCard(true)' in body, \
'respondApproval must call hideApprovalCard(true) so card hides immediately after user clicks'
# Must NOT have bare hideApprovalCard() without force
bare_calls = re.findall(r'hideApprovalCard\((?!true)', body)
assert not bare_calls, \
f'respondApproval has bare hideApprovalCard() calls (no force=true): {bare_calls}'
def test_stream_done_calls_hide_with_force(self):
"""Done SSE event handler must call hideApprovalCard(true)."""
src = self._get_js().read_text()
# Find the done event handler section (stopApprovalPolling followed by hideApprovalCard)
import re
# Look for pattern: stopApprovalPolling();\n + hideApprovalCard
matches = re.findall(
r'stopApprovalPolling\(\);\s*\n\s*if\(!_approvalSessionId[^)]*\)\s*hideApprovalCard\((\w*)\)',
src
)
# All stopApprovalPolling paths that call hideApprovalCard should use force=true
for match in matches:
assert match == 'true', \
f'After stopApprovalPolling(), hideApprovalCard called without force=true (got: {match!r})'
def test_poll_loop_still_uses_no_force(self):
"""Poll loop hideApprovalCard() (when pending gone) keeps no-force — correct behavior."""
src = self._get_js().read_text()
# Line 446: else { hideApprovalCard(); } — this is the poll-loop path
# The 30s guard should protect this call (don't force from poll ticks)
assert 'else { hideApprovalCard(); }' in src or \
'else {hideApprovalCard();}' in src or \
'else { hideApprovalCard() }' in src, \
'Poll loop should still call hideApprovalCard() without force=true'
def test_show_approval_card_signature_dedup(self):
"""showApprovalCard uses a signature to avoid resetting timer on repeat polls."""
src = self._get_js().read_text()
# The sig computation must use JSON.stringify on card content
import re
m = re.search(r'function showApprovalCard.*?(?=\nfunction |\nasync function |\Z)',
src, re.DOTALL)
assert m, 'showApprovalCard function not found'
body = m.group(0)
assert 'JSON.stringify' in body, 'showApprovalCard must compute a signature via JSON.stringify'
assert '_approvalSignature' in body, 'showApprovalCard must check/set _approvalSignature'
def test_clear_approval_hide_timer_helper_present(self):
"""_clearApprovalHideTimer helper exists to cancel deferred hides."""
src = self._get_js().read_text()
assert '_clearApprovalHideTimer' in src, \
'_clearApprovalHideTimer helper must exist to cancel deferred setTimeout'
class TestClarifyCardTimerLogic:
def _get_js(self):
return pathlib.Path(__file__).parent.parent / 'static' / 'messages.js'
def test_clarify_min_visible_ms_constant_present(self):
src = self._get_js().read_text()
assert 'CLARIFY_MIN_VISIBLE_MS' in src
import re
m = re.search(r'CLARIFY_MIN_VISIBLE_MS\s*=\s*(\d+)', src)
assert m is not None, 'CLARIFY_MIN_VISIBLE_MS not assigned'
assert int(m.group(1)) == 30000, f'Expected 30000, got {m.group(1)}'
def test_hide_clarify_card_has_force_parameter(self):
src = self._get_js().read_text()
assert 'hideClarifyCard(force=false)' in src or \
'hideClarifyCard(force = false)' in src, \
'hideClarifyCard must have force=false default parameter'
def test_hide_clarify_card_checks_force_flag(self):
src = self._get_js().read_text()
assert '!force' in src, 'hideClarifyCard must check !force before deferred hide'
def test_clarify_hide_timer_variable_present(self):
src = self._get_js().read_text()
assert '_clarifyHideTimer' in src
def test_clarify_visible_since_variable_present(self):
src = self._get_js().read_text()
assert '_clarifyVisibleSince' in src
def test_clarify_signature_variable_present(self):
src = self._get_js().read_text()
assert '_clarifySignature' in src
def test_respond_clarify_calls_hide_with_force(self):
src = self._get_js().read_text()
import re
m = re.search(r'async function respondClarify.*?(?=\nasync function|\nfunction |\Z)',
src, re.DOTALL)
assert m, 'respondClarify function not found'
body = m.group(0)
assert 'hideClarifyCard(true)' in body, \
'respondClarify must call hideClarifyCard(true) so card hides immediately after user clicks'
def test_clarify_poll_loop_uses_no_force(self):
src = self._get_js().read_text()
assert 'else { hideClarifyCard(); }' in src or \
'else {hideClarifyCard();}' in src or \
'else { hideClarifyCard() }' in src, \
'Clarify poll loop should hide without force=true'
def test_show_clarify_card_signature_dedup(self):
src = self._get_js().read_text()
import re
m = re.search(r'function showClarifyCard.*?(?=\nfunction |\nasync function |\Z)',
src, re.DOTALL)
assert m, 'showClarifyCard function not found'
body = m.group(0)
assert 'JSON.stringify' in body, 'showClarifyCard must compute a signature via JSON.stringify'
assert '_clarifySignature' in body, 'showClarifyCard must check/set _clarifySignature'

143
tests/test_sprint31.py Normal file
View File

@@ -0,0 +1,143 @@
"""
Tests for issue #170: new profile form with optional custom endpoint fields.
Tests cover:
1. _write_endpoint_to_config writes base_url into config.yaml
2. _write_endpoint_to_config writes api_key into config.yaml
3. _write_endpoint_to_config writes both together
4. _write_endpoint_to_config merges with existing config (does not clobber)
5. _write_endpoint_to_config is a no-op when both args are None/empty
6. API route accepts base_url and api_key in POST body
7. Profile created via API has base_url in config.yaml
"""
import json
import pathlib
import shutil
import os
import pytest
yaml = pytest.importorskip("yaml", reason="PyYAML required for config write tests")
# ── 1-5: _write_endpoint_to_config unit tests ─────────────────────────────────
class TestWriteEndpointToConfig:
def test_writes_base_url(self, tmp_path):
from api.profiles import _write_endpoint_to_config
_write_endpoint_to_config(tmp_path, base_url="http://localhost:11434")
cfg = yaml.safe_load((tmp_path / "config.yaml").read_text())
assert cfg["model"]["base_url"] == "http://localhost:11434"
def test_writes_api_key(self, tmp_path):
from api.profiles import _write_endpoint_to_config
_write_endpoint_to_config(tmp_path, api_key="sk-local-test")
cfg = yaml.safe_load((tmp_path / "config.yaml").read_text())
assert cfg["model"]["api_key"] == "sk-local-test"
def test_writes_both(self, tmp_path):
from api.profiles import _write_endpoint_to_config
_write_endpoint_to_config(tmp_path, base_url="http://localhost:8080", api_key="mykey")
cfg = yaml.safe_load((tmp_path / "config.yaml").read_text())
assert cfg["model"]["base_url"] == "http://localhost:8080"
assert cfg["model"]["api_key"] == "mykey"
def test_merges_with_existing_config(self, tmp_path):
"""Does not clobber other top-level config keys."""
existing = {"model": {"default": "gpt-4o", "provider": "openai"}, "agent": {"max_turns": 90}}
(tmp_path / "config.yaml").write_text(yaml.dump(existing))
from api.profiles import _write_endpoint_to_config
_write_endpoint_to_config(tmp_path, base_url="http://localhost:1234")
cfg = yaml.safe_load((tmp_path / "config.yaml").read_text())
# Existing keys preserved
assert cfg["model"]["default"] == "gpt-4o"
assert cfg["model"]["provider"] == "openai"
assert cfg["agent"]["max_turns"] == 90
# New key added
assert cfg["model"]["base_url"] == "http://localhost:1234"
def test_noop_when_both_none(self, tmp_path):
from api.profiles import _write_endpoint_to_config
_write_endpoint_to_config(tmp_path, base_url=None, api_key=None)
assert not (tmp_path / "config.yaml").exists()
def test_noop_when_both_empty_strings(self, tmp_path):
from api.profiles import _write_endpoint_to_config
_write_endpoint_to_config(tmp_path, base_url="", api_key="")
assert not (tmp_path / "config.yaml").exists()
# ── 6-7: API integration tests ────────────────────────────────────────────────
from tests._pytest_port import BASE as _TEST_BASE
def _post(path, body=None):
import urllib.request
data = json.dumps(body or {}).encode()
req = urllib.request.Request(
_TEST_BASE + path, data=data, headers={"Content-Type": "application/json"}
)
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), None
except urllib.error.HTTPError as e:
try:
return json.loads(e.read()), e.code
except Exception:
return {}, e.code
class TestProfileCreateAPIWithEndpoint:
_PROFILE_NAME = "test-ep-sprint31"
def _cleanup(self):
"""Remove the test profile from wherever hermes_cli placed it."""
home_hermes = pathlib.Path.home() / ".hermes"
# Walk all profile roots: real ~/.hermes, and any subdirs that might be HERMES_HOME
roots_to_check = set()
roots_to_check.add(home_hermes)
for root, dirs, _ in os.walk(str(home_hermes)):
if "profiles" in dirs:
roots_to_check.add(pathlib.Path(root))
if root.count(os.sep) - str(home_hermes).count(os.sep) > 4:
break # don't recurse too deep
for search_root in roots_to_check:
candidate = search_root / "profiles" / self._PROFILE_NAME
if candidate.exists():
shutil.rmtree(candidate)
def setup_method(self, _):
self._cleanup()
def teardown_method(self, _):
self._cleanup()
def test_api_route_accepts_base_url(self, test_server):
"""POST /api/profile/create with base_url returns ok:True."""
data, err = _post("/api/profile/create", {
"name": self._PROFILE_NAME,
"base_url": "http://localhost:11434",
})
assert err is None, f"Expected 200, got {err}: {data}"
assert data.get("ok") is True
def test_api_route_writes_base_url_to_config(self, test_server):
"""Route accepts base_url and returns profile metadata.
The actual config.yaml write is covered by the unit tests above.
"""
data, err = _post("/api/profile/create", {
"name": self._PROFILE_NAME,
"base_url": "http://localhost:9999",
})
assert err is None, f"Expected 200, got {err}: {data}"
assert data.get("ok") is True
assert data.get("profile", {}).get("path"), f"API response missing profile.path: {data}"
def test_api_route_rejects_invalid_base_url(self, test_server):
"""POST /api/profile/create with a non-http base_url returns 400."""
data, err = _post("/api/profile/create", {
"name": self._PROFILE_NAME,
"base_url": "ftp://localhost:11434",
})
assert err == 400, f"Expected 400, got {err}: {data}"

72
tests/test_sprint32.py Normal file
View File

@@ -0,0 +1,72 @@
from pathlib import Path
from unittest.mock import MagicMock, patch
import subprocess
import os
from api.startup import auto_install_agent_deps
class TestAutoInstallAgentDeps:
def test_installs_from_requirements_txt(self, tmp_path):
agent_dir = tmp_path / 'hermes-agent'
agent_dir.mkdir()
req = agent_dir / 'requirements.txt'
req.write_text('pyyaml\n')
with patch.dict('os.environ', {'HERMES_WEBUI_AGENT_DIR': str(agent_dir)}, clear=False):
with patch('subprocess.run') as mock_run:
mock_run.return_value = MagicMock(returncode=0, stderr='')
assert auto_install_agent_deps() is True
args = mock_run.call_args[0][0]
assert '-r' in args and str(req) in args
def test_falls_back_to_pyproject(self, tmp_path):
agent_dir = tmp_path / 'hermes-agent'
agent_dir.mkdir()
(agent_dir / 'pyproject.toml').write_text('[project]\nname="hermes-agent"\n')
with patch.dict('os.environ', {'HERMES_WEBUI_AGENT_DIR': str(agent_dir)}, clear=False):
with patch('subprocess.run') as mock_run:
mock_run.return_value = MagicMock(returncode=0, stderr='')
assert auto_install_agent_deps() is True
args = mock_run.call_args[0][0]
assert str(agent_dir) in args and '-r' not in args
def test_skips_when_agent_dir_missing(self, tmp_path, capsys):
missing = tmp_path / 'nonexistent-agent'
# Patch both HERMES_WEBUI_AGENT_DIR and HERMES_HOME so the fallback
# path (HERMES_HOME/hermes-agent) also resolves to a nonexistent dir,
# preventing the real agent dir from being found in the test environment.
env_overrides = {
'HERMES_WEBUI_AGENT_DIR': str(missing),
'HERMES_HOME': str(tmp_path / 'no-hermes-home'),
}
with patch.dict('os.environ', env_overrides, clear=False):
with patch('subprocess.run') as mock_run:
assert auto_install_agent_deps() is False
assert not mock_run.called
assert 'skipped' in capsys.readouterr().out.lower()
def test_skips_when_no_install_file(self, tmp_path, capsys):
agent_dir = tmp_path / 'hermes-agent'
agent_dir.mkdir()
with patch.dict('os.environ', {'HERMES_WEBUI_AGENT_DIR': str(agent_dir)}, clear=False):
with patch('subprocess.run') as mock_run:
assert auto_install_agent_deps() is False
assert not mock_run.called
assert 'skipped' in capsys.readouterr().out.lower()
def test_tolerates_pip_failure(self, tmp_path, capsys):
agent_dir = tmp_path / 'hermes-agent'
agent_dir.mkdir()
(agent_dir / 'requirements.txt').write_text('somepkg\n')
with patch.dict('os.environ', {'HERMES_WEBUI_AGENT_DIR': str(agent_dir)}, clear=False):
with patch('subprocess.run') as mock_run:
mock_run.return_value = MagicMock(returncode=1, stderr='ERROR: could not find package')
assert auto_install_agent_deps() is False
assert 'failed' in capsys.readouterr().out.lower() or 'pip' in capsys.readouterr().out.lower()
def test_tolerates_timeout(self, tmp_path, capsys):
agent_dir = tmp_path / 'hermes-agent'
agent_dir.mkdir()
(agent_dir / 'requirements.txt').write_text('somepkg\n')
with patch.dict('os.environ', {'HERMES_WEBUI_AGENT_DIR': str(agent_dir)}, clear=False):
with patch('subprocess.run', side_effect=subprocess.TimeoutExpired('pip', 120)):
assert auto_install_agent_deps() is False
assert 'timed out' in capsys.readouterr().out.lower()

59
tests/test_sprint33.py Normal file
View File

@@ -0,0 +1,59 @@
"""
Sprint 33 Tests: Shared app dialogs replace native confirm/prompt usage.
These tests verify the static assets expose the reusable confirm/input modal
and that browser-native confirm/prompt calls are no longer used in the Web UI.
"""
import pathlib
import re
REPO = pathlib.Path(__file__).parent.parent
def read(path):
return (REPO / path).read_text(encoding="utf-8")
def test_index_has_shared_app_dialog_markup():
html = read("static/index.html")
assert 'id="appDialogOverlay"' in html
assert 'id="appDialog"' in html
assert 'id="appDialogTitle"' in html
assert 'id="appDialogDesc"' in html
assert 'id="appDialogInput"' in html
assert 'id="appDialogCancel"' in html
assert 'id="appDialogConfirm"' in html
def test_app_dialog_css_rules_exist():
css = read("static/style.css")
for selector in (
".app-dialog-overlay",
".app-dialog",
".app-dialog-input",
".app-dialog-actions",
".app-dialog-btn.confirm",
".app-dialog-btn.confirm.danger",
):
assert selector in css, f"missing CSS selector: {selector}"
def test_ui_js_exposes_shared_dialog_helpers():
src = read("static/ui.js")
assert "function showConfirmDialog(opts={})" in src
assert "function showPromptDialog(opts={})" in src
assert "document.addEventListener('keydown'" in src
def test_no_native_confirm_calls_remain_in_static_js():
for path in (REPO / "static").glob("*.js"):
src = path.read_text(encoding="utf-8")
assert not re.search(r"\bconfirm\s*\(", src), f"native confirm() remains in {path.name}"
def test_no_native_prompt_calls_remain_in_static_js():
for path in (REPO / "static").glob("*.js"):
src = path.read_text(encoding="utf-8")
assert not re.search(r"\bprompt\s*\(", src), f"native prompt() remains in {path.name}"

300
tests/test_sprint34.py Normal file
View File

@@ -0,0 +1,300 @@
"""
Sprint 34 Tests: OAuth provider support in onboarding (issues #303, #304).
Covers:
1. _provider_oauth_authenticated() returns True for known OAuth providers
with valid tokens in auth.json
2. _provider_oauth_authenticated() returns False when auth.json is absent,
empty, or has no token data
3. _provider_oauth_authenticated() returns False for unknown/API-key providers
4. _status_from_runtime() marks copilot/openai-codex as provider_ready when
credentials exist
5. _status_from_runtime() gives a helpful "hermes auth" note (not "API key")
for OAuth providers that have no credentials yet
6. API route /api/onboarding/status reflects OAuth-ready state
"""
import json
import pathlib
import tempfile
import unittest.mock
import pytest
REPO = pathlib.Path(__file__).parent.parent
from tests._pytest_port import BASE
# ── Helpers ──────────────────────────────────────────────────────────────────
def _make_auth_json(provider_id: str, tokens: dict, tmp_dir: pathlib.Path) -> pathlib.Path:
"""Write an auth.json with the given tokens for provider_id into tmp_dir."""
store = {"providers": {provider_id: tokens}}
auth_path = tmp_dir / "auth.json"
auth_path.write_text(json.dumps(store), encoding="utf-8")
return auth_path
# ── 13. _provider_oauth_authenticated unit tests ────────────────────────────
class TestProviderOAuthAuthenticated:
"""Unit tests for the new _provider_oauth_authenticated() helper."""
def _call(self, provider: str, hermes_home: pathlib.Path) -> bool:
# Import fresh so we don't get a stale module reference
from api.onboarding import _provider_oauth_authenticated
return _provider_oauth_authenticated(provider, hermes_home)
def test_returns_false_when_auth_json_absent(self, tmp_path):
"""No auth.json -> not authenticated."""
assert self._call("openai-codex", tmp_path) is False
def test_openai_codex_with_access_token(self, tmp_path):
"""openai-codex with a valid access_token -> authenticated."""
_make_auth_json(
"openai-codex",
{"access_token": "ey.test.token", "refresh_token": "ref123"},
tmp_path,
)
assert self._call("openai-codex", tmp_path) is True
def test_openai_codex_with_refresh_token_only(self, tmp_path):
"""openai-codex with only a refresh_token -> still authenticated."""
_make_auth_json(
"openai-codex",
{"access_token": "", "refresh_token": "ref_only_token"},
tmp_path,
)
assert self._call("openai-codex", tmp_path) is True
def test_copilot_with_api_key(self, tmp_path):
"""copilot with an api_key (GitHub token) -> authenticated."""
_make_auth_json("copilot", {"api_key": "ghu_test_token_123"}, tmp_path)
assert self._call("copilot", tmp_path) is True
def test_empty_tokens_returns_false(self, tmp_path):
"""All token fields empty -> not authenticated."""
_make_auth_json(
"openai-codex",
{"access_token": "", "refresh_token": "", "api_key": ""},
tmp_path,
)
assert self._call("openai-codex", tmp_path) is False
def test_missing_provider_key_in_auth_json(self, tmp_path):
"""auth.json present but provider key absent -> not authenticated."""
store = {"providers": {"some-other-provider": {"access_token": "tok"}}}
(tmp_path / "auth.json").write_text(json.dumps(store), encoding="utf-8")
assert self._call("openai-codex", tmp_path) is False
def test_unknown_provider_not_in_oauth_list(self, tmp_path):
"""A provider that is not a known OAuth provider -> always False."""
_make_auth_json("some-random-provider", {"access_token": "tok"}, tmp_path)
assert self._call("some-random-provider", tmp_path) is False
def test_nous_provider_recognized(self, tmp_path):
"""nous is in the known OAuth set."""
_make_auth_json("nous", {"access_token": "nous_tok"}, tmp_path)
assert self._call("nous", tmp_path) is True
def test_qwen_oauth_provider_recognized(self, tmp_path):
"""qwen-oauth is in the known OAuth set."""
_make_auth_json("qwen-oauth", {"access_token": "qwen_tok"}, tmp_path)
assert self._call("qwen-oauth", tmp_path) is True
def test_empty_provider_string_returns_false(self, tmp_path):
"""Empty provider string -> False, no crash."""
assert self._call("", tmp_path) is False
assert self._call(" ", tmp_path) is False
# ── 45. _status_from_runtime integration ────────────────────────────────────
class TestStatusFromRuntimeOAuth:
"""_status_from_runtime should treat OAuth providers with tokens as ready."""
def _call(self, provider: str, model: str, hermes_home: pathlib.Path) -> dict:
from api.onboarding import _status_from_runtime
import api.onboarding as _ob
orig_home = _ob._get_active_hermes_home
orig_found = _ob._HERMES_FOUND
_ob._get_active_hermes_home = lambda: hermes_home
# Simulate hermes-agent being available so we reach the provider logic
# (without this, _status_from_runtime short-circuits to agent_unavailable)
_ob._HERMES_FOUND = True
try:
cfg = {"model": {"provider": provider, "default": model}}
return _status_from_runtime(cfg, True)
finally:
_ob._get_active_hermes_home = orig_home
_ob._HERMES_FOUND = orig_found
def test_copilot_ready_when_api_key_in_auth_json(self, tmp_path):
"""copilot configured + api_key in auth.json -> provider_ready True."""
_make_auth_json("copilot", {"api_key": "ghu_abc123"}, tmp_path)
result = self._call("copilot", "gpt-5.4", tmp_path)
assert result["provider_configured"] is True
assert result["provider_ready"] is True
assert result["setup_state"] == "ready"
def test_openai_codex_ready_when_token_in_auth_json(self, tmp_path):
"""openai-codex configured + access_token -> provider_ready True."""
_make_auth_json(
"openai-codex",
{"access_token": "ey.test", "refresh_token": "ref"},
tmp_path,
)
result = self._call("openai-codex", "codex-mini-latest", tmp_path)
assert result["provider_configured"] is True
assert result["provider_ready"] is True
assert result["setup_state"] == "ready"
def test_copilot_not_ready_without_credentials(self, tmp_path):
"""copilot configured but no credentials -> provider_ready False.
We mock hermes_cli.auth to be unavailable so the function falls through
to the auth.json path. With no auth.json the result must be False.
"""
import unittest.mock
# Prevent the hermes_cli fast path from finding real credentials
with unittest.mock.patch(
"api.onboarding._provider_oauth_authenticated",
return_value=False,
):
result = self._call("copilot", "gpt-5.4", tmp_path)
assert result["provider_configured"] is True
assert result["provider_ready"] is False
assert result["setup_state"] == "provider_incomplete"
def test_oauth_incomplete_note_mentions_hermes_auth(self, tmp_path):
"""When OAuth provider is incomplete, note should mention hermes auth/model."""
result = self._call("openai-codex", "codex-mini-latest", tmp_path)
note = result["provider_note"]
assert "hermes auth" in note or "hermes model" in note, (
f"Expected 'hermes auth' or 'hermes model' in note, got: {note!r}"
)
def test_oauth_incomplete_note_does_not_say_api_key(self, tmp_path):
"""OAuth provider incomplete note must not say 'API key' — that's misleading."""
result = self._call("copilot", "gpt-5.4", tmp_path)
note = result["provider_note"]
assert "API key" not in note, (
f"Note misleadingly mentions 'API key' for OAuth provider: {note!r}"
)
def test_standard_provider_incomplete_note_still_says_api_key(self, tmp_path):
"""For a standard API-key provider (openrouter), note should still say API key."""
# openrouter with no .env
result = self._call("openrouter", "anthropic/claude-sonnet-4.6", tmp_path)
assert result["provider_ready"] is False
note = result["provider_note"]
assert "API key" in note, (
f"Expected 'API key' in note for openrouter, got: {note!r}"
)
# ── 6. API endpoint reflects OAuth-ready state ───────────────────────────────
class TestOnboardingStatusApiOAuth:
"""
The /api/onboarding/status endpoint should report provider_ready=True
when an OAuth provider is configured and has valid credentials.
"""
def test_status_endpoint_returns_200(self):
import urllib.request
with urllib.request.urlopen(BASE + "/api/onboarding/status", timeout=10) as r:
assert r.status == 200
data = json.loads(r.read())
assert "system" in data
assert "provider_ready" in data["system"]
def test_onboarding_status_has_chat_ready_field(self):
import urllib.request
with urllib.request.urlopen(BASE + "/api/onboarding/status", timeout=10) as r:
data = json.loads(r.read())
assert "chat_ready" in data["system"]
def test_status_setup_state_valid_values(self):
"""setup_state must be one of the known string values."""
import urllib.request
with urllib.request.urlopen(BASE + "/api/onboarding/status", timeout=10) as r:
data = json.loads(r.read())
valid = {"ready", "provider_incomplete", "needs_provider", "agent_unavailable"}
assert data["system"]["setup_state"] in valid, (
f"Unexpected setup_state: {data['system']['setup_state']!r}"
)
# ── Control Center: section reset on close ─────────────────────────────────
def test_control_center_resets_active_section_on_close():
"""Closing the control center must reset _settingsSection to 'conversation'."""
src = open(pathlib.Path(__file__).parent.parent / 'static' / 'panels.js').read()
assert '_settingsSection' in src, '_settingsSection state variable missing from panels.js'
assert "_settingsSection = 'conversation'" in src or "_settingsSection='conversation'" in src, \
'Control center does not reset section to conversation on close'
def test_control_center_tab_highlight_on_open():
"""Opening the control center must use settings-tabs for section navigation."""
css = open(pathlib.Path(__file__).parent.parent / 'static' / 'style.css').read()
assert 'settings-tabs' in css, 'settings-tabs CSS class for control center tabs missing from style.css'
# ── apply_onboarding_setup: unsupported/OAuth providers complete gracefully ──
class TestApplyOnboardingSetupUnsupportedProvider:
"""PR #323 / Issue #322: apply_onboarding_setup must not raise ValueError for
providers already configured via CLI (openai-codex, copilot, nous, etc.).
Instead it marks onboarding complete and returns current status.
"""
def _call(self, provider: str) -> dict:
import sys, pathlib, unittest.mock, tempfile, os
repo = pathlib.Path(__file__).parent.parent
if str(repo) not in sys.path:
sys.path.insert(0, str(repo))
from api.onboarding import apply_onboarding_setup
with tempfile.TemporaryDirectory() as tmp:
with unittest.mock.patch("api.onboarding._get_active_hermes_home",
return_value=pathlib.Path(tmp)), \
unittest.mock.patch("api.onboarding._get_config_path",
return_value=pathlib.Path(tmp) / "config.yaml"), \
unittest.mock.patch("api.onboarding.save_settings") as mock_save, \
unittest.mock.patch("api.onboarding.get_onboarding_status",
return_value={"completed": True, "system": {}}):
result = apply_onboarding_setup({"provider": provider, "model": "", "api_key": ""})
return result, mock_save
def test_openai_codex_does_not_raise(self):
"""apply_onboarding_setup with openai-codex must not raise ValueError."""
result, _ = self._call("openai-codex")
assert result is not None
def test_copilot_does_not_raise(self):
"""apply_onboarding_setup with copilot must not raise ValueError."""
result, _ = self._call("copilot")
assert result is not None
def test_nous_does_not_raise(self):
"""apply_onboarding_setup with nous must not raise ValueError."""
result, _ = self._call("nous")
assert result is not None
def test_unsupported_provider_marks_onboarding_complete(self):
"""apply_onboarding_setup with an unsupported provider must save onboarding_completed=True."""
_, mock_save = self._call("openai-codex")
calls = [str(c) for c in mock_save.call_args_list]
assert any("onboarding_completed" in c for c in calls), \
"save_settings must be called with onboarding_completed=True for unsupported providers"
def test_unsupported_provider_returns_status_dict(self):
"""apply_onboarding_setup with an unsupported provider must return a status dict (not raise)."""
result, _ = self._call("openai-codex")
assert isinstance(result, dict), \
"apply_onboarding_setup must return a dict for unsupported providers, not raise"

146
tests/test_sprint35.py Normal file
View File

@@ -0,0 +1,146 @@
"""
Sprint 35 Tests: Breadcrumb nav + wider panel + responsive message width (PR #302).
Covers:
1. PANEL_MAX raised from 500 to 1200 in boot.js
2. Responsive .messages-inner breakpoints in style.css (no hardcoded 800px)
3. renderFileBreadcrumb() function exists in workspace.js
4. renderFileBreadcrumb() is called from openFile()
5. clearPreview() calls renderBreadcrumb() to restore dir breadcrumb
6. Breadcrumb segments use correct CSS classes
7. breadcrumbBar element exists in index.html
8. Breadcrumb CSS rules exist in style.css
"""
import pathlib
import re
REPO = pathlib.Path(__file__).parent.parent
def read(path):
return (REPO / path).read_text(encoding="utf-8")
# ── 1. PANEL_MAX raised ──────────────────────────────────────────────────────
def test_panel_max_raised_to_1200():
"""PANEL_MAX must be 1200 (raised from 500) for wider right panel."""
src = read("static/boot.js")
assert "PANEL_MAX=1200" in src or "PANEL_MAX = 1200" in src, (
"PANEL_MAX was not raised to 1200 — right panel cannot be widened on ultrawide screens"
)
def test_panel_max_is_not_500():
"""Old PANEL_MAX=500 must no longer be present."""
src = read("static/boot.js")
assert "PANEL_MAX=500" not in src and "PANEL_MAX = 500" not in src, (
"Old PANEL_MAX=500 still present — right panel width not updated"
)
# ── 2. Responsive messages-inner ─────────────────────────────────────────────
def test_messages_inner_has_responsive_breakpoints():
"""style.css must have @media breakpoints for .messages-inner."""
css = read("static/style.css")
assert "min-width:1400px" in css or "min-width: 1400px" in css, (
"Missing @media(min-width:1400px) breakpoint for .messages-inner"
)
assert "min-width:1800px" in css or "min-width: 1800px" in css, (
"Missing @media(min-width:1800px) breakpoint for .messages-inner"
)
def test_messages_inner_no_hardcoded_800px():
"""The base .messages-inner rule must not hardcode max-width:800px."""
css = read("static/style.css")
# Find the .messages-inner base rule (not inside a @media block)
# It should not have max-width:800px on the same line
for line in css.splitlines():
if ".messages-inner{" in line and "max-width:800px" in line:
raise AssertionError(
"Base .messages-inner still has hardcoded max-width:800px — "
"responsive breakpoints not applied"
)
def test_messages_inner_breakpoint_values():
"""The breakpoints should expand max-width at 1400px and 1800px."""
css = read("static/style.css")
assert "max-width:1100px" in css or "max-width: 1100px" in css, (
"Expected max-width:1100px at 1400px breakpoint"
)
assert "max-width:1200px" in css or "max-width: 1200px" in css, (
"Expected max-width:1200px at 1800px breakpoint"
)
# ── 36. Breadcrumb navigation ───────────────────────────────────────────────
def test_render_file_breadcrumb_function_exists():
"""workspace.js must expose renderFileBreadcrumb()."""
src = read("static/workspace.js")
assert "function renderFileBreadcrumb" in src, (
"renderFileBreadcrumb() not defined in workspace.js"
)
def test_render_file_breadcrumb_called_from_open_file():
"""openFile() must call renderFileBreadcrumb(path) to show path segments."""
src = read("static/workspace.js")
assert "renderFileBreadcrumb(path)" in src, (
"openFile() does not call renderFileBreadcrumb(path)"
)
def test_breadcrumb_has_root_segment():
"""renderFileBreadcrumb must add a root '~' segment."""
src = read("static/workspace.js")
idx = src.find("function renderFileBreadcrumb")
block = src[idx:idx + 800]
assert "'~'" in block or '"~"' in block, (
"renderFileBreadcrumb missing root '~' segment"
)
def test_breadcrumb_segments_use_correct_classes():
"""Breadcrumb segments must use breadcrumb-seg breadcrumb-link/current classes."""
src = read("static/workspace.js")
assert "breadcrumb-seg" in src, "breadcrumb-seg class not used"
assert "breadcrumb-link" in src, "breadcrumb-link class not used"
assert "breadcrumb-current" in src, "breadcrumb-current class not used"
def test_clear_preview_calls_render_breadcrumb():
"""clearPreview() in boot.js must call renderBreadcrumb() to restore dir view."""
src = read("static/boot.js")
# Find clearPreview and check renderBreadcrumb is called nearby
idx = src.find("function clearPreview")
assert idx != -1, "clearPreview not found in boot.js"
block = src[idx:idx + 600]
assert "renderBreadcrumb" in block, (
"clearPreview() does not call renderBreadcrumb() — "
"directory breadcrumb won't restore after closing file preview"
)
# ── 7. HTML markup ───────────────────────────────────────────────────────────
def test_breadcrumb_bar_in_index_html():
"""index.html must have the breadcrumbBar element."""
html = read("static/index.html")
assert 'id="breadcrumbBar"' in html, (
"breadcrumbBar element missing from index.html — "
"renderFileBreadcrumb() has nowhere to render"
)
# ── 8. Breadcrumb CSS ────────────────────────────────────────────────────────
def test_breadcrumb_css_rules_exist():
"""style.css must have breadcrumb CSS rules."""
css = read("static/style.css")
for selector in (".breadcrumb-seg", ".breadcrumb-link", ".breadcrumb-current"):
assert selector in css, f"Missing CSS rule: {selector}"

182
tests/test_sprint36.py Normal file
View File

@@ -0,0 +1,182 @@
"""
Sprint 36 Tests: cancelStream cleanup no longer depends on SSE event (PR #309 / issue #299).
The old cancelStream() set "Cancelling..." status and then relied on the SSE cancel
event to clear it. If the SSE connection was already closed, the event never arrived
and "Cancelling..." lingered indefinitely.
The fix: cancelStream() now clears status, busy state, activeStreamId, and the cancel
button directly after the cancel API request completes — regardless of whether the SSE
cancel event fires. The SSE handler still runs if it arrives (all operations idempotent).
Covers:
1. cancelStream() clears activeStreamId unconditionally after the fetch
2. cancelStream() calls setBusy(false) unconditionally
3. cancelStream() calls setStatus('') unconditionally
4. cancelStream() hides the cancel button unconditionally
5. The catch block no longer calls setStatus(cancel_failed) — cleanup runs even on error
6. The SSE cancel handler is still present (idempotent path)
7. cancel_failed i18n key is still defined in all locales (key exists, just not used in
the catch-path anymore — kept for potential future use)
"""
import pathlib
import re
REPO = pathlib.Path(__file__).parent.parent
def read(path):
return (REPO / path).read_text(encoding="utf-8")
def _locale_count(src: str) -> int:
pattern = re.compile(
r"^\s{2}(?:'(?P<quoted>[A-Za-z0-9-]+)'|(?P<plain>[A-Za-z0-9-]+))\s*:\s*\{",
re.MULTILINE,
)
return sum(1 for _ in pattern.finditer(src))
# ── 14. cancelStream() cleanup is unconditional ─────────────────────────────
class TestCancelStreamCleanup:
"""cancelStream() must clear all busy state regardless of SSE connection state."""
def _get_cancel_block(self):
"""Extract the cancelStream function body from boot.js."""
src = read("static/boot.js")
idx = src.find("async function cancelStream()")
assert idx != -1, "cancelStream not found in boot.js"
# Find the closing brace — scan for the matching }
depth = 0
end = idx
for i, ch in enumerate(src[idx:]):
if ch == '{':
depth += 1
elif ch == '}':
depth -= 1
if depth == 0:
end = idx + i + 1
break
return src[idx:end]
def test_clears_active_stream_id(self):
"""cancelStream() must null out S.activeStreamId after the request."""
block = self._get_cancel_block()
assert "S.activeStreamId=null" in block or "S.activeStreamId = null" in block, (
"cancelStream() does not clear S.activeStreamId — "
"subsequent calls could re-cancel an already-finished stream"
)
def test_calls_set_busy_false(self):
"""cancelStream() must call setBusy(false) directly."""
block = self._get_cancel_block()
assert "setBusy(false)" in block, (
"cancelStream() does not call setBusy(false) — "
"spinner may linger if SSE connection is already closed"
)
def test_calls_set_status_empty(self):
"""cancelStream() must call setStatus('') to clear 'Cancelling...' text."""
block = self._get_cancel_block()
assert "setStatus('')" in block or 'setStatus("")' in block, (
"cancelStream() does not clear status text — "
"'Cancelling...' can linger if SSE cancel event never arrives"
)
def test_hides_cancel_button(self):
"""cancelStream() must hide the cancel button unconditionally."""
block = self._get_cancel_block()
assert "btnCancel" in block, (
"cancelStream() does not reference btnCancel — cancel button may stay visible"
)
def test_cleanup_not_inside_try_block(self):
"""Cleanup must happen outside the try block so it runs even if fetch fails."""
block = self._get_cancel_block()
# The S.activeStreamId=null and setBusy(false) must appear after the try/catch
# Verify they are NOT only inside the try block by checking position relative to catch
try_idx = block.find("try{")
catch_idx = block.find("}catch(")
cleanup_idx = block.find("S.activeStreamId=null")
if cleanup_idx == -1:
cleanup_idx = block.find("S.activeStreamId = null")
assert cleanup_idx > catch_idx, (
"S.activeStreamId cleanup appears to be inside the try block — "
"it won't run if the fetch throws"
)
# ── 5. Error path behavior ────────────────────────────────────────────────────
class TestCancelStreamErrorPath:
"""The catch block should not prevent cleanup from running."""
def test_catch_block_does_not_call_set_status_cancel_failed(self):
"""The catch block must not call setStatus(cancel_failed) on its own.
Previously: catch(e){setStatus(t('cancel_failed')+e.message)}
After fix: catch swallows the error; cleanup runs in the outer scope.
The status is cleared by setStatus('') unconditionally.
"""
src = read("static/boot.js")
idx = src.find("async function cancelStream()")
block = src[idx:idx + 400]
# The old pattern was setStatus inside catch; new pattern has it outside
# Look for the catch block specifically
catch_idx = block.find("}catch(")
if catch_idx == -1:
catch_idx = block.find("} catch (")
assert catch_idx != -1, "No catch block found in cancelStream"
# Get just the catch body
brace_open = block.find("{", catch_idx)
brace_close = block.find("}", brace_open)
catch_body = block[brace_open:brace_close + 1]
assert "cancel_failed" not in catch_body, (
"catch block still calls setStatus(cancel_failed) — "
"this means a failed cancel shows an error instead of cleaning up silently"
)
# ── 6. SSE cancel handler still present ──────────────────────────────────────
def test_sse_cancel_handler_still_present():
"""The SSE 'cancel' event handler must still exist in messages.js.
The new cancelStream() cleanup is not a replacement — the SSE handler
provides additional cleanup (removes 'Task cancelled.' message, clears
tool cards, etc.) when the connection is still alive.
"""
src = read("static/messages.js")
assert "addEventListener('cancel'" in src or 'addEventListener("cancel"' in src, (
"SSE cancel event handler missing from messages.js — "
"live cancellation cleanup path is broken"
)
def test_sse_cancel_handler_calls_set_busy():
"""The SSE cancel handler must still call setBusy(false)."""
src = read("static/messages.js")
idx = src.find("addEventListener('cancel'")
if idx == -1:
idx = src.find('addEventListener("cancel"')
assert idx != -1
block = src[idx:idx + 1000]
assert "setBusy(false)" in block, (
"SSE cancel handler no longer calls setBusy(false)"
)
# ── 7. i18n key preserved ─────────────────────────────────────────────────────
def test_cancel_failed_i18n_key_exists_in_all_locales():
"""cancel_failed key must still exist in i18n.js for all locales."""
src = read("static/i18n.js")
# Should appear once per locale (en, es, de, ru, zh, zh-Hant)
locale_count = _locale_count(src)
count = src.count("cancel_failed:")
assert count >= locale_count, (
f"cancel_failed key only found {count} times in i18n.js — "
f"expected at least {locale_count} (one per locale)"
)

103
tests/test_sprint37.py Normal file
View File

@@ -0,0 +1,103 @@
"""
Sprint 37 Tests: Workspace panel open/closed state persists across refreshes via localStorage.
"""
import pathlib
import re
REPO_ROOT = pathlib.Path(__file__).parent.parent
BOOT_JS = (REPO_ROOT / "static" / "boot.js").read_text()
HTML = (REPO_ROOT / "static" / "index.html").read_text()
# ── Persistence: save on change ───────────────────────────────────────────────
def test_workspace_panel_saves_to_localstorage():
"""_setWorkspacePanelMode must call localStorage.setItem with hermes-webui-workspace-panel."""
assert "hermes-webui-workspace-panel" in BOOT_JS, \
"boot.js must use localStorage key 'hermes-webui-workspace-panel' to persist panel state"
def test_workspace_panel_save_inside_set_mode():
"""localStorage.setItem for panel state must live inside _setWorkspacePanelMode."""
fn_idx = BOOT_JS.find("function _setWorkspacePanelMode(")
fn_end = BOOT_JS.find("\n}", fn_idx) + 2
fn_body = BOOT_JS[fn_idx:fn_end]
assert "hermes-webui-workspace-panel" in fn_body, \
"localStorage save must be inside _setWorkspacePanelMode so every state change is captured"
def test_workspace_panel_saves_open_value():
"""When the panel is open, localStorage must be set to 'open'."""
fn_idx = BOOT_JS.find("function _setWorkspacePanelMode(")
fn_end = BOOT_JS.find("\n}", fn_idx) + 2
fn_body = BOOT_JS[fn_idx:fn_end]
assert "'open'" in fn_body or '"open"' in fn_body, \
"_setWorkspacePanelMode must store 'open' for an open panel state"
def test_workspace_panel_saves_closed_value():
"""When the panel is closed, localStorage must be set to 'closed'."""
fn_idx = BOOT_JS.find("function _setWorkspacePanelMode(")
fn_end = BOOT_JS.find("\n}", fn_idx) + 2
fn_body = BOOT_JS[fn_idx:fn_end]
assert "'closed'" in fn_body or '"closed"' in fn_body, \
"_setWorkspacePanelMode must store 'closed' for a closed panel state"
# ── Persistence: restore on boot ─────────────────────────────────────────────
def test_workspace_panel_restored_on_boot():
"""Boot IIFE must read hermes-webui-workspace-panel from localStorage and restore the mode."""
# Find the boot IIFE (the async IIFE at the bottom of boot.js)
iife_idx = BOOT_JS.rfind("(async function")
if iife_idx < 0:
iife_idx = BOOT_JS.rfind("(async()=>{")
iife_body = BOOT_JS[iife_idx:]
assert "hermes-webui-workspace-panel" in iife_body, \
"Boot IIFE must read 'hermes-webui-workspace-panel' from localStorage to restore panel state on load"
def test_workspace_panel_restore_sets_browse_mode():
"""When localStorage says 'open', boot must set _workspacePanelMode to 'browse' before syncing."""
iife_idx = BOOT_JS.rfind("(async function")
if iife_idx < 0:
iife_idx = BOOT_JS.rfind("(async()=>{")
iife_body = BOOT_JS[iife_idx:]
# The restore block must assign _workspacePanelMode = 'browse'
assert "_workspacePanelMode='browse'" in iife_body or "_workspacePanelMode = 'browse'" in iife_body, \
"Boot must set _workspacePanelMode='browse' when restoring an open panel"
def test_workspace_panel_restore_before_sync():
"""Restore must happen before syncWorkspacePanelState() so the state drives the initial render."""
iife_idx = BOOT_JS.rfind("(async function")
if iife_idx < 0:
iife_idx = BOOT_JS.rfind("(async()=>{")
iife_body = BOOT_JS[iife_idx:]
restore_pos = iife_body.find("hermes-webui-workspace-panel")
sync_pos = iife_body.find("syncWorkspacePanelState()")
assert restore_pos >= 0, "restore read must be present in boot IIFE"
assert sync_pos >= 0, "syncWorkspacePanelState call must be present in boot IIFE"
assert restore_pos < sync_pos, \
"Workspace panel restore must happen BEFORE syncWorkspacePanelState() so the correct mode is applied"
def test_workspace_panel_preload_marker_restored_in_head():
"""index.html must preload the workspace panel state before the main stylesheet paints."""
marker = "document.documentElement.dataset.workspacePanel"
css_link = '<link rel="stylesheet" href="static/style.css">'
marker_pos = HTML.find(marker)
css_pos = HTML.find(css_link)
assert marker_pos >= 0, "index.html must preload documentElement.dataset.workspacePanel from localStorage"
assert css_pos >= 0, "main stylesheet link missing from index.html"
assert marker_pos < css_pos, \
"workspace panel preload marker must be set before style.css loads to avoid first-paint flash"
def test_workspace_panel_mode_syncs_document_dataset():
"""_setWorkspacePanelMode must update documentElement.dataset.workspacePanel for runtime parity."""
fn_idx = BOOT_JS.find("function _setWorkspacePanelMode(")
fn_end = BOOT_JS.find("\n}", fn_idx) + 2
fn_body = BOOT_JS[fn_idx:fn_end]
assert "document.documentElement.dataset.workspacePanel" in fn_body, \
"_setWorkspacePanelMode must keep documentElement.dataset.workspacePanel in sync with the panel state"

140
tests/test_sprint38.py Normal file
View File

@@ -0,0 +1,140 @@
"""
Sprint 38 Tests: Think-tag stripping with leading whitespace (PR #327).
Covers the static render path (ui.js regex logic, verified against the JS source)
and the streaming render path (messages.js _streamDisplay logic).
"""
import pathlib
import re
REPO_ROOT = pathlib.Path(__file__).parent.parent
UI_JS = (REPO_ROOT / "static" / "ui.js").read_text()
MSG_JS = (REPO_ROOT / "static" / "messages.js").read_text()
# ── ui.js: static render path ────────────────────────────────────────────────
def test_think_regex_has_no_anchor():
"""The <think> regex in ui.js must not use a ^ anchor so leading whitespace is allowed."""
# Find the thinkMatch line by locating the .match( call on that line
idx = UI_JS.find("const thinkMatch=content.match(")
assert idx >= 0, "thinkMatch line not found in ui.js"
line = UI_JS[idx:idx+100]
# The regex must NOT start with ^ right after the opening /
assert "/^<think>" not in line and "(/^" not in line, \
f"thinkMatch regex must not use ^ anchor — found: {line.strip()}"
def test_gemma_regex_has_no_anchor():
"""The Gemma channel-token regex in ui.js must not use a ^ anchor."""
match = re.search(r'const gemmaMatch=content\.match\((/[^/]+/)\)', UI_JS)
assert match, "gemmaMatch line not found in ui.js"
pattern = match.group(1)
assert not pattern.startswith('/^'), \
f"gemmaMatch regex must not use ^ anchor — got {pattern}"
def test_think_content_removal_uses_replace_not_slice():
"""After extracting thinkingText, content must use .replace() not .slice() to remove the tag."""
# Find the block that handles thinkMatch
idx = UI_JS.find("if(thinkMatch){")
assert idx >= 0, "thinkMatch handler block not found"
block = UI_JS[idx:idx+200]
assert "content.replace(" in block, \
"ui.js must use content.replace() to remove <think> block (not .slice())"
assert ".trimStart()" in block, \
"ui.js must call .trimStart() on content after removing the <think> block"
def test_gemma_content_removal_uses_replace_not_slice():
"""Gemma channel token removal must also use .replace() not .slice()."""
idx = UI_JS.find("if(gemmaMatch){")
assert idx >= 0, "gemmaMatch handler block not found"
block = UI_JS[idx:idx+200]
assert "content.replace(" in block, \
"ui.js must use content.replace() to remove Gemma channel block (not .slice())"
assert ".trimStart()" in block, \
"ui.js must call .trimStart() on content after removing the Gemma channel block"
def test_gemma_turn_regex_in_ui_js():
"""The Gemma 4 <|turn|>thinking\\n...<turn|> pattern must be extracted from persisted content."""
# Detection in _messageHasReasoningPayload (correct double-pipe format)
assert "<\\|turn\\|>thinking" in UI_JS, (
"ui.js _messageHasReasoningPayload must detect Gemma 4 <|turn|>thinking\\n...<turn|> pattern"
" (note: double-pipe: <|turn|> not <|turn>)"
)
# Extraction block
match = re.search(r'const gemmaTurnMatch=content\.match\((/[^/]+/)\)', UI_JS)
assert match, "gemmaTurnMatch line not found in ui.js"
pattern = match.group(1)
assert not pattern.startswith('/^'), (
f"gemmaTurnMatch regex must not use ^ anchor — got {pattern}"
)
def test_gemma_turn_content_removal_uses_replace_not_slice():
"""Gemma 4 turn token removal must use .replace() not .slice()."""
idx = UI_JS.find("if(gemmaTurnMatch){")
assert idx >= 0, "gemmaTurnMatch handler block not found in ui.js"
block = UI_JS[idx:idx+240]
assert "content.replace(" in block, (
"ui.js must use content.replace() to remove Gemma 4 turn block (not .slice())"
)
assert ".trimStart()" in block, (
"ui.js must call .trimStart() on content after removing the Gemma 4 turn block"
)
# ── messages.js: streaming render path ───────────────────────────────────────
def test_stream_display_trims_before_startswith():
"""_streamDisplay in messages.js must call .trimStart() before .startsWith() check."""
fn_idx = MSG_JS.find("function _streamDisplay()")
assert fn_idx >= 0, "_streamDisplay function not found in messages.js"
fn_end = MSG_JS.find("\n }", fn_idx) + 4
fn_body = MSG_JS[fn_idx:fn_end]
assert "trimStart()" in fn_body, \
"_streamDisplay must call trimStart() to handle models that emit leading whitespace before <think>"
def test_stream_display_uses_trimmed_for_startswith():
"""_streamDisplay must check trimmed.startsWith(open), not raw.startsWith(open)."""
fn_idx = MSG_JS.find("function _streamDisplay()")
fn_end = MSG_JS.find("\n }", fn_idx) + 4
fn_body = MSG_JS[fn_idx:fn_end]
assert "trimmed.startsWith(open)" in fn_body, \
"_streamDisplay must use trimmed.startsWith(open) not raw.startsWith(open)"
def test_stream_display_partial_tag_uses_trimmed():
"""The partial-tag guard in _streamDisplay must also use trimmed, not raw."""
fn_idx = MSG_JS.find("function _streamDisplay()")
fn_end = MSG_JS.find("\n }", fn_idx) + 4
fn_body = MSG_JS[fn_idx:fn_end]
assert "open.startsWith(trimmed)" in fn_body, \
"Partial-tag guard must use open.startsWith(trimmed) not open.startsWith(raw)"
def test_stream_display_trims_return_after_close():
"""After stripping a completed think block, _streamDisplay must trim leading whitespace from the result."""
fn_idx = MSG_JS.find("function _streamDisplay()")
fn_end = MSG_JS.find("\n }", fn_idx) + 4
fn_body = MSG_JS[fn_idx:fn_end]
# The return after finding close must strip whitespace from the result
assert ".replace(/^" in fn_body and "s+/,'')" in fn_body, \
"_streamDisplay must strip leading whitespace from content after the closing think tag"
# ── Regression: existing anchored patterns must be gone ──────────────────────
def test_no_anchored_think_regex_in_ui_js():
"""The old anchored regex /^<think>/ must not exist in ui.js."""
assert "/^<think>" not in UI_JS, \
"Old anchored /^<think>/ regex still present in ui.js — fix not applied"
def test_no_anchored_gemma_regex_in_ui_js():
"""The old anchored Gemma regex must not exist in ui.js."""
assert "/^<|channel>" not in UI_JS, \
"Old anchored /^<|channel>/ regex still present in ui.js — fix not applied"

235
tests/test_sprint39.py Normal file
View File

@@ -0,0 +1,235 @@
"""
Sprint 39 Tests: Skip-onboarding env var + onboarding key reload fix (PR A of issue #329).
Covers:
- HERMES_WEBUI_SKIP_ONBOARDING=1 bypasses the wizard unconditionally (chat_ready not required)
- HERMES_WEBUI_SKIP_ONBOARDING unset leaves default behaviour unchanged
- apply_onboarding_setup sets os.environ synchronously when an API key is saved
- apply_onboarding_setup refuses to write config/env files when SKIP_ONBOARDING is set
"""
import os
import unittest
import unittest.mock
from unittest.mock import patch
import api.onboarding as mod
_READY_RUNTIME = {
"chat_ready": True,
"provider_configured": True,
"provider_ready": True,
"setup_state": "ready",
"provider_note": "Ready",
"current_provider": "openai",
"current_model": "gpt-4o",
"current_base_url": None,
"env_path": "/tmp/test.env",
}
_NOT_READY_RUNTIME = {
"chat_ready": False,
"provider_configured": False,
"provider_ready": False,
"setup_state": "needs_provider",
"provider_note": "Needs setup",
"current_provider": None,
"current_model": None,
"current_base_url": None,
"env_path": "/tmp/test.env",
}
_COMMON_PATCHES = [
("api.onboarding.load_settings", lambda: {}),
("api.onboarding.get_config", lambda: {}),
("api.onboarding.verify_hermes_imports",lambda: (True, [], [])),
("api.onboarding.load_workspaces", lambda: []),
("api.onboarding.get_last_workspace", lambda: "/tmp"),
("api.onboarding.get_available_models", lambda: []),
("api.onboarding.is_auth_enabled", lambda: False),
("api.onboarding._build_setup_catalog", lambda cfg: {}),
("api.onboarding._get_config_path", lambda: __import__("pathlib").Path("/tmp/fake.yaml")),
]
def _apply_patches(extra_patches=()):
patches = []
for target, side_effect in _COMMON_PATCHES:
p = patch(target, side_effect=side_effect)
patches.append(p)
for target, side_effect in extra_patches:
p = patch(target, side_effect=side_effect)
patches.append(p)
return patches
class TestSkipOnboardingEnvVar(unittest.TestCase):
def _run_status(self, runtime, env_override):
runtime_patches = [("api.onboarding._status_from_runtime", lambda cfg, ok: runtime)]
all_patches = _apply_patches(runtime_patches)
with patch.dict(os.environ, env_override, clear=False):
for p in all_patches:
p.start()
try:
return mod.get_onboarding_status()
finally:
for p in all_patches:
p.stop()
def test_skip_env_1_and_chat_ready_marks_completed(self):
"""HERMES_WEBUI_SKIP_ONBOARDING=1 + chat_ready=True → completed=True."""
status = self._run_status(_READY_RUNTIME, {"HERMES_WEBUI_SKIP_ONBOARDING": "1"})
self.assertTrue(status["completed"],
"completed must be True when skip env var is 1 and chat_ready")
def test_skip_env_true_and_chat_ready_marks_completed(self):
"""HERMES_WEBUI_SKIP_ONBOARDING=true also accepted."""
status = self._run_status(_READY_RUNTIME, {"HERMES_WEBUI_SKIP_ONBOARDING": "true"})
self.assertTrue(status["completed"])
def test_skip_env_yes_and_chat_ready_marks_completed(self):
"""HERMES_WEBUI_SKIP_ONBOARDING=yes also accepted."""
status = self._run_status(_READY_RUNTIME, {"HERMES_WEBUI_SKIP_ONBOARDING": "yes"})
self.assertTrue(status["completed"])
def test_skip_env_1_works_even_when_not_chat_ready(self):
"""HERMES_WEBUI_SKIP_ONBOARDING=1 skips unconditionally — chat_ready is NOT required."""
status = self._run_status(_NOT_READY_RUNTIME, {"HERMES_WEBUI_SKIP_ONBOARDING": "1"})
self.assertTrue(status["completed"],
"completed must be True when skip env var is set, regardless of chat_ready")
def test_skip_env_unset_leaves_default_false(self):
"""Without the env var, completed is False when settings are empty."""
env = {k: v for k, v in os.environ.items() if k != "HERMES_WEBUI_SKIP_ONBOARDING"}
with patch.dict(os.environ, env, clear=True):
status = self._run_status(_READY_RUNTIME, {})
self.assertFalse(status["completed"],
"completed must be False when env var absent and settings empty")
def test_settings_completed_still_works_without_env_var(self):
"""onboarding_completed in settings → completed=True regardless of env var."""
runtime_patches = [("api.onboarding._status_from_runtime", lambda cfg, ok: _READY_RUNTIME)]
settings_patch = [("api.onboarding.load_settings", lambda: {"onboarding_completed": True})]
all_patches = _apply_patches(runtime_patches + settings_patch)
env = {k: v for k, v in os.environ.items() if k != "HERMES_WEBUI_SKIP_ONBOARDING"}
with patch.dict(os.environ, env, clear=True):
for p in all_patches:
p.start()
try:
status = mod.get_onboarding_status()
finally:
for p in all_patches:
p.stop()
self.assertTrue(status["completed"])
class TestApplyOnboardingKeySync(unittest.TestCase):
"""Verify that apply_onboarding_setup sets os.environ synchronously."""
def test_api_key_set_in_os_environ_after_apply(self):
"""After apply_onboarding_setup with a key, os.environ must have the key."""
import pathlib
os.environ.pop("OPENAI_API_KEY", None)
mock_cfg = {"model": {"provider": "openai", "default": "gpt-4o"}}
with patch("api.onboarding._load_yaml_config", return_value=mock_cfg), \
patch("api.onboarding._save_yaml_config"), \
patch("api.onboarding._write_env_file"), \
patch("api.onboarding.reload_config"), \
patch("api.onboarding.get_onboarding_status", return_value={"completed": True}), \
patch("api.onboarding._get_config_path", return_value=pathlib.Path("/tmp/fake.yaml")), \
patch("api.onboarding._load_env_file", return_value={}), \
patch("api.onboarding._provider_api_key_present", return_value=False), \
patch("api.onboarding._get_active_hermes_home", return_value=pathlib.Path("/tmp")):
mod.apply_onboarding_setup({
"provider": "openai",
"model": "gpt-4o",
"api_key": "sk-test-key-123",
})
self.assertEqual(os.environ.get("OPENAI_API_KEY"), "sk-test-key-123",
"OPENAI_API_KEY must be set directly on os.environ after apply")
os.environ.pop("OPENAI_API_KEY", None)
def test_no_key_provided_does_not_set_environ(self):
"""If no api_key is given (key already present), os.environ is not clobbered."""
import pathlib
os.environ["OPENAI_API_KEY"] = "sk-existing-key"
mock_cfg = {"model": {"provider": "openai", "default": "gpt-4o"}}
with patch("api.onboarding._load_yaml_config", return_value=mock_cfg), \
patch("api.onboarding._save_yaml_config"), \
patch("api.onboarding._write_env_file"), \
patch("api.onboarding.reload_config"), \
patch("api.onboarding.get_onboarding_status", return_value={"completed": True}), \
patch("api.onboarding._get_config_path", return_value=pathlib.Path("/tmp/fake.yaml")), \
patch("api.onboarding._load_env_file", return_value={"OPENAI_API_KEY": "sk-existing-key"}), \
patch("api.onboarding._provider_api_key_present", return_value=True), \
patch("api.onboarding._get_active_hermes_home", return_value=pathlib.Path("/tmp")):
mod.apply_onboarding_setup({
"provider": "openai",
"model": "gpt-4o",
})
# Key must be unchanged
self.assertEqual(os.environ.get("OPENAI_API_KEY"), "sk-existing-key")
os.environ.pop("OPENAI_API_KEY", None)
class TestApplyOnboardingSkipGuard(unittest.TestCase):
"""apply_onboarding_setup must not write config/env when SKIP_ONBOARDING is set."""
def test_apply_setup_blocked_when_skip_env_set(self):
"""SKIP_ONBOARDING=1 → apply_onboarding_setup never touches disk."""
save_yaml_mock = unittest.mock.MagicMock()
write_env_mock = unittest.mock.MagicMock()
with patch.dict(os.environ, {"HERMES_WEBUI_SKIP_ONBOARDING": "1"}, clear=False), \
patch("api.onboarding._save_yaml_config", save_yaml_mock), \
patch("api.onboarding._write_env_file", write_env_mock), \
patch("api.onboarding.save_settings"), \
patch("api.onboarding.get_onboarding_status", return_value={"completed": True}):
mod.apply_onboarding_setup({
"provider": "openai",
"model": "gpt-4o",
"api_key": "should-not-be-saved",
})
save_yaml_mock.assert_not_called()
write_env_mock.assert_not_called()
def test_apply_setup_proceeds_normally_without_skip_env(self):
"""Without SKIP_ONBOARDING, apply_onboarding_setup writes config as usual."""
import pathlib
save_yaml_mock = unittest.mock.MagicMock()
mock_cfg = {"model": {"provider": "openai", "default": "gpt-4o"}}
env = {k: v for k, v in os.environ.items() if k != "HERMES_WEBUI_SKIP_ONBOARDING"}
with patch.dict(os.environ, env, clear=True), \
patch("api.onboarding._load_yaml_config", return_value=mock_cfg), \
patch("api.onboarding._save_yaml_config", save_yaml_mock), \
patch("api.onboarding._write_env_file"), \
patch("api.onboarding.reload_config"), \
patch("api.onboarding.get_onboarding_status", return_value={"completed": True}), \
patch("api.onboarding._get_config_path", return_value=pathlib.Path("/tmp/fake.yaml")), \
patch("api.onboarding._load_env_file", return_value={"OPENAI_API_KEY": "existing"}), \
patch("api.onboarding._provider_api_key_present", return_value=True), \
patch("api.onboarding._get_active_hermes_home", return_value=pathlib.Path("/tmp")):
mod.apply_onboarding_setup({
"provider": "openai",
"model": "gpt-4o",
})
save_yaml_mock.assert_called_once()
if __name__ == "__main__":
unittest.main()

158
tests/test_sprint4.py Normal file
View File

@@ -0,0 +1,158 @@
"""Sprint 4 tests: relocation, session rename, search, file ops, validation."""
import json, pathlib, uuid, urllib.request, urllib.error
from tests._pytest_port import BASE
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def get_raw(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return r.read(), r.headers.get("Content-Type",""), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(BASE + path, data=data, headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def make_session_tracked(created_list, ws=None):
"""Create a session and register it with the cleanup fixture."""
import pathlib as _pathlib
body = {}
if ws: body["workspace"] = str(ws)
d, _ = post("/api/session/new", body)
sid = d["session"]["session_id"]
created_list.append(sid)
return sid, _pathlib.Path(d["session"]["workspace"])
def test_server_running_from_new_location():
data, status = get("/health")
assert status == 200 and data["status"] == "ok"
def test_static_css_served():
raw, ct, status = get_raw("/static/style.css")
assert status == 200 and "text/css" in ct and b"--bg" in raw
def test_static_unknown_file_404():
try:
get_raw("/static/doesnotexist.xyz")
assert False
except urllib.error.HTTPError as e:
assert e.code == 404
def test_session_rename(cleanup_test_sessions):
sid, _ = make_session_tracked(cleanup_test_sessions)
result, status = post("/api/session/rename", {"session_id": sid, "title": "Renamed Session"})
assert status == 200 and result["session"]["title"] == "Renamed Session"
def test_session_rename_persists(cleanup_test_sessions):
sid, _ = make_session_tracked(cleanup_test_sessions)
post("/api/session/rename", {"session_id": sid, "title": "Persisted"})
loaded, _ = get(f"/api/session?session_id={sid}")
assert loaded["session"]["title"] == "Persisted"
def test_session_rename_truncates(cleanup_test_sessions):
sid, _ = make_session_tracked(cleanup_test_sessions)
result, status = post("/api/session/rename", {"session_id": sid, "title": "A" * 200})
assert status == 200 and len(result["session"]["title"]) <= 80
def test_session_rename_requires_fields():
result, status = post("/api/session/rename", {"session_id": "x"})
assert status == 400
result2, status2 = post("/api/session/rename", {"title": "hi"})
assert status2 == 400
def test_session_rename_unknown_id():
result, status = post("/api/session/rename", {"session_id": "nosuchid", "title": "hi"})
assert status == 404
def test_session_search_returns_matches(cleanup_test_sessions):
sid, _ = make_session_tracked(cleanup_test_sessions)
uid = uuid.uuid4().hex[:8]
post("/api/session/rename", {"session_id": sid, "title": f"s4-search-{uid}"})
data, status = get(f"/api/sessions/search?q=s4-search-{uid}")
assert status == 200
sids = [s["session_id"] for s in data["sessions"]]
assert sid in sids
def test_session_search_empty_query_returns_all():
data, status = get("/api/sessions/search?q=")
assert status == 200 and "sessions" in data
def test_session_search_no_results():
data, status = get("/api/sessions/search?q=zzznomatchzzz9999")
assert status == 200 and data["sessions"] == []
def test_file_create(cleanup_test_sessions):
sid, ws = make_session_tracked(cleanup_test_sessions)
fname = f"test_{uuid.uuid4().hex[:6]}.txt"
result, status = post("/api/file/create", {"session_id": sid, "path": fname, "content": "hello sprint4"})
assert status == 200 and result["ok"] is True
assert (ws / fname).read_text() == "hello sprint4"
def test_file_create_requires_fields(cleanup_test_sessions):
sid, _ = make_session_tracked(cleanup_test_sessions)
result, status = post("/api/file/create", {"session_id": sid})
assert status == 400
result2, status2 = post("/api/file/create", {"path": "x.txt"})
assert status2 == 400
def test_file_create_duplicate_rejected(cleanup_test_sessions):
sid, ws = make_session_tracked(cleanup_test_sessions)
fname = f"dup_{uuid.uuid4().hex[:6]}.txt"
post("/api/file/create", {"session_id": sid, "path": fname, "content": ""})
result, status = post("/api/file/create", {"session_id": sid, "path": fname, "content": ""})
assert status == 400
def test_file_delete(cleanup_test_sessions):
sid, ws = make_session_tracked(cleanup_test_sessions)
(ws / "to_delete.txt").write_text("bye")
result, status = post("/api/file/delete", {"session_id": sid, "path": "to_delete.txt"})
assert status == 200 and not (ws / "to_delete.txt").exists()
def test_file_delete_missing_returns_404(cleanup_test_sessions):
sid, _ = make_session_tracked(cleanup_test_sessions)
result, status = post("/api/file/delete", {"session_id": sid, "path": "nosuchfile.txt"})
assert status == 404
def test_file_delete_path_traversal_blocked(cleanup_test_sessions):
sid, _ = make_session_tracked(cleanup_test_sessions)
result, status = post("/api/file/delete", {"session_id": sid, "path": "../../etc/passwd"})
assert status in (400, 500)
def test_list_requires_session_id():
try:
get("/api/list?path=.")
assert False
except urllib.error.HTTPError as e:
assert e.code == 400
def test_file_requires_session_id():
try:
get("/api/file?path=readme.txt")
assert False
except urllib.error.HTTPError as e:
assert e.code == 400
def test_file_requires_path(cleanup_test_sessions):
sid, _ = make_session_tracked(cleanup_test_sessions)
try:
get(f"/api/file?session_id={sid}")
assert False
except urllib.error.HTTPError as e:
assert e.code == 400
def test_new_session_inherits_workspace(cleanup_test_sessions):
sid, ws = make_session_tracked(cleanup_test_sessions)
child = ws / f"workspace-inherit-{uuid.uuid4().hex[:6]}"
child.mkdir(parents=True, exist_ok=True)
post("/api/session/update", {"session_id": sid, "workspace": str(child), "model": "openai/gpt-5.4-mini"})
sid2, _ = make_session_tracked(cleanup_test_sessions)
data, _ = get(f"/api/session?session_id={sid2}")
assert data["session"]["workspace"] == str(child)

162
tests/test_sprint40.py Normal file
View File

@@ -0,0 +1,162 @@
"""
Sprint 40 Tests: OAuth provider onboarding path (PR B of issue #329).
Covers:
- _build_setup_catalog sets current_is_oauth=True for OAuth providers
- _build_setup_catalog sets current_is_oauth=False for API-key providers
- _build_setup_catalog sets current_is_oauth=False when no provider configured
- apply_onboarding_setup with unsupported provider marks onboarding complete directly
- i18n.js contains all required OAuth onboarding keys in both English and Spanish
"""
import pathlib
import re
import unittest
from unittest.mock import patch
import api.onboarding as mod
REPO_ROOT = pathlib.Path(__file__).parent.parent
I18N_JS = (REPO_ROOT / "static" / "i18n.js").read_text()
ONBOARDING_JS = (REPO_ROOT / "static" / "onboarding.js").read_text()
# ── Backend: _build_setup_catalog ──────────────────────────────────────────
class TestBuildSetupCatalog(unittest.TestCase):
def _catalog(self, provider, model="gpt-4o", base_url=""):
cfg = {}
if provider:
cfg = {"model": {"provider": provider, "default": model, "base_url": base_url}}
with patch.object(mod, "get_config", return_value=cfg):
return mod._build_setup_catalog(cfg)
def test_oauth_provider_sets_current_is_oauth_true(self):
"""openai-codex is not in _SUPPORTED_PROVIDER_SETUPS → current_is_oauth=True."""
catalog = self._catalog("openai-codex", "gpt-5.4")
self.assertTrue(catalog["current_is_oauth"],
"current_is_oauth must be True for openai-codex")
def test_copilot_provider_sets_current_is_oauth_true(self):
"""copilot is also OAuth."""
catalog = self._catalog("copilot")
self.assertTrue(catalog["current_is_oauth"])
def test_openai_provider_sets_current_is_oauth_false(self):
"""openai is in _SUPPORTED_PROVIDER_SETUPS → current_is_oauth=False."""
catalog = self._catalog("openai", "gpt-4o")
self.assertFalse(catalog["current_is_oauth"],
"current_is_oauth must be False for API-key provider openai")
def test_anthropic_provider_sets_current_is_oauth_false(self):
catalog = self._catalog("anthropic", "claude-sonnet-4.6")
self.assertFalse(catalog["current_is_oauth"])
def test_no_provider_sets_current_is_oauth_false(self):
"""Empty config → current_is_oauth=False."""
catalog = self._catalog("")
self.assertFalse(catalog["current_is_oauth"])
def test_catalog_includes_current_is_oauth_key(self):
"""current_is_oauth must always be present in the catalog dict."""
catalog = self._catalog("openrouter")
self.assertIn("current_is_oauth", catalog)
# ── Backend: apply_onboarding_setup for OAuth providers ────────────────────
class TestApplyOnboardingOAuthPath(unittest.TestCase):
def test_unsupported_provider_skips_to_complete(self):
"""apply_onboarding_setup with an OAuth provider just marks onboarding done."""
saved = {}
def _save(d):
saved.update(d)
mock_status = {"completed": True, "system": {"chat_ready": True}}
with patch.object(mod, "save_settings", side_effect=_save), \
patch.object(mod, "get_onboarding_status", return_value=mock_status):
result = mod.apply_onboarding_setup({"provider": "openai-codex", "model": "gpt-5.4"})
self.assertTrue(saved.get("onboarding_completed"),
"save_settings must set onboarding_completed=True for OAuth provider")
self.assertEqual(result, mock_status)
def test_unsupported_provider_does_not_write_config_yaml(self):
"""OAuth path must not call _save_yaml_config — no config mutation."""
with patch.object(mod, "save_settings"), \
patch.object(mod, "get_onboarding_status", return_value={}), \
patch.object(mod, "_save_yaml_config") as mock_save_yaml:
mod.apply_onboarding_setup({"provider": "copilot", "model": "gpt-4o"})
mock_save_yaml.assert_not_called()
# ── Frontend: i18n keys ────────────────────────────────────────────────────
_REQUIRED_OAUTH_KEYS = [
"onboarding_oauth_provider_ready_title",
"onboarding_oauth_provider_ready_body",
"onboarding_oauth_provider_not_ready_title",
"onboarding_oauth_provider_not_ready_body",
"onboarding_oauth_switch_hint",
]
class TestOAuthI18nKeys(unittest.TestCase):
def test_english_locale_has_all_oauth_keys(self):
"""All OAuth onboarding i18n keys must be present in the English locale."""
missing = [k for k in _REQUIRED_OAUTH_KEYS if k not in I18N_JS]
self.assertFalse(missing,
f"English locale missing OAuth keys: {missing}")
def test_spanish_locale_has_all_oauth_keys(self):
"""All OAuth onboarding i18n keys must be present in the Spanish locale."""
# Spanish locale is the second occurrence of each key
counts = {k: I18N_JS.count(k) for k in _REQUIRED_OAUTH_KEYS}
under = [k for k, c in counts.items() if c < 2]
self.assertFalse(under,
f"Spanish locale missing OAuth keys (need 2 occurrences each): {under}")
def test_oauth_body_strings_contain_provider_placeholder(self):
"""Body strings must contain {provider} so JS can substitute the provider name."""
for key in ["onboarding_oauth_provider_ready_body",
"onboarding_oauth_provider_not_ready_body"]:
self.assertIn("{provider}", I18N_JS,
f"{key} must contain {{provider}} placeholder")
# ── Frontend: onboarding.js uses current_is_oauth ─────────────────────────
class TestOAuthOnboardingJs(unittest.TestCase):
def test_onboarding_js_reads_current_is_oauth(self):
"""onboarding.js must check current_is_oauth from the status payload."""
self.assertIn("current_is_oauth", ONBOARDING_JS,
"onboarding.js must read current_is_oauth from ONBOARDING.status.setup")
def test_onboarding_js_renders_oauth_ready_card(self):
"""onboarding.js must render the oauth-ready card class."""
self.assertIn("onboarding-oauth-ready", ONBOARDING_JS)
def test_onboarding_js_renders_oauth_pending_card(self):
"""onboarding.js must render the oauth-pending card class."""
self.assertIn("onboarding-oauth-pending", ONBOARDING_JS)
def test_style_css_has_oauth_card_rules(self):
"""style.css must contain the .onboarding-oauth-card rules."""
css = (REPO_ROOT / "static" / "style.css").read_text()
self.assertIn("onboarding-oauth-card", css)
self.assertIn("onboarding-oauth-ready", css)
self.assertIn("onboarding-oauth-pending", css)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,267 @@
"""
Sprint 40 UI Polish Tests: Active session title uses CSS theme variable (issue #440).
Covers:
- .session-item.active .session-title uses var(--gold) instead of hardcoded #e8a030
- The hardcoded amber color #e8a030 is NOT present in the active session title rule
"""
import os
import pathlib
import re
import sys
import unittest
from unittest import mock
# Ensure repo is on sys.path so api.config can be imported
_REPO_ROOT = pathlib.Path(__file__).parent.parent
if str(_REPO_ROOT) not in sys.path:
sys.path.insert(0, str(_REPO_ROOT))
REPO_ROOT = _REPO_ROOT
STYLE_CSS = (REPO_ROOT / "static" / "style.css").read_text()
SESSIONS_JS = (REPO_ROOT / "static" / "sessions.js").read_text()
PANELS_JS = (REPO_ROOT / "static" / "panels.js").read_text()
try:
from api import config as _api_config
_config_available = True
except Exception:
_api_config = None
_config_available = False
# Combined tests for Sprint 40 — Session + UI Polish
# Covers: active title color, unknown model, Telegram badge,
# custom endpoint model routing, workspace chip
# ── #451 active title ─────────────────────────────────────────────
class TestActiveSessionTitleThemeColor(unittest.TestCase):
def test_active_session_title_uses_theme_variable(self):
"""
.session-item.active .session-title must use var(--gold) not a hardcoded hex.
The light-mode override line (:not(.dark)) is allowed to keep its own
hardcoded color; we only check the base/dark rule.
"""
# Find all lines that match the active session title selector
lines = STYLE_CSS.splitlines()
base_rule_lines = [
line for line in lines
if ".session-item.active .session-title" in line
and ':not(.dark)' not in line
]
self.assertTrue(
len(base_rule_lines) >= 1,
"Could not find .session-item.active .session-title base rule in style.css"
)
for line in base_rule_lines:
self.assertTrue(
"var(--gold)" in line or "var(--accent-text)" in line,
f"Expected var(--gold) or var(--accent-text) in active session title rule, got: {line.strip()}"
)
self.assertNotIn(
"#e8a030",
line,
f"Hardcoded #e8a030 must be removed from active session title rule: {line.strip()}"
)
class TestDarkTopbarSelector(unittest.TestCase):
def test_topbar_dark_border_uses_root_dark_selector(self):
self.assertIn(
":root.dark .topbar{border-bottom:1px solid rgba(255,255,255,.07);}",
STYLE_CSS,
"Topbar dark border override must target :root.dark after the theme-class migration",
)
self.assertNotIn(
'[data-theme="dark"] .topbar',
STYLE_CSS,
"Topbar dark border override must not keep the removed data-theme selector",
)
if __name__ == "__main__":
unittest.main()
# ── #452 unknown model ─────────────────────────────────────────────
class TestGatewaySessionNullModel(unittest.TestCase):
"""Verify that api/models.py and api/gateway_watcher.py do not
fall back to the string 'unknown' for missing model values."""
def test_gateway_session_null_model_returns_none_not_unknown(self):
"""api/models.py must not use `or 'unknown'` for the model field
so that a NULL model in state.db is returned as None (falsy) to
the frontend rather than the truthy string 'unknown'."""
models_src = (REPO_ROOT / "api" / "models.py").read_text()
# Ensure the old fallback pattern is gone
self.assertNotIn(
"'model': row['model'] or 'unknown'",
models_src,
"api/models.py must not use `or 'unknown'` for the model field "
"(fixes #443: gateway sessions showed 'telegram · unknown')",
)
def test_gateway_watcher_null_model_returns_none_not_unknown(self):
"""api/gateway_watcher.py must not use `or 'unknown'` for the model
field so that a NULL model in state.db is returned as None (falsy)."""
gw_src = (REPO_ROOT / "api" / "gateway_watcher.py").read_text()
self.assertNotIn(
"'model': row['model'] or 'unknown'",
gw_src,
"api/gateway_watcher.py must not use `or 'unknown'` for the model "
"field (fixes #443: gateway sessions showed 'telegram · unknown')",
)
def test_gateway_session_model_uses_none_fallback(self):
"""Both source files must use `row['model'] or None` (explicit None
fallback) for the model field assignment."""
models_src = (REPO_ROOT / "api" / "models.py").read_text()
gw_src = (REPO_ROOT / "api" / "gateway_watcher.py").read_text()
self.assertIn(
"'model': row['model'] or None,",
models_src,
"api/models.py should assign `row['model'] or None` for the model field",
)
self.assertIn(
"'model': row['model'] or None,",
gw_src,
"api/gateway_watcher.py should assign `row['model'] or None` for the model field",
)
if __name__ == "__main__":
unittest.main()
# ── #454 model routing ─────────────────────────────────────────────
@unittest.skipUnless(_config_available, "api.config not importable")
class TestCustomEndpointModelStripping:
"""Tests for fix #433: strip provider prefix when custom base_url is set."""
def _resolve(self, model_id, provider=None, base_url=None):
"""Helper: set cfg directly (same pattern as test_model_resolver.py)."""
old_cfg = dict(_api_config.cfg)
model_cfg = {}
if provider:
model_cfg['provider'] = provider
if base_url:
model_cfg['base_url'] = base_url
_api_config.cfg['model'] = model_cfg
try:
return _api_config.resolve_model_provider(model_id)
finally:
_api_config.cfg.clear()
_api_config.cfg.update(old_cfg)
def test_prefixed_model_stripped_for_custom_endpoint(self):
"""Issue #433: 'openai/gpt-5.4' with custom base_url returns bare 'gpt-5.4'."""
model, provider, base_url = self._resolve(
'openai/gpt-5.4',
provider='custom',
base_url='http://my-proxy.local:8080/v1',
)
assert model == 'gpt-5.4', (
"Expected bare 'gpt-5.4' for custom endpoint, got '{}'."
" Stale provider-prefix must be stripped.".format(model)
)
assert base_url == 'http://my-proxy.local:8080/v1'
assert provider == 'custom'
def test_bare_model_unchanged_for_custom_endpoint(self):
"""Bare model ID (no slash) must pass through untouched with custom base_url."""
model, provider, base_url = self._resolve(
'gpt-4o',
provider='custom',
base_url='http://my-proxy.local:8080/v1',
)
assert model == 'gpt-4o', (
"Bare model 'gpt-4o' should not be modified, got '{}'.".format(model)
)
assert base_url == 'http://my-proxy.local:8080/v1'
assert provider == 'custom'
def test_prefixed_model_kept_for_openrouter(self):
"""When NO custom base_url (openrouter route), prefixed model must stay prefixed."""
model, provider, base_url = self._resolve(
'openai/gpt-5.4',
provider='anthropic', # cross-provider pick triggers openrouter routing
)
# Cross-provider model with openrouter routing must keep full provider/model path
assert 'openai/gpt-5.4' in model or provider == 'openrouter', (
"Expected prefixed model or openrouter routing for non-custom endpoint, "
"got model='{}', provider='{}'.".format(model, provider)
)
assert base_url is None, (
"OpenRouter routing must not set a base_url, got '{}'.".format(base_url)
)
# ── #455 workspace chip ─────────────────────────────────────────────
class TestWorkspaceChipAfterProfileSwitch(unittest.TestCase):
"""Verify that switchToProfile() applies the profile default workspace
to the new session when a conversation is in progress (fixes #424)."""
def test_workspace_chip_updated_after_profile_switch(self):
"""After await newSession(false) in the sessionInProgress branch,
the code must call updateWorkspaceChip() so the chip reflects the
new profile's default workspace instead of showing 'No active workspace'."""
# Find the sessionInProgress block
idx = PANELS_JS.find('if (sessionInProgress)')
self.assertGreater(idx, -1, "sessionInProgress branch must exist in panels.js")
# Slice from that point to cover the relevant block
block = PANELS_JS[idx:idx + 1000]
# newSession(false) must be called first
self.assertIn('await newSession(false)', block,
"sessionInProgress branch must call await newSession(false)")
# The fix: updateWorkspaceChip() must be called after newSession(false)
pos_new_session = block.find('await newSession(false)')
pos_update_chip = block.find('updateWorkspaceChip()')
self.assertGreater(pos_update_chip, -1,
"updateWorkspaceChip() must be called in the sessionInProgress branch")
self.assertGreater(pos_update_chip, pos_new_session,
"updateWorkspaceChip() must be called AFTER newSession(false)")
def test_profile_default_workspace_applied_to_new_session(self):
"""After newSession(false) the code must assign S._profileDefaultWorkspace
to S.session.workspace so the session is correctly tagged."""
idx = PANELS_JS.find('if (sessionInProgress)')
self.assertGreater(idx, -1)
block = PANELS_JS[idx:idx + 1000]
# The fix block must set S.session.workspace from S._profileDefaultWorkspace
self.assertIn('S.session.workspace = S._profileDefaultWorkspace', block,
"S.session.workspace must be set from S._profileDefaultWorkspace "
"in the sessionInProgress branch after newSession(false)")
def test_api_session_update_called_for_new_session_workspace(self):
"""The fix must call /api/session/update to persist the workspace on the server."""
idx = PANELS_JS.find('if (sessionInProgress)')
self.assertGreater(idx, -1)
block = PANELS_JS[idx:idx + 1000]
# Must patch the session on the backend too
self.assertIn('/api/session/update', block,
"The sessionInProgress branch must call /api/session/update "
"to persist the new workspace after newSession(false)")
def test_update_workspace_chip_before_render_session_list(self):
"""updateWorkspaceChip() should be called before renderSessionList()
so the chip is correct when the UI re-renders."""
idx = PANELS_JS.find('if (sessionInProgress)')
self.assertGreater(idx, -1)
block = PANELS_JS[idx:idx + 1000]
pos_chip = block.find('updateWorkspaceChip()')
pos_render = block.find('await renderSessionList()')
self.assertGreater(pos_chip, -1, "updateWorkspaceChip() must exist in block")
self.assertGreater(pos_render, -1, "renderSessionList() must exist in block")
self.assertLess(pos_chip, pos_render,
"updateWorkspaceChip() must be called before renderSessionList()")
if __name__ == '__main__':
unittest.main()

381
tests/test_sprint41.py Normal file
View File

@@ -0,0 +1,381 @@
"""
Sprint 41 Tests: Title auto-generation fix + mobile close button CSS (PR #333).
Covers:
- streaming.py: sessions titled 'New Chat' trigger auto-title generation
- streaming.py: sessions with empty/falsy title trigger auto-title generation
- streaming.py: sessions titled 'Untitled' (original guard) still trigger
- streaming.py: sessions with a user-set title do NOT trigger auto-title
- style.css: .mobile-close-btn is hidden by default (desktop rule present)
- style.css: .mobile-close-btn shown in <=900px media query
- style.css: #btnCollapseWorkspacePanel hidden in <=900px media query
- index.html: both .mobile-close-btn and #btnCollapseWorkspacePanel buttons exist
"""
import pathlib
import re
import unittest
REPO_ROOT = pathlib.Path(__file__).parent.parent
CSS = (REPO_ROOT / "static" / "style.css").read_text()
HTML = (REPO_ROOT / "static" / "index.html").read_text()
MESSAGES_JS = (REPO_ROOT / "static" / "messages.js").read_text()
STREAMING_PY = (REPO_ROOT / "api" / "streaming.py").read_text()
# ── streaming.py: title auto-generation condition ─────────────────────────
class TestTitleAutoGenerationCondition(unittest.TestCase):
"""Verify the guarded condition in streaming.py covers all default title cases."""
def _titles_that_trigger(self):
"""Extract the condition from the source so tests stay in sync with code."""
# Find the if-condition that calls title_from
m = re.search(
r'if\s+(s\.title\s*==.*?):\s*\n\s*s\.title\s*=\s*title_from',
STREAMING_PY,
re.DOTALL,
)
self.assertIsNotNone(m, "Could not find title auto-generation condition in streaming.py")
return m.group(1)
def test_untitled_in_condition(self):
cond = self._titles_that_trigger()
self.assertIn("'Untitled'", cond, "Original 'Untitled' guard must be present")
def test_new_chat_in_condition(self):
cond = self._titles_that_trigger()
self.assertIn("'New Chat'", cond, "'New Chat' guard must be present (PR #333)")
def test_empty_title_guard_in_condition(self):
cond = self._titles_that_trigger()
self.assertIn("not s.title", cond, "Empty/falsy title guard must be present (PR #333)")
def test_condition_logic_covers_all_defaults(self):
"""The condition uses OR so any one default title triggers generation."""
cond = self._titles_that_trigger()
# All three guards must be joined by 'or'
parts = re.split(r'\bor\b', cond)
self.assertGreaterEqual(len(parts), 3,
"Expected at least 3 OR-joined sub-conditions (Untitled, New Chat, not s.title)")
# ── style.css: mobile close button visibility ─────────────────────────────
class TestMobileCloseButtonCSS(unittest.TestCase):
"""Verify CSS rules that control the duplicate close button on mobile."""
def test_mobile_close_btn_hidden_by_default(self):
"""Desktop default: .mobile-close-btn must be display:none outside any media query."""
# Find the rule before the first @media block that contains mobile-close-btn
# We look for the pattern in the desktop (non-media-query) section
self.assertIn(
".mobile-close-btn{display:none;}",
CSS.replace(" ", ""),
".mobile-close-btn should be hidden by default (desktop) — rule missing or wrong"
)
def test_mobile_close_btn_shown_in_900px_query(self):
"""Inside max-width:900px media query, .mobile-close-btn must be display:flex."""
# Extract the 900px media block
m = re.search(r'@media\s*\(max-width\s*:\s*900px\)\s*\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}',
CSS)
self.assertIsNotNone(m, "@media(max-width:900px) block not found in style.css")
block = m.group(1).replace(" ", "")
self.assertIn(".mobile-close-btn{display:flex;}",
block,
".mobile-close-btn must be display:flex inside the 900px media query")
def test_desktop_collapse_btn_hidden_in_900px_query(self):
"""Inside max-width:900px media query, #btnCollapseWorkspacePanel must be display:none."""
m = re.search(r'@media\s*\(max-width\s*:\s*900px\)\s*\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}',
CSS)
self.assertIsNotNone(m, "@media(max-width:900px) block not found in style.css")
block = m.group(1).replace(" ", "")
self.assertIn("#btnCollapseWorkspacePanel{display:none;}",
block,
"#btnCollapseWorkspacePanel must be display:none in 900px media query")
def test_900px_query_retains_existing_rules(self):
"""Ensure the PR didn't accidentally drop existing rules from the 900px block."""
m = re.search(r'@media\s*\(max-width\s*:\s*900px\)\s*\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}',
CSS)
self.assertIsNotNone(m)
block = m.group(1)
self.assertIn("rightpanel", block, ".rightpanel rule missing from 900px block")
self.assertIn("mobile-files-btn", block, ".mobile-files-btn rule missing from 900px block")
# ── index.html: button presence ───────────────────────────────────────────
class TestWorkspacePanelButtons(unittest.TestCase):
"""Verify both panel buttons are present in the HTML so CSS rules have targets."""
def test_desktop_collapse_button_exists(self):
self.assertIn("btnCollapseWorkspacePanel", HTML,
"#btnCollapseWorkspacePanel button must exist in index.html")
def test_mobile_close_button_exists(self):
self.assertIn("mobile-close-btn", HTML,
".mobile-close-btn button must exist in index.html")
def test_mobile_close_button_has_aria_label(self):
"""Accessibility: mobile close button must have an aria-label."""
m = re.search(r'class="[^"]*mobile-close-btn[^"]*"[^>]*>', HTML)
self.assertIsNotNone(m, "Could not find mobile-close-btn element")
self.assertIn("aria-label", m.group(0),
"mobile-close-btn must have aria-label for accessibility")
class TestIssue495TitleStreaming(unittest.TestCase):
"""Regression checks for issue #495 title SSE behavior."""
def test_streaming_has_llm_title_helper(self):
self.assertIn(
"def _generate_llm_session_title_for_agent(",
STREAMING_PY,
"streaming.py should define an agent-backed LLM title helper for session titles",
)
def test_streaming_rejects_generic_completion_titles(self):
self.assertIn(
"测试完成",
STREAMING_PY,
"streaming.py should reject generic completion phrases as session titles",
)
self.assertIn(
"all set",
STREAMING_PY,
"streaming.py should reject generic English completion phrases as session titles",
)
def test_streaming_uses_reasoning_split_for_minimax_titles(self):
self.assertIn(
"reasoning_split",
STREAMING_PY,
"streaming.py should request MiniMax title calls with reasoning_split so final text is separated from thinking",
)
def test_streaming_emits_title_sse_event(self):
self.assertIn(
"put_event('title', {'session_id': s.session_id, 'title': s.title})",
STREAMING_PY,
"streaming.py should emit a title SSE event when title is updated",
)
def test_streaming_emits_title_status_sse_event(self):
self.assertIn(
"put_event('title_status', payload)",
STREAMING_PY,
"streaming.py should emit a title_status SSE event for title generation diagnostics",
)
def test_streaming_emits_stream_end_event(self):
self.assertIn(
"put_event('stream_end', {'session_id': session_id})",
STREAMING_PY,
"background title path should end the SSE stream with stream_end",
)
def test_frontend_listens_for_title_event(self):
self.assertIn(
"addEventListener('title'",
MESSAGES_JS,
"messages.js should listen for title SSE events",
)
def test_frontend_listens_for_title_status_event(self):
self.assertIn(
"addEventListener('title_status'",
MESSAGES_JS,
"messages.js should listen for title_status SSE events",
)
self.assertIn(
"console.info('[title]'",
MESSAGES_JS,
"messages.js should log title generation diagnostics to the browser console",
)
def test_frontend_refreshes_title_ui_after_title_event(self):
self.assertIn(
"syncTopbar()",
MESSAGES_JS,
"messages.js title listener should sync top bar title",
)
self.assertTrue(
("renderSessionListFromCache()" in MESSAGES_JS) or ("renderSessionList()" in MESSAGES_JS),
"messages.js title listener should refresh session list UI",
)
def test_frontend_waits_for_stream_end_before_closing(self):
self.assertIn(
"addEventListener('stream_end'",
MESSAGES_JS,
"messages.js should close SSE connection on stream_end (not immediately on done)",
)
def test_title_snippet_uses_visible_assistant_reply_after_tools(self):
"""Tool-heavy opening turns should use the final visible assistant reply."""
from api.streaming import _first_exchange_snippets
user_msg = {
"role": "user",
"content": "Please look up the earlier context and then summarize it.",
}
preamble_asst = {
"role": "assistant",
"content": "Let me check my memory first.",
"tool_calls": [
{
"id": "call-1",
"function": {
"name": "memory",
"arguments": '{"action":"search"}',
},
}
],
}
tool_result = {
"role": "tool",
"tool_call_id": "call-1",
"content": '{"result":"background info"}',
}
final_asst = {
"role": "assistant",
"content": "Here is the substantive answer after the tool work.",
}
user_text, assistant_text = _first_exchange_snippets(
[user_msg, preamble_asst, tool_result, final_asst]
)
self.assertEqual(user_text, user_msg["content"][:500])
self.assertEqual(assistant_text, final_asst["content"][:500])
def test_title_snippet_keeps_short_substantive_assistant_reply(self):
"""Short but real assistant answers should still be eligible for titles."""
from api.streaming import _first_exchange_snippets
messages = [
{"role": "user", "content": "Can you help me rename this session?"},
{"role": "assistant", "content": "Sure."},
]
user_text, assistant_text = _first_exchange_snippets(messages)
self.assertEqual(user_text, "Can you help me rename this session?")
self.assertEqual(assistant_text, "Sure.")
def test_provisional_title_detection_ignores_whitespace_noise(self):
"""Temporary first-message titles should still match with whitespace normalization."""
from api.streaming import _is_provisional_title, title_from
messages = [
{
"role": "user",
"content": "过去两个礼拜发生了一些事情。最重要的一点就是我加入了一个 Hermes Web UI 的项目。\n\n因为我开始使用 Hermes 这个 agent 以后,就逐渐不再使用 OpenClaw了。",
},
{"role": "assistant", "content": "Sure, let me help."},
]
derived = title_from(messages, "")
current = derived[:63] # Simulate the provisional title the UI writes immediately.
self.assertNotEqual(current, derived[:64])
self.assertTrue(
_is_provisional_title(current, messages),
"Whitespace-normalized provisional titles should still be recognized",
)
def test_title_snippet_keeps_tool_call_with_substantive_text(self):
"""An assistant row with tool_calls AND a substantive answer text
must still be used as the first-exchange snippet — it's not a
preamble, it's an agentic first-turn plan."""
from api.streaming import _first_exchange_snippets
user_msg = {
"role": "user",
"content": "Can you schedule a reminder for the Q3 kickoff meeting?",
}
# Assistant row with both a real answer AND a tool_call
agentic_asst = {
"role": "assistant",
"content": "I'll schedule the Q3 kickoff reminder for next Monday at 9am.",
"tool_calls": [
{
"id": "call-1",
"function": {
"name": "cronjob",
"arguments": '{"action":"create","when":"mon 9am"}',
},
}
],
}
user_text, assistant_text = _first_exchange_snippets([user_msg, agentic_asst])
self.assertEqual(user_text, user_msg["content"][:500])
self.assertEqual(
assistant_text,
agentic_asst["content"][:500],
"Substantive answer text on a tool_call row must be preserved",
)
def test_title_snippet_skips_tool_call_preamble_only_rows(self):
"""Tool-call rows whose content is empty or meta-reasoning preamble
('Let me check my memory first.') must still be skipped — those are
orchestration scaffolding, not title material."""
from api.streaming import _first_exchange_snippets
user_msg = {
"role": "user",
"content": "Summarize my notes from last week.",
}
empty_preamble = {
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "call-1",
"function": {
"name": "memory",
"arguments": '{"action":"search"}',
},
}
],
}
meta_preamble = {
"role": "assistant",
"content": "Let me check my memory first.",
"tool_calls": [
{
"id": "call-2",
"function": {
"name": "memory",
"arguments": '{"action":"search","q":"last week"}',
},
}
],
}
tool_result = {
"role": "tool",
"tool_call_id": "call-2",
"content": '{"result":"background info"}',
}
final_asst = {
"role": "assistant",
"content": "Here's a summary of your notes from last week.",
}
_, assistant_text = _first_exchange_snippets(
[user_msg, empty_preamble, meta_preamble, tool_result, final_asst]
)
self.assertEqual(
assistant_text,
final_asst["content"][:500],
"Empty and meta-reasoning preamble rows must be skipped",
)
if __name__ == "__main__":
unittest.main()

456
tests/test_sprint42.py Normal file
View File

@@ -0,0 +1,456 @@
"""
Sprint 42 Tests: SessionDB injection into AIAgent for WebUI sessions (PR #356).
Covers:
- streaming.py: SessionDB is initialized inside _run_agent_streaming (import present)
- streaming.py: try/except guards SessionDB init so failures are non-fatal
- streaming.py: session_db= kwarg is passed to AIAgent constructor
- streaming.py: SessionDB init failure prints a WARNING (not silently swallowed)
- streaming.py: SessionDB init is placed before AIAgent construction
"""
import ast
import pathlib
import re
import queue
import sys
import types
import unittest
from unittest import mock
REPO_ROOT = pathlib.Path(__file__).parent.parent
STREAMING_PY = (REPO_ROOT / "api" / "streaming.py").read_text()
# ── Shared helpers for sprint-42 additional tests ────────────────────────────
REPO = REPO_ROOT # alias used by #427 tests
_SESSIONS_JS = REPO_ROOT / 'static' / 'sessions.js'
_STREAMING_PY = REPO_ROOT / 'api' / 'streaming.py'
_MESSAGES_JS = REPO_ROOT / 'static' / 'messages.js'
_UI_JS = REPO_ROOT / 'static' / 'ui.js'
def _read_sessions_js():
return _SESSIONS_JS.read_text(encoding='utf-8')
# ─────────────────────────────────────────────────────────────────────────────
class TestSessionDBInjection(unittest.TestCase):
"""Verify SessionDB is initialized and passed to AIAgent in streaming.py."""
def test_hermes_state_import_present(self):
"""SessionDB must be imported from hermes_state inside _run_agent_streaming."""
self.assertIn(
"from hermes_state import SessionDB",
STREAMING_PY,
"SessionDB import missing from streaming.py (PR #356)",
)
def test_session_db_kwarg_passed_to_agent(self):
"""session_db= must be passed to the AIAgent constructor call."""
self.assertIn(
"session_db=_session_db",
STREAMING_PY,
"session_db kwarg not passed to AIAgent (PR #356)",
)
def test_sessiondb_init_in_try_except(self):
"""SessionDB() init must be wrapped in try/except for non-fatal failure handling."""
# Check that the try/except pattern surrounding SessionDB() is present
pattern = r"try:\s*\n\s*from hermes_state import SessionDB\s*\n\s*_session_db\s*=\s*SessionDB\(\)"
self.assertRegex(
STREAMING_PY,
pattern,
"SessionDB() init must be inside a try block for non-fatal error handling (PR #356)",
)
def test_sessiondb_failure_logs_warning(self):
"""A failure initializing SessionDB must print a WARNING (not silently drop the error)."""
self.assertIn(
"WARNING: SessionDB init failed",
STREAMING_PY,
"SessionDB init failure must log a WARNING message (PR #356)",
)
def test_session_db_initialized_before_agent_construction(self):
"""SessionDB initialization must appear before the AIAgent(...) constructor call."""
db_pos = STREAMING_PY.find("from hermes_state import SessionDB")
agent_pos = STREAMING_PY.find("session_db=_session_db")
self.assertGreater(
agent_pos,
db_pos,
"SessionDB init must appear before AIAgent construction (PR #356)",
)
def test_session_db_default_is_none(self):
"""_session_db must be initialized to None before the try block (safe default)."""
# Pattern: _session_db = None followed (eventually) by the try/SessionDB block
pattern = r"_session_db\s*=\s*None\s*\n\s*try:"
self.assertRegex(
STREAMING_PY,
pattern,
"_session_db must default to None before try/except block (PR #356)",
)
class TestRuntimeRouteInjection(unittest.TestCase):
"""Verify WebUI forwards the resolved runtime route into AIAgent."""
def test_runtime_provider_keys_are_forwarded_to_agent(self):
"""WebUI must pass the runtime route fields that CLI already uses."""
for snippet in (
"api_mode=_rt.get('api_mode')",
"acp_command=_rt.get('command')",
"acp_args=_rt.get('args')",
"credential_pool=_rt.get('credential_pool')",
):
self.assertIn(
snippet,
STREAMING_PY,
f"Missing runtime route forwarding in AIAgent constructor: {snippet}",
)
def test_runtime_route_is_forwarded_from_resolver_into_agent_init(self):
"""The resolved ACP route should be passed through to AIAgent kwargs."""
import api.streaming as streaming
captured = {}
fake_session_db = object()
resolve_runtime_provider = mock.Mock(
return_value={
"provider": "openai-codex",
"base_url": "https://api.openai.com/v1",
"api_key": "rt-key",
"api_mode": "codex_responses",
"command": "codex",
"args": ["exec", "--json"],
"credential_pool": "openai-codex",
}
)
class FakeSession:
def __init__(self):
self.session_id = "sess-runtime-route"
self.title = "Existing title"
self.workspace = "/tmp"
self.model = "gpt-5.4"
self.messages = []
self.personality = None
self.input_tokens = 0
self.output_tokens = 0
self.estimated_cost = None
self.tool_calls = []
self.active_stream_id = None
self.pending_user_message = None
self.pending_attachments = []
self.pending_started_at = None
def save(self, touch_updated_at=True):
self._saved = True
def compact(self):
return {
"session_id": self.session_id,
"title": self.title,
"workspace": self.workspace,
"model": self.model,
"created_at": 0,
"updated_at": 0,
"pinned": False,
"archived": False,
"project_id": None,
"profile": None,
"input_tokens": self.input_tokens,
"output_tokens": self.output_tokens,
"estimated_cost": self.estimated_cost,
"personality": self.personality,
}
class CapturingAgent:
def __init__(self, **kwargs):
captured["init_kwargs"] = kwargs
self.session_id = kwargs["session_id"]
self.context_compressor = None
self.session_prompt_tokens = 0
self.session_completion_tokens = 0
self.session_estimated_cost_usd = None
self.reasoning_config = None
self.ephemeral_system_prompt = None
self._last_error = None
def run_conversation(self, **kwargs):
captured["run_kwargs"] = kwargs
return {
"messages": [
{"role": "user", "content": kwargs["persist_user_message"]},
{"role": "assistant", "content": "ok"},
]
}
def interrupt(self, _message):
captured["interrupted"] = True
fake_session = FakeSession()
fake_stream_id = "stream-runtime-route"
fake_queue = queue.Queue()
fake_runtime_module = types.ModuleType("hermes_cli.runtime_provider")
fake_runtime_module.resolve_runtime_provider = resolve_runtime_provider
fake_hermes_cli = types.ModuleType("hermes_cli")
fake_hermes_cli.runtime_provider = fake_runtime_module
fake_hermes_state = types.ModuleType("hermes_state")
fake_hermes_state.SessionDB = mock.Mock(return_value=fake_session_db)
with mock.patch.object(streaming, "get_session", return_value=fake_session), \
mock.patch.object(streaming, "_get_ai_agent", return_value=CapturingAgent), \
mock.patch.object(streaming, "resolve_model_provider", return_value=("gpt-5.4", "openai-codex", None)), \
mock.patch("api.config.get_config", return_value={}), \
mock.patch("api.config._resolve_cli_toolsets", return_value=[]), \
mock.patch.dict(
sys.modules,
{
"hermes_cli": fake_hermes_cli,
"hermes_cli.runtime_provider": fake_runtime_module,
"hermes_state": fake_hermes_state,
},
):
streaming.STREAMS[fake_stream_id] = fake_queue
streaming._run_agent_streaming(
session_id=fake_session.session_id,
msg_text="hello from webui",
model="gpt-5.4",
workspace="/tmp",
stream_id=fake_stream_id,
)
resolve_runtime_provider.assert_called_once_with(requested="openai-codex")
init_kwargs = captured["init_kwargs"]
self.assertEqual(init_kwargs["api_mode"], "codex_responses")
self.assertEqual(init_kwargs["acp_command"], "codex")
self.assertEqual(init_kwargs["acp_args"], ["exec", "--json"])
self.assertEqual(init_kwargs["credential_pool"], "openai-codex")
self.assertEqual(init_kwargs["api_key"], "rt-key")
self.assertIs(init_kwargs["session_db"], fake_session_db)
class TestSessionDBAST(unittest.TestCase):
"""AST-level checks: verify the try/except is not inside _ENV_LOCK (deadlock guard)."""
def setUp(self):
self.tree = ast.parse(STREAMING_PY)
def test_sessiondb_try_not_inside_env_lock(self):
"""The try block that wraps SessionDB init must NOT be inside a 'with _ENV_LOCK:' block.
Putting a try/except inside _ENV_LOCK is the deadlock pattern caught by test_sprint34.
The SessionDB try/except is outside the lock scope, which is correct.
"""
# Find all 'with _ENV_LOCK:' nodes; check none of their bodies contain
# a Try node that also contains 'from hermes_state import SessionDB'
for node in ast.walk(self.tree):
if not isinstance(node, ast.With):
continue
names = [getattr(item.context_expr, "id", "") for item in node.items]
if "_ENV_LOCK" not in names:
continue
# Walk the with-body for Try nodes
for stmt in node.body:
if isinstance(stmt, ast.Try):
# Check if this try imports hermes_state
src = ast.unparse(stmt)
self.assertNotIn(
"hermes_state",
src,
"SessionDB try/except must NOT be inside _ENV_LOCK body (deadlock risk)",
)
class TestModelCustomInput(unittest.TestCase):
"""Tests for issue #444 — custom model ID input in model dropdown."""
STATIC = pathlib.Path(__file__).parent.parent / 'static'
def _read(self, filename):
path = self.STATIC / filename
with open(path, 'r', encoding='utf-8') as f:
return f.read()
def _renderModelDropdown_body(self):
src = self._read('ui.js')
start = src.find('function renderModelDropdown()')
end = src.find('\nasync function selectModelFromDropdown', start)
return src[start:end]
def test_model_custom_input_in_dropdown(self):
body = self._renderModelDropdown_body()
self.assertIn('model-custom-input', body,
'model-custom-input class must be in renderModelDropdown')
def test_model_custom_enter_handler(self):
body = self._renderModelDropdown_body()
self.assertIn('_applyCustom', body,
'_applyCustom function must be defined in renderModelDropdown')
def test_model_custom_css_defined(self):
css = self._read('style.css')
self.assertIn('.model-custom-row', css,
'.model-custom-row must be defined in style.css')
self.assertIn('.model-custom-input', css,
'.model-custom-input must be defined in style.css')
def test_model_custom_i18n_keys(self):
i18n = self._read('i18n.js')
# Find en locale block (appears first before es)
en_block_start = i18n.find("'en'")
es_block_start = i18n.find("'es'")
en_block = i18n[en_block_start:es_block_start]
self.assertIn('model_custom_label', en_block,
'model_custom_label must be in en locale')
self.assertIn('model_custom_placeholder', en_block,
'model_custom_placeholder must be in en locale')
# ── Sprint 42 additional tests: context indicator (#437) ─────────────────
def test_context_indicator_uses_pick_helper():
"""The _pick helper must be present in sessions.js to prefer latest over stale values."""
content = _read_sessions_js()
assert '_pick' in content, "_pick helper not found in static/sessions.js"
def test_context_indicator_old_pattern_removed():
"""The old || pattern that preferred stale session data must be gone."""
content = _read_sessions_js()
assert '_s.input_tokens||u.input_tokens' not in content, \
"Old stale-data-first pattern '_s.input_tokens||u.input_tokens' still present in static/sessions.js"
def test_context_indicator_all_six_fields():
"""All six token/cost fields must appear in the _syncCtxIndicator call."""
content = _read_sessions_js()
fields = [
'input_tokens',
'output_tokens',
'estimated_cost',
'context_length',
'last_prompt_tokens',
'threshold_tokens',
]
for field in fields:
assert field in content, \
f"Field '{field}' not found in static/sessions.js _syncCtxIndicator call"
# ── Sprint 42 additional tests: system prompt title (#441) ──────────────
def test_system_prompt_title_guard_exists():
"""The guard that detects [SYSTEM: prefixes must be present in sessions.js."""
content = _read_sessions_js()
assert '[SYSTEM:' in content, \
"sessions.js must contain the [SYSTEM: guard to intercept system-prompt titles"
# Make sure it appears in an if-condition context, not just a comment
assert "cleanTitle.startsWith('[SYSTEM:')" in content, \
"sessions.js must have: cleanTitle.startsWith('[SYSTEM:') guard expression"
def test_cleanTitle_is_let_not_const():
"""cleanTitle must be declared with let (not const) to allow reassignment in the guard."""
content = _read_sessions_js()
assert 'let cleanTitle' in content, \
"cleanTitle must be declared with 'let' (not 'const') to allow reassignment"
# Make sure the old const form is gone in this context
# (check the specific assignment line pattern)
assert "const cleanTitle=tags.length" not in content, \
"Old 'const cleanTitle=tags.length...' must be replaced by 'let cleanTitle=...'"
# ── Sprint 42 additional tests: thinking panel persistence (#427) ────────
def test_streaming_persists_reasoning_in_session():
"""streaming.py must accumulate reasoning_text and patch last assistant message."""
src = (REPO / 'api' / 'streaming.py').read_text()
# _reasoning_text must be initialised
assert "_reasoning_text = ''" in src, \
"_reasoning_text variable not initialised in streaming.py"
# on_reasoning must accumulate into _reasoning_text
assert '_reasoning_text += str(text)' in src, \
"on_reasoning callback does not accumulate into _reasoning_text"
# Persistence block must exist before raw_session is built
assert "Persist reasoning trace in the session so it survives reload" in src, \
"Reasoning persistence comment not found in streaming.py"
assert "_rm['reasoning'] = _reasoning_text" in src, \
"Code to set _rm['reasoning'] not found in streaming.py"
# Persistence block must come BEFORE raw_session assignment
persist_idx = src.index("Persist reasoning trace in the session")
raw_session_idx = src.index("raw_session = s.compact()")
assert persist_idx < raw_session_idx, \
"Reasoning persistence block must appear before raw_session assignment"
def test_done_handler_patches_reasoning_field():
"""messages.js done SSE handler must patch reasoningText onto the last assistant message."""
src = (REPO / 'static' / 'messages.js').read_text()
# The persistence comment must be present inside the done handler
assert "Persist reasoning trace so thinking card survives page reload" in src, \
"Reasoning persistence comment not found in messages.js done handler"
# The guard and assignment must be present
assert "if(reasoningText){" in src, \
"reasoningText guard not found in messages.js"
assert "lastAsst.reasoning=reasoningText" in src, \
"lastAsst.reasoning assignment not found in messages.js"
# Verify the patch is inside the done handler (after 'source.addEventListener' for done)
done_handler_idx = src.index("source.addEventListener('done'")
persist_idx = src.index("Persist reasoning trace so thinking card survives page reload")
assert done_handler_idx < persist_idx, \
"Reasoning persistence patch must be inside the done SSE handler"
# The guard must also check !lastAsst.reasoning to avoid overwriting server value
assert "!lastAsst.reasoning" in src, \
"Guard '!lastAsst.reasoning' missing — would overwrite server-persisted reasoning"
def test_rendermessages_reads_reasoning_from_messages():
"""ui.js renderMessages must read m.reasoning to display the thinking card."""
src = (REPO / 'static' / 'ui.js').read_text()
# m.reasoning must be read in the render path
assert 'm.reasoning' in src, \
"m.reasoning not referenced in ui.js — thinking card won't render on reload"
# The thinking card rendering block must also be present
assert 'thinking-card' in src, \
"thinking-card CSS class not found in ui.js"
# Specifically, the fallback that reads from top-level m.reasoning field
assert 'thinkingText=m.reasoning' in src.replace(' ', ''), \
"thinkingText=m.reasoning assignment not found in ui.js renderMessages"
def test_streaming_restores_prior_reasoning_metadata_after_followup():
"""Previous-turn thinking must survive later turns.
The provider-facing history strips WebUI-only `reasoning` fields, so the
streaming path must merge that metadata back onto the returned message
history before saving the session, including reinserting dropped
reasoning-only assistant segments.
"""
src = (REPO / 'api' / 'streaming.py').read_text()
assert "def _restore_reasoning_metadata(" in src, \
"streaming.py must define a helper to restore prior reasoning metadata"
assert "s.messages = _restore_reasoning_metadata(" in src, \
"streaming.py must merge prior reasoning metadata back after run_conversation()"
assert "updated_messages.insert(safe_pos, copy.deepcopy(prev_msg))" in src, \
"streaming.py must reinsert dropped reasoning-only assistant messages"
def test_routes_restores_prior_reasoning_metadata_after_followup():
"""The non-streaming route path must preserve prior reasoning metadata too."""
src = (REPO / 'api' / 'routes.py').read_text()
assert "_restore_reasoning_metadata" in src, \
"routes.py must import reasoning metadata restoration helper"
assert 's.messages = _restore_reasoning_metadata(' in src, \
"routes.py must merge prior reasoning metadata back after run_conversation()"

253
tests/test_sprint43.py Normal file
View File

@@ -0,0 +1,253 @@
"""
Sprint 43 Tests: Bandit security fixes — B310, B324, B110 + QuietHTTPServer (PR #354).
Covers:
- gateway_watcher.py: MD5 uses usedforsecurity=False (B324)
- config.py: URL scheme validation before urlopen (B310)
- bootstrap.py: URL scheme validation in wait_for_health (B310)
- server.py: QuietHTTPServer class exists and extends ThreadingHTTPServer
- server.py: QuietHTTPServer.handle_error suppresses client disconnect errors
- server.py: QuietHTTPServer uses sys.exc_info() not traceback.sys.exc_info()
- Logging: at least 5 modules add a module-level logger (B110 remediation)
- routes.py: session titles redacted in /api/sessions list response
"""
import ast
import pathlib
import re
import sys
import unittest
REPO_ROOT = pathlib.Path(__file__).parent.parent
GATEWAY_WATCHER_PY = (REPO_ROOT / "api" / "gateway_watcher.py").read_text()
CONFIG_PY = (REPO_ROOT / "api" / "config.py").read_text()
BOOTSTRAP_PY = (REPO_ROOT / "bootstrap.py").read_text()
SERVER_PY = (REPO_ROOT / "server.py").read_text()
ROUTES_PY = (REPO_ROOT / "api" / "routes.py").read_text()
AUTH_PY = (REPO_ROOT / "api" / "auth.py").read_text()
PROFILES_PY = (REPO_ROOT / "api" / "profiles.py").read_text()
STREAMING_PY = (REPO_ROOT / "api" / "streaming.py").read_text()
WORKSPACE_PY = (REPO_ROOT / "api" / "workspace.py").read_text()
STATE_SYNC_PY = (REPO_ROOT / "api" / "state_sync.py").read_text()
# ── B324: MD5 usedforsecurity=False ─────────────────────────────────────────
class TestMD5SecurityFix(unittest.TestCase):
"""B324: hashlib.md5 must use usedforsecurity=False for non-crypto hashes."""
def test_gateway_watcher_md5_usedforsecurity_false(self):
"""_snapshot_hash must pass usedforsecurity=False to hashlib.md5 (PR #354)."""
self.assertIn(
"usedforsecurity=False",
GATEWAY_WATCHER_PY,
"gateway_watcher.py: MD5 must use usedforsecurity=False (B324)",
)
def test_gateway_watcher_md5_pattern(self):
"""Exact pattern: hashlib.md5(..., usedforsecurity=False)."""
# Use re.search with DOTALL since the arg may span parens internally
import re
self.assertIsNotNone(
re.search(r"hashlib\.md5\(.*?usedforsecurity=False\)", GATEWAY_WATCHER_PY, re.DOTALL),
"MD5 call must include usedforsecurity=False kwarg",
)
# ── B310: URL scheme validation ──────────────────────────────────────────────
class TestUrlSchemeValidation(unittest.TestCase):
"""B310: urllib.request.urlopen must not be called with arbitrary schemes."""
def test_config_scheme_validation_present(self):
"""config.py must validate URL scheme before urlopen (B310 fix)."""
self.assertIn(
"parsed_url.scheme",
CONFIG_PY,
"config.py: URL scheme validation missing (B310)",
)
# Must check against allowed schemes
self.assertRegex(
CONFIG_PY,
r'parsed_url\.scheme\s+not\s+in\s+\(',
"config.py: scheme check must use 'not in (...)' pattern",
)
def test_config_urlopen_has_nosec(self):
"""The urlopen call in config.py must have a # nosec B310 comment."""
self.assertIn(
"nosec B310",
CONFIG_PY,
"config.py: urlopen must have # nosec B310 after scheme validation",
)
def test_bootstrap_scheme_validation_present(self):
"""bootstrap.py wait_for_health must validate URL scheme before urlopen."""
self.assertIn(
"Invalid health check URL",
BOOTSTRAP_PY,
"bootstrap.py: URL scheme validation missing in wait_for_health (B310)",
)
self.assertRegex(
BOOTSTRAP_PY,
r'url\.startswith\([^)]+http',
"bootstrap.py: must check url starts with http:// or https://",
)
def test_bootstrap_urlopen_has_nosec(self):
"""The urlopen call in bootstrap.py must have a # nosec B310 comment."""
self.assertIn(
"nosec B310",
BOOTSTRAP_PY,
"bootstrap.py: urlopen must have # nosec B310 after scheme validation",
)
def test_config_allows_http_and_https(self):
"""config.py scheme check must permit both http and https."""
self.assertIn('"http"', CONFIG_PY, "config.py: http must be in allowed schemes")
self.assertIn('"https"', CONFIG_PY, "config.py: https must be in allowed schemes")
# ── B110: Bare except/pass → logger.debug() ─────────────────────────────────
class TestBareExceptLogging(unittest.TestCase):
"""B110: bare except/pass blocks must be replaced with logger.debug()."""
MODULES_REQUIRING_LOGGER = [
("api/auth.py", AUTH_PY),
("api/config.py", CONFIG_PY),
("api/gateway_watcher.py", GATEWAY_WATCHER_PY),
("api/profiles.py", PROFILES_PY),
("api/streaming.py", STREAMING_PY),
("api/workspace.py", WORKSPACE_PY),
("api/state_sync.py", STATE_SYNC_PY),
("api/routes.py", ROUTES_PY),
]
def test_module_level_loggers_present(self):
"""All fixed modules must have a module-level logger = logging.getLogger(__name__)."""
for name, src in self.MODULES_REQUIRING_LOGGER:
with self.subTest(module=name):
self.assertIn(
"logger = logging.getLogger(__name__)",
src,
f"{name}: module-level logger missing (B110 fix requires logger)",
)
def test_gateway_watcher_no_bare_pass_in_except(self):
"""gateway_watcher.py critical except blocks must not use bare pass."""
# The poll loop except block that previously had 'pass' must now use logger
self.assertIn(
"logger.debug",
GATEWAY_WATCHER_PY,
"gateway_watcher.py: must use logger.debug not bare pass (B110)",
)
def test_profiles_reload_dotenv_logs_on_error(self):
"""profiles.py _reload_dotenv except must log + reset _loaded_profile_env_keys."""
# Both the reset and the debug log should be present in the except block
self.assertIn(
"_loaded_profile_env_keys = set()",
PROFILES_PY,
"profiles.py: _reload_dotenv except must reset _loaded_profile_env_keys",
)
self.assertIn(
"Failed to reload dotenv",
PROFILES_PY,
"profiles.py: _reload_dotenv except must log a warning",
)
# ── QuietHTTPServer ──────────────────────────────────────────────────────────
class TestQuietHTTPServer(unittest.TestCase):
"""server.py: QuietHTTPServer suppresses client disconnect noise."""
def test_quiet_http_server_class_exists(self):
"""QuietHTTPServer must be defined in server.py."""
self.assertIn(
"class QuietHTTPServer",
SERVER_PY,
"server.py: QuietHTTPServer class missing (PR #354)",
)
def test_quiet_http_server_extends_threading_http_server(self):
"""QuietHTTPServer must extend ThreadingHTTPServer."""
self.assertRegex(
SERVER_PY,
r"class QuietHTTPServer\(ThreadingHTTPServer\)",
"QuietHTTPServer must extend ThreadingHTTPServer",
)
def test_quiet_http_server_used_as_server(self):
"""main() must instantiate QuietHTTPServer not raw ThreadingHTTPServer."""
# After the class is defined, the server creation should use QuietHTTPServer
after_class = SERVER_PY[SERVER_PY.find("class QuietHTTPServer"):]
self.assertIn(
"QuietHTTPServer(",
after_class,
"main() must use QuietHTTPServer, not ThreadingHTTPServer directly",
)
def test_handle_error_suppresses_connection_reset(self):
"""handle_error must suppress ConnectionResetError and BrokenPipeError."""
self.assertIn(
"ConnectionResetError",
SERVER_PY,
"QuietHTTPServer.handle_error must handle ConnectionResetError",
)
self.assertIn(
"BrokenPipeError",
SERVER_PY,
"QuietHTTPServer.handle_error must handle BrokenPipeError",
)
def test_uses_sys_exc_info_not_traceback_sys(self):
"""handle_error must use sys.exc_info() not traceback.sys.exc_info() (implementation detail)."""
self.assertNotIn(
"traceback.sys.exc_info()",
SERVER_PY,
"server.py: must use sys.exc_info() not traceback.sys.exc_info()",
)
self.assertIn(
"sys.exc_info()",
SERVER_PY,
"server.py: handle_error must call sys.exc_info()",
)
def test_sys_imported_in_server(self):
"""server.py must import sys (needed for sys.exc_info)."""
import re
self.assertIsNotNone(
re.search(r"^import sys", SERVER_PY, re.MULTILINE),
"server.py: sys must be imported",
)
def test_handle_error_calls_super(self):
"""handle_error must call super().handle_error for non-client-disconnect errors."""
self.assertIn(
"super().handle_error(request, client_address)",
SERVER_PY,
"QuietHTTPServer.handle_error must delegate to super for real errors",
)
# ── Session title redaction in /api/sessions ────────────────────────────────
class TestSessionTitleRedaction(unittest.TestCase):
"""routes.py: session titles must be redacted in the sessions list endpoint."""
def test_redact_text_called_on_session_titles(self):
"""routes.py must call _redact_text on session titles in /api/sessions."""
self.assertRegex(
ROUTES_PY,
r'_redact_text\([^)]*\btitle\b[^)]*\)',
"routes.py: session titles must be redacted via _redact_text in /api/sessions",
)
def test_redact_text_imported_in_routes(self):
"""routes.py must import _redact_text from api.helpers."""
self.assertIn(
"_redact_text",
ROUTES_PY,
"routes.py: _redact_text must be imported from api.helpers",
)

134
tests/test_sprint44.py Normal file
View File

@@ -0,0 +1,134 @@
"""
Sprint 44 Tests: Workspace panel close button fixes (PR #413).
Covers:
- index.html: mobile-close-btn now calls handleWorkspaceClose() instead of
closeWorkspacePanel(), so hitting X while a file is open returns you to the
file browser rather than collapsing the whole panel.
- boot.js: syncWorkspacePanelUI() hides #btnClearPreview (the X icon) on
desktop when no file preview is open, eliminating the duplicate X that
appeared alongside the chevron collapse button.
- boot.js: handleWorkspaceClose() logic — clears preview when one is visible,
closes panel otherwise (existing function, confirmed wired to both buttons).
"""
import pathlib
import re
import unittest
REPO = pathlib.Path(__file__).parent.parent
HTML = (REPO / "static" / "index.html").read_text(encoding="utf-8")
BOOT_JS = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
class TestMobileCloseButtonBehavior(unittest.TestCase):
"""mobile-close-btn must call handleWorkspaceClose(), not closeWorkspacePanel()."""
def test_mobile_close_btn_calls_handle_workspace_close(self):
"""mobile-close-btn onclick must be handleWorkspaceClose(), not closeWorkspacePanel()."""
m = re.search(r'class="[^"]*mobile-close-btn[^"]*"[^>]*>', HTML)
self.assertIsNotNone(m, "mobile-close-btn element not found in index.html")
btn_html = m.group(0)
self.assertIn(
'onclick="handleWorkspaceClose()"',
btn_html,
"mobile-close-btn must call handleWorkspaceClose() so that hitting X "
"while a file is open closes the file first, not the whole panel",
)
def test_mobile_close_btn_does_not_call_close_workspace_panel_directly(self):
"""mobile-close-btn must NOT call closeWorkspacePanel() directly."""
m = re.search(r'class="[^"]*mobile-close-btn[^"]*"[^>]*>', HTML)
self.assertIsNotNone(m, "mobile-close-btn element not found in index.html")
btn_html = m.group(0)
self.assertNotIn(
'onclick="closeWorkspacePanel()"',
btn_html,
"mobile-close-btn must not call closeWorkspacePanel() directly — "
"it would bypass the two-step close logic and collapse the panel even "
"when a file is being viewed",
)
def test_handle_workspace_close_defined_in_boot_js(self):
"""handleWorkspaceClose() must be defined in boot.js."""
self.assertIn(
"function handleWorkspaceClose()",
BOOT_JS,
"handleWorkspaceClose() is missing from boot.js",
)
def test_handle_workspace_close_clears_preview_first(self):
"""handleWorkspaceClose() must call clearPreview() when a preview is visible."""
# The function must check for visible preview and call clearPreview
self.assertIn(
"clearPreview()",
BOOT_JS,
"handleWorkspaceClose() must call clearPreview() when preview is visible",
)
def test_handle_workspace_close_falls_back_to_close_panel(self):
"""handleWorkspaceClose() must call closeWorkspacePanel() as fallback."""
# Find the function start and extract until the closing brace by scanning
start = BOOT_JS.find("function handleWorkspaceClose()")
self.assertNotEqual(start, -1, "handleWorkspaceClose() not found in boot.js")
# Extract a generous window after the function start
fn_window = BOOT_JS[start : start + 400]
self.assertIn(
"closeWorkspacePanel()",
fn_window,
"handleWorkspaceClose() must call closeWorkspacePanel() as its fallback path",
)
class TestDesktopNoDuplicateXButton(unittest.TestCase):
"""On desktop, only one X/close control should appear at a time."""
def test_sync_workspace_panel_ui_hides_clear_preview_on_desktop(self):
"""syncWorkspacePanelUI() must set display:none on btnClearPreview when no preview and desktop."""
self.assertIn(
"clearBtn.style.display",
BOOT_JS,
"syncWorkspacePanelUI() must control clearBtn.style.display to hide it "
"on desktop when no file preview is open",
)
def test_clear_preview_hidden_when_no_preview(self):
"""The display toggle for btnClearPreview must key off hasPreview."""
# Expect something like: clearBtn.style.display=hasPreview?'':'none'
# or clearBtn.style.display = hasPreview ? '' : 'none'
pattern = r"clearBtn\.style\.display\s*=\s*hasPreview"
self.assertRegex(
BOOT_JS,
pattern,
"btnClearPreview display must be conditioned on hasPreview in "
"syncWorkspacePanelUI() to avoid a duplicate X on desktop",
)
def test_clear_preview_toggle_only_applied_on_desktop(self):
"""The display toggle must be guarded by !isCompact so mobile is unaffected."""
# Expect: if(!isCompact) clearBtn.style.display=...
pattern = r"isCompact.*clearBtn\.style\.display|clearBtn\.style\.display.*isCompact"
self.assertRegex(
BOOT_JS,
pattern,
"btnClearPreview display toggle must be guarded by isCompact so the "
"mobile X button visibility is not accidentally affected",
)
def test_btnclearpreview_exists_in_html(self):
"""#btnClearPreview must still exist in the HTML (not removed)."""
self.assertIn(
'id="btnClearPreview"',
HTML,
"#btnClearPreview must remain in index.html",
)
def test_btncollapseWorkspacepanel_exists_in_html(self):
"""#btnCollapseWorkspacePanel (chevron) must still exist in the HTML."""
self.assertIn(
'id="btnCollapseWorkspacePanel"',
HTML,
"#btnCollapseWorkspacePanel must remain in index.html",
)
if __name__ == "__main__":
unittest.main()

157
tests/test_sprint45.py Normal file
View File

@@ -0,0 +1,157 @@
"""
Sprint 45 Tests: v0.50.36 upstream sync with minimal local patch retention.
Covers:
- First password enablement via POST /api/settings keeps the current browser logged in
- The returned auth metadata is present and onboarding can continue with the issued cookie
- Legacy assistant_language is no longer exposed and is removed on the next save
- The local reply-language UI/runtime enhancement is gone from the synced codebase
"""
import json
import pathlib
import urllib.error
import urllib.request
import os
from tests._pytest_port import BASE
REPO = pathlib.Path(__file__).parent.parent
# Use HERMES_WEBUI_TEST_STATE_DIR if available (set by conftest for the test process),
# falling back to the conventional webui-mvp-test path.
def _get_settings_file() -> pathlib.Path:
"""Resolve SETTINGS_FILE at call time (env var set by conftest after module import)."""
state_dir = pathlib.Path(
os.environ.get("HERMES_WEBUI_TEST_STATE_DIR",
str(pathlib.Path.home() / ".hermes" / "webui-mvp-test"))
)
return state_dir / "settings.json"
def get(path, headers=None):
req = urllib.request.Request(BASE + path, headers=headers or {})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status, dict(r.headers)
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code, dict(e.headers)
def post(path, body=None, headers=None):
req = urllib.request.Request(
BASE + path,
data=json.dumps(body or {}).encode(),
headers={"Content-Type": "application/json", **(headers or {})},
)
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status, dict(r.headers)
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code, dict(e.headers)
def read(path):
return (REPO / path).read_text(encoding="utf-8")
def _snapshot_settings_file():
if _get_settings_file().exists():
return _get_settings_file().read_text(encoding="utf-8")
return None
def _restore_settings_file(original_text):
if original_text is None:
_get_settings_file().unlink(missing_ok=True)
return
_get_settings_file().write_text(original_text, encoding="utf-8")
def test_first_password_enablement_returns_cookie_and_keeps_browser_logged_in():
original_settings = _snapshot_settings_file()
cookie_header = None # captured for teardown use
try:
saved, status, headers = post("/api/settings", {"_set_password": "sprint45-secret"})
assert status == 200
assert saved["auth_enabled"] is True
assert saved["logged_in"] is True
assert saved["auth_just_enabled"] is True
set_cookie = headers.get("Set-Cookie", "")
assert "hermes_session=" in set_cookie
cookie_header = set_cookie.split(";", 1)[0]
auth, auth_status, _ = get("/api/auth/status", headers={"Cookie": cookie_header})
assert auth_status == 200
assert auth["auth_enabled"] is True
assert auth["logged_in"] is True
done, done_status, _ = post(
"/api/onboarding/complete",
{},
headers={"Cookie": cookie_header},
)
assert done_status == 200
assert done["completed"] is True
finally:
# First: write a clean settings file (no password_hash) directly to disk
try:
import json as _json
clean = _json.loads(original_settings) if original_settings else {}
clean.pop("password_hash", None)
_get_settings_file().parent.mkdir(parents=True, exist_ok=True)
_get_settings_file().write_text(_json.dumps(clean, indent=2), encoding="utf-8")
except Exception:
pass
# Then: tell the server to clear auth via API (must use the session cookie)
try:
_headers = {"Cookie": cookie_header} if cookie_header else {}
post("/api/settings", {"_clear_password": True}, headers=_headers)
except Exception:
pass
_restore_settings_file(original_settings)
def test_legacy_assistant_language_is_hidden_and_removed_on_next_save():
original_settings = _snapshot_settings_file()
try:
_get_settings_file().parent.mkdir(parents=True, exist_ok=True)
_get_settings_file().write_text(
json.dumps(
{
"assistant_language": "zh",
"send_key": "enter",
"onboarding_completed": False,
},
ensure_ascii=False,
indent=2,
),
encoding="utf-8",
)
loaded, status, _ = get("/api/settings")
assert status == 200
assert "assistant_language" not in loaded
saved, save_status, _ = post("/api/settings", {"send_key": "ctrl+enter"})
assert save_status == 200
assert "assistant_language" not in saved
assert saved["send_key"] == "ctrl+enter"
persisted = json.loads(_get_settings_file().read_text(encoding="utf-8"))
assert "assistant_language" not in persisted
finally:
_restore_settings_file(original_settings)
def test_reply_language_customization_ui_and_runtime_are_removed():
index_html = read("static/index.html")
panels_js = read("static/panels.js")
streaming_py = read("api/streaming.py")
assert "settingsAssistantLanguage" not in index_html
assert "assistant_language" not in panels_js
assert "settingsAssistantLanguage" not in panels_js
assert "assistant_language" not in streaming_py
assert "Default reply language:" not in streaming_py

167
tests/test_sprint46.py Normal file
View File

@@ -0,0 +1,167 @@
"""
Sprint 46 Tests: manual session compression with optional focus topic.
"""
import contextlib
import io
import json
import sys
import types
from api.models import Session
from api.config import SESSION_DIR
from api.routes import _handle_session_compress
from tests._pytest_port import BASE
class _FakeHandler:
def __init__(self):
self.wfile = io.BytesIO()
self.status = None
self.sent_headers = {}
def send_response(self, status):
self.status = status
def send_header(self, key, value):
self.sent_headers[key] = value
def end_headers(self):
pass
def payload(self):
return json.loads(self.wfile.getvalue().decode("utf-8"))
class _FakeCompressor:
def __init__(self):
self.calls = []
def compress(self, messages, current_tokens=None, focus_topic=None):
self.calls.append(
{
"messages": list(messages),
"current_tokens": current_tokens,
"focus_topic": focus_topic,
}
)
if len(messages) >= 2:
return [messages[0], messages[-1]]
return list(messages)
class _FakeAgent:
last_instance = None
def __init__(self, **kwargs):
self.kwargs = kwargs
self.context_compressor = _FakeCompressor()
_FakeAgent.last_instance = self
def _make_session(messages=None):
SESSION_DIR.mkdir(parents=True, exist_ok=True)
messages = messages or [
{"role": "user", "content": "one"},
{"role": "assistant", "content": "two"},
{"role": "user", "content": "three"},
{"role": "assistant", "content": "four"},
]
s = Session(
session_id="compress_test_001",
title="Untitled",
workspace="/tmp/hermes-webui-test",
model="openai/gpt-5.4-mini",
messages=messages,
)
s.save(touch_updated_at=False)
return s.session_id
def test_session_compress_requires_session_id(cleanup_test_sessions):
handler = _FakeHandler()
_handle_session_compress(handler, {})
assert handler.status == 400
assert handler.payload()["error"] == "Missing required field(s): session_id"
def test_session_compress_roundtrip(monkeypatch, cleanup_test_sessions):
created = cleanup_test_sessions
sid = _make_session()
created.append(sid)
fake_run_agent = types.ModuleType("run_agent")
fake_run_agent.AIAgent = _FakeAgent
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
import api.config as _cfg
fake_runtime_provider = types.ModuleType("hermes_cli.runtime_provider")
fake_runtime_provider.resolve_runtime_provider = lambda requested=None: {
"api_key": "fake-key",
"provider": requested or "openai",
"base_url": "https://api.openai.com/v1",
}
fake_hermes_cli = types.ModuleType("hermes_cli")
fake_hermes_cli.__path__ = []
fake_hermes_cli.runtime_provider = fake_runtime_provider
monkeypatch.setitem(sys.modules, "hermes_cli", fake_hermes_cli)
monkeypatch.setitem(sys.modules, "hermes_cli.runtime_provider", fake_runtime_provider)
import hermes_cli.runtime_provider as _rtp
monkeypatch.setattr(
_cfg,
"resolve_model_provider",
lambda model: ("openai/gpt-5.4-mini", "openai", "https://api.openai.com/v1"),
)
monkeypatch.setattr(
_cfg,
"_get_session_agent_lock",
lambda sid: contextlib.nullcontext(),
)
monkeypatch.setattr(
_rtp,
"resolve_runtime_provider",
lambda requested=None: {
"api_key": "fake-key",
"provider": requested or "openai",
"base_url": "https://api.openai.com/v1",
},
)
handler = _FakeHandler()
_handle_session_compress(handler, {"session_id": sid, "focus_topic": "database schema"})
assert handler.status == 200
payload = handler.payload()
assert payload["ok"] is True
assert payload["focus_topic"] == "database schema"
assert payload["summary"]["headline"] == "Compressed: 4 → 2 messages"
assert payload["session"]["session_id"] == sid
assert payload["session"]["messages"] == [
{"role": "user", "content": "one"},
{"role": "assistant", "content": "four"},
]
assert _FakeAgent.last_instance is not None
assert _FakeAgent.last_instance.context_compressor.calls[0]["focus_topic"] == "database schema"
def test_static_commands_js_registers_compress_alias(cleanup_test_sessions):
from pathlib import Path
with open(Path(__file__).resolve().parents[1] / "static" / "commands.js", encoding="utf-8") as f:
src = f.read()
assert "name:'compress'" in src
assert "name:'compact'" in src
assert "/api/session/compress" in src
assert "cmdCompress" in src
assert "cmdCompact" in src
def test_static_commands_js_prefers_persisted_reference_message(cleanup_test_sessions):
from pathlib import Path
with open(Path(__file__).resolve().parents[1] / "static" / "commands.js", encoding="utf-8") as f:
src = f.read()
assert "const messageRef=referenceMsg?msgContent(referenceMsg)||String(referenceMsg.content||''):'';" in src
assert "const referenceText=messageRef || summaryRef;" in src

39
tests/test_sprint47.py Normal file
View File

@@ -0,0 +1,39 @@
"""
Sprint 47 tests: skill-backed slash commands appear in the Web UI autocomplete.
Covers:
- commands.js lazily loads /api/skills for slash autocomplete
- built-in commands still win over skill name collisions
- boot.js primes the async skill load when typing '/'
- the dropdown marks skill-backed entries visually
"""
import pathlib
REPO_ROOT = pathlib.Path(__file__).parent.parent
COMMANDS_JS = (REPO_ROOT / "static" / "commands.js").read_text(encoding="utf-8")
BOOT_JS = (REPO_ROOT / "static" / "boot.js").read_text(encoding="utf-8")
STYLE_CSS = (REPO_ROOT / "static" / "style.css").read_text(encoding="utf-8")
def test_skill_commands_are_loaded_from_api_skills_for_autocomplete():
assert "loadSkillCommands" in COMMANDS_JS
assert "api('/api/skills')" in COMMANDS_JS
assert "source:'skill'" in COMMANDS_JS
def test_builtin_commands_take_precedence_over_skill_slug_collisions():
# In the combined implementation, REGISTRY (agent registry + WEBUI_ONLY) wins over skills
assert ("if(COMMANDS.some(c=>c.name===slug)) return null;" in COMMANDS_JS or
"if(REGISTRY.some(c=>c.name===slug)) return null;" in COMMANDS_JS), \
"Built-in commands must block skill slug collisions"
def test_typing_slash_primes_async_skill_command_loading():
assert "ensureSkillCommandsLoadedForAutocomplete" in BOOT_JS
assert "ensureSkillCommandsLoadedForAutocomplete();" in BOOT_JS
def test_dropdown_has_visual_badge_for_skill_backed_entries():
assert "cmd-item-badge-skill" in STYLE_CSS
assert "slash_skill_badge" in COMMANDS_JS

209
tests/test_sprint48.py Normal file
View File

@@ -0,0 +1,209 @@
"""Tests for sprint 48 UX bug fixes — v0.50.92.
Covers:
- #702: XML tool-call syntax (<function_calls>) stripped from assistant
message content before rendering (server-side + client-side).
- #703: Workspace file panel shows an empty-state message when no workspace
is configured or the directory is empty.
- #704: Notification settings description uses "app" instead of "tab".
"""
import pathlib
import re
REPO = pathlib.Path(__file__).parent.parent
def read(rel):
return (REPO / rel).read_text()
# ── Bug #702 — XML tool-call leak on DeepSeek ────────────────────────────────
class TestXmlToolCallStrip:
"""_strip_xml_tool_calls() is defined in api/streaming.py and must remove
<function_calls>...</function_calls> blocks from assistant content."""
def _load_fn(self):
"""Import the helper from streaming.py without triggering full server
initialisation (which would fail in unit-test contexts)."""
import importlib, sys, types
# Stub heavy transitive imports so we can import the module cleanly.
for mod in ('api.config', 'api.helpers', 'api.models', 'api.workspace'):
if mod not in sys.modules:
sys.modules[mod] = types.ModuleType(mod)
# Provide minimal symbols that streaming.py needs at import time.
cfg = sys.modules.setdefault('api.config', types.ModuleType('api.config'))
for attr in ('STREAMS', 'STREAMS_LOCK', 'CANCEL_FLAGS', 'AGENT_INSTANCES',
'LOCK', 'SESSIONS', 'SESSION_DIR',
'_get_session_agent_lock', '_set_thread_env',
'_clear_thread_env', 'resolve_model_provider'):
if not hasattr(cfg, attr):
setattr(cfg, attr, None)
# Fall back to reading the source and exec-ing just the function.
src = read('api/streaming.py')
ns: dict = {}
# Extract the function definition with regex so we don't need to import
# the whole module (avoids all the heavy deps).
match = re.search(
r'(def _strip_xml_tool_calls\(.*?)\n(?=\ndef |\nclass )',
src, re.DOTALL
)
assert match, "_strip_xml_tool_calls not found in api/streaming.py"
exec(compile('import re\n' + match.group(1), '<streaming_extract>', 'exec'), ns)
return ns['_strip_xml_tool_calls']
def test_complete_block_removed(self):
fn = self._load_fn()
text = "Hello <function_calls><invoke>foo</invoke></function_calls> world"
result = fn(text)
assert '<function_calls>' not in result
assert 'Hello' in result
assert 'world' in result
def test_orphaned_opening_tag_removed(self):
fn = self._load_fn()
text = "Some answer text\n<function_calls>\n<invoke>tool</invoke>"
result = fn(text)
assert '<function_calls>' not in result
assert 'Some answer text' in result
def test_no_tag_unchanged(self):
fn = self._load_fn()
text = "This is a normal response with no tool calls."
assert fn(text) == text
def test_multiple_blocks_removed(self):
fn = self._load_fn()
text = (
"Part one <function_calls><invoke>a</invoke></function_calls> "
"middle <function_calls><invoke>b</invoke></function_calls> end"
)
result = fn(text)
assert '<function_calls>' not in result
assert 'Part one' in result
assert 'middle' in result
assert 'end' in result
def test_function_defined_in_streaming_py(self):
src = read('api/streaming.py')
assert 'def _strip_xml_tool_calls(' in src, (
"_strip_xml_tool_calls must be defined in api/streaming.py"
)
def test_strip_applied_to_assistant_messages(self):
"""Verify the strip call is applied to assistant message content after
the agent run completes (server-side persistence fix)."""
src = read('api/streaming.py')
assert '_strip_xml_tool_calls' in src, (
"_strip_xml_tool_calls must be referenced in api/streaming.py"
)
# Confirm it is called on message content, not just defined
assert src.count('_strip_xml_tool_calls') >= 2, (
"_strip_xml_tool_calls must be both defined and called"
)
def test_client_side_strip_in_messages_js(self):
src = read('static/messages.js')
assert '_stripXmlToolCalls' in src, (
"Client-side _stripXmlToolCalls must exist in static/messages.js"
)
assert 'function_calls' in src.lower(), (
"Client-side strip must reference 'function_calls'"
)
def test_client_side_strip_in_ui_js(self):
src = read('static/ui.js')
assert '_stripXmlToolCallsDisplay' in src, (
"_stripXmlToolCallsDisplay must exist in static/ui.js"
)
# ── Bug #703 — Workspace file panel empty state ───────────────────────────────
class TestWorkspaceEmptyState:
def test_i18n_no_path_string_present(self):
src = read('static/i18n.js')
assert 'workspace_empty_no_path' in src, (
"i18n key workspace_empty_no_path must be defined in i18n.js"
)
def test_i18n_no_path_mentions_settings(self):
src = read('static/i18n.js')
# Extract the value of the key
m = re.search(r"workspace_empty_no_path:\s*'([^']+)'", src)
assert m, "workspace_empty_no_path value not found in i18n.js"
assert 'Settings' in m.group(1), (
"workspace_empty_no_path should mention Settings"
)
def test_i18n_empty_dir_string_present(self):
src = read('static/i18n.js')
assert 'workspace_empty_dir' in src, (
"i18n key workspace_empty_dir must be defined in i18n.js"
)
def test_empty_state_element_in_html(self):
src = read('static/index.html')
assert 'wsEmptyState' in src, (
"id=\"wsEmptyState\" empty-state element must exist in index.html"
)
def test_render_file_tree_shows_empty_state(self):
src = read('static/ui.js')
assert 'wsEmptyState' in src, (
"renderFileTree in ui.js must reference wsEmptyState"
)
assert 'workspace_empty_no_path' in src, (
"renderFileTree must use workspace_empty_no_path i18n key"
)
assert 'workspace_empty_dir' in src, (
"renderFileTree must use workspace_empty_dir i18n key"
)
# ── Bug #704 — Notification description says "tab" ───────────────────────────
class TestNotificationDescriptionText:
def test_english_uses_app_not_tab(self):
src = read('static/i18n.js')
# Find the English locale block (appears before other locales)
# The English block starts at line 1 (it's the first locale object).
# We look for the settings_desc_notifications in the English section.
# English block ends before the Spanish (es) block.
es_marker = "settings_desc_notifications: 'Muestra"
en_end = src.index(es_marker) if es_marker in src else len(src)
en_section = src[:en_end]
m = re.search(r"settings_desc_notifications:\s*'([^']+)'", en_section)
assert m, "English settings_desc_notifications not found"
desc = m.group(1)
assert 'tab' not in desc.lower(), (
f"English notification description must not say 'tab', got: {desc!r}"
)
assert 'app' in desc.lower(), (
f"English notification description must say 'app', got: {desc!r}"
)
def test_new_wording_exact(self):
src = read('static/i18n.js')
expected = 'while the app is in the background'
assert expected in src, (
f"Exact phrase {expected!r} must appear in i18n.js"
)
def test_old_wording_removed_from_english(self):
src = read('static/i18n.js')
old_phrase = 'while the tab is in the background'
# The old phrase must not appear in the English locale section
es_marker = "settings_desc_notifications: 'Muestra"
en_end = src.index(es_marker) if es_marker in src else len(src)
en_section = src[:en_end]
assert old_phrase not in en_section, (
"Old English notification description with 'tab' must be removed"
)

157
tests/test_sprint5.py Normal file
View File

@@ -0,0 +1,157 @@
"""Sprint 5 tests: workspace CRUD, file save, session index, JS serving."""
import json, pathlib, uuid, urllib.request, urllib.error
import os
from tests._pytest_port import BASE
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def get_raw(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return r.read(), r.headers.get("Content-Type",""), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(BASE + path, data=data, headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def make_session_tracked(created_list, ws=None):
"""Create a session and register it with the cleanup fixture."""
import pathlib as _pathlib
body = {}
if ws: body["workspace"] = str(ws)
d, _ = post("/api/session/new", body)
sid = d["session"]["session_id"]
created_list.append(sid)
return sid, _pathlib.Path(d["session"]["workspace"])
def make_workspace_child(base: pathlib.Path, name: str) -> pathlib.Path:
target = base / name
target.mkdir(parents=True, exist_ok=True)
return target
def test_server_running_from_new_location():
data, status = get("/health")
assert status == 200 and data["status"] == "ok"
def test_app_js_served():
"""Sprint 9: app.js replaced by modules. Verify ui.js (contains renderMd) is served."""
raw, ct, status = get_raw("/static/ui.js")
assert status == 200 and "javascript" in ct and b"renderMd" in raw
def test_workspaces_list():
data, status = get("/api/workspaces")
assert status == 200 and "workspaces" in data and "last" in data
def test_workspace_add_valid(cleanup_test_sessions):
_, ws = make_session_tracked(cleanup_test_sessions)
child = make_workspace_child(ws, f"workspace-add-{uuid.uuid4().hex[:6]}")
post("/api/workspaces/remove", {"path": str(child)})
result, status = post("/api/workspaces/add", {"path": str(child), "name": "Temp"})
assert status == 200 and any(w["path"] == str(child) for w in result["workspaces"])
post("/api/workspaces/remove", {"path": str(child)})
def test_workspace_add_validates_existence():
result, status = post("/api/workspaces/add", {"path": "/tmp/does_not_exist_xyz_999"})
assert status == 400
def test_workspace_add_validates_is_dir():
result, status = post("/api/workspaces/add", {"path": "/etc/hostname"})
assert status == 400
def test_workspace_add_no_duplicate(cleanup_test_sessions):
_, ws = make_session_tracked(cleanup_test_sessions)
child = make_workspace_child(ws, f"workspace-dup-{uuid.uuid4().hex[:6]}")
post("/api/workspaces/remove", {"path": str(child)})
post("/api/workspaces/add", {"path": str(child)})
result, status = post("/api/workspaces/add", {"path": str(child)})
assert status == 400 and "already" in result.get("error","").lower()
post("/api/workspaces/remove", {"path": str(child)})
def test_workspace_add_requires_path():
result, status = post("/api/workspaces/add", {})
assert status == 400
def test_workspace_remove(cleanup_test_sessions):
_, ws = make_session_tracked(cleanup_test_sessions)
child = make_workspace_child(ws, f"workspace-remove-{uuid.uuid4().hex[:6]}")
post("/api/workspaces/remove", {"path": str(child)})
post("/api/workspaces/add", {"path": str(child), "name": "Temp"})
result, status = post("/api/workspaces/remove", {"path": str(child)})
assert status == 200 and str(child) not in [w["path"] for w in result["workspaces"]]
def test_workspace_rename(cleanup_test_sessions):
_, ws = make_session_tracked(cleanup_test_sessions)
child = make_workspace_child(ws, f"workspace-rename-{uuid.uuid4().hex[:6]}")
post("/api/workspaces/remove", {"path": str(child)})
post("/api/workspaces/add", {"path": str(child), "name": "Temp"})
result, status = post("/api/workspaces/rename", {"path": str(child), "name": "My Temp"})
assert status == 200
assert {w["path"]: w["name"] for w in result["workspaces"]}.get(str(child)) == "My Temp"
post("/api/workspaces/remove", {"path": str(child)})
def test_workspace_rename_unknown():
result, status = post("/api/workspaces/rename", {"path": "/no/such/path", "name": "X"})
assert status == 404
def test_last_workspace_updates_on_session_update(cleanup_test_sessions):
sid, ws = make_session_tracked(cleanup_test_sessions)
child = make_workspace_child(ws, f"workspace-last-{uuid.uuid4().hex[:6]}")
post("/api/session/update", {"session_id": sid, "workspace": str(child), "model": "openai/gpt-5.4-mini"})
data, _ = get("/api/workspaces")
assert data["last"] == str(child)
def test_file_save(cleanup_test_sessions):
sid, ws = make_session_tracked(cleanup_test_sessions)
fname = f"save_{uuid.uuid4().hex[:6]}.txt"
(ws / fname).write_text("original content")
result, status = post("/api/file/save", {"session_id": sid, "path": fname, "content": "updated"})
assert status == 200 and (ws / fname).read_text() == "updated"
def test_file_save_requires_fields(cleanup_test_sessions):
sid, _ = make_session_tracked(cleanup_test_sessions)
result, status = post("/api/file/save", {"session_id": sid})
assert status == 400
def test_file_save_nonexistent_returns_404(cleanup_test_sessions):
sid, _ = make_session_tracked(cleanup_test_sessions)
result, status = post("/api/file/save", {"session_id": sid, "path": "no_such.txt", "content": ""})
assert status == 404
def test_file_save_path_traversal_blocked(cleanup_test_sessions):
sid, _ = make_session_tracked(cleanup_test_sessions)
result, status = post("/api/file/save", {"session_id": sid, "path": "../../etc/passwd", "content": ""})
assert status in (400, 500)
def test_session_index_created_after_save(cleanup_test_sessions):
# Index is created in the TEST state dir, not the production dir
test_state_dir = pathlib.Path(os.environ.get("HERMES_WEBUI_TEST_STATE_DIR", str(pathlib.Path.home() / ".hermes" / "webui-mvp-test")))
index_path = test_state_dir / "sessions" / "_index.json"
make_session_tracked(cleanup_test_sessions)
# Index may not exist yet if cleanup already wiped it -- just check the endpoint works
data, status = get("/api/sessions")
assert status == 200
assert isinstance(data["sessions"], list)
def test_sessions_endpoint_returns_sorted():
data, status = get("/api/sessions")
assert status == 200
sessions = data["sessions"]
if len(sessions) >= 2:
assert sessions[0]["updated_at"] >= sessions[1]["updated_at"]
def test_new_session_inherits_last_workspace(cleanup_test_sessions):
sid, ws = make_session_tracked(cleanup_test_sessions)
child = make_workspace_child(ws, f"workspace-inherit-{uuid.uuid4().hex[:6]}")
post("/api/session/update", {"session_id": sid, "workspace": str(child), "model": "openai/gpt-5.4-mini"})
sid2, _ = make_session_tracked(cleanup_test_sessions)
d, _ = get(f"/api/session?session_id={sid2}")
assert d["session"]["workspace"] == str(child)

152
tests/test_sprint6.py Normal file
View File

@@ -0,0 +1,152 @@
"""Sprint 6 tests: Escape from editor, Phase D validation, HTML extraction, cron create, session export."""
import json, uuid, pathlib, urllib.request, urllib.error
REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
from tests._pytest_port import BASE
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def get_raw(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return r.read(), r.headers, r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(BASE + path, data=data, headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def make_session_tracked(created_list, ws=None):
body = {}
if ws: body["workspace"] = str(ws)
d, _ = post("/api/session/new", body)
sid = d["session"]["session_id"]
created_list.append(sid)
return sid, pathlib.Path(d["session"]["workspace"])
# ── Phase E: HTML served from static/index.html ──
def test_index_html_served():
raw, headers, status = get_raw("/")
assert status == 200
assert b"sidebarResize" in raw, "Resize handle not found in HTML"
assert b"cronCreateForm" in raw, "Cron create form not found in HTML"
assert b"btnHermesPanel" in raw, "Hermes control center trigger not found in HTML"
assert b"btnExportJSON" in raw, "Export JSON button not found in HTML"
def test_index_html_file_exists():
p = REPO_ROOT / "static/index.html"
assert p.exists(), "static/index.html does not exist"
assert p.stat().st_size > 5000, "index.html seems too small"
def test_server_py_has_no_html_string():
txt = (REPO_ROOT / "server.py").read_text()
assert 'HTML = r"""' not in txt, "server.py still contains inline HTML string"
assert "doctype html" not in txt.lower(), "server.py still contains raw HTML"
# ── Phase D: remaining endpoint validation ──
def test_approval_respond_requires_session_id():
result, status = post("/api/approval/respond", {"choice": "deny"})
assert status == 400
def test_approval_respond_rejects_invalid_choice(cleanup_test_sessions):
sid, _ = make_session_tracked(cleanup_test_sessions)
result, status = post("/api/approval/respond", {"session_id": sid, "choice": "INVALID"})
assert status == 400
def test_file_raw_requires_session_id():
try:
get_raw("/api/file/raw?path=test.png")
assert False, "Expected 400"
except urllib.error.HTTPError as e:
assert e.code == 400
def test_file_raw_unknown_session():
try:
get_raw("/api/file/raw?session_id=nosuchsession&path=test.png")
assert False, "Expected 404"
except urllib.error.HTTPError as e:
assert e.code == 404
# ── Cron create ──
def test_cron_create_requires_prompt():
result, status = post("/api/crons/create", {"schedule": "0 9 * * *"})
assert status == 400
assert "prompt" in result.get("error", "").lower()
def test_cron_create_requires_schedule():
result, status = post("/api/crons/create", {"prompt": "Say hello"})
assert status == 400
assert "schedule" in result.get("error", "").lower()
def test_cron_create_invalid_schedule():
result, status = post("/api/crons/create", {
"prompt": "Say hello", "schedule": "not_a_valid_schedule_xyz"
})
assert status == 400
def test_cron_create_success():
uid = uuid.uuid4().hex[:6]
result, status = post("/api/crons/create", {
"name": f"test-job-{uid}",
"prompt": "Just say 'hello' and nothing else.",
"schedule": "every 999h", # far future -- won't actually run during test
"deliver": "local",
})
assert status == 200, f"Expected 200 got {status}: {result}"
assert result["ok"] is True
assert "job" in result
job_id = result["job"]["id"]
# Verify it appears in the cron list
jobs, _ = get("/api/crons")
ids = [j["id"] for j in jobs["jobs"]]
assert job_id in ids, f"Created job {job_id} not in list"
# ── Session export ──
def test_session_export_requires_session_id():
try:
get_raw("/api/session/export")
assert False
except urllib.error.HTTPError as e:
assert e.code == 400
def test_session_export_unknown_session():
try:
get_raw("/api/session/export?session_id=nosuchsession")
assert False
except urllib.error.HTTPError as e:
assert e.code == 404
def test_session_export_returns_json(cleanup_test_sessions):
sid, _ = make_session_tracked(cleanup_test_sessions)
raw, headers, status = get_raw(f"/api/session/export?session_id={sid}")
assert status == 200
assert "application/json" in headers.get("Content-Type", "")
data = json.loads(raw)
assert data["session_id"] == sid
assert "messages" in data
assert "title" in data
# ── Resizable panels: static files present ──
def test_static_index_has_resize_handles():
raw, _, status = get_raw("/")
assert status == 200
assert b"sidebarResize" in raw
assert b"rightpanelResize" in raw
def test_app_js_has_resize_logic():
"""Sprint 9: app.js replaced by modules. Resize logic lives in boot.js."""
raw, _, status = get_raw("/static/boot.js")
assert status == 200
assert b"_initResizePanels" in raw
assert b"hermes-sidebar-w" in raw
assert b"hermes-panel-w" in raw

Some files were not shown because too many files have changed in this diff Show More