Hermes WebUI v0.1.0 — initial public release

This commit is contained in:
Nathan Esquenazi
2026-03-30 20:40:19 -07:00
commit a4e2174c29
41 changed files with 11380 additions and 0 deletions

416
tests/test_regressions.py Normal file
View File

@@ -0,0 +1,416 @@
"""
Regression tests -- one test per bug that was introduced and fixed.
These tests exist specifically to prevent those bugs from silently returning.
Each test is tagged with the sprint/commit where the bug was found and fixed.
"""
import json
import pathlib
import time
import urllib.error
import urllib.request
import urllib.parse
REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
BASE = "http://127.0.0.1:8788"
def get(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return json.loads(r.read()), r.status
def get_raw(path):
with urllib.request.urlopen(BASE + path, timeout=10) as r:
return r.read(), r.headers.get("Content-Type",""), r.status
def post(path, body=None):
data = json.dumps(body or {}).encode()
req = urllib.request.Request(
BASE + path, data=data, headers={"Content-Type": "application/json"}
)
try:
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read()), r.status
except urllib.error.HTTPError as e:
return json.loads(e.read()), e.code
def make_session(created_list):
d, _ = post("/api/session/new", {})
sid = d["session"]["session_id"]
created_list.append(sid)
return sid
# ── R1: uuid not imported in server.py (Sprint 10 split regression) ──────────
def test_chat_start_returns_stream_id(cleanup_test_sessions):
"""R1: chat/start must return stream_id -- catches missing uuid import.
When uuid was missing, this returned 500 (NameError).
"""
sid = make_session(cleanup_test_sessions)
data, status = post("/api/chat/start", {
"session_id": sid,
"message": "ping",
"model": "openai/gpt-5.4-mini",
})
# Must return 200 with a stream_id -- not 500
assert status == 200, f"chat/start failed with {status}: {data}"
assert "stream_id" in data, "stream_id missing from chat/start response"
assert len(data["stream_id"]) > 8, "stream_id looks invalid"
post("/api/session/delete", {"session_id": sid})
cleanup_test_sessions.clear()
# ── R2: AIAgent not imported in api/streaming.py (Sprint 10 split regression) ─
def test_chat_stream_opens_successfully(cleanup_test_sessions):
"""R2: After chat/start, GET /api/chat/stream must return 200 (SSE opens).
When AIAgent was missing, the thread crashed immediately, popped STREAMS,
and the SSE GET returned 404.
"""
sid = make_session(cleanup_test_sessions)
data, status = post("/api/chat/start", {
"session_id": sid,
"message": "say: hello",
"model": "openai/gpt-5.4-mini",
})
assert status == 200, f"chat/start failed: {data}"
stream_id = data["stream_id"]
# Open the SSE stream -- must return 200, not 404
# We only check headers (don't read the full stream body)
req = urllib.request.Request(BASE + f"/api/chat/stream?stream_id={stream_id}")
try:
r = urllib.request.urlopen(req, timeout=3)
assert r.status == 200, f"SSE stream returned {r.status} (expected 200)"
ct = r.headers.get("Content-Type", "")
assert "text/event-stream" in ct, f"Wrong Content-Type: {ct}"
r.close()
except urllib.error.HTTPError as e:
assert False, f"SSE stream returned {e.code} -- AIAgent may not be imported"
except Exception:
pass # timeout or connection close after brief read is fine
post("/api/session/delete", {"session_id": sid})
cleanup_test_sessions.clear()
# ── R3: Session.__init__ missing tool_calls param (Sprint 10 split regression) ─
def test_session_with_tool_calls_in_json_loads_ok(cleanup_test_sessions):
"""R3: Sessions that have tool_calls in their JSON must load without 500.
When tool_calls=None was missing from Session.__init__, loading such sessions
threw TypeError: unexpected keyword argument.
"""
sid = make_session(cleanup_test_sessions)
# Manually inject tool_calls into the session's JSON file
sessions_dir = pathlib.Path.home() / ".hermes" / "webui-mvp-test" / "sessions"
session_file = sessions_dir / f"{sid}.json"
if session_file.exists():
d = json.loads(session_file.read_text())
d["tool_calls"] = [
{"name": "terminal", "snippet": "test output", "tid": "test_tid_001", "assistant_msg_idx": 1}
]
session_file.write_text(json.dumps(d))
# Loading the session must return 200, not 500
data, status = get(f"/api/session?session_id={urllib.parse.quote(sid)}")
assert status == 200, f"Session with tool_calls returned {status}: {data}"
assert data["session"]["session_id"] == sid
post("/api/session/delete", {"session_id": sid})
cleanup_test_sessions.clear()
# ── R4: has_pending not imported in streaming.py (Sprint 10 split regression) ─
def test_streaming_py_imports_has_pending(cleanup_test_sessions):
"""R4: api/streaming.py must import or define has_pending.
When missing, the approval check mid-stream caused NameError.
"""
src = (REPO_ROOT / "api/streaming.py").read_text()
assert "has_pending" in src, "has_pending not found in api/streaming.py"
# Verify it's imported (not just used)
assert "import" in src and "has_pending" in src, \
"has_pending must be imported in api/streaming.py"
def test_aiagent_imported_in_streaming(cleanup_test_sessions):
"""R2b: api/streaming.py must import AIAgent.
When missing, the streaming thread crashed immediately after being spawned.
"""
src = (REPO_ROOT / "api/streaming.py").read_text()
assert "AIAgent" in src, "AIAgent not referenced in api/streaming.py"
assert "from run_agent import AIAgent" in src or "import AIAgent" in src, \
"AIAgent must be imported in api/streaming.py"
# ── R5: SSE loop did not break on cancel event (Sprint 10 bug) ───────────────
def test_cancel_nonexistent_stream_returns_not_cancelled(cleanup_test_sessions):
"""R5a: Cancel endpoint works and returns cancelled:false for unknown stream."""
data, status = get("/api/chat/cancel?stream_id=nonexistent_test_xyz")
assert status == 200
assert data["ok"] is True
assert data["cancelled"] is False
def test_server_py_sse_loop_breaks_on_cancel(cleanup_test_sessions):
"""R5b: server.py SSE loop must include 'cancel' in the break condition.
When missing, the connection hung after the cancel event was processed.
"""
src = (REPO_ROOT / "server.py").read_text()
# Find the SSE break condition
import re
m = re.search(r"if event in \([^)]+\):\s*break", src)
assert m, "SSE break condition not found in server.py"
assert "cancel" in m.group(), \
f"'cancel' missing from SSE break condition: {m.group()}"
# ── R6: Test cron isolation (Sprint 10) ──────────────────────────────────────
def test_real_jobs_json_not_polluted_by_tests(cleanup_test_sessions):
"""R6: Test runs must not write to the real ~/.hermes/cron/jobs.json.
When HERMES_HOME isolation was missing, every test run added test-job-* entries.
"""
real_jobs_path = pathlib.Path.home() / ".hermes" / "cron" / "jobs.json"
if not real_jobs_path.exists():
return # no jobs file at all -- fine
jobs = json.loads(real_jobs_path.read_text())
if isinstance(jobs, dict):
jobs = jobs.get("jobs", [])
test_jobs = [j for j in jobs if j.get("name", "").startswith("test-job-")]
assert len(test_jobs) == 0, \
f"Real jobs.json contains {len(test_jobs)} test-job-* entries: " \
f"{[j['name'] for j in test_jobs]}"
# ── General: api modules all importable ──────────────────────────────────────
def test_all_api_modules_importable(cleanup_test_sessions):
"""All api/ modules must be importable without NameError or ImportError.
Catches missing imports introduced during future module splits.
"""
import ast, pathlib
api_dir = REPO_ROOT / "api"
for module_file in api_dir.glob("*.py"):
src = module_file.read_text()
try:
ast.parse(src)
except SyntaxError as e:
assert False, f"{module_file.name} has syntax error: {e}"
def test_server_py_importable(cleanup_test_sessions):
"""server.py must parse without syntax errors after any split."""
import ast, pathlib
src = (REPO_ROOT / "server.py").read_text()
try:
ast.parse(src)
except SyntaxError as e:
assert False, f"server.py has syntax error: {e}"
# ── R7: Cross-session busy state bleed ───────────────────────────────────────
def test_loadSession_resets_busy_state_for_idle_session(cleanup_test_sessions):
"""R7: sessions.js loadSession for a non-inflight session must reset S.busy to false.
When missing, switching from a busy session to an idle one left the Send button
disabled, showed the wrong activity bar, and pointed Cancel at the wrong stream.
"""
src = (REPO_ROOT / "static/sessions.js").read_text()
# The fix adds explicit S.busy=false in the non-inflight else branch
assert "S.busy=false;" in src, "sessions.js loadSession must set S.busy=false when loading a non-inflight session"
# btnSend must be explicitly re-enabled
assert "$('btnSend').disabled=false;" in src, "sessions.js loadSession must enable btnSend for non-inflight sessions"
def test_done_handler_guards_setbusy_with_inflight_check(cleanup_test_sessions):
"""R7b: messages.js done/error handlers must not call setBusy(false) if the
currently viewed session is itself still in-flight.
When missing, finishing session A while viewing in-flight session B would
disable B's Send button.
"""
src = (REPO_ROOT / "static/messages.js").read_text()
# The fix wraps setBusy(false) in a guard
assert "INFLIGHT[S.session.session_id]" in src, "messages.js must guard setBusy(false) with INFLIGHT check for current session"
def test_cancel_button_not_cleared_across_sessions(cleanup_test_sessions):
"""R7c: The Cancel button and activeStreamId must only be cleared when the
done/error event belongs to the currently viewed session.
"""
src = (REPO_ROOT / "static/messages.js").read_text()
# Both clear operations must be inside the activeSid === S.session guard
# We check for the pattern added by the fix
assert "S.session.session_id===activeSid" in src, "messages.js must guard activeStreamId/Cancel clearing with session identity check"
# ── R8: Session delete does not invalidate index (ghost sessions) ─────────────
def test_deleted_session_does_not_appear_in_list(cleanup_test_sessions):
"""R8: After deleting a session, it must not appear in /api/sessions.
When _index.json was not invalidated on delete, the session reappeared
in the list even after the JSON file was removed.
"""
# Create a session with a title so it shows in the list
d, _ = post("/api/session/new", {})
sid = d["session"]["session_id"]
post("/api/session/rename", {"session_id": sid, "title": "regression-test-delete-R8"})
# Verify it appears
sessions, _ = get("/api/sessions")
ids_before = [s["session_id"] for s in sessions["sessions"]]
assert sid in ids_before, "Session must appear in list before delete"
# Delete it
result, status = post("/api/session/delete", {"session_id": sid})
assert status == 200 and result.get("ok") is True
# Verify it no longer appears -- even after a second fetch (index rebuild)
sessions2, _ = get("/api/sessions")
ids_after = [s["session_id"] for s in sessions2["sessions"]]
assert sid not in ids_after, f"Deleted session {sid} still appears in list -- index not invalidated on delete"
def test_server_delete_invalidates_index(cleanup_test_sessions):
"""R8b: server.py session/delete handler must unlink _index.json.
Static check that the fix is in place.
"""
src = (REPO_ROOT / "server.py").read_text()
# Find the delete handler and verify it unlinks the index
delete_idx = src.find("if parsed.path == '/api/session/delete':")
assert delete_idx >= 0, "session/delete handler not found"
delete_block = src[delete_idx:delete_idx+600]
assert "SESSION_INDEX_FILE" in delete_block, "server.py session/delete must invalidate SESSION_INDEX_FILE"
# ── R9: Token/tool SSE events write to wrong session after switch ─────────────
def test_token_handler_guards_session_id(cleanup_test_sessions):
"""R9a: The SSE token event handler must check activeSid before writing to DOM.
When missing, tokens from session A would render into session B's message area
if the user switched sessions mid-stream.
"""
src = (REPO_ROOT / "static/messages.js").read_text()
# Find the token event handler
token_idx = src.find("es.addEventListener('token'")
assert token_idx >= 0, "token event handler not found"
token_block = src[token_idx:token_idx+300]
assert "activeSid" in token_block, "token handler must check activeSid before writing to DOM"
assert "S.session.session_id!==activeSid" in token_block or "S.session.session_id===activeSid" in token_block, "token handler must compare current session to activeSid"
def test_tool_handler_guards_session_id(cleanup_test_sessions):
"""R9b: The SSE tool event handler must check activeSid before writing to DOM.
When missing, tool cards from session A would render into session B's message area.
"""
src = (REPO_ROOT / "static/messages.js").read_text()
tool_idx = src.find("es.addEventListener('tool'")
assert tool_idx >= 0, "tool event handler not found"
tool_block = src[tool_idx:tool_idx+400]
assert "activeSid" in tool_block, "tool handler must check activeSid before writing to DOM"
# ── R10: respondApproval uses wrong session_id after switch (multi-session) ─
def test_respond_approval_uses_approval_session_id(cleanup_test_sessions):
"""R10: respondApproval must use the session_id of the session that triggered
the approval, not S.session.session_id (which may be a different session
if the user switched while approval was pending).
"""
src = (REPO_ROOT / "static/messages.js").read_text()
# The fix introduces _approvalSessionId to track the correct session
assert "_approvalSessionId" in src, "messages.js must use _approvalSessionId in respondApproval"
# respondApproval must use _approvalSessionId, not S.session.session_id directly
idx = src.find("async function respondApproval(")
assert idx >= 0, "respondApproval not found"
fn_body = src[idx:idx+300]
assert "_approvalSessionId" in fn_body, "respondApproval must read _approvalSessionId, not S.session.session_id"
# ── R11: Activity bar shows cross-session tool status ─────────────────────
def test_tool_status_only_shown_for_current_session(cleanup_test_sessions):
"""R11: The activity bar setStatus() call in the tool SSE handler must only
fire when the user is viewing the session that triggered the tool.
When missing, session A's tool names would appear in session B's activity bar.
"""
src = (REPO_ROOT / "static/messages.js").read_text()
# Find the tool event handler
tool_idx = src.find("es.addEventListener('tool'")
assert tool_idx >= 0
tool_block = src[tool_idx:tool_idx+400]
# setStatus must be inside the activeSid guard, not before it
status_pos = tool_block.find("setStatus(")
guard_pos = tool_block.find("S.session.session_id===activeSid")
assert guard_pos >= 0, "tool handler must guard with activeSid check"
# The guard must appear BEFORE or AROUND the setStatus call
# (status only fires for the current session)
assert status_pos > tool_block.find("activeSid"), "setStatus in tool handler must be inside the activeSid guard"
# ── R12: Live tool cards lost on switch-away and switch-back ──────────────
def test_loadSession_inflight_restores_live_tool_cards(cleanup_test_sessions):
"""R12: When switching back to an in-flight session, live tool cards in
#liveToolCards must be restored from S.toolCalls.
When missing, tool cards disappeared on switch-away even though the session
was still processing.
"""
src = (REPO_ROOT / "static/sessions.js").read_text()
# INFLIGHT branch must call appendLiveToolCard
inflight_idx = src.find("if(INFLIGHT[sid]){")
assert inflight_idx >= 0, "INFLIGHT branch not found in loadSession"
inflight_block = src[inflight_idx:inflight_idx+500]
assert "appendLiveToolCard" in inflight_block, "loadSession INFLIGHT branch must restore live tool cards via appendLiveToolCard"
assert "clearLiveToolCards" in inflight_block, "loadSession INFLIGHT branch must clear old live cards before restoring"
# ── R13: renderMessages() called before S.busy=false in done handler ────────
def test_done_handler_sets_busy_false_before_renderMessages(cleanup_test_sessions):
"""R13: In the done handler, S.busy must be set to false BEFORE renderMessages()
is called for the active session. The !S.busy guard in renderMessages() controls
whether settled tool cards are rendered. When S.busy=true during renderMessages(),
tool cards are skipped entirely after a response completes.
"""
src = (REPO_ROOT / "static/messages.js").read_text()
done_idx = src.find("es.addEventListener('done'")
assert done_idx >= 0
done_block = src[done_idx:done_idx+1500]
# S.busy=false must appear before renderMessages() within the done handler
busy_pos = done_block.find("S.busy=false;")
render_pos = done_block.find("renderMessages()")
assert busy_pos >= 0, "done handler must set S.busy=false before renderMessages()"
assert busy_pos < render_pos, f"S.busy=false (pos {busy_pos}) must come before renderMessages() (pos {render_pos})"
# ── R14: send() uses stale modelSelect.value instead of session model ────────
def test_send_uses_session_model_as_authoritative_source(cleanup_test_sessions):
"""R14: send() must use S.session.model as the authoritative model, not just
$('modelSelect').value. When a session was created with a model not in the
current dropdown list, the select value would be stale after switching sessions,
causing the wrong model to be sent.
"""
src = (REPO_ROOT / "static/messages.js").read_text()
# The model field in the chat/start payload must prefer S.session.model
chat_start_idx = src.find("/api/chat/start")
assert chat_start_idx >= 0
payload_block = src[chat_start_idx:chat_start_idx+300]
assert "S.session.model" in payload_block, "send() must use S.session.model in the chat/start payload"
# ── R15: newSession does not clear live tool cards ────────────────────────────
def test_newSession_clears_live_tool_cards(cleanup_test_sessions):
"""R15: newSession() must call clearLiveToolCards() so live cards from a
previous in-flight session don't persist when starting a fresh conversation.
"""
src = (REPO_ROOT / "static/sessions.js").read_text()
new_sess_idx = src.find("async function newSession(")
assert new_sess_idx >= 0
# Find end of newSession (next async function)
next_fn = src.find("async function ", new_sess_idx + 10)
new_sess_body = src[new_sess_idx:next_fn]
assert "clearLiveToolCards" in new_sess_body, "newSession() must call clearLiveToolCards() to clear stale live cards"