Hermes WebUI v0.1.0 — initial public release
This commit is contained in:
416
tests/test_regressions.py
Normal file
416
tests/test_regressions.py
Normal file
@@ -0,0 +1,416 @@
|
||||
"""
|
||||
Regression tests -- one test per bug that was introduced and fixed.
|
||||
These tests exist specifically to prevent those bugs from silently returning.
|
||||
|
||||
Each test is tagged with the sprint/commit where the bug was found and fixed.
|
||||
"""
|
||||
import json
|
||||
import pathlib
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
|
||||
|
||||
BASE = "http://127.0.0.1:8788"
|
||||
|
||||
def get(path):
|
||||
with urllib.request.urlopen(BASE + path, timeout=10) as r:
|
||||
return json.loads(r.read()), r.status
|
||||
|
||||
def get_raw(path):
|
||||
with urllib.request.urlopen(BASE + path, timeout=10) as r:
|
||||
return r.read(), r.headers.get("Content-Type",""), r.status
|
||||
|
||||
def post(path, body=None):
|
||||
data = json.dumps(body or {}).encode()
|
||||
req = urllib.request.Request(
|
||||
BASE + path, data=data, headers={"Content-Type": "application/json"}
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as r:
|
||||
return json.loads(r.read()), r.status
|
||||
except urllib.error.HTTPError as e:
|
||||
return json.loads(e.read()), e.code
|
||||
|
||||
def make_session(created_list):
|
||||
d, _ = post("/api/session/new", {})
|
||||
sid = d["session"]["session_id"]
|
||||
created_list.append(sid)
|
||||
return sid
|
||||
|
||||
|
||||
# ── R1: uuid not imported in server.py (Sprint 10 split regression) ──────────
|
||||
|
||||
def test_chat_start_returns_stream_id(cleanup_test_sessions):
|
||||
"""R1: chat/start must return stream_id -- catches missing uuid import.
|
||||
When uuid was missing, this returned 500 (NameError).
|
||||
"""
|
||||
sid = make_session(cleanup_test_sessions)
|
||||
data, status = post("/api/chat/start", {
|
||||
"session_id": sid,
|
||||
"message": "ping",
|
||||
"model": "openai/gpt-5.4-mini",
|
||||
})
|
||||
# Must return 200 with a stream_id -- not 500
|
||||
assert status == 200, f"chat/start failed with {status}: {data}"
|
||||
assert "stream_id" in data, "stream_id missing from chat/start response"
|
||||
assert len(data["stream_id"]) > 8, "stream_id looks invalid"
|
||||
post("/api/session/delete", {"session_id": sid})
|
||||
cleanup_test_sessions.clear()
|
||||
|
||||
|
||||
# ── R2: AIAgent not imported in api/streaming.py (Sprint 10 split regression) ─
|
||||
|
||||
def test_chat_stream_opens_successfully(cleanup_test_sessions):
|
||||
"""R2: After chat/start, GET /api/chat/stream must return 200 (SSE opens).
|
||||
When AIAgent was missing, the thread crashed immediately, popped STREAMS,
|
||||
and the SSE GET returned 404.
|
||||
"""
|
||||
sid = make_session(cleanup_test_sessions)
|
||||
data, status = post("/api/chat/start", {
|
||||
"session_id": sid,
|
||||
"message": "say: hello",
|
||||
"model": "openai/gpt-5.4-mini",
|
||||
})
|
||||
assert status == 200, f"chat/start failed: {data}"
|
||||
stream_id = data["stream_id"]
|
||||
|
||||
# Open the SSE stream -- must return 200, not 404
|
||||
# We only check headers (don't read the full stream body)
|
||||
req = urllib.request.Request(BASE + f"/api/chat/stream?stream_id={stream_id}")
|
||||
try:
|
||||
r = urllib.request.urlopen(req, timeout=3)
|
||||
assert r.status == 200, f"SSE stream returned {r.status} (expected 200)"
|
||||
ct = r.headers.get("Content-Type", "")
|
||||
assert "text/event-stream" in ct, f"Wrong Content-Type: {ct}"
|
||||
r.close()
|
||||
except urllib.error.HTTPError as e:
|
||||
assert False, f"SSE stream returned {e.code} -- AIAgent may not be imported"
|
||||
except Exception:
|
||||
pass # timeout or connection close after brief read is fine
|
||||
|
||||
post("/api/session/delete", {"session_id": sid})
|
||||
cleanup_test_sessions.clear()
|
||||
|
||||
|
||||
# ── R3: Session.__init__ missing tool_calls param (Sprint 10 split regression) ─
|
||||
|
||||
def test_session_with_tool_calls_in_json_loads_ok(cleanup_test_sessions):
|
||||
"""R3: Sessions that have tool_calls in their JSON must load without 500.
|
||||
When tool_calls=None was missing from Session.__init__, loading such sessions
|
||||
threw TypeError: unexpected keyword argument.
|
||||
"""
|
||||
sid = make_session(cleanup_test_sessions)
|
||||
|
||||
# Manually inject tool_calls into the session's JSON file
|
||||
sessions_dir = pathlib.Path.home() / ".hermes" / "webui-mvp-test" / "sessions"
|
||||
session_file = sessions_dir / f"{sid}.json"
|
||||
if session_file.exists():
|
||||
d = json.loads(session_file.read_text())
|
||||
d["tool_calls"] = [
|
||||
{"name": "terminal", "snippet": "test output", "tid": "test_tid_001", "assistant_msg_idx": 1}
|
||||
]
|
||||
session_file.write_text(json.dumps(d))
|
||||
|
||||
# Loading the session must return 200, not 500
|
||||
data, status = get(f"/api/session?session_id={urllib.parse.quote(sid)}")
|
||||
assert status == 200, f"Session with tool_calls returned {status}: {data}"
|
||||
assert data["session"]["session_id"] == sid
|
||||
|
||||
post("/api/session/delete", {"session_id": sid})
|
||||
cleanup_test_sessions.clear()
|
||||
|
||||
|
||||
# ── R4: has_pending not imported in streaming.py (Sprint 10 split regression) ─
|
||||
|
||||
def test_streaming_py_imports_has_pending(cleanup_test_sessions):
|
||||
"""R4: api/streaming.py must import or define has_pending.
|
||||
When missing, the approval check mid-stream caused NameError.
|
||||
"""
|
||||
src = (REPO_ROOT / "api/streaming.py").read_text()
|
||||
assert "has_pending" in src, "has_pending not found in api/streaming.py"
|
||||
# Verify it's imported (not just used)
|
||||
assert "import" in src and "has_pending" in src, \
|
||||
"has_pending must be imported in api/streaming.py"
|
||||
|
||||
|
||||
def test_aiagent_imported_in_streaming(cleanup_test_sessions):
|
||||
"""R2b: api/streaming.py must import AIAgent.
|
||||
When missing, the streaming thread crashed immediately after being spawned.
|
||||
"""
|
||||
src = (REPO_ROOT / "api/streaming.py").read_text()
|
||||
assert "AIAgent" in src, "AIAgent not referenced in api/streaming.py"
|
||||
assert "from run_agent import AIAgent" in src or "import AIAgent" in src, \
|
||||
"AIAgent must be imported in api/streaming.py"
|
||||
|
||||
|
||||
# ── R5: SSE loop did not break on cancel event (Sprint 10 bug) ───────────────
|
||||
|
||||
def test_cancel_nonexistent_stream_returns_not_cancelled(cleanup_test_sessions):
|
||||
"""R5a: Cancel endpoint works and returns cancelled:false for unknown stream."""
|
||||
data, status = get("/api/chat/cancel?stream_id=nonexistent_test_xyz")
|
||||
assert status == 200
|
||||
assert data["ok"] is True
|
||||
assert data["cancelled"] is False
|
||||
|
||||
|
||||
def test_server_py_sse_loop_breaks_on_cancel(cleanup_test_sessions):
|
||||
"""R5b: server.py SSE loop must include 'cancel' in the break condition.
|
||||
When missing, the connection hung after the cancel event was processed.
|
||||
"""
|
||||
src = (REPO_ROOT / "server.py").read_text()
|
||||
# Find the SSE break condition
|
||||
import re
|
||||
m = re.search(r"if event in \([^)]+\):\s*break", src)
|
||||
assert m, "SSE break condition not found in server.py"
|
||||
assert "cancel" in m.group(), \
|
||||
f"'cancel' missing from SSE break condition: {m.group()}"
|
||||
|
||||
|
||||
# ── R6: Test cron isolation (Sprint 10) ──────────────────────────────────────
|
||||
|
||||
def test_real_jobs_json_not_polluted_by_tests(cleanup_test_sessions):
|
||||
"""R6: Test runs must not write to the real ~/.hermes/cron/jobs.json.
|
||||
When HERMES_HOME isolation was missing, every test run added test-job-* entries.
|
||||
"""
|
||||
real_jobs_path = pathlib.Path.home() / ".hermes" / "cron" / "jobs.json"
|
||||
if not real_jobs_path.exists():
|
||||
return # no jobs file at all -- fine
|
||||
|
||||
jobs = json.loads(real_jobs_path.read_text())
|
||||
if isinstance(jobs, dict):
|
||||
jobs = jobs.get("jobs", [])
|
||||
|
||||
test_jobs = [j for j in jobs if j.get("name", "").startswith("test-job-")]
|
||||
assert len(test_jobs) == 0, \
|
||||
f"Real jobs.json contains {len(test_jobs)} test-job-* entries: " \
|
||||
f"{[j['name'] for j in test_jobs]}"
|
||||
|
||||
|
||||
# ── General: api modules all importable ──────────────────────────────────────
|
||||
|
||||
def test_all_api_modules_importable(cleanup_test_sessions):
|
||||
"""All api/ modules must be importable without NameError or ImportError.
|
||||
Catches missing imports introduced during future module splits.
|
||||
"""
|
||||
import ast, pathlib
|
||||
api_dir = REPO_ROOT / "api"
|
||||
for module_file in api_dir.glob("*.py"):
|
||||
src = module_file.read_text()
|
||||
try:
|
||||
ast.parse(src)
|
||||
except SyntaxError as e:
|
||||
assert False, f"{module_file.name} has syntax error: {e}"
|
||||
|
||||
|
||||
def test_server_py_importable(cleanup_test_sessions):
|
||||
"""server.py must parse without syntax errors after any split."""
|
||||
import ast, pathlib
|
||||
src = (REPO_ROOT / "server.py").read_text()
|
||||
try:
|
||||
ast.parse(src)
|
||||
except SyntaxError as e:
|
||||
assert False, f"server.py has syntax error: {e}"
|
||||
|
||||
# ── R7: Cross-session busy state bleed ───────────────────────────────────────
|
||||
|
||||
def test_loadSession_resets_busy_state_for_idle_session(cleanup_test_sessions):
|
||||
"""R7: sessions.js loadSession for a non-inflight session must reset S.busy to false.
|
||||
When missing, switching from a busy session to an idle one left the Send button
|
||||
disabled, showed the wrong activity bar, and pointed Cancel at the wrong stream.
|
||||
"""
|
||||
src = (REPO_ROOT / "static/sessions.js").read_text()
|
||||
# The fix adds explicit S.busy=false in the non-inflight else branch
|
||||
assert "S.busy=false;" in src, "sessions.js loadSession must set S.busy=false when loading a non-inflight session"
|
||||
# btnSend must be explicitly re-enabled
|
||||
assert "$('btnSend').disabled=false;" in src, "sessions.js loadSession must enable btnSend for non-inflight sessions"
|
||||
|
||||
|
||||
def test_done_handler_guards_setbusy_with_inflight_check(cleanup_test_sessions):
|
||||
"""R7b: messages.js done/error handlers must not call setBusy(false) if the
|
||||
currently viewed session is itself still in-flight.
|
||||
When missing, finishing session A while viewing in-flight session B would
|
||||
disable B's Send button.
|
||||
"""
|
||||
src = (REPO_ROOT / "static/messages.js").read_text()
|
||||
# The fix wraps setBusy(false) in a guard
|
||||
assert "INFLIGHT[S.session.session_id]" in src, "messages.js must guard setBusy(false) with INFLIGHT check for current session"
|
||||
|
||||
|
||||
def test_cancel_button_not_cleared_across_sessions(cleanup_test_sessions):
|
||||
"""R7c: The Cancel button and activeStreamId must only be cleared when the
|
||||
done/error event belongs to the currently viewed session.
|
||||
"""
|
||||
src = (REPO_ROOT / "static/messages.js").read_text()
|
||||
# Both clear operations must be inside the activeSid === S.session guard
|
||||
# We check for the pattern added by the fix
|
||||
assert "S.session.session_id===activeSid" in src, "messages.js must guard activeStreamId/Cancel clearing with session identity check"
|
||||
|
||||
# ── R8: Session delete does not invalidate index (ghost sessions) ─────────────
|
||||
|
||||
def test_deleted_session_does_not_appear_in_list(cleanup_test_sessions):
|
||||
"""R8: After deleting a session, it must not appear in /api/sessions.
|
||||
When _index.json was not invalidated on delete, the session reappeared
|
||||
in the list even after the JSON file was removed.
|
||||
"""
|
||||
# Create a session with a title so it shows in the list
|
||||
d, _ = post("/api/session/new", {})
|
||||
sid = d["session"]["session_id"]
|
||||
post("/api/session/rename", {"session_id": sid, "title": "regression-test-delete-R8"})
|
||||
|
||||
# Verify it appears
|
||||
sessions, _ = get("/api/sessions")
|
||||
ids_before = [s["session_id"] for s in sessions["sessions"]]
|
||||
assert sid in ids_before, "Session must appear in list before delete"
|
||||
|
||||
# Delete it
|
||||
result, status = post("/api/session/delete", {"session_id": sid})
|
||||
assert status == 200 and result.get("ok") is True
|
||||
|
||||
# Verify it no longer appears -- even after a second fetch (index rebuild)
|
||||
sessions2, _ = get("/api/sessions")
|
||||
ids_after = [s["session_id"] for s in sessions2["sessions"]]
|
||||
assert sid not in ids_after, f"Deleted session {sid} still appears in list -- index not invalidated on delete"
|
||||
|
||||
|
||||
def test_server_delete_invalidates_index(cleanup_test_sessions):
|
||||
"""R8b: server.py session/delete handler must unlink _index.json.
|
||||
Static check that the fix is in place.
|
||||
"""
|
||||
src = (REPO_ROOT / "server.py").read_text()
|
||||
# Find the delete handler and verify it unlinks the index
|
||||
delete_idx = src.find("if parsed.path == '/api/session/delete':")
|
||||
assert delete_idx >= 0, "session/delete handler not found"
|
||||
delete_block = src[delete_idx:delete_idx+600]
|
||||
assert "SESSION_INDEX_FILE" in delete_block, "server.py session/delete must invalidate SESSION_INDEX_FILE"
|
||||
|
||||
|
||||
# ── R9: Token/tool SSE events write to wrong session after switch ─────────────
|
||||
|
||||
def test_token_handler_guards_session_id(cleanup_test_sessions):
|
||||
"""R9a: The SSE token event handler must check activeSid before writing to DOM.
|
||||
When missing, tokens from session A would render into session B's message area
|
||||
if the user switched sessions mid-stream.
|
||||
"""
|
||||
src = (REPO_ROOT / "static/messages.js").read_text()
|
||||
# Find the token event handler
|
||||
token_idx = src.find("es.addEventListener('token'")
|
||||
assert token_idx >= 0, "token event handler not found"
|
||||
token_block = src[token_idx:token_idx+300]
|
||||
assert "activeSid" in token_block, "token handler must check activeSid before writing to DOM"
|
||||
assert "S.session.session_id!==activeSid" in token_block or "S.session.session_id===activeSid" in token_block, "token handler must compare current session to activeSid"
|
||||
|
||||
|
||||
def test_tool_handler_guards_session_id(cleanup_test_sessions):
|
||||
"""R9b: The SSE tool event handler must check activeSid before writing to DOM.
|
||||
When missing, tool cards from session A would render into session B's message area.
|
||||
"""
|
||||
src = (REPO_ROOT / "static/messages.js").read_text()
|
||||
tool_idx = src.find("es.addEventListener('tool'")
|
||||
assert tool_idx >= 0, "tool event handler not found"
|
||||
tool_block = src[tool_idx:tool_idx+400]
|
||||
assert "activeSid" in tool_block, "tool handler must check activeSid before writing to DOM"
|
||||
|
||||
# ── R10: respondApproval uses wrong session_id after switch (multi-session) ─
|
||||
|
||||
def test_respond_approval_uses_approval_session_id(cleanup_test_sessions):
|
||||
"""R10: respondApproval must use the session_id of the session that triggered
|
||||
the approval, not S.session.session_id (which may be a different session
|
||||
if the user switched while approval was pending).
|
||||
"""
|
||||
src = (REPO_ROOT / "static/messages.js").read_text()
|
||||
# The fix introduces _approvalSessionId to track the correct session
|
||||
assert "_approvalSessionId" in src, "messages.js must use _approvalSessionId in respondApproval"
|
||||
# respondApproval must use _approvalSessionId, not S.session.session_id directly
|
||||
idx = src.find("async function respondApproval(")
|
||||
assert idx >= 0, "respondApproval not found"
|
||||
fn_body = src[idx:idx+300]
|
||||
assert "_approvalSessionId" in fn_body, "respondApproval must read _approvalSessionId, not S.session.session_id"
|
||||
|
||||
|
||||
# ── R11: Activity bar shows cross-session tool status ─────────────────────
|
||||
|
||||
def test_tool_status_only_shown_for_current_session(cleanup_test_sessions):
|
||||
"""R11: The activity bar setStatus() call in the tool SSE handler must only
|
||||
fire when the user is viewing the session that triggered the tool.
|
||||
When missing, session A's tool names would appear in session B's activity bar.
|
||||
"""
|
||||
src = (REPO_ROOT / "static/messages.js").read_text()
|
||||
# Find the tool event handler
|
||||
tool_idx = src.find("es.addEventListener('tool'")
|
||||
assert tool_idx >= 0
|
||||
tool_block = src[tool_idx:tool_idx+400]
|
||||
# setStatus must be inside the activeSid guard, not before it
|
||||
status_pos = tool_block.find("setStatus(")
|
||||
guard_pos = tool_block.find("S.session.session_id===activeSid")
|
||||
assert guard_pos >= 0, "tool handler must guard with activeSid check"
|
||||
# The guard must appear BEFORE or AROUND the setStatus call
|
||||
# (status only fires for the current session)
|
||||
assert status_pos > tool_block.find("activeSid"), "setStatus in tool handler must be inside the activeSid guard"
|
||||
|
||||
|
||||
# ── R12: Live tool cards lost on switch-away and switch-back ──────────────
|
||||
|
||||
def test_loadSession_inflight_restores_live_tool_cards(cleanup_test_sessions):
|
||||
"""R12: When switching back to an in-flight session, live tool cards in
|
||||
#liveToolCards must be restored from S.toolCalls.
|
||||
When missing, tool cards disappeared on switch-away even though the session
|
||||
was still processing.
|
||||
"""
|
||||
src = (REPO_ROOT / "static/sessions.js").read_text()
|
||||
# INFLIGHT branch must call appendLiveToolCard
|
||||
inflight_idx = src.find("if(INFLIGHT[sid]){")
|
||||
assert inflight_idx >= 0, "INFLIGHT branch not found in loadSession"
|
||||
inflight_block = src[inflight_idx:inflight_idx+500]
|
||||
assert "appendLiveToolCard" in inflight_block, "loadSession INFLIGHT branch must restore live tool cards via appendLiveToolCard"
|
||||
assert "clearLiveToolCards" in inflight_block, "loadSession INFLIGHT branch must clear old live cards before restoring"
|
||||
|
||||
# ── R13: renderMessages() called before S.busy=false in done handler ────────
|
||||
|
||||
def test_done_handler_sets_busy_false_before_renderMessages(cleanup_test_sessions):
|
||||
"""R13: In the done handler, S.busy must be set to false BEFORE renderMessages()
|
||||
is called for the active session. The !S.busy guard in renderMessages() controls
|
||||
whether settled tool cards are rendered. When S.busy=true during renderMessages(),
|
||||
tool cards are skipped entirely after a response completes.
|
||||
"""
|
||||
src = (REPO_ROOT / "static/messages.js").read_text()
|
||||
done_idx = src.find("es.addEventListener('done'")
|
||||
assert done_idx >= 0
|
||||
done_block = src[done_idx:done_idx+1500]
|
||||
# S.busy=false must appear before renderMessages() within the done handler
|
||||
busy_pos = done_block.find("S.busy=false;")
|
||||
render_pos = done_block.find("renderMessages()")
|
||||
assert busy_pos >= 0, "done handler must set S.busy=false before renderMessages()"
|
||||
assert busy_pos < render_pos, f"S.busy=false (pos {busy_pos}) must come before renderMessages() (pos {render_pos})"
|
||||
|
||||
|
||||
# ── R14: send() uses stale modelSelect.value instead of session model ────────
|
||||
|
||||
def test_send_uses_session_model_as_authoritative_source(cleanup_test_sessions):
|
||||
"""R14: send() must use S.session.model as the authoritative model, not just
|
||||
$('modelSelect').value. When a session was created with a model not in the
|
||||
current dropdown list, the select value would be stale after switching sessions,
|
||||
causing the wrong model to be sent.
|
||||
"""
|
||||
src = (REPO_ROOT / "static/messages.js").read_text()
|
||||
# The model field in the chat/start payload must prefer S.session.model
|
||||
chat_start_idx = src.find("/api/chat/start")
|
||||
assert chat_start_idx >= 0
|
||||
payload_block = src[chat_start_idx:chat_start_idx+300]
|
||||
assert "S.session.model" in payload_block, "send() must use S.session.model in the chat/start payload"
|
||||
|
||||
|
||||
# ── R15: newSession does not clear live tool cards ────────────────────────────
|
||||
|
||||
def test_newSession_clears_live_tool_cards(cleanup_test_sessions):
|
||||
"""R15: newSession() must call clearLiveToolCards() so live cards from a
|
||||
previous in-flight session don't persist when starting a fresh conversation.
|
||||
"""
|
||||
src = (REPO_ROOT / "static/sessions.js").read_text()
|
||||
new_sess_idx = src.find("async function newSession(")
|
||||
assert new_sess_idx >= 0
|
||||
# Find end of newSession (next async function)
|
||||
next_fn = src.find("async function ", new_sess_idx + 10)
|
||||
new_sess_body = src[new_sess_idx:next_fn]
|
||||
assert "clearLiveToolCards" in new_sess_body, "newSession() must call clearLiveToolCards() to clear stale live cards"
|
||||
Reference in New Issue
Block a user