* fix: persist durable inflight reload snapshots * fix: remove duplicate loadInflightState stub, update CHANGELOG test count The stub added in the previous review branch is superseded by the author's real localStorage-backed implementation in the cherry-picked commit 36051c0. Remove the duplicate. Update CHANGELOG to 961 tests and document the durable inflight state feature. --------- Co-authored-by: Jordan SkyLF <jordan@skylinkfiber.net> Co-authored-by: Nathan Esquenazi <nesquena@gmail.com>
694 lines
34 KiB
Python
694 lines
34 KiB
Python
"""
|
|
Regression tests -- one test per bug that was introduced and fixed.
|
|
These tests exist specifically to prevent those bugs from silently returning.
|
|
|
|
Each test is tagged with the sprint/commit where the bug was found and fixed.
|
|
"""
|
|
import json
|
|
import pathlib
|
|
import time
|
|
import urllib.error
|
|
import urllib.request
|
|
import urllib.parse
|
|
REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
|
|
|
|
BASE = "http://127.0.0.1:8788"
|
|
|
|
def get(path):
|
|
with urllib.request.urlopen(BASE + path, timeout=10) as r:
|
|
return json.loads(r.read()), r.status
|
|
|
|
def get_raw(path):
|
|
with urllib.request.urlopen(BASE + path, timeout=10) as r:
|
|
return r.read(), r.headers.get("Content-Type",""), r.status
|
|
|
|
def post(path, body=None):
|
|
data = json.dumps(body or {}).encode()
|
|
req = urllib.request.Request(
|
|
BASE + path, data=data, headers={"Content-Type": "application/json"}
|
|
)
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=10) as r:
|
|
return json.loads(r.read()), r.status
|
|
except urllib.error.HTTPError as e:
|
|
return json.loads(e.read()), e.code
|
|
|
|
def make_session(created_list):
|
|
d, _ = post("/api/session/new", {})
|
|
sid = d["session"]["session_id"]
|
|
created_list.append(sid)
|
|
return sid
|
|
|
|
|
|
# ── R1: uuid not imported in server.py (Sprint 10 split regression) ──────────
|
|
|
|
def test_chat_start_returns_stream_id(cleanup_test_sessions):
|
|
"""R1: chat/start must return stream_id -- catches missing uuid import.
|
|
When uuid was missing, this returned 500 (NameError).
|
|
"""
|
|
sid = make_session(cleanup_test_sessions)
|
|
data, status = post("/api/chat/start", {
|
|
"session_id": sid,
|
|
"message": "ping",
|
|
"model": "openai/gpt-5.4-mini",
|
|
})
|
|
# Must return 200 with a stream_id -- not 500
|
|
assert status == 200, f"chat/start failed with {status}: {data}"
|
|
assert "stream_id" in data, "stream_id missing from chat/start response"
|
|
assert len(data["stream_id"]) > 8, "stream_id looks invalid"
|
|
post("/api/session/delete", {"session_id": sid})
|
|
cleanup_test_sessions.clear()
|
|
|
|
|
|
# ── R2: AIAgent not imported in api/streaming.py (Sprint 10 split regression) ─
|
|
|
|
def test_chat_stream_opens_successfully(cleanup_test_sessions):
|
|
"""R2: After chat/start, GET /api/chat/stream must return 200 (SSE opens).
|
|
When AIAgent was missing, the thread crashed immediately, popped STREAMS,
|
|
and the SSE GET returned 404.
|
|
"""
|
|
sid = make_session(cleanup_test_sessions)
|
|
data, status = post("/api/chat/start", {
|
|
"session_id": sid,
|
|
"message": "say: hello",
|
|
"model": "openai/gpt-5.4-mini",
|
|
})
|
|
assert status == 200, f"chat/start failed: {data}"
|
|
stream_id = data["stream_id"]
|
|
|
|
# Open the SSE stream -- must return 200, not 404
|
|
# We only check headers (don't read the full stream body)
|
|
req = urllib.request.Request(BASE + f"/api/chat/stream?stream_id={stream_id}")
|
|
try:
|
|
r = urllib.request.urlopen(req, timeout=3)
|
|
assert r.status == 200, f"SSE stream returned {r.status} (expected 200)"
|
|
ct = r.headers.get("Content-Type", "")
|
|
assert "text/event-stream" in ct, f"Wrong Content-Type: {ct}"
|
|
r.close()
|
|
except urllib.error.HTTPError as e:
|
|
assert False, f"SSE stream returned {e.code} -- AIAgent may not be imported"
|
|
except Exception:
|
|
pass # timeout or connection close after brief read is fine
|
|
|
|
post("/api/session/delete", {"session_id": sid})
|
|
cleanup_test_sessions.clear()
|
|
|
|
|
|
# ── R3: Session.__init__ missing tool_calls param (Sprint 10 split regression) ─
|
|
|
|
def test_session_with_tool_calls_in_json_loads_ok(cleanup_test_sessions):
|
|
"""R3: Sessions that have tool_calls in their JSON must load without 500.
|
|
When tool_calls=None was missing from Session.__init__, loading such sessions
|
|
threw TypeError: unexpected keyword argument.
|
|
"""
|
|
sid = make_session(cleanup_test_sessions)
|
|
|
|
# Manually inject tool_calls into the session's JSON file
|
|
sessions_dir = pathlib.Path.home() / ".hermes" / "webui-mvp-test" / "sessions"
|
|
session_file = sessions_dir / f"{sid}.json"
|
|
if session_file.exists():
|
|
d = json.loads(session_file.read_text())
|
|
d["tool_calls"] = [
|
|
{"name": "terminal", "snippet": "test output", "tid": "test_tid_001", "assistant_msg_idx": 1}
|
|
]
|
|
session_file.write_text(json.dumps(d))
|
|
|
|
# Loading the session must return 200, not 500
|
|
data, status = get(f"/api/session?session_id={urllib.parse.quote(sid)}")
|
|
assert status == 200, f"Session with tool_calls returned {status}: {data}"
|
|
assert data["session"]["session_id"] == sid
|
|
|
|
post("/api/session/delete", {"session_id": sid})
|
|
cleanup_test_sessions.clear()
|
|
|
|
|
|
# ── R4: has_pending not imported in streaming.py (Sprint 10 split regression) ─
|
|
|
|
def test_streaming_py_imports_has_pending(cleanup_test_sessions):
|
|
"""R4: api/streaming.py must import or define has_pending.
|
|
When missing, the approval check mid-stream caused NameError.
|
|
"""
|
|
src = (REPO_ROOT / "api/streaming.py").read_text()
|
|
assert "has_pending" in src, "has_pending not found in api/streaming.py"
|
|
# Verify it's imported (not just used)
|
|
assert "import" in src and "has_pending" in src, \
|
|
"has_pending must be imported in api/streaming.py"
|
|
|
|
|
|
def test_aiagent_imported_in_streaming(cleanup_test_sessions):
|
|
"""R2b: api/streaming.py must import AIAgent.
|
|
When missing, the streaming thread crashed immediately after being spawned.
|
|
"""
|
|
src = (REPO_ROOT / "api/streaming.py").read_text()
|
|
assert "AIAgent" in src, "AIAgent not referenced in api/streaming.py"
|
|
assert "from run_agent import AIAgent" in src or "import AIAgent" in src, \
|
|
"AIAgent must be imported in api/streaming.py"
|
|
|
|
|
|
# ── R5: SSE loop did not break on cancel event (Sprint 10 bug) ───────────────
|
|
|
|
def test_cancel_nonexistent_stream_returns_not_cancelled(cleanup_test_sessions):
|
|
"""R5a: Cancel endpoint works and returns cancelled:false for unknown stream."""
|
|
data, status = get("/api/chat/cancel?stream_id=nonexistent_test_xyz")
|
|
assert status == 200
|
|
assert data["ok"] is True
|
|
assert data["cancelled"] is False
|
|
|
|
|
|
def test_server_py_sse_loop_breaks_on_cancel(cleanup_test_sessions):
|
|
"""R5b: SSE loop must include 'cancel' in the break condition.
|
|
When missing, the connection hung after the cancel event was processed.
|
|
Sprint 11: logic moved from server.py to api/routes.py -- check both.
|
|
"""
|
|
import re
|
|
# Check server.py first, then api/routes.py (Sprint 11 extracted routes)
|
|
src = (REPO_ROOT / "server.py").read_text()
|
|
routes_src = (REPO_ROOT / "api" / "routes.py").read_text() if (REPO_ROOT / "api" / "routes.py").exists() else ""
|
|
combined = src + routes_src
|
|
m = re.search(r"if event in \([^)]+\):\s*break", combined)
|
|
assert m, "SSE break condition not found in server.py or api/routes.py"
|
|
assert "cancel" in m.group(), \
|
|
f"'cancel' missing from SSE break condition: {m.group()}"
|
|
|
|
|
|
# ── R6: Test cron isolation (Sprint 10) ──────────────────────────────────────
|
|
|
|
def test_real_jobs_json_not_polluted_by_tests(cleanup_test_sessions):
|
|
"""R6: Test runs must not write to the real ~/.hermes/cron/jobs.json.
|
|
When HERMES_HOME isolation was missing, every test run added test-job-* entries.
|
|
"""
|
|
real_jobs_path = pathlib.Path.home() / ".hermes" / "cron" / "jobs.json"
|
|
if not real_jobs_path.exists():
|
|
return # no jobs file at all -- fine
|
|
|
|
jobs = json.loads(real_jobs_path.read_text())
|
|
if isinstance(jobs, dict):
|
|
jobs = jobs.get("jobs", [])
|
|
|
|
test_jobs = [j for j in jobs if j.get("name", "").startswith("test-job-")]
|
|
assert len(test_jobs) == 0, \
|
|
f"Real jobs.json contains {len(test_jobs)} test-job-* entries: " \
|
|
f"{[j['name'] for j in test_jobs]}"
|
|
|
|
|
|
# ── General: api modules all importable ──────────────────────────────────────
|
|
|
|
def test_all_api_modules_importable(cleanup_test_sessions):
|
|
"""All api/ modules must be importable without NameError or ImportError.
|
|
Catches missing imports introduced during future module splits.
|
|
"""
|
|
import ast, pathlib
|
|
api_dir = REPO_ROOT / "api"
|
|
for module_file in api_dir.glob("*.py"):
|
|
src = module_file.read_text()
|
|
try:
|
|
ast.parse(src)
|
|
except SyntaxError as e:
|
|
assert False, f"{module_file.name} has syntax error: {e}"
|
|
|
|
|
|
def test_server_py_importable(cleanup_test_sessions):
|
|
"""server.py must parse without syntax errors after any split."""
|
|
import ast, pathlib
|
|
src = (REPO_ROOT / "server.py").read_text()
|
|
try:
|
|
ast.parse(src)
|
|
except SyntaxError as e:
|
|
assert False, f"server.py has syntax error: {e}"
|
|
|
|
# ── R7: Cross-session busy state bleed ───────────────────────────────────────
|
|
|
|
def test_loadSession_resets_busy_state_for_idle_session(cleanup_test_sessions):
|
|
"""R7: sessions.js loadSession for a non-inflight session must reset S.busy to false.
|
|
When missing, switching from a busy session to an idle one left the Send button
|
|
disabled, showed the wrong activity bar, and pointed Cancel at the wrong stream.
|
|
"""
|
|
src = (REPO_ROOT / "static/sessions.js").read_text()
|
|
# The fix adds explicit S.busy=false in the non-inflight else branch
|
|
assert "S.busy=false;" in src, "sessions.js loadSession must set S.busy=false when loading a non-inflight session"
|
|
# btnSend state must be refreshed via updateSendBtn
|
|
assert "updateSendBtn()" in src, "sessions.js loadSession must call updateSendBtn for non-inflight sessions"
|
|
|
|
|
|
def test_done_handler_guards_setbusy_with_inflight_check(cleanup_test_sessions):
|
|
"""R7b: messages.js done/error handlers must not call setBusy(false) if the
|
|
currently viewed session is itself still in-flight.
|
|
When missing, finishing session A while viewing in-flight session B would
|
|
disable B's Send button.
|
|
"""
|
|
src = (REPO_ROOT / "static/messages.js").read_text()
|
|
# The fix wraps setBusy(false) in a guard
|
|
assert "INFLIGHT[S.session.session_id]" in src, "messages.js must guard setBusy(false) with INFLIGHT check for current session"
|
|
|
|
|
|
def test_refresh_handler_does_not_drop_tool_messages_needed_by_todos(cleanup_test_sessions):
|
|
"""Todo panel state must survive session reload/refresh.
|
|
The UI can hide tool-role messages from the visible transcript, but it must not
|
|
destroy the raw session messages because loadTodos reconstructs state from the
|
|
latest todo tool output.
|
|
"""
|
|
sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
|
|
ui_src = (REPO_ROOT / "static/ui.js").read_text()
|
|
panels_src = (REPO_ROOT / "static/panels.js").read_text()
|
|
|
|
assert "data.session.messages=(data.session.messages||[]).filter(" not in sessions_src, \
|
|
"sessions.js must not overwrite raw session.messages when filtering transcript display"
|
|
assert "S.messages = (data.session.messages || []).filter(" not in ui_src, \
|
|
"ui.js refreshSession must not rebuild S.messages by discarding tool messages from the raw session payload"
|
|
assert "const sourceMessages = (S.session && Array.isArray(S.session.messages) && S.session.messages.length) ? S.session.messages : S.messages;" in panels_src, \
|
|
"loadTodos must prefer raw S.session.messages so todo state survives reloads"
|
|
|
|
|
|
def test_cancel_button_not_cleared_across_sessions(cleanup_test_sessions):
|
|
"""R7c: The Cancel button and activeStreamId must only be cleared when the
|
|
done/error event belongs to the currently viewed session.
|
|
"""
|
|
src = (REPO_ROOT / "static/messages.js").read_text()
|
|
# Both clear operations must be inside the activeSid === S.session guard
|
|
# We check for the pattern added by the fix
|
|
assert "S.session.session_id===activeSid" in src, "messages.js must guard activeStreamId/Cancel clearing with session identity check"
|
|
|
|
# ── R8: Session delete does not invalidate index (ghost sessions) ─────────────
|
|
|
|
def test_deleted_session_does_not_appear_in_list(cleanup_test_sessions):
|
|
"""R8: After deleting a session, it must not appear in /api/sessions.
|
|
When _index.json was not invalidated on delete, the session reappeared
|
|
in the list even after the JSON file was removed.
|
|
"""
|
|
# Create a session with a title so it shows in the list
|
|
d, _ = post("/api/session/new", {})
|
|
sid = d["session"]["session_id"]
|
|
post("/api/session/rename", {"session_id": sid, "title": "regression-test-delete-R8"})
|
|
|
|
# Verify it appears
|
|
sessions, _ = get("/api/sessions")
|
|
ids_before = [s["session_id"] for s in sessions["sessions"]]
|
|
assert sid in ids_before, "Session must appear in list before delete"
|
|
|
|
# Delete it
|
|
result, status = post("/api/session/delete", {"session_id": sid})
|
|
assert status == 200 and result.get("ok") is True
|
|
|
|
# Verify it no longer appears -- even after a second fetch (index rebuild)
|
|
sessions2, _ = get("/api/sessions")
|
|
ids_after = [s["session_id"] for s in sessions2["sessions"]]
|
|
assert sid not in ids_after, f"Deleted session {sid} still appears in list -- index not invalidated on delete"
|
|
|
|
|
|
def test_server_delete_invalidates_index(cleanup_test_sessions):
|
|
"""R8b: session/delete handler must unlink _index.json.
|
|
Static check that the fix is in place.
|
|
Sprint 11: handler moved from server.py to api/routes.py -- check both.
|
|
"""
|
|
src = (REPO_ROOT / "server.py").read_text()
|
|
routes_src = (REPO_ROOT / "api" / "routes.py").read_text() if (REPO_ROOT / "api" / "routes.py").exists() else ""
|
|
# Find the delete handler in either file
|
|
for label, text in [("server.py", src), ("api/routes.py", routes_src)]:
|
|
# Accept both single-quote and double-quote style (formatting varies by contributor)
|
|
delete_idx = max(
|
|
text.find("if parsed.path == '/api/session/delete':"),
|
|
text.find('if parsed.path == "/api/session/delete":'),
|
|
)
|
|
if delete_idx >= 0:
|
|
delete_block = text[delete_idx:delete_idx+600]
|
|
assert "SESSION_INDEX_FILE" in delete_block, \
|
|
f"{label} session/delete must invalidate SESSION_INDEX_FILE"
|
|
return
|
|
assert False, "session/delete handler not found in server.py or api/routes.py"
|
|
|
|
# ── R9: Token/tool SSE events write to wrong session after switch ─────────────
|
|
|
|
def test_token_handler_guards_session_id(cleanup_test_sessions):
|
|
"""R9a: The SSE token event handler must check activeSid before writing to DOM.
|
|
When missing, tokens from session A would render into session B's message area
|
|
if the user switched sessions mid-stream.
|
|
Sprint 12: handler moved into _wireSSE(source), so search source.addEventListener.
|
|
"""
|
|
src = (REPO_ROOT / "static/messages.js").read_text()
|
|
# Sprint 12 refactored es.addEventListener -> source.addEventListener inside _wireSSE()
|
|
token_idx = src.find("source.addEventListener('token'")
|
|
if token_idx < 0:
|
|
token_idx = src.find("es.addEventListener('token'")
|
|
assert token_idx >= 0, "token event handler not found"
|
|
token_block = src[token_idx:token_idx+300]
|
|
assert "activeSid" in token_block, \
|
|
"token handler must check activeSid before writing to DOM"
|
|
assert "S.session.session_id!==activeSid" in token_block or \
|
|
"S.session.session_id===activeSid" in token_block, \
|
|
"token handler must compare current session to activeSid"
|
|
|
|
|
|
def test_tool_handler_guards_session_id(cleanup_test_sessions):
|
|
"""R9b: The SSE tool event handler must check activeSid before writing to DOM.
|
|
When missing, tool cards from session A would render into session B's message area.
|
|
Sprint 12: handler moved into _wireSSE(source), so search source.addEventListener.
|
|
"""
|
|
src = (REPO_ROOT / "static/messages.js").read_text()
|
|
tool_idx = src.find("source.addEventListener('tool'")
|
|
if tool_idx < 0:
|
|
tool_idx = src.find("es.addEventListener('tool'")
|
|
assert tool_idx >= 0, "tool event handler not found"
|
|
tool_block = src[tool_idx:tool_idx+400]
|
|
assert "activeSid" in tool_block, \
|
|
"tool handler must check activeSid before writing to DOM"
|
|
|
|
|
|
# ── R10: respondApproval uses wrong session_id after switch (multi-session) ─
|
|
|
|
def test_respond_approval_uses_approval_session_id(cleanup_test_sessions):
|
|
"""R10: respondApproval must use the session_id of the session that triggered
|
|
the approval, not S.session.session_id (which may be a different session
|
|
if the user switched while approval was pending).
|
|
"""
|
|
src = (REPO_ROOT / "static/messages.js").read_text()
|
|
# The fix introduces _approvalSessionId to track the correct session
|
|
assert "_approvalSessionId" in src, "messages.js must use _approvalSessionId in respondApproval"
|
|
# respondApproval must use _approvalSessionId, not S.session.session_id directly
|
|
idx = src.find("async function respondApproval(")
|
|
assert idx >= 0, "respondApproval not found"
|
|
fn_body = src[idx:idx+300]
|
|
assert "_approvalSessionId" in fn_body, "respondApproval must read _approvalSessionId, not S.session.session_id"
|
|
|
|
|
|
# ── R11: Tool progress must not use shared status chrome ──────────────────
|
|
|
|
def test_tool_status_only_shown_for_current_session(cleanup_test_sessions):
|
|
"""R11: Tool progress should not drive the global status bar or composer
|
|
status. Live tool cards in the current conversation are the authoritative
|
|
progress UI, which avoids cross-session status leakage entirely.
|
|
"""
|
|
src = (REPO_ROOT / "static/messages.js").read_text()
|
|
# Sprint 12: handler moved into _wireSSE(source)
|
|
tool_idx = src.find("source.addEventListener('tool'")
|
|
if tool_idx < 0:
|
|
tool_idx = src.find("es.addEventListener('tool'")
|
|
assert tool_idx >= 0
|
|
tool_block = src[tool_idx:tool_idx+400]
|
|
assert "setStatus(" not in tool_block, \
|
|
"tool handler should not use the global activity/status bar"
|
|
assert "setComposerStatus(" not in tool_block, \
|
|
"tool handler should not use composer status for tool progress"
|
|
|
|
# ── R12: Live tool cards lost on switch-away and switch-back ──────────────
|
|
|
|
def test_loadSession_inflight_restores_live_tool_cards(cleanup_test_sessions):
|
|
"""R12: When switching back to an in-flight session, live tool cards in
|
|
#liveToolCards must be restored from S.toolCalls.
|
|
When missing, tool cards disappeared on switch-away even though the session
|
|
was still processing.
|
|
"""
|
|
src = (REPO_ROOT / "static/sessions.js").read_text()
|
|
# INFLIGHT branch must call appendLiveToolCard
|
|
inflight_idx = src.find("if(INFLIGHT[sid]){")
|
|
assert inflight_idx >= 0, "INFLIGHT branch not found in loadSession"
|
|
inflight_block = src[inflight_idx:inflight_idx+500]
|
|
assert "appendLiveToolCard" in inflight_block, "loadSession INFLIGHT branch must restore live tool cards via appendLiveToolCard"
|
|
assert "clearLiveToolCards" in inflight_block, "loadSession INFLIGHT branch must clear old live cards before restoring"
|
|
|
|
# ── R13: renderMessages() called before S.busy=false in done handler ────────
|
|
|
|
def test_done_handler_sets_busy_false_before_renderMessages(cleanup_test_sessions):
|
|
"""R13: In the done handler, S.busy must be set to false BEFORE renderMessages()
|
|
is called for the active session. The !S.busy guard in renderMessages() controls
|
|
whether settled tool cards are rendered. When S.busy=true during renderMessages(),
|
|
tool cards are skipped entirely after a response completes.
|
|
"""
|
|
src = (REPO_ROOT / "static/messages.js").read_text()
|
|
# Sprint 12: handler moved into _wireSSE(source)
|
|
done_idx = src.find("source.addEventListener('done'")
|
|
if done_idx < 0:
|
|
done_idx = src.find("es.addEventListener('done'")
|
|
assert done_idx >= 0
|
|
done_block = src[done_idx:done_idx+2500]
|
|
# S.busy=false must appear before renderMessages() within the done handler
|
|
busy_pos = done_block.find("S.busy=false;")
|
|
render_pos = done_block.find("renderMessages()")
|
|
assert busy_pos >= 0, "done handler must set S.busy=false before renderMessages()"
|
|
assert busy_pos < render_pos, f"S.busy=false (pos {busy_pos}) must come before renderMessages() (pos {render_pos})"
|
|
|
|
|
|
# ── R14: send() uses stale modelSelect.value instead of session model ────────
|
|
|
|
def test_send_uses_session_model_as_authoritative_source(cleanup_test_sessions):
|
|
"""R14: send() must use S.session.model as the authoritative model, not just
|
|
$('modelSelect').value. When a session was created with a model not in the
|
|
current dropdown list, the select value would be stale after switching sessions,
|
|
causing the wrong model to be sent.
|
|
"""
|
|
src = (REPO_ROOT / "static/messages.js").read_text()
|
|
# The model field in the chat/start payload must prefer S.session.model
|
|
chat_start_idx = src.find("/api/chat/start")
|
|
assert chat_start_idx >= 0
|
|
payload_block = src[chat_start_idx:chat_start_idx+300]
|
|
assert "S.session.model" in payload_block, "send() must use S.session.model in the chat/start payload"
|
|
|
|
|
|
# ── R15: newSession does not clear live tool cards ────────────────────────────
|
|
|
|
def test_newSession_clears_live_tool_cards(cleanup_test_sessions):
|
|
"""R15: newSession() must call clearLiveToolCards() so live cards from a
|
|
previous in-flight session don't persist when starting a fresh conversation.
|
|
"""
|
|
src = (REPO_ROOT / "static/sessions.js").read_text()
|
|
new_sess_idx = src.find("async function newSession(")
|
|
assert new_sess_idx >= 0
|
|
# Find end of newSession (next async function)
|
|
next_fn = src.find("async function ", new_sess_idx + 10)
|
|
new_sess_body = src[new_sess_idx:next_fn]
|
|
assert "clearLiveToolCards" in new_sess_body, "newSession() must call clearLiveToolCards() to clear stale live cards"
|
|
|
|
|
|
def test_newSession_resets_busy_state_for_fresh_chat(cleanup_test_sessions):
|
|
"""R15b: newSession() must reset the viewed chat to idle state.
|
|
Without this, starting a second chat while another session is streaming leaves
|
|
S.busy=true, so the first send in the new chat gets incorrectly queued.
|
|
"""
|
|
src = (REPO_ROOT / "static/sessions.js").read_text()
|
|
new_sess_idx = src.find("async function newSession(")
|
|
assert new_sess_idx >= 0
|
|
next_fn = src.find("async function ", new_sess_idx + 10)
|
|
new_sess_body = src[new_sess_idx:next_fn]
|
|
assert "S.busy=false;" in new_sess_body, \
|
|
"newSession() must clear S.busy so a fresh chat is immediately sendable"
|
|
assert "S.activeStreamId=null;" in new_sess_body, \
|
|
"newSession() must clear the active stream id for the newly viewed chat"
|
|
assert "updateQueueBadge(S.session.session_id);" in new_sess_body, \
|
|
"newSession() must refresh the badge for the new session rather than leaving the old session's queue badge visible"
|
|
|
|
|
|
def test_session_scoped_message_queue_frontend_wiring(cleanup_test_sessions):
|
|
"""R15bb: queued follow-ups must stay attached to their originating session.
|
|
The frontend should use a session-keyed queue store and drain only the active
|
|
session's queued messages when that session becomes idle.
|
|
"""
|
|
ui_src = (REPO_ROOT / "static/ui.js").read_text()
|
|
messages_src = (REPO_ROOT / "static/messages.js").read_text()
|
|
sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
|
|
assert "const SESSION_QUEUES" in ui_src
|
|
assert "function queueSessionMessage" in ui_src
|
|
assert "function shiftQueuedSessionMessage" in ui_src
|
|
assert "const sid=S.session&&S.session.session_id;" in ui_src
|
|
assert "const next=sid?shiftQueuedSessionMessage(sid):null;" in ui_src
|
|
assert "queueSessionMessage(S.session.session_id" in messages_src
|
|
assert "updateQueueBadge(S.session.session_id);" in messages_src
|
|
assert "updateQueueBadge(sid);" in sessions_src
|
|
|
|
|
|
def test_chat_start_persists_pending_turn_metadata_for_reload_recovery(cleanup_test_sessions):
|
|
"""R15c: chat/start must expose enough pending-turn metadata for a reload to
|
|
rebuild the in-flight conversation instead of showing a blank session.
|
|
"""
|
|
routes_src = (REPO_ROOT / "api/routes.py").read_text()
|
|
assert 's.active_stream_id = stream_id' in routes_src
|
|
assert 's.pending_user_message = msg' in routes_src
|
|
assert 's.pending_attachments = attachments' in routes_src
|
|
assert '"active_stream_id": getattr(s, "active_stream_id", None)' in routes_src
|
|
assert '"pending_user_message": getattr(s, "pending_user_message", None)' in routes_src
|
|
|
|
|
|
def test_reload_path_restores_pending_message_and_reattaches_live_stream(cleanup_test_sessions):
|
|
"""R15d: the frontend reload path must show the pending user turn and
|
|
reattach to the live SSE stream after loadSession().
|
|
"""
|
|
sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
|
|
ui_src = (REPO_ROOT / "static/ui.js").read_text()
|
|
messages_src = (REPO_ROOT / "static/messages.js").read_text()
|
|
assert 'getPendingSessionMessage' in ui_src
|
|
assert 'pending_user_message' in ui_src
|
|
assert 'function attachLiveStream' in messages_src
|
|
assert 'const pendingMsg=typeof getPendingSessionMessage' in sessions_src
|
|
assert 'const activeStreamId=data.session.active_stream_id||null;' in sessions_src
|
|
assert 'attachLiveStream(sid, activeStreamId' in sessions_src
|
|
assert 'if (S.activeStreamId && S.activeStreamId === streamId) return;' in ui_src
|
|
|
|
|
|
# ── R16: Switching away/back must preserve live partial assistant output ─────
|
|
|
|
|
|
def test_live_stream_tokens_persist_partial_assistant_for_session_switch(cleanup_test_sessions):
|
|
"""R16: in-flight assistant text must be mirrored into INFLIGHT session state,
|
|
and the live stream must rebind to the rebuilt DOM after switching away and back.
|
|
Without this, partial assistant output disappears until the final done payload lands.
|
|
"""
|
|
messages_src = (REPO_ROOT / "static/messages.js").read_text()
|
|
ui_src = (REPO_ROOT / "static/ui.js").read_text()
|
|
|
|
assert "content:assistantText" in messages_src, \
|
|
"messages.js must persist the partial assistant text into INFLIGHT state"
|
|
assert "_live:true" in messages_src, \
|
|
"messages.js must mark the persisted in-flight assistant row so renderMessages can re-anchor it"
|
|
assert "syncInflightAssistantMessage();" in messages_src, \
|
|
"token handler must update INFLIGHT state before checking the active session"
|
|
assert "assistantRow&&!assistantRow.isConnected" in messages_src, \
|
|
"live stream must drop stale detached assistant DOM references after session switches"
|
|
assert "data-live-assistant" in ui_src, \
|
|
"renderMessages must preserve a live-assistant DOM anchor when rebuilding the thread"
|
|
|
|
|
|
def test_inflight_session_state_tracks_live_tool_cards_per_session(cleanup_test_sessions):
|
|
"""R16b: live tool cards must be stored on the in-flight session, not only in the
|
|
global S.toolCalls array, so switching chats does not lose or misattach them.
|
|
"""
|
|
messages_src = (REPO_ROOT / "static/messages.js").read_text()
|
|
sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
|
|
|
|
assert "INFLIGHT[activeSid].toolCalls.push(tc);" in messages_src, \
|
|
"tool SSE handler must persist live tool calls onto the in-flight session"
|
|
assert "S.toolCalls=(INFLIGHT[sid].toolCalls||[]);" in sessions_src, \
|
|
"loadSession() must restore live tool calls from the in-flight session state"
|
|
|
|
|
|
def test_loadSession_inflight_sets_busy_before_renderMessages(cleanup_test_sessions):
|
|
"""R16c: loading an in-flight session must mark it busy before renderMessages().
|
|
Otherwise renderMessages() treats S.toolCalls as settled history cards and the
|
|
same tool call appears once inline and once in the live tool host after a
|
|
session switch.
|
|
"""
|
|
src = (REPO_ROOT / "static/sessions.js").read_text()
|
|
inflight_idx = src.find("if(INFLIGHT[sid]){")
|
|
assert inflight_idx >= 0, "INFLIGHT branch not found in loadSession"
|
|
inflight_block = src[inflight_idx:inflight_idx+700]
|
|
busy_pos = inflight_block.find("S.busy=true;")
|
|
render_pos = inflight_block.find("renderMessages();appendThinking();")
|
|
assert busy_pos >= 0, "loadSession INFLIGHT branch must set S.busy=true"
|
|
assert render_pos >= 0, "loadSession INFLIGHT branch must call renderMessages()"
|
|
assert busy_pos < render_pos, \
|
|
"loadSession must set S.busy=true before renderMessages() to avoid duplicate tool cards"
|
|
|
|
|
|
def test_streaming_bridge_accepts_current_tool_progress_callback_signature(cleanup_test_sessions):
|
|
"""R17: api/streaming.py must accept the current Hermes agent callback contract.
|
|
The agent now calls tool_progress_callback(event_type, name, preview, args, **kwargs).
|
|
If the WebUI bridge only accepts (name, preview, args), live tool updates silently vanish.
|
|
"""
|
|
src = (REPO_ROOT / "api/streaming.py").read_text()
|
|
assert "def on_tool(*cb_args, **cb_kwargs):" in src, \
|
|
"streaming.py must accept variable callback args for tool progress events"
|
|
assert "reasoning_callback=on_reasoning" in src, \
|
|
"streaming.py must wire the agent's reasoning callback into the SSE bridge"
|
|
assert "put('tool_complete'" in src or 'put("tool_complete"' in src, \
|
|
"streaming.py must emit live tool completion SSE events"
|
|
|
|
|
|
def test_messages_js_supports_live_reasoning_and_tool_completion(cleanup_test_sessions):
|
|
"""R18: messages.js must render live reasoning and react to tool completion events.
|
|
Without these handlers, the operator only sees generic Thinking… or nothing
|
|
until the final done snapshot redraws the whole turn.
|
|
"""
|
|
src = (REPO_ROOT / "static/messages.js").read_text()
|
|
assert "let reasoningText=''" in src, \
|
|
"messages.js must track streamed reasoning text separately from assistant text"
|
|
assert "source.addEventListener('reasoning'" in src or 'source.addEventListener("reasoning"' in src, \
|
|
"messages.js must listen for live reasoning SSE events"
|
|
assert "source.addEventListener('tool_complete'" in src or 'source.addEventListener("tool_complete"' in src, \
|
|
"messages.js must listen for live tool completion SSE events"
|
|
assert "function _parseStreamState()" in src, \
|
|
"messages.js must parse live stream state into reasoning + visible answer"
|
|
|
|
|
|
def test_ui_js_can_upgrade_thinking_spinner_into_live_reasoning_card(cleanup_test_sessions):
|
|
"""R19: ui.js must be able to replace the placeholder thinking spinner with
|
|
streamed reasoning text while a turn is in progress.
|
|
"""
|
|
src = (REPO_ROOT / "static/ui.js").read_text()
|
|
assert "function _thinkingMarkup(text='')" in src or 'function _thinkingMarkup(text="")' in src, \
|
|
"ui.js must centralize thinking row markup so it can switch between spinner and live text"
|
|
assert "function updateThinking(text=''){appendThinking(text);}" in src or 'function updateThinking(text=""){appendThinking(text);}' in src, \
|
|
"ui.js must expose an updateThinking helper for live reasoning rendering"
|
|
|
|
|
|
# ── R17: Stack traces must not leak to clients in 500 responses ────────────
|
|
|
|
def test_500_response_has_no_trace_field():
|
|
"""R16: HTTP 500 responses must not include a 'trace' field.
|
|
Leaking tracebacks exposes file paths, module names, and potentially
|
|
secret values from local variables.
|
|
"""
|
|
# POST to /api/chat/start with missing required fields to trigger an error
|
|
data, status = post("/api/chat/start", {})
|
|
# Should be an error response (4xx or 5xx)
|
|
assert "trace" not in data, \
|
|
"Server must not leak stack traces to clients"
|
|
|
|
def test_upload_error_has_no_trace_field():
|
|
"""R16b: Upload 500 responses must not include a 'trace' field."""
|
|
# Send a POST to /api/upload with invalid content to trigger the error handler
|
|
req = urllib.request.Request(
|
|
BASE + "/api/upload",
|
|
data=b"not-multipart-data",
|
|
headers={"Content-Type": "text/plain", "Content-Length": "18"},
|
|
)
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=10) as r:
|
|
body = json.loads(r.read())
|
|
code = r.status
|
|
except urllib.error.HTTPError as e:
|
|
body = json.loads(e.read())
|
|
code = e.code
|
|
assert code >= 400, "Invalid upload should return an error status"
|
|
assert "trace" not in body, \
|
|
"Upload errors must not leak stack traces to clients"
|
|
assert "error" in body, "Error responses must include an 'error' key"
|
|
|
|
|
|
# ── #248: /skills slash command ───────────────────────────────────────────────
|
|
|
|
def test_skills_slash_command_defined():
|
|
"""#248: /skills command must be registered in COMMANDS and implemented.
|
|
Verifies the command entry, function definition, and i18n key are all present.
|
|
"""
|
|
src = (REPO_ROOT / "static/commands.js").read_text()
|
|
|
|
# 1. 'skills' must appear in the COMMANDS array definition
|
|
assert "name:'skills'" in src or 'name:"skills"' in src, \
|
|
"COMMANDS array must include an entry with name:'skills'"
|
|
|
|
# 2. cmdSkills function must be defined
|
|
assert "function cmdSkills" in src, \
|
|
"cmdSkills function must be defined in commands.js"
|
|
|
|
# 3. i18n key cmd_skills must be referenced (wired to COMMANDS entry)
|
|
assert "cmd_skills" in src, \
|
|
"cmd_skills i18n key must be referenced in commands.js"
|
|
|
|
|
|
def test_reload_recovery_persists_durable_inflight_state(cleanup_test_sessions):
|
|
"""Reload recovery must persist a durable per-session inflight snapshot.
|
|
Without these helpers, loadSession() references loadInflightState() but a full
|
|
browser reload has no saved state to hydrate, so recovery silently no-ops.
|
|
"""
|
|
ui_src = (REPO_ROOT / "static/ui.js").read_text()
|
|
messages_src = (REPO_ROOT / "static/messages.js").read_text()
|
|
sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
|
|
|
|
assert "const INFLIGHT_STATE_KEY = 'hermes-webui-inflight-state'" in ui_src
|
|
assert "function saveInflightState(sid, state)" in ui_src
|
|
assert "function loadInflightState(sid, streamId)" in ui_src
|
|
assert "function clearInflightState(sid)" in ui_src
|
|
assert "saveInflightState(activeSid" in messages_src, \
|
|
"messages.js must persist live stream snapshots while a turn is in flight"
|
|
assert "clearInflightState(activeSid)" in messages_src, \
|
|
"messages.js must clear durable inflight snapshots when the run ends/errors/cancels"
|
|
assert "const stored=loadInflightState(sid, activeStreamId);" in sessions_src, \
|
|
"loadSession() must hydrate in-flight state from durable browser storage on reload"
|