fix: live reasoning, tool progress, in-flight session recovery (#367)

* fix: preserve live session output across chat switches

(cherry picked from commit 401e3b643d25e8dad8c06883b478b3c3073f07a5)

* fix: preserve todo state after session reload

(cherry picked from commit 7ee093ba19978af23b79148df2f2347e2f1e5bde)

* fix: preserve live assistant anchor across rerenders

* fix: stream live reasoning and tool progress

* fix: recover inflight session state after reload

* fix: add loadInflightState stub + CHANGELOG v0.50.21

- static/ui.js: add loadInflightState() function (currently returns null —
  the typeof guard in sessions.js means reload recovery works via the
  else-path attachLiveStream call; this stub satisfies the guard cleanly
  and documents the extension point for future localStorage-backed state)
- CHANGELOG.md: v0.50.21 entry; 960 tests (up from 949)

---------

Co-authored-by: Jordan SkyLF <jordan@skylinkfiber.net>
Co-authored-by: Nathan Esquenazi <nesquena@gmail.com>
This commit is contained in:
nesquena-hermes
2026-04-13 16:18:15 -07:00
committed by GitHub
parent bcdd7ed3f3
commit 9542639a90
9 changed files with 609 additions and 73 deletions

View File

@@ -241,6 +241,24 @@ def test_done_handler_guards_setbusy_with_inflight_check(cleanup_test_sessions):
assert "INFLIGHT[S.session.session_id]" in src, "messages.js must guard setBusy(false) with INFLIGHT check for current session"
def test_refresh_handler_does_not_drop_tool_messages_needed_by_todos(cleanup_test_sessions):
"""Todo panel state must survive session reload/refresh.
The UI can hide tool-role messages from the visible transcript, but it must not
destroy the raw session messages because loadTodos reconstructs state from the
latest todo tool output.
"""
sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
ui_src = (REPO_ROOT / "static/ui.js").read_text()
panels_src = (REPO_ROOT / "static/panels.js").read_text()
assert "data.session.messages=(data.session.messages||[]).filter(" not in sessions_src, \
"sessions.js must not overwrite raw session.messages when filtering transcript display"
assert "S.messages = (data.session.messages || []).filter(" not in ui_src, \
"ui.js refreshSession must not rebuild S.messages by discarding tool messages from the raw session payload"
assert "const sourceMessages = (S.session && Array.isArray(S.session.messages) && S.session.messages.length) ? S.session.messages : S.messages;" in panels_src, \
"loadTodos must prefer raw S.session.messages so todo state survives reloads"
def test_cancel_button_not_cleared_across_sessions(cleanup_test_sessions):
"""R7c: The Cancel button and activeStreamId must only be cleared when the
done/error event belongs to the currently viewed session.
@@ -440,7 +458,166 @@ def test_newSession_clears_live_tool_cards(cleanup_test_sessions):
assert "clearLiveToolCards" in new_sess_body, "newSession() must call clearLiveToolCards() to clear stale live cards"
# ── R16: Stack traces must not leak to clients in 500 responses ────────────
def test_newSession_resets_busy_state_for_fresh_chat(cleanup_test_sessions):
"""R15b: newSession() must reset the viewed chat to idle state.
Without this, starting a second chat while another session is streaming leaves
S.busy=true, so the first send in the new chat gets incorrectly queued.
"""
src = (REPO_ROOT / "static/sessions.js").read_text()
new_sess_idx = src.find("async function newSession(")
assert new_sess_idx >= 0
next_fn = src.find("async function ", new_sess_idx + 10)
new_sess_body = src[new_sess_idx:next_fn]
assert "S.busy=false;" in new_sess_body, \
"newSession() must clear S.busy so a fresh chat is immediately sendable"
assert "S.activeStreamId=null;" in new_sess_body, \
"newSession() must clear the active stream id for the newly viewed chat"
assert "updateQueueBadge(S.session.session_id);" in new_sess_body, \
"newSession() must refresh the badge for the new session rather than leaving the old session's queue badge visible"
def test_session_scoped_message_queue_frontend_wiring(cleanup_test_sessions):
"""R15bb: queued follow-ups must stay attached to their originating session.
The frontend should use a session-keyed queue store and drain only the active
session's queued messages when that session becomes idle.
"""
ui_src = (REPO_ROOT / "static/ui.js").read_text()
messages_src = (REPO_ROOT / "static/messages.js").read_text()
sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
assert "const SESSION_QUEUES" in ui_src
assert "function queueSessionMessage" in ui_src
assert "function shiftQueuedSessionMessage" in ui_src
assert "const sid=S.session&&S.session.session_id;" in ui_src
assert "const next=sid?shiftQueuedSessionMessage(sid):null;" in ui_src
assert "queueSessionMessage(S.session.session_id" in messages_src
assert "updateQueueBadge(S.session.session_id);" in messages_src
assert "updateQueueBadge(sid);" in sessions_src
def test_chat_start_persists_pending_turn_metadata_for_reload_recovery(cleanup_test_sessions):
"""R15c: chat/start must expose enough pending-turn metadata for a reload to
rebuild the in-flight conversation instead of showing a blank session.
"""
routes_src = (REPO_ROOT / "api/routes.py").read_text()
assert 's.active_stream_id = stream_id' in routes_src
assert 's.pending_user_message = msg' in routes_src
assert 's.pending_attachments = attachments' in routes_src
assert '"active_stream_id": getattr(s, "active_stream_id", None)' in routes_src
assert '"pending_user_message": getattr(s, "pending_user_message", None)' in routes_src
def test_reload_path_restores_pending_message_and_reattaches_live_stream(cleanup_test_sessions):
"""R15d: the frontend reload path must show the pending user turn and
reattach to the live SSE stream after loadSession().
"""
sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
ui_src = (REPO_ROOT / "static/ui.js").read_text()
messages_src = (REPO_ROOT / "static/messages.js").read_text()
assert 'getPendingSessionMessage' in ui_src
assert 'pending_user_message' in ui_src
assert 'function attachLiveStream' in messages_src
assert 'const pendingMsg=typeof getPendingSessionMessage' in sessions_src
assert 'const activeStreamId=data.session.active_stream_id||null;' in sessions_src
assert 'attachLiveStream(sid, activeStreamId' in sessions_src
assert 'if (S.activeStreamId && S.activeStreamId === streamId) return;' in ui_src
# ── R16: Switching away/back must preserve live partial assistant output ─────
def test_live_stream_tokens_persist_partial_assistant_for_session_switch(cleanup_test_sessions):
"""R16: in-flight assistant text must be mirrored into INFLIGHT session state,
and the live stream must rebind to the rebuilt DOM after switching away and back.
Without this, partial assistant output disappears until the final done payload lands.
"""
messages_src = (REPO_ROOT / "static/messages.js").read_text()
ui_src = (REPO_ROOT / "static/ui.js").read_text()
assert "content:assistantText" in messages_src, \
"messages.js must persist the partial assistant text into INFLIGHT state"
assert "_live:true" in messages_src, \
"messages.js must mark the persisted in-flight assistant row so renderMessages can re-anchor it"
assert "syncInflightAssistantMessage();" in messages_src, \
"token handler must update INFLIGHT state before checking the active session"
assert "assistantRow&&!assistantRow.isConnected" in messages_src, \
"live stream must drop stale detached assistant DOM references after session switches"
assert "data-live-assistant" in ui_src, \
"renderMessages must preserve a live-assistant DOM anchor when rebuilding the thread"
def test_inflight_session_state_tracks_live_tool_cards_per_session(cleanup_test_sessions):
"""R16b: live tool cards must be stored on the in-flight session, not only in the
global S.toolCalls array, so switching chats does not lose or misattach them.
"""
messages_src = (REPO_ROOT / "static/messages.js").read_text()
sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
assert "INFLIGHT[activeSid].toolCalls.push(tc);" in messages_src, \
"tool SSE handler must persist live tool calls onto the in-flight session"
assert "S.toolCalls=(INFLIGHT[sid].toolCalls||[]);" in sessions_src, \
"loadSession() must restore live tool calls from the in-flight session state"
def test_loadSession_inflight_sets_busy_before_renderMessages(cleanup_test_sessions):
"""R16c: loading an in-flight session must mark it busy before renderMessages().
Otherwise renderMessages() treats S.toolCalls as settled history cards and the
same tool call appears once inline and once in the live tool host after a
session switch.
"""
src = (REPO_ROOT / "static/sessions.js").read_text()
inflight_idx = src.find("if(INFLIGHT[sid]){")
assert inflight_idx >= 0, "INFLIGHT branch not found in loadSession"
inflight_block = src[inflight_idx:inflight_idx+700]
busy_pos = inflight_block.find("S.busy=true;")
render_pos = inflight_block.find("renderMessages();appendThinking();")
assert busy_pos >= 0, "loadSession INFLIGHT branch must set S.busy=true"
assert render_pos >= 0, "loadSession INFLIGHT branch must call renderMessages()"
assert busy_pos < render_pos, \
"loadSession must set S.busy=true before renderMessages() to avoid duplicate tool cards"
def test_streaming_bridge_accepts_current_tool_progress_callback_signature(cleanup_test_sessions):
"""R17: api/streaming.py must accept the current Hermes agent callback contract.
The agent now calls tool_progress_callback(event_type, name, preview, args, **kwargs).
If the WebUI bridge only accepts (name, preview, args), live tool updates silently vanish.
"""
src = (REPO_ROOT / "api/streaming.py").read_text()
assert "def on_tool(*cb_args, **cb_kwargs):" in src, \
"streaming.py must accept variable callback args for tool progress events"
assert "reasoning_callback=on_reasoning" in src, \
"streaming.py must wire the agent's reasoning callback into the SSE bridge"
assert "put('tool_complete'" in src or 'put("tool_complete"' in src, \
"streaming.py must emit live tool completion SSE events"
def test_messages_js_supports_live_reasoning_and_tool_completion(cleanup_test_sessions):
"""R18: messages.js must render live reasoning and react to tool completion events.
Without these handlers, the operator only sees generic Thinking… or nothing
until the final done snapshot redraws the whole turn.
"""
src = (REPO_ROOT / "static/messages.js").read_text()
assert "let reasoningText=''" in src, \
"messages.js must track streamed reasoning text separately from assistant text"
assert "source.addEventListener('reasoning'" in src or 'source.addEventListener("reasoning"' in src, \
"messages.js must listen for live reasoning SSE events"
assert "source.addEventListener('tool_complete'" in src or 'source.addEventListener("tool_complete"' in src, \
"messages.js must listen for live tool completion SSE events"
assert "function _parseStreamState()" in src, \
"messages.js must parse live stream state into reasoning + visible answer"
def test_ui_js_can_upgrade_thinking_spinner_into_live_reasoning_card(cleanup_test_sessions):
"""R19: ui.js must be able to replace the placeholder thinking spinner with
streamed reasoning text while a turn is in progress.
"""
src = (REPO_ROOT / "static/ui.js").read_text()
assert "function _thinkingMarkup(text='')" in src or 'function _thinkingMarkup(text="")' in src, \
"ui.js must centralize thinking row markup so it can switch between spinner and live text"
assert "function updateThinking(text=''){appendThinking(text);}" in src or 'function updateThinking(text=""){appendThinking(text);}' in src, \
"ui.js must expose an updateThinking helper for live reasoning rendering"
# ── R17: Stack traces must not leak to clients in 500 responses ────────────
def test_500_response_has_no_trace_field():
"""R16: HTTP 500 responses must not include a 'trace' field.