diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ce8f88..d6b978e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,17 @@ - **Workspace file downloads no longer crash for Unicode filenames** (`api/routes.py`): Clicking a PDF or other file with Chinese, Japanese, Arabic, or other non-ASCII characters in its name caused a `UnicodeEncodeError` because Python's HTTP server requires header values to be latin-1 encodable. A new `_content_disposition_value(disposition, filename)` helper centralises `Content-Disposition` generation: it strips CR/LF (injection guard), builds an ASCII fallback for the legacy `filename=` parameter (non-ASCII chars replaced with `_`), and preserves the full UTF-8 name in `filename*=UTF-8''...` per RFC 5987. Both `attachment` and `inline` responses use it. - 2 new integration tests in `tests/test_sprint29.py` covering Chinese filenames for both download and inline responses, verifying the header is latin-1 encodable and `filename*=UTF-8''` is present; 924 tests total (up from 922) +## [v0.50.21] Live reasoning, tool progress, and in-flight session recovery (PR #367) + +- **Live reasoning cards during streaming** (`static/ui.js`, `static/messages.js`): The generic thinking spinner now upgrades to a live reasoning card when the backend streams reasoning text. `_thinkingMarkup(text)` and `updateThinking(text)` centralize the markup so the spinner and card share the same DOM slot. Works with models that emit reasoning via the agent's `reasoning_callback` or `tool_progress_callback`. +- **`tool_complete` SSE events** (`api/streaming.py`, `static/messages.js`): Tool progress callback now accepts the current agent signature `on_tool(*cb_args, **cb_kwargs)` — handles both the old 3-arg `(name, preview, args)` form and the new 4-arg `(event_type, name, preview, args)` form. `tool.completed` events transition live tool cards from running to done cleanly. +- **In-flight session state stable across switches** (`static/messages.js`, `static/sessions.js`): `attachLiveStream` refactored out of `send()` into a standalone function; partial assistant text mirrored into `INFLIGHT` state on every token; `data-live-assistant` DOM anchor preserved across `renderMessages()` calls so switching away and back doesn't lose or duplicate live output. +- **Reload recovery** (`api/models.py`, `api/routes.py`, `api/streaming.py`, `static/sessions.js`): `active_stream_id`, `pending_user_message`, `pending_attachments`, and `pending_started_at` now persisted on the session object before streaming starts and cleared on completion (or exception). `/api/session` returns these fields. After a page reload or session switch, `loadSession()` detects `active_stream_id` and calls `attachLiveStream(..., {reconnecting:true})` to reattach to the live SSE stream. +- **Session-scoped message queue** (`static/ui.js`, `static/messages.js`): Global `MSG_QUEUE` replaced with `SESSION_QUEUES` keyed by session ID. Queued follow-up messages are associated with the session they were typed in and only drained when that session becomes idle — no cross-session bleed. +- **`newSession()` idle reset** (`static/sessions.js`): Sets `S.busy=false`, `S.activeStreamId=null`, clears the cancel button, resets composer status — ensures a fresh chat is immediately usable even if another session's stream is still running. +- **Todos survive session reload** (`static/panels.js`): `loadTodos()` now reads from `S.session.messages` (raw, includes tool-role messages) rather than `S.messages` (filtered display), so todo state reconstructed from tool outputs survives reloads. + - 12 new regression tests in `tests/test_regressions.py`; 960 tests total (up from 949) + ## [v0.50.20] Silent error fix, stale model cleanup, live model fetching (fixes #373, #374, #375) ### Fix: Chat no longer silently swallows agent failures (fixes #373) diff --git a/api/models.py b/api/models.py index e2426b3..399a102 100644 --- a/api/models.py +++ b/api/models.py @@ -44,6 +44,10 @@ class Session: project_id: str=None, profile=None, input_tokens: int=0, output_tokens: int=0, estimated_cost=None, personality=None, + active_stream_id: str=None, + pending_user_message: str=None, + pending_attachments=None, + pending_started_at=None, **kwargs): self.session_id = session_id or uuid.uuid4().hex[:12] self.title = title @@ -61,6 +65,10 @@ class Session: self.output_tokens = output_tokens or 0 self.estimated_cost = estimated_cost self.personality = personality + self.active_stream_id = active_stream_id + self.pending_user_message = pending_user_message + self.pending_attachments = pending_attachments or [] + self.pending_started_at = pending_started_at @property def path(self): diff --git a/api/routes.py b/api/routes.py index 3fa9006..82f78b3 100644 --- a/api/routes.py +++ b/api/routes.py @@ -365,6 +365,10 @@ def handle_get(handler, parsed) -> bool: raw = s.compact() | { "messages": s.messages, "tool_calls": getattr(s, "tool_calls", []), + "active_stream_id": getattr(s, "active_stream_id", None), + "pending_user_message": getattr(s, "pending_user_message", None), + "pending_attachments": getattr(s, "pending_attachments", []), + "pending_started_at": getattr(s, "pending_started_at", None), } return j(handler, {"session": redact_session_data(raw)}) except KeyError: @@ -1683,11 +1687,15 @@ def _handle_chat_start(handler, body): attachments = [str(a) for a in (body.get("attachments") or [])][:20] workspace = str(Path(body.get("workspace") or s.workspace).expanduser().resolve()) model = body.get("model") or s.model + stream_id = uuid.uuid4().hex s.workspace = workspace s.model = model + s.active_stream_id = stream_id + s.pending_user_message = msg + s.pending_attachments = attachments + s.pending_started_at = time.time() s.save() set_last_workspace(workspace) - stream_id = uuid.uuid4().hex q = queue.Queue() with STREAMS_LOCK: STREAMS[stream_id] = q diff --git a/api/streaming.py b/api/streaming.py index 7f37701..00135ec 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -172,23 +172,70 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta _token_sent = True put('token', {'text': text}) - def on_tool(name, preview, args): + def on_reasoning(text): + if text is None: + return + put('reasoning', {'text': str(text)}) + + def on_tool(*cb_args, **cb_kwargs): + event_type = None + name = None + preview = None + args = None + + if len(cb_args) >= 4: + event_type, name, preview, args = cb_args[:4] + elif len(cb_args) == 3: + name, preview, args = cb_args + event_type = 'tool.started' + elif len(cb_args) == 2: + event_type, name = cb_args + elif len(cb_args) == 1: + name = cb_args[0] + event_type = 'tool.started' + + if event_type in ('reasoning.available', '_thinking'): + reason_text = preview if event_type == 'reasoning.available' else name + if reason_text: + put('reasoning', {'text': str(reason_text)}) + return + args_snap = {} if isinstance(args, dict): for k, v in list(args.items())[:4]: - s2 = str(v); args_snap[k] = s2[:120]+('...' if len(s2)>120 else '') - put('tool', {'name': name, 'preview': preview, 'args': args_snap}) - # Fallback: poll for pending approval in case notify_cb wasn't - # registered (e.g. older approval module without gateway support). - try: - from tools.approval import has_pending as _has_pending, _pending, _lock - if _has_pending(session_id): - with _lock: - p = dict(_pending.get(session_id, {})) - if p: - put('approval', p) - except ImportError: - pass + s2 = str(v) + args_snap[k] = s2[:120] + ('...' if len(s2) > 120 else '') + + if event_type in (None, 'tool.started'): + put('tool', { + 'event_type': event_type or 'tool.started', + 'name': name, + 'preview': preview, + 'args': args_snap, + }) + # Fallback: poll for pending approval in case notify_cb wasn't + # registered (e.g. older approval module without gateway support). + try: + from tools.approval import has_pending as _has_pending, _pending, _lock + if _has_pending(session_id): + with _lock: + p = dict(_pending.get(session_id, {})) + if p: + put('approval', p) + except ImportError: + pass + return + + if event_type == 'tool.completed': + put('tool_complete', { + 'event_type': event_type, + 'name': name, + 'preview': preview, + 'args': args_snap, + 'duration': cb_kwargs.get('duration'), + 'is_error': bool(cb_kwargs.get('is_error', False)), + }) + return _AIAgent = _get_ai_agent() if _AIAgent is None: @@ -252,6 +299,7 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta session_id=session_id, session_db=_session_db, stream_delta_callback=on_token, + reasoning_callback=on_reasoning, tool_progress_callback=on_tool, ) @@ -458,6 +506,10 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta 'assistant_msg_idx': asst_idx, 'args': args_snap, }) s.tool_calls = tool_calls + s.active_stream_id = None + s.pending_user_message = None + s.pending_attachments = [] + s.pending_started_at = None # Tag the matching user message with attachment filenames for display on reload # Only tag a user message whose content relates to this turn's text # (msg_text is the full message including the [Attached files: ...] suffix) @@ -516,6 +568,15 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta except Exception as e: print('[webui] stream error:\n' + traceback.format_exc(), flush=True) + if s is not None: + s.active_stream_id = None + s.pending_user_message = None + s.pending_attachments = [] + s.pending_started_at = None + try: + s.save() + except Exception: + pass err_str = str(e) # Detect rate limit errors specifically so the client can show a helpful card # rather than the generic "Connection lost" message diff --git a/static/messages.js b/static/messages.js index 594d5ae..8dd65c0 100644 --- a/static/messages.js +++ b/static/messages.js @@ -10,10 +10,12 @@ async function send(){ // If busy, queue the message instead of dropping it if(S.busy){ if(text){ - MSG_QUEUE.push(text); + if(!S.session){await newSession();await renderSessionList();} + queueSessionMessage(S.session.session_id,{text,files:[...S.pendingFiles]}); $('msg').value='';autoResize(); - updateQueueBadge(); - showToast(`Queued: "${text.slice(0,40)}${text.length>40?'\u2026':''}"`,2000); + S.pendingFiles=[];renderTray(); + updateQueueBadge(S.session.session_id); + showToast(`Queued: "${text.slice(0,40)}${text.length>40?'…':''}"`,2000); } return; } @@ -37,7 +39,7 @@ async function send(){ S.toolCalls=[]; // clear tool calls from previous turn clearLiveToolCards(); // clear any leftover live cards from last turn S.messages.push(userMsg);renderMessages();appendThinking();setBusy(true); - INFLIGHT[activeSid]={messages:[...S.messages],uploaded}; + INFLIGHT[activeSid]={messages:[...S.messages],uploaded,toolCalls:[]}; startApprovalPolling(activeSid); S.activeStreamId = null; // will be set after stream starts @@ -81,7 +83,32 @@ async function send(){ } // Open SSE stream and render tokens live + attachLiveStream(activeSid, streamId, uploaded); + +} + +const LIVE_STREAMS={}; + +function closeLiveStream(sessionId, streamId){ + const live=LIVE_STREAMS[sessionId]; + if(!live) return; + if(streamId&&live.streamId!==streamId) return; + try{live.source.close();}catch(_){ } + delete LIVE_STREAMS[sessionId]; +} + +function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ + if(!activeSid||!streamId) return; + const reconnecting=!!options.reconnecting; + closeLiveStream(activeSid); + if(!INFLIGHT[activeSid]) INFLIGHT[activeSid]={messages:[...S.messages],uploaded:[...uploaded],toolCalls:[]}; + else { + if(uploaded.length) INFLIGHT[activeSid].uploaded=[...uploaded]; + if(!Array.isArray(INFLIGHT[activeSid].toolCalls)) INFLIGHT[activeSid].toolCalls=[]; + } + let assistantText=''; + let reasoningText=''; let assistantRow=null; let assistantBody=null; // Thinking tag patterns for streaming display @@ -90,8 +117,45 @@ async function send(){ {open:'<|channel>thought\n',close:''} ]; + function _isActiveSession(){ + return !!(S.session&&S.session.session_id===activeSid); + } + function _closeSource(){ + closeLiveStream(activeSid, streamId); + } + function syncInflightAssistantMessage(){ + const inflight=INFLIGHT[activeSid]; + if(!inflight) return; + if(!Array.isArray(inflight.messages)) inflight.messages=[]; + let assistantIdx=-1; + for(let i=inflight.messages.length-1;i>=0;i--){ + const msg=inflight.messages[i]; + if(msg&&msg.role==='assistant'&&msg._live){assistantIdx=i;break;} + } + const ts=Date.now()/1000; + if(assistantIdx>=0){ + inflight.messages[assistantIdx].content=assistantText; + inflight.messages[assistantIdx].reasoning=reasoningText||undefined; + inflight.messages[assistantIdx]._ts=inflight.messages[assistantIdx]._ts||ts; + return; + } + inflight.messages.push({role:'assistant',content:assistantText,reasoning:reasoningText||undefined,_live:true,_ts:ts}); + } function ensureAssistantRow(){ - if(assistantRow)return; + if(!_isActiveSession()) return; + if(assistantRow&&!assistantRow.isConnected){assistantRow=null;assistantBody=null;} + if(!assistantRow){ + const existing=$('msgInner').querySelector('.msg-row[data-live-assistant="1"]'); + if(existing){ + assistantRow=existing; + assistantBody=existing.querySelector('.msg-body'); + } + } + if(assistantRow){ + if(typeof placeLiveToolCardsHost==='function') placeLiveToolCardsHost(); + return; + } + removeThinking(); const tr=$('toolRunningRow');if(tr)tr.remove(); $('emptyState').style.display='none'; @@ -115,6 +179,7 @@ async function send(){ // and hiding content still inside an open thinking block. function _streamDisplay(){ const raw=assistantText; + if(reasoningText) return raw; for(const {open,close} of _thinkPairs){ // Trim leading whitespace before checking for the open tag — some models // (e.g. MiniMax) emit newlines before . @@ -134,15 +199,52 @@ async function send(){ } return raw; } + function _parseStreamState(){ + const raw=assistantText; + if(reasoningText){ + return {thinkingText:reasoningText, displayText:_streamDisplay(), inThinking:false}; + } + for(const {open,close} of _thinkPairs){ + const trimmed=raw.trimStart(); + if(trimmed.startsWith(open)){ + const ci=trimmed.indexOf(close,open.length); + if(ci!==-1){ + return { + thinkingText: trimmed.slice(open.length, ci).trim(), + displayText: trimmed.slice(ci+close.length).replace(/^\s+/,''), + inThinking:false, + }; + } + return { + thinkingText: trimmed.slice(open.length).trim(), + displayText:'', + inThinking:true, + }; + } + if(open.startsWith(trimmed)){ + return {thinkingText:'', displayText:'', inThinking:true}; + } + } + return {thinkingText:'', displayText:raw, inThinking:false}; + } + function _renderLiveThinking(parsed){ + const text=(parsed&&parsed.thinkingText)||''; + if(text||(parsed&&parsed.inThinking)){ + if(typeof updateThinking==='function') updateThinking(text||'Thinking…'); + else appendThinking(); + return; + } + removeThinking(); + } function _scheduleRender(){ if(_renderPending) return; _renderPending=true; requestAnimationFrame(()=>{ _renderPending=false; + const parsed=_parseStreamState(); + _renderLiveThinking(parsed); if(assistantBody){ - const txt=_streamDisplay(); - const isThinking=!txt&&assistantText.length>0; - assistantBody.innerHTML=txt?renderMd(txt):(isThinking?'Thinking\u2026':''); + assistantBody.innerHTML=parsed.displayText?renderMd(parsed.displayText):''; } scrollIfPinned(); }); @@ -153,17 +255,59 @@ async function send(){ if(!S.session||S.session.session_id!==activeSid) return; const d=JSON.parse(e.data); assistantText+=d.text; + syncInflightAssistantMessage(); + if(!S.session||S.session.session_id!==activeSid) return; + ensureAssistantRow(); _scheduleRender(); }); + source.addEventListener('reasoning',e=>{ + const d=JSON.parse(e.data); + reasoningText += d.text || ''; + syncInflightAssistantMessage(); + if(!S.session||S.session.session_id!==activeSid) return; + _scheduleRender(); + }); + source.addEventListener('tool',e=>{ const d=JSON.parse(e.data); + const tc={name:d.name, preview:d.preview||'', args:d.args||{}, snippet:'', done:false, tid:d.tid||`live-${Date.now()}-${Math.random().toString(36).slice(2,8)}`}; + if(!Array.isArray(INFLIGHT[activeSid].toolCalls)) INFLIGHT[activeSid].toolCalls=[]; + INFLIGHT[activeSid].toolCalls.push(tc); + S.toolCalls=INFLIGHT[activeSid].toolCalls; + if(!S.session||S.session.session_id!==activeSid) return; removeThinking(); const oldRow=$('toolRunningRow');if(oldRow)oldRow.remove(); - const tc={name:d.name, preview:d.preview||'', args:d.args||{}, snippet:'', done:false}; - S.toolCalls.push(tc); + appendLiveToolCard(tc); + scrollIfPinned(); + }); + + source.addEventListener('tool_complete',e=>{ + const d=JSON.parse(e.data); + const inflight=INFLIGHT[activeSid]; + if(!inflight) return; + if(!Array.isArray(inflight.toolCalls)) inflight.toolCalls=[]; + let tc=null; + for(let i=inflight.toolCalls.length-1;i>=0;i--){ + const cur=inflight.toolCalls[i]; + if(cur&&cur.done===false&&(!d.name||cur.name===d.name)){ + tc=cur; + break; + } + } + if(!tc){ + tc={name:d.name||'tool', preview:d.preview||'', args:d.args||{}, snippet:'', done:true}; + inflight.toolCalls.push(tc); + } + tc.preview=d.preview||tc.preview||''; + tc.args=d.args||tc.args||{}; + tc.done=true; + tc.is_error=!!d.is_error; + if(d.duration!==undefined) tc.duration=d.duration; + S.toolCalls=inflight.toolCalls; + if(!S.session||S.session.session_id!==activeSid) return; appendLiveToolCard(tc); scrollIfPinned(); }); diff --git a/static/panels.js b/static/panels.js index 1c95a92..9fa07c9 100644 --- a/static/panels.js +++ b/static/panels.js @@ -308,10 +308,11 @@ async function cronDelete(id) { function loadTodos() { const panel = $('todoPanel'); if (!panel) return; + const sourceMessages = (S.session && Array.isArray(S.session.messages) && S.session.messages.length) ? S.session.messages : S.messages; // Parse the most recent todo state from message history let todos = []; - for (let i = S.messages.length - 1; i >= 0; i--) { - const m = S.messages[i]; + for (let i = sourceMessages.length - 1; i >= 0; i--) { + const m = sourceMessages[i]; if (m && m.role === 'tool') { try { const d = JSON.parse(typeof m.content === 'string' ? m.content : JSON.stringify(m.content)); diff --git a/static/sessions.js b/static/sessions.js index 23687a2..78974cb 100644 --- a/static/sessions.js +++ b/static/sessions.js @@ -11,7 +11,7 @@ const ICONS={ }; async function newSession(flash){ - MSG_QUEUE.length=0;updateQueueBadge(); + updateQueueBadge(); S.toolCalls=[]; clearLiveToolCards(); // Use profile default workspace for new sessions after a profile switch (one-shot), @@ -20,9 +20,19 @@ async function newSession(flash){ S._profileDefaultWorkspace=null; // consume — only applies to the first new session after switch const data=await api('/api/session/new',{method:'POST',body:JSON.stringify({model:$('modelSelect').value,workspace:inheritWs})}); S.session=data.session;S.messages=data.session.messages||[]; + S.lastUsage={...(data.session.last_usage||{})}; if(flash)S.session._flash=true; localStorage.setItem('hermes-webui-session',S.session.session_id); - syncTopbar();await loadDir('.');renderMessages(); + // Reset per-session visual state: a fresh chat is idle even if another + // conversation is still streaming in the background. + S.busy=false; + S.activeStreamId=null; + updateSendBtn(); + const _cb=$('btnCancel');if(_cb)_cb.style.display='none'; + setStatus(''); + setComposerStatus(''); + updateQueueBadge(S.session.session_id); + syncTopbar();renderMessages();loadDir('.'); // don't call renderSessionList here - callers do it when needed } @@ -30,40 +40,74 @@ async function loadSession(sid){ stopApprovalPolling();hideApprovalCard(); const data=await api(`/api/session?session_id=${encodeURIComponent(sid)}`); S.session=data.session; + S.lastUsage={...(data.session.last_usage||{})}; localStorage.setItem('hermes-webui-session',S.session.session_id); - // B9: sanitize empty assistant messages that can appear when agent only ran tool calls - data.session.messages=(data.session.messages||[]).filter(m=>{ - if(!m||!m.role)return false; - if(m.role==='tool')return false; - if(m.role==='assistant'){let c=m.content||'';if(Array.isArray(c))c=c.filter(p=>p&&p.type==='text').map(p=>p.text||'').join('');return String(c).trim().length>0;} - return true; - }); + const activeStreamId=data.session.active_stream_id||null; + if(!INFLIGHT[sid]&&activeStreamId&&typeof loadInflightState==='function'){ + const stored=loadInflightState(sid, activeStreamId); + if(stored){ + INFLIGHT[sid]={ + messages:Array.isArray(stored.messages)&&stored.messages.length?stored.messages:[...(data.session.messages||[])], + uploaded:Array.isArray(stored.uploaded)?stored.uploaded:[...(data.session.pending_attachments||[])], + toolCalls:Array.isArray(stored.toolCalls)?stored.toolCalls:[], + reattach:true, + }; + } + } + // Keep raw session.messages intact so side panels (e.g. Todos) can still + // reconstruct state from tool outputs after reload. Visible transcript rows + // are filtered later by renderMessages(). if(INFLIGHT[sid]){ S.messages=INFLIGHT[sid].messages; - // Restore live tool cards for this in-flight session + S.toolCalls=(INFLIGHT[sid].toolCalls||[]); + S.busy=true; + syncTopbar();renderMessages();appendThinking();loadDir('.'); clearLiveToolCards(); + if(typeof placeLiveToolCardsHost==='function') placeLiveToolCardsHost(); for(const tc of (S.toolCalls||[])){ if(tc&&tc.name) appendLiveToolCard(tc); } - syncTopbar();await loadDir('.');renderMessages();appendThinking(); setBusy(true);setComposerStatus(''); startApprovalPolling(sid); + S.activeStreamId=activeStreamId; + const _cb=$('btnCancel');if(_cb&&activeStreamId)_cb.style.display='inline-flex'; + if(INFLIGHT[sid].reattach&&activeStreamId&&typeof attachLiveStream==='function'){ + INFLIGHT[sid].reattach=false; + attachLiveStream(sid, activeStreamId, data.session.pending_attachments||[], {reconnecting:true}); + } }else{ - MSG_QUEUE.length=0;updateQueueBadge(); // clear queue for the viewed session + updateQueueBadge(sid); S.messages=data.session.messages||[]; + const pendingMsg=typeof getPendingSessionMessage==='function'?getPendingSessionMessage(data.session):null; + if(pendingMsg) S.messages.push(pendingMsg); S.toolCalls=(data.session.tool_calls||[]).map(tc=>({...tc,done:true})); - // Reset per-session visual state: the viewed session is idle even if another - // session's stream is still running in the background. - // We directly update the DOM instead of calling setBusy(false), because - // setBusy(false) drains MSG_QUEUE which we don't want here. - S.busy=false; - S.activeStreamId=null; - updateSendBtn(); - const _cb=$('btnCancel');if(_cb)_cb.style.display='none'; - setStatus(''); - setComposerStatus(''); clearLiveToolCards(); - syncTopbar();await loadDir('.');renderMessages();highlightCode(); + if(activeStreamId){ + S.busy=true; + S.activeStreamId=activeStreamId; + updateSendBtn(); + const _cb=$('btnCancel');if(_cb)_cb.style.display='inline-flex'; + setStatus(''); + setComposerStatus(''); + syncTopbar();renderMessages();appendThinking();loadDir('.'); + updateQueueBadge(sid); + startApprovalPolling(sid); + if(typeof attachLiveStream==='function') attachLiveStream(sid, activeStreamId, data.session.pending_attachments||[], {reconnecting:true}); + else if(typeof watchInflightSession==='function') watchInflightSession(sid, activeStreamId); + }else{ + // Reset per-session visual state: the viewed session is idle even if another + // session's stream is still running in the background. + // We directly update the DOM instead of calling setBusy(false), because + // setBusy(false) drains the viewed session's queued follow-up turns. + S.busy=false; + S.activeStreamId=null; + updateSendBtn(); + const _cb=$('btnCancel');if(_cb)_cb.style.display='none'; + setStatus(''); + setComposerStatus(''); + updateQueueBadge(sid); + syncTopbar();renderMessages();highlightCode();loadDir('.'); + } } // Sync context usage indicator from session data const _s=S.session; diff --git a/static/ui.js b/static/ui.js index a138f61..16ee01f 100644 --- a/static/ui.js +++ b/static/ui.js @@ -1,7 +1,28 @@ const S={session:null,messages:[],entries:[],busy:false,pendingFiles:[],toolCalls:[],activeStreamId:null,currentDir:'.',activeProfile:'default'}; const INFLIGHT={}; // keyed by session_id while request in-flight -const MSG_QUEUE=[]; // messages queued while a request is in-flight +const SESSION_QUEUES={}; // keyed by session_id for queued follow-up turns const $=id=>document.getElementById(id); +function _getSessionQueue(sid, create=false){ + if(!sid) return []; + if(!SESSION_QUEUES[sid]&&create) SESSION_QUEUES[sid]=[]; + return SESSION_QUEUES[sid]||[]; +} +function queueSessionMessage(sid, payload){ + if(!sid||!payload) return 0; + const q=_getSessionQueue(sid,true); + q.push(payload); + return q.length; +} +function shiftQueuedSessionMessage(sid){ + const q=_getSessionQueue(sid,false); + if(!q.length) return null; + const next=q.shift(); + if(!q.length) delete SESSION_QUEUES[sid]; + return next; +} +function getQueuedSessionCount(sid){ + return _getSessionQueue(sid,false).length; +} const esc=s=>String(s??'').replace(/[&<>"']/g,c=>({'&':'&','<':'<','>':'>','"':'"',"'":'''}[c])); // Dynamic model labels -- populated by populateModelDropdown(), fallback to static map @@ -513,28 +534,37 @@ function setBusy(v){ setComposerStatus(''); // Always hide Cancel button when not busy const _cb=$('btnCancel');if(_cb)_cb.style.display='none'; - updateQueueBadge(); - // Drain one queued message after UI settles - if(MSG_QUEUE.length>0){ - const next=MSG_QUEUE.shift(); - updateQueueBadge(); - setTimeout(()=>{ $('msg').value=next; send(); }, 120); + const sid=S.session&&S.session.session_id; + updateQueueBadge(sid); + // Drain one queued message for the currently viewed session after UI settles + const next=sid?shiftQueuedSessionMessage(sid):null; + if(next){ + updateQueueBadge(sid); + setTimeout(()=>{ + $('msg').value=next.text||''; + S.pendingFiles=Array.isArray(next.files)?[...next.files]:[]; + autoResize(); + renderTray(); + send(); + },120); } } } -function updateQueueBadge(){ +function updateQueueBadge(sessionId){ + const sid=sessionId||(S.session&&S.session.session_id); + const count=sid?getQueuedSessionCount(sid):0; let badge=$('queueBadge'); - if(MSG_QUEUE.length>0){ + if(count>0){ if(!badge){ badge=document.createElement('div'); badge.id='queueBadge'; badge.style.cssText='position:fixed;bottom:80px;right:24px;background:rgba(124,185,255,.18);border:1px solid rgba(124,185,255,.4);color:var(--blue);font-size:12px;font-weight:600;padding:6px 14px;border-radius:20px;z-index:50;pointer-events:none;backdrop-filter:blur(8px);'; document.body.appendChild(badge); } - badge.textContent=MSG_QUEUE.length===1?'1 message queued':`${MSG_QUEUE.length} messages queued`; - } else { - if(badge) badge.remove(); + badge.textContent=count===1?'1 message queued':`${count} messages queued`; + } else if(badge) { + badge.remove(); } } function showToast(msg,ms){const el=$('toast');el.textContent=msg;el.classList.add('show');clearTimeout(el._t);el._t=setTimeout(()=>el.classList.remove('show'),ms||2800);} @@ -714,11 +744,11 @@ async function refreshSession() { try { const data = await api(`/api/session?session_id=${encodeURIComponent(S.session.session_id)}`); S.session = data.session; - S.messages = (data.session.messages || []).filter(m => { - if (!m || !m.role || m.role === 'tool') return false; - if (m.role === 'assistant') { let c = m.content || ''; if (Array.isArray(c)) c = c.map(p => p.text||'').join(''); return String(c).trim().length > 0; } - return true; - }); + S.messages = data.session.messages || []; + const pendingMsg=getPendingSessionMessage(data.session); + if(pendingMsg) S.messages.push(pendingMsg); + S.activeStreamId=data.session.active_stream_id||null; + syncTopbar(); renderMessages(); showToast('Conversation refreshed'); } catch(e) { setStatus('Refresh failed: ' + e.message); } @@ -764,12 +794,46 @@ async function applyUpdates(){ } } +function getPendingSessionMessage(session){ + const text=String(session?.pending_user_message||'').trim(); + if(!text) return null; + const attachments=Array.isArray(session?.pending_attachments)?session.pending_attachments.filter(Boolean):[]; + const messages=Array.isArray(session?.messages)?session.messages:[]; + const lastUser=[...messages].reverse().find(m=>m&&m.role==='user'); + if(lastUser){ + const lastText=String(msgContent(lastUser)||'').trim(); + if(lastText===text){ + if(attachments.length&&!lastUser.attachments?.length) lastUser.attachments=attachments; + return null; + } + } + return { + role:'user', + content:text, + attachments:attachments.length?attachments:undefined, + _ts:session?.pending_started_at||Date.now()/1000, + _pending:true, + }; +} +// loadInflightState — retrieve in-memory inflight state for a session. +// Called by loadSession() when active_stream_id is set on the server session +// but no INFLIGHT[sid] entry exists (e.g. after a session switch back). +// Returns the stored state dict or null. The else-path in loadSession handles +// page reloads directly via attachLiveStream when this returns null. +function loadInflightState(sid, streamId) { + // In-memory store: only survives within the same page load. + // If INFLIGHT[sid] exists but the caller already checked !INFLIGHT[sid], + // this won't be reached. Return null — the else path handles page reloads. + return null; +} + async function checkInflightOnBoot(sid) { const raw = localStorage.getItem(INFLIGHT_KEY); if (!raw) return; try { const {sid: inflightSid, streamId, ts} = JSON.parse(raw); if (inflightSid !== sid) { clearInflight(); return; } + if (S.activeStreamId && S.activeStreamId === streamId) return; // Only show banner if the in-flight entry is less than 10 minutes old if (Date.now() - ts > 10 * 60 * 1000) { clearInflight(); return; } // Check if stream is still active @@ -915,6 +979,7 @@ function renderMessages(){ } const row=document.createElement('div');row.className='msg-row'; row.dataset.msgIdx=rawIdx;row.dataset.role=m.role||'assistant'; + if(m._live) row.setAttribute('data-live-assistant','1'); let filesHtml=''; if(m.attachments&&m.attachments.length) filesHtml=`
${m.attachments.map(f=>`
${li('paperclip',12)} ${esc(f)}
`).join('')}
`; @@ -1357,12 +1422,29 @@ function renderKatexBlocks(){ }); } -function appendThinking(){ - $('emptyState').style.display='none'; - const row=document.createElement('div');row.className='msg-row';row.id='thinkingRow'; - row.innerHTML=`
H
Hermes
`; - $('msgInner').appendChild(row);scrollToBottom(); +function _thinkingMarkup(text=''){ + const _bn=window._botName||'Hermes'; + const icon=esc(_bn.charAt(0).toUpperCase()); + const label=esc(_bn); + const body=(text&&String(text).trim()) + ? `
${li('lightbulb',14)}${t('thinking')}
${esc(String(text).trim())}
` + : `
`; + return `
${icon}
${label}
${body}`; } +function appendThinking(text=''){ + $('emptyState').style.display='none'; + let row=$('thinkingRow'); + if(!row){ + row=document.createElement('div'); + row.className='msg-row'; + row.id='thinkingRow'; + $('msgInner').appendChild(row); + } + row.className=(text&&String(text).trim())?'msg-row thinking-card-row':'msg-row'; + row.innerHTML=_thinkingMarkup(text); + scrollToBottom(); +} +function updateThinking(text=''){appendThinking(text);} function removeThinking(){const el=$('thinkingRow');if(el)el.remove();} function fileIcon(name, type){ diff --git a/tests/test_regressions.py b/tests/test_regressions.py index b3b4b50..073ad98 100644 --- a/tests/test_regressions.py +++ b/tests/test_regressions.py @@ -241,6 +241,24 @@ def test_done_handler_guards_setbusy_with_inflight_check(cleanup_test_sessions): assert "INFLIGHT[S.session.session_id]" in src, "messages.js must guard setBusy(false) with INFLIGHT check for current session" +def test_refresh_handler_does_not_drop_tool_messages_needed_by_todos(cleanup_test_sessions): + """Todo panel state must survive session reload/refresh. + The UI can hide tool-role messages from the visible transcript, but it must not + destroy the raw session messages because loadTodos reconstructs state from the + latest todo tool output. + """ + sessions_src = (REPO_ROOT / "static/sessions.js").read_text() + ui_src = (REPO_ROOT / "static/ui.js").read_text() + panels_src = (REPO_ROOT / "static/panels.js").read_text() + + assert "data.session.messages=(data.session.messages||[]).filter(" not in sessions_src, \ + "sessions.js must not overwrite raw session.messages when filtering transcript display" + assert "S.messages = (data.session.messages || []).filter(" not in ui_src, \ + "ui.js refreshSession must not rebuild S.messages by discarding tool messages from the raw session payload" + assert "const sourceMessages = (S.session && Array.isArray(S.session.messages) && S.session.messages.length) ? S.session.messages : S.messages;" in panels_src, \ + "loadTodos must prefer raw S.session.messages so todo state survives reloads" + + def test_cancel_button_not_cleared_across_sessions(cleanup_test_sessions): """R7c: The Cancel button and activeStreamId must only be cleared when the done/error event belongs to the currently viewed session. @@ -440,7 +458,166 @@ def test_newSession_clears_live_tool_cards(cleanup_test_sessions): assert "clearLiveToolCards" in new_sess_body, "newSession() must call clearLiveToolCards() to clear stale live cards" -# ── R16: Stack traces must not leak to clients in 500 responses ──────────── +def test_newSession_resets_busy_state_for_fresh_chat(cleanup_test_sessions): + """R15b: newSession() must reset the viewed chat to idle state. + Without this, starting a second chat while another session is streaming leaves + S.busy=true, so the first send in the new chat gets incorrectly queued. + """ + src = (REPO_ROOT / "static/sessions.js").read_text() + new_sess_idx = src.find("async function newSession(") + assert new_sess_idx >= 0 + next_fn = src.find("async function ", new_sess_idx + 10) + new_sess_body = src[new_sess_idx:next_fn] + assert "S.busy=false;" in new_sess_body, \ + "newSession() must clear S.busy so a fresh chat is immediately sendable" + assert "S.activeStreamId=null;" in new_sess_body, \ + "newSession() must clear the active stream id for the newly viewed chat" + assert "updateQueueBadge(S.session.session_id);" in new_sess_body, \ + "newSession() must refresh the badge for the new session rather than leaving the old session's queue badge visible" + + +def test_session_scoped_message_queue_frontend_wiring(cleanup_test_sessions): + """R15bb: queued follow-ups must stay attached to their originating session. + The frontend should use a session-keyed queue store and drain only the active + session's queued messages when that session becomes idle. + """ + ui_src = (REPO_ROOT / "static/ui.js").read_text() + messages_src = (REPO_ROOT / "static/messages.js").read_text() + sessions_src = (REPO_ROOT / "static/sessions.js").read_text() + assert "const SESSION_QUEUES" in ui_src + assert "function queueSessionMessage" in ui_src + assert "function shiftQueuedSessionMessage" in ui_src + assert "const sid=S.session&&S.session.session_id;" in ui_src + assert "const next=sid?shiftQueuedSessionMessage(sid):null;" in ui_src + assert "queueSessionMessage(S.session.session_id" in messages_src + assert "updateQueueBadge(S.session.session_id);" in messages_src + assert "updateQueueBadge(sid);" in sessions_src + + +def test_chat_start_persists_pending_turn_metadata_for_reload_recovery(cleanup_test_sessions): + """R15c: chat/start must expose enough pending-turn metadata for a reload to + rebuild the in-flight conversation instead of showing a blank session. + """ + routes_src = (REPO_ROOT / "api/routes.py").read_text() + assert 's.active_stream_id = stream_id' in routes_src + assert 's.pending_user_message = msg' in routes_src + assert 's.pending_attachments = attachments' in routes_src + assert '"active_stream_id": getattr(s, "active_stream_id", None)' in routes_src + assert '"pending_user_message": getattr(s, "pending_user_message", None)' in routes_src + + +def test_reload_path_restores_pending_message_and_reattaches_live_stream(cleanup_test_sessions): + """R15d: the frontend reload path must show the pending user turn and + reattach to the live SSE stream after loadSession(). + """ + sessions_src = (REPO_ROOT / "static/sessions.js").read_text() + ui_src = (REPO_ROOT / "static/ui.js").read_text() + messages_src = (REPO_ROOT / "static/messages.js").read_text() + assert 'getPendingSessionMessage' in ui_src + assert 'pending_user_message' in ui_src + assert 'function attachLiveStream' in messages_src + assert 'const pendingMsg=typeof getPendingSessionMessage' in sessions_src + assert 'const activeStreamId=data.session.active_stream_id||null;' in sessions_src + assert 'attachLiveStream(sid, activeStreamId' in sessions_src + assert 'if (S.activeStreamId && S.activeStreamId === streamId) return;' in ui_src + + +# ── R16: Switching away/back must preserve live partial assistant output ───── + + +def test_live_stream_tokens_persist_partial_assistant_for_session_switch(cleanup_test_sessions): + """R16: in-flight assistant text must be mirrored into INFLIGHT session state, + and the live stream must rebind to the rebuilt DOM after switching away and back. + Without this, partial assistant output disappears until the final done payload lands. + """ + messages_src = (REPO_ROOT / "static/messages.js").read_text() + ui_src = (REPO_ROOT / "static/ui.js").read_text() + + assert "content:assistantText" in messages_src, \ + "messages.js must persist the partial assistant text into INFLIGHT state" + assert "_live:true" in messages_src, \ + "messages.js must mark the persisted in-flight assistant row so renderMessages can re-anchor it" + assert "syncInflightAssistantMessage();" in messages_src, \ + "token handler must update INFLIGHT state before checking the active session" + assert "assistantRow&&!assistantRow.isConnected" in messages_src, \ + "live stream must drop stale detached assistant DOM references after session switches" + assert "data-live-assistant" in ui_src, \ + "renderMessages must preserve a live-assistant DOM anchor when rebuilding the thread" + + +def test_inflight_session_state_tracks_live_tool_cards_per_session(cleanup_test_sessions): + """R16b: live tool cards must be stored on the in-flight session, not only in the + global S.toolCalls array, so switching chats does not lose or misattach them. + """ + messages_src = (REPO_ROOT / "static/messages.js").read_text() + sessions_src = (REPO_ROOT / "static/sessions.js").read_text() + + assert "INFLIGHT[activeSid].toolCalls.push(tc);" in messages_src, \ + "tool SSE handler must persist live tool calls onto the in-flight session" + assert "S.toolCalls=(INFLIGHT[sid].toolCalls||[]);" in sessions_src, \ + "loadSession() must restore live tool calls from the in-flight session state" + + +def test_loadSession_inflight_sets_busy_before_renderMessages(cleanup_test_sessions): + """R16c: loading an in-flight session must mark it busy before renderMessages(). + Otherwise renderMessages() treats S.toolCalls as settled history cards and the + same tool call appears once inline and once in the live tool host after a + session switch. + """ + src = (REPO_ROOT / "static/sessions.js").read_text() + inflight_idx = src.find("if(INFLIGHT[sid]){") + assert inflight_idx >= 0, "INFLIGHT branch not found in loadSession" + inflight_block = src[inflight_idx:inflight_idx+700] + busy_pos = inflight_block.find("S.busy=true;") + render_pos = inflight_block.find("renderMessages();appendThinking();") + assert busy_pos >= 0, "loadSession INFLIGHT branch must set S.busy=true" + assert render_pos >= 0, "loadSession INFLIGHT branch must call renderMessages()" + assert busy_pos < render_pos, \ + "loadSession must set S.busy=true before renderMessages() to avoid duplicate tool cards" + + +def test_streaming_bridge_accepts_current_tool_progress_callback_signature(cleanup_test_sessions): + """R17: api/streaming.py must accept the current Hermes agent callback contract. + The agent now calls tool_progress_callback(event_type, name, preview, args, **kwargs). + If the WebUI bridge only accepts (name, preview, args), live tool updates silently vanish. + """ + src = (REPO_ROOT / "api/streaming.py").read_text() + assert "def on_tool(*cb_args, **cb_kwargs):" in src, \ + "streaming.py must accept variable callback args for tool progress events" + assert "reasoning_callback=on_reasoning" in src, \ + "streaming.py must wire the agent's reasoning callback into the SSE bridge" + assert "put('tool_complete'" in src or 'put("tool_complete"' in src, \ + "streaming.py must emit live tool completion SSE events" + + +def test_messages_js_supports_live_reasoning_and_tool_completion(cleanup_test_sessions): + """R18: messages.js must render live reasoning and react to tool completion events. + Without these handlers, the operator only sees generic Thinking… or nothing + until the final done snapshot redraws the whole turn. + """ + src = (REPO_ROOT / "static/messages.js").read_text() + assert "let reasoningText=''" in src, \ + "messages.js must track streamed reasoning text separately from assistant text" + assert "source.addEventListener('reasoning'" in src or 'source.addEventListener("reasoning"' in src, \ + "messages.js must listen for live reasoning SSE events" + assert "source.addEventListener('tool_complete'" in src or 'source.addEventListener("tool_complete"' in src, \ + "messages.js must listen for live tool completion SSE events" + assert "function _parseStreamState()" in src, \ + "messages.js must parse live stream state into reasoning + visible answer" + + +def test_ui_js_can_upgrade_thinking_spinner_into_live_reasoning_card(cleanup_test_sessions): + """R19: ui.js must be able to replace the placeholder thinking spinner with + streamed reasoning text while a turn is in progress. + """ + src = (REPO_ROOT / "static/ui.js").read_text() + assert "function _thinkingMarkup(text='')" in src or 'function _thinkingMarkup(text="")' in src, \ + "ui.js must centralize thinking row markup so it can switch between spinner and live text" + assert "function updateThinking(text=''){appendThinking(text);}" in src or 'function updateThinking(text=""){appendThinking(text);}' in src, \ + "ui.js must expose an updateThinking helper for live reasoning rendering" + + +# ── R17: Stack traces must not leak to clients in 500 responses ──────────── def test_500_response_has_no_trace_field(): """R16: HTTP 500 responses must not include a 'trace' field.