From 429a0ea228447f176223f2b5f259a204b6ba2fd9 Mon Sep 17 00:00:00 2001 From: Nathan Esquenazi Date: Sat, 4 Apr 2026 18:46:34 -0700 Subject: [PATCH 1/3] feat: handle auto-compaction side effects + /compact command The agent's run_conversation() already triggers context compression internally, but the WebUI was unaware of the side effects: 1. Session ID rotation: compression creates a new session_id inside the agent. The WebUI kept writing to the old session file, causing silent data loss. Fix: detect agent.session_id mismatch after run_conversation(), rename the session file, and update in-memory caches. 2. No user notification: compression was invisible. Fix: emit a 'compressed' SSE event when compression is detected. Frontend shows a system message and toast. 3. No manual control: Fix: add /compact slash command that sends a message to the agent requesting context compression. Shows in the autocomplete dropdown. Detection works two ways: - agent.session_id != original session_id (ID rotation) - agent.context_compressor.compression_count > 0 (compressor state) Closes #90 Co-Authored-By: Claude Opus 4.6 (1M context) --- api/streaming.py | 35 +++++++++++++++++++++++++++++++++++ static/commands.js | 10 ++++++++++ static/messages.js | 11 +++++++++++ 3 files changed, 56 insertions(+) diff --git a/api/streaming.py b/api/streaming.py index 52cdf2f..fa08894 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -12,6 +12,7 @@ from pathlib import Path from api.config import ( STREAMS, STREAMS_LOCK, CANCEL_FLAGS, CLI_TOOLSETS, + LOCK, SESSIONS, SESSION_DIR, _get_session_agent_lock, _set_thread_env, _clear_thread_env, resolve_model_provider, ) @@ -206,6 +207,40 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta persist_user_message=msg_text, ) s.messages = result.get('messages') or s.messages + + # ── Handle context compression side effects ── + # If compression fired inside run_conversation, the agent may have + # rotated its session_id. Detect and fix the mismatch so the WebUI + # continues writing to the correct session file. + _agent_sid = getattr(agent, 'session_id', None) + _compressed = False + if _agent_sid and _agent_sid != session_id: + old_sid = session_id + new_sid = _agent_sid + # Rename the session file + old_path = SESSION_DIR / f'{old_sid}.json' + new_path = SESSION_DIR / f'{new_sid}.json' + s.session_id = new_sid + with LOCK: + if old_sid in SESSIONS: + SESSIONS[new_sid] = SESSIONS.pop(old_sid) + if old_path.exists() and not new_path.exists(): + try: + old_path.rename(new_path) + except OSError: + pass + _compressed = True + # Also detect compression via the result dict or compressor state + if not _compressed: + _compressor = getattr(agent, 'context_compressor', None) + if _compressor and getattr(_compressor, 'compression_count', 0) > 0: + _compressed = True + # Notify the frontend that compression happened + if _compressed: + put('compressed', { + 'message': 'Context auto-compressed to continue the conversation', + }) + # Stamp 'timestamp' on any messages that don't have one yet _now = time.time() for _m in s.messages: diff --git a/static/commands.js b/static/commands.js index 33c24b7..1bfed92 100644 --- a/static/commands.js +++ b/static/commands.js @@ -5,6 +5,7 @@ const COMMANDS=[ {name:'help', desc:'List available commands', fn:cmdHelp}, {name:'clear', desc:'Clear conversation messages', fn:cmdClear}, + {name:'compact', desc:'Compress conversation context', fn:cmdCompact}, {name:'model', desc:'Switch model (e.g. /model gpt-4o)', fn:cmdModel, arg:'model_name'}, {name:'workspace', desc:'Switch workspace by name', fn:cmdWorkspace, arg:'name'}, {name:'new', desc:'Start a new chat session', fn:cmdNew}, @@ -99,6 +100,15 @@ async function cmdNew(){ showToast('New session created'); } +function cmdCompact(){ + // Send as a regular message to the agent -- the agent's run_conversation + // preflight will detect the high token count and trigger _compress_context. + // We send a user message so it appears in the conversation. + $('msg').value='Please compress and summarize the conversation context to free up space.'; + send(); + showToast('Requesting context compression...'); +} + async function cmdUsage(){ const next=!window._showTokenUsage; window._showTokenUsage=next; diff --git a/static/messages.js b/static/messages.js index 35cf27f..257994e 100644 --- a/static/messages.js +++ b/static/messages.js @@ -177,6 +177,17 @@ async function send(){ renderSessionList();setBusy(false);setStatus(''); }); + source.addEventListener('compressed',e=>{ + // Context was auto-compressed during this turn -- show a system message + if(!S.session||S.session.session_id!==activeSid) return; + try{ + const d=JSON.parse(e.data); + const sysMsg={role:'assistant',content:'*[Context was auto-compressed to continue the conversation]*'}; + S.messages.push(sysMsg); + showToast(d.message||'Context compressed'); + }catch(err){} + }); + source.addEventListener('apperror',e=>{ // Application-level error sent explicitly by the server (rate limit, crash, etc.) // This is distinct from the SSE network 'error' event below. From 2797e5189bba8e1fbee27bbc0edd89c26a025856 Mon Sep 17 00:00:00 2001 From: Nathan Esquenazi Date: Sat, 4 Apr 2026 18:50:17 -0700 Subject: [PATCH 2/3] feat: context window usage indicator with real agent data The context indicator in the composer footer now shows real data from the agent's context compressor instead of hardcoded estimates: - last_prompt_tokens / context_length (e.g. '12.4k / 200k (6%)') - Bar color: blue <50%, yellow 50-75%, red >75% - Hover tooltip shows exact numbers + compression threshold - Cost appended when available Backend: streaming.py now reads context_length, threshold_tokens, and last_prompt_tokens from agent.context_compressor after run_conversation() and includes them in the usage dict sent with the 'done' SSE event. This matches the CLI's context window display (the bar that shows current context vs total window). Co-Authored-By: Claude Opus 4.6 (1M context) --- api/streaming.py | 6 ++++++ static/ui.js | 22 ++++++++++------------ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/api/streaming.py b/api/streaming.py index fa08894..6d7c480 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -310,6 +310,12 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta break s.save() usage = {'input_tokens': input_tokens, 'output_tokens': output_tokens, 'estimated_cost': estimated_cost} + # Include context window data from the agent's compressor for the UI indicator + _cc = getattr(agent, 'context_compressor', None) + if _cc: + usage['context_length'] = getattr(_cc, 'context_length', 0) or 0 + usage['threshold_tokens'] = getattr(_cc, 'threshold_tokens', 0) or 0 + usage['last_prompt_tokens'] = getattr(_cc, 'last_prompt_tokens', 0) or 0 put('done', {'session': s.compact() | {'messages': s.messages, 'tool_calls': tool_calls}, 'usage': usage}) finally: if old_cwd is None: os.environ.pop('TERMINAL_CWD', None) diff --git a/static/ui.js b/static/ui.js index fa4121a..0cb7b54 100644 --- a/static/ui.js +++ b/static/ui.js @@ -88,18 +88,11 @@ function _fmtTokens(n){if(!n||n<0)return'0';if(n>=1e6)return(n/1e6).toFixed(1)+' function _syncCtxIndicator(usage){ const el=$('ctxIndicator'); if(!el)return; - const inTok=usage.input_tokens||0; - const outTok=usage.output_tokens||0; - const total=inTok+outTok; - if(!total){el.style.display='none';return;} + const promptTok=usage.last_prompt_tokens||usage.input_tokens||0; + const ctxWindow=usage.context_length||0; + if(!promptTok||!ctxWindow){el.style.display='none';return;} el.style.display=''; - // Estimate context window from model name (rough, covers major families) - // TODO: fetch exact values from server or model metadata API - const _CTX={claude:200000,gemini:1000000,'gpt-4o':128000,'gpt-5':128000,o3:200000,o4:200000,deepseek:128000,llama:128000}; - const _m=(S.session&&S.session.model||'').toLowerCase(); - let ctxWindow=128000; - for(const[k,v]of Object.entries(_CTX)){if(_m.includes(k)){ctxWindow=v;break;}} - const pct=Math.min(100,Math.round((inTok/ctxWindow)*100)); + const pct=Math.min(100,Math.round((promptTok/ctxWindow)*100)); const bar=$('ctxBar'); const label=$('ctxLabel'); if(bar){ @@ -108,10 +101,15 @@ function _syncCtxIndicator(usage){ } if(label){ const cost=usage.estimated_cost; - let text=`${_fmtTokens(inTok)} in \u00b7 ${_fmtTokens(outTok)} out`; + let text=`${_fmtTokens(promptTok)} / ${_fmtTokens(ctxWindow)}`; + if(pct>0) text+=` (${pct}%)`; if(cost) text+=` \u00b7 $${cost<0.01?cost.toFixed(4):cost.toFixed(2)}`; label.textContent=text; } + // Update title with detailed info + const threshold=usage.threshold_tokens||0; + el.title=`Context: ${_fmtTokens(promptTok)} of ${_fmtTokens(ctxWindow)} tokens used` + +(threshold?`\nAuto-compress at ${_fmtTokens(threshold)} (${Math.round(threshold/ctxWindow*100)}%)`:''); } function scrollIfPinned(){ From 4a6769ec08dc3167fd39cc57ece71e5ef0b2a0de Mon Sep 17 00:00:00 2001 From: Nathan Esquenazi Date: Sat, 4 Apr 2026 19:00:02 -0700 Subject: [PATCH 3/3] docs: v0.32 release notes, version bump for auto-compaction handling Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 30 +++++++++++++++++++++++++++++- SPRINTS.md | 4 ++-- static/index.html | 2 +- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce10f0b..90189e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,34 @@ --- +## [v0.32] Auto-Compaction Handling + /compact Command (Issue #90) +*April 5, 2026 | 424 tests* + +### Features +- **Auto-compaction detection.** When the agent's `run_conversation()` triggers + context compression and rotates the session ID, the WebUI detects the mismatch + and renames the session file + cache entry so messages don't split across files. +- **`compressed` SSE event.** Frontend receives a notification when compression + fires, shows a system message ("Context was auto-compressed") and a toast. +- **`/compact` slash command.** Type `/compact` to request the agent compress + the conversation context. Sends a natural-language message that triggers the + agent's compression preflight. +- **Real context window data.** The context usage indicator now uses actual + `context_length`, `threshold_tokens`, and `last_prompt_tokens` from the agent's + compressor instead of the client-side model name lookup. Tooltip shows the + auto-compress threshold. Hides gracefully when the agent has no compressor. + +### Architecture +- `api/streaming.py`: Session ID mismatch detection after `run_conversation()`, + file rename, SESSIONS cache update under lock, `compressed` SSE event, + `context_length`/`threshold_tokens`/`last_prompt_tokens` in usage dict. +- `static/commands.js`: `/compact` command. +- `static/messages.js`: `compressed` SSE event handler. +- `static/ui.js`: `_syncCtxIndicator()` rewritten to use server-side compressor + data instead of client-side model estimates. + +--- + ## [v0.31.2] CLI session delete fix *April 5, 2026 | 424 tests* @@ -1113,4 +1141,4 @@ Three-panel layout: sessions sidebar, chat area, workspace panel. --- -*Last updated: v0.31, April 4, 2026 | Tests: 424* +*Last updated: v0.32, April 5, 2026 | Tests: 424* diff --git a/SPRINTS.md b/SPRINTS.md index 1b860cc..daa085a 100644 --- a/SPRINTS.md +++ b/SPRINTS.md @@ -1,6 +1,6 @@ # Hermes Web UI -- Forward Sprint Plan -> Current state: v0.31 | 424 tests | Daily driver ready +> Current state: v0.32 | 424 tests | Daily driver ready > This document plans the path from here to two targets: > > Target A: 1:1 feature parity with the Hermes CLI (everything you can do from the @@ -898,5 +898,5 @@ genuinely differentiating for an open-source project --- *Last updated: April 5, 2026* -*Current version: v0.31.2 | 424 tests* +*Current version: v0.32 | 424 tests* *Next sprint: Sprint 24 (Web Polish + Bug Fix Pass)* diff --git a/static/index.html b/static/index.html index a0526b1..a4baf4b 100644 --- a/static/index.html +++ b/static/index.html @@ -13,7 +13,7 @@