Merge pull request #91 from nesquena/feat/auto-compaction-handling

feat: handle auto-compaction side effects + /compact command (#90)
2026-04-04 19:00:15 -07:00
parent 2e7ce0a341 4a6769ec08
commit e0d52f39dc
7 changed files with 104 additions and 16 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,34 @@

 ---

+## [v0.32] Auto-Compaction Handling + /compact Command (Issue #90)
+*April 5, 2026 | 424 tests*
+
+### Features
+- **Auto-compaction detection.** When the agent's `run_conversation()` triggers
+  context compression and rotates the session ID, the WebUI detects the mismatch
+  and renames the session file + cache entry so messages don't split across files.
+- **`compressed` SSE event.** Frontend receives a notification when compression
+  fires, shows a system message ("Context was auto-compressed") and a toast.
+- **`/compact` slash command.** Type `/compact` to request the agent compress
+  the conversation context. Sends a natural-language message that triggers the
+  agent's compression preflight.
+- **Real context window data.** The context usage indicator now uses actual
+  `context_length`, `threshold_tokens`, and `last_prompt_tokens` from the agent's
+  compressor instead of the client-side model name lookup. Tooltip shows the
+  auto-compress threshold. Hides gracefully when the agent has no compressor.
+
+### Architecture
+- `api/streaming.py`: Session ID mismatch detection after `run_conversation()`,
+  file rename, SESSIONS cache update under lock, `compressed` SSE event,
+  `context_length`/`threshold_tokens`/`last_prompt_tokens` in usage dict.
+- `static/commands.js`: `/compact` command.
+- `static/messages.js`: `compressed` SSE event handler.
+- `static/ui.js`: `_syncCtxIndicator()` rewritten to use server-side compressor
+  data instead of client-side model estimates.
+
+---
+
 ## [v0.31.2] CLI session delete fix
 *April 5, 2026 | 424 tests*

@@ -1113,4 +1141,4 @@ Three-panel layout: sessions sidebar, chat area, workspace panel.

 ---

-*Last updated: v0.31, April 4, 2026 | Tests: 424*
+*Last updated: v0.32, April 5, 2026 | Tests: 424*
--- a/SPRINTS.md
+++ b/SPRINTS.md
@@ -1,6 +1,6 @@
 # Hermes Web UI -- Forward Sprint Plan

-> Current state: v0.31 | 424 tests | Daily driver ready
+> Current state: v0.32 | 424 tests | Daily driver ready
 > This document plans the path from here to two targets:
 >
 > Target A: 1:1 feature parity with the Hermes CLI (everything you can do from the
@@ -898,5 +898,5 @@ genuinely differentiating for an open-source project
 ---

 *Last updated: April 5, 2026*
-*Current version: v0.31.2 | 424 tests*
+*Current version: v0.32 | 424 tests*
 *Next sprint: Sprint 24 (Web Polish + Bug Fix Pass)*
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -12,6 +12,7 @@ from pathlib import Path

 from api.config import (
    STREAMS, STREAMS_LOCK, CANCEL_FLAGS, CLI_TOOLSETS,
+    LOCK, SESSIONS, SESSION_DIR,
    _get_session_agent_lock, _set_thread_env, _clear_thread_env,
    resolve_model_provider,
 )
@@ -206,6 +207,40 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
                persist_user_message=msg_text,
            )
            s.messages = result.get('messages') or s.messages
+
+            # ── Handle context compression side effects ──
+            # If compression fired inside run_conversation, the agent may have
+            # rotated its session_id. Detect and fix the mismatch so the WebUI
+            # continues writing to the correct session file.
+            _agent_sid = getattr(agent, 'session_id', None)
+            _compressed = False
+            if _agent_sid and _agent_sid != session_id:
+                old_sid = session_id
+                new_sid = _agent_sid
+                # Rename the session file
+                old_path = SESSION_DIR / f'{old_sid}.json'
+                new_path = SESSION_DIR / f'{new_sid}.json'
+                s.session_id = new_sid
+                with LOCK:
+                    if old_sid in SESSIONS:
+                        SESSIONS[new_sid] = SESSIONS.pop(old_sid)
+                if old_path.exists() and not new_path.exists():
+                    try:
+                        old_path.rename(new_path)
+                    except OSError:
+                        pass
+                _compressed = True
+            # Also detect compression via the result dict or compressor state
+            if not _compressed:
+                _compressor = getattr(agent, 'context_compressor', None)
+                if _compressor and getattr(_compressor, 'compression_count', 0) > 0:
+                    _compressed = True
+            # Notify the frontend that compression happened
+            if _compressed:
+                put('compressed', {
+                    'message': 'Context auto-compressed to continue the conversation',
+                })
+
            # Stamp 'timestamp' on any messages that don't have one yet
            _now = time.time()
            for _m in s.messages:
@@ -275,6 +310,12 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
                            break
            s.save()
            usage = {'input_tokens': input_tokens, 'output_tokens': output_tokens, 'estimated_cost': estimated_cost}
+            # Include context window data from the agent's compressor for the UI indicator
+            _cc = getattr(agent, 'context_compressor', None)
+            if _cc:
+                usage['context_length'] = getattr(_cc, 'context_length', 0) or 0
+                usage['threshold_tokens'] = getattr(_cc, 'threshold_tokens', 0) or 0
+                usage['last_prompt_tokens'] = getattr(_cc, 'last_prompt_tokens', 0) or 0
            put('done', {'session': s.compact() | {'messages': s.messages, 'tool_calls': tool_calls}, 'usage': usage})
          finally:
            if old_cwd is None: os.environ.pop('TERMINAL_CWD', None)
--- a/static/commands.js
+++ b/static/commands.js
@@ -5,6 +5,7 @@
 const COMMANDS=[
  {name:'help',      desc:'List available commands',             fn:cmdHelp},
  {name:'clear',     desc:'Clear conversation messages',         fn:cmdClear},
+  {name:'compact',   desc:'Compress conversation context',       fn:cmdCompact},
  {name:'model',     desc:'Switch model (e.g. /model gpt-4o)',  fn:cmdModel,     arg:'model_name'},
  {name:'workspace', desc:'Switch workspace by name',            fn:cmdWorkspace, arg:'name'},
  {name:'new',       desc:'Start a new chat session',            fn:cmdNew},
@@ -99,6 +100,15 @@ async function cmdNew(){
  showToast('New session created');
 }

+function cmdCompact(){
+  // Send as a regular message to the agent -- the agent's run_conversation
+  // preflight will detect the high token count and trigger _compress_context.
+  // We send a user message so it appears in the conversation.
+  $('msg').value='Please compress and summarize the conversation context to free up space.';
+  send();
+  showToast('Requesting context compression...');
+}
+
 async function cmdUsage(){
  const next=!window._showTokenUsage;
  window._showTokenUsage=next;
--- a/static/index.html
+++ b/static/index.html
@@ -13,7 +13,7 @@
 <body>
 <div class="layout">
  <aside class="sidebar">
-    <div class="sidebar-header"><div class="logo">H</div><div><h1 style="margin:0;font-size:15px;font-weight:700;letter-spacing:-.01em">Hermes</h1><div style="font-size:10px;color:var(--muted);opacity:.8;margin-top:1px">v0.31.2</div></div></div>
+    <div class="sidebar-header"><div class="logo">H</div><div><h1 style="margin:0;font-size:15px;font-weight:700;letter-spacing:-.01em">Hermes</h1><div style="font-size:10px;color:var(--muted);opacity:.8;margin-top:1px">v0.32</div></div></div>
    <div class="sidebar-nav">
      <button class="nav-tab active" data-panel="chat" data-label="Chat" onclick="switchPanel('chat')" title="Chat">&#128172;</button>
      <button class="nav-tab" data-panel="tasks" data-label="Tasks" onclick="switchPanel('tasks')" title="Tasks">&#128197;</button>
--- a/static/messages.js
+++ b/static/messages.js
@@ -177,6 +177,17 @@ async function send(){
      renderSessionList();setBusy(false);setStatus('');
    });

+    source.addEventListener('compressed',e=>{
+      // Context was auto-compressed during this turn -- show a system message
+      if(!S.session||S.session.session_id!==activeSid) return;
+      try{
+        const d=JSON.parse(e.data);
+        const sysMsg={role:'assistant',content:'*[Context was auto-compressed to continue the conversation]*'};
+        S.messages.push(sysMsg);
+        showToast(d.message||'Context compressed');
+      }catch(err){}
+    });
+
    source.addEventListener('apperror',e=>{
      // Application-level error sent explicitly by the server (rate limit, crash, etc.)
      // This is distinct from the SSE network 'error' event below.
--- a/static/ui.js
+++ b/static/ui.js
@@ -88,18 +88,11 @@ function _fmtTokens(n){if(!n||n<0)return'0';if(n>=1e6)return(n/1e6).toFixed(1)+'
 function _syncCtxIndicator(usage){
  const el=$('ctxIndicator');
  if(!el)return;
-  const inTok=usage.input_tokens||0;
-  const outTok=usage.output_tokens||0;
-  const total=inTok+outTok;
-  if(!total){el.style.display='none';return;}
+  const promptTok=usage.last_prompt_tokens||usage.input_tokens||0;
+  const ctxWindow=usage.context_length||0;
+  if(!promptTok||!ctxWindow){el.style.display='none';return;}
  el.style.display='';
-  // Estimate context window from model name (rough, covers major families)
-  // TODO: fetch exact values from server or model metadata API
-  const _CTX={claude:200000,gemini:1000000,'gpt-4o':128000,'gpt-5':128000,o3:200000,o4:200000,deepseek:128000,llama:128000};
-  const _m=(S.session&&S.session.model||'').toLowerCase();
-  let ctxWindow=128000;
-  for(const[k,v]of Object.entries(_CTX)){if(_m.includes(k)){ctxWindow=v;break;}}
-  const pct=Math.min(100,Math.round((inTok/ctxWindow)*100));
+  const pct=Math.min(100,Math.round((promptTok/ctxWindow)*100));
  const bar=$('ctxBar');
  const label=$('ctxLabel');
  if(bar){
@@ -108,10 +101,15 @@ function _syncCtxIndicator(usage){
  }
  if(label){
    const cost=usage.estimated_cost;
-    let text=`${_fmtTokens(inTok)} in \u00b7 ${_fmtTokens(outTok)} out`;
+    let text=`${_fmtTokens(promptTok)} / ${_fmtTokens(ctxWindow)}`;
+    if(pct>0) text+=` (${pct}%)`;
    if(cost) text+=` \u00b7 $${cost<0.01?cost.toFixed(4):cost.toFixed(2)}`;
    label.textContent=text;
  }
+  // Update title with detailed info
+  const threshold=usage.threshold_tokens||0;
+  el.title=`Context: ${_fmtTokens(promptTok)} of ${_fmtTokens(ctxWindow)} tokens used`
+    +(threshold?`\nAuto-compress at ${_fmtTokens(threshold)} (${Math.round(threshold/ctxWindow*100)}%)`:'');
 }

 function scrollIfPinned(){