From 429a0ea228447f176223f2b5f259a204b6ba2fd9 Mon Sep 17 00:00:00 2001
From: Nathan Esquenazi <nesquena@gmail.com>
Date: Sat, 4 Apr 2026 18:46:34 -0700
Subject: [PATCH 1/3] feat: handle auto-compaction side effects + /compact
 command

The agent's run_conversation() already triggers context compression
internally, but the WebUI was unaware of the side effects:

1. Session ID rotation: compression creates a new session_id inside
   the agent. The WebUI kept writing to the old session file, causing
   silent data loss. Fix: detect agent.session_id mismatch after
   run_conversation(), rename the session file, and update in-memory
   caches.

2. No user notification: compression was invisible. Fix: emit a
   'compressed' SSE event when compression is detected. Frontend shows
   a system message and toast.

3. No manual control: Fix: add /compact slash command that sends a
   message to the agent requesting context compression. Shows in the
   autocomplete dropdown.

Detection works two ways:
- agent.session_id != original session_id (ID rotation)
- agent.context_compressor.compression_count > 0 (compressor state)

Closes #90

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 api/streaming.py   | 35 +++++++++++++++++++++++++++++++++++
 static/commands.js | 10 ++++++++++
 static/messages.js | 11 +++++++++++
 3 files changed, 56 insertions(+)

diff --git a/api/streaming.py b/api/streaming.py
index 52cdf2f..fa08894 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -12,6 +12,7 @@ from pathlib import Path
 
 from api.config import (
     STREAMS, STREAMS_LOCK, CANCEL_FLAGS, CLI_TOOLSETS,
+    LOCK, SESSIONS, SESSION_DIR,
     _get_session_agent_lock, _set_thread_env, _clear_thread_env,
     resolve_model_provider,
 )
@@ -206,6 +207,40 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
                 persist_user_message=msg_text,
             )
             s.messages = result.get('messages') or s.messages
+
+            # ── Handle context compression side effects ──
+            # If compression fired inside run_conversation, the agent may have
+            # rotated its session_id. Detect and fix the mismatch so the WebUI
+            # continues writing to the correct session file.
+            _agent_sid = getattr(agent, 'session_id', None)
+            _compressed = False
+            if _agent_sid and _agent_sid != session_id:
+                old_sid = session_id
+                new_sid = _agent_sid
+                # Rename the session file
+                old_path = SESSION_DIR / f'{old_sid}.json'
+                new_path = SESSION_DIR / f'{new_sid}.json'
+                s.session_id = new_sid
+                with LOCK:
+                    if old_sid in SESSIONS:
+                        SESSIONS[new_sid] = SESSIONS.pop(old_sid)
+                if old_path.exists() and not new_path.exists():
+                    try:
+                        old_path.rename(new_path)
+                    except OSError:
+                        pass
+                _compressed = True
+            # Also detect compression via the result dict or compressor state
+            if not _compressed:
+                _compressor = getattr(agent, 'context_compressor', None)
+                if _compressor and getattr(_compressor, 'compression_count', 0) > 0:
+                    _compressed = True
+            # Notify the frontend that compression happened
+            if _compressed:
+                put('compressed', {
+                    'message': 'Context auto-compressed to continue the conversation',
+                })
+
             # Stamp 'timestamp' on any messages that don't have one yet
             _now = time.time()
             for _m in s.messages:
diff --git a/static/commands.js b/static/commands.js
index 33c24b7..1bfed92 100644
--- a/static/commands.js
+++ b/static/commands.js
@@ -5,6 +5,7 @@
 const COMMANDS=[
   {name:'help',      desc:'List available commands',             fn:cmdHelp},
   {name:'clear',     desc:'Clear conversation messages',         fn:cmdClear},
+  {name:'compact',   desc:'Compress conversation context',       fn:cmdCompact},
   {name:'model',     desc:'Switch model (e.g. /model gpt-4o)',  fn:cmdModel,     arg:'model_name'},
   {name:'workspace', desc:'Switch workspace by name',            fn:cmdWorkspace, arg:'name'},
   {name:'new',       desc:'Start a new chat session',            fn:cmdNew},
@@ -99,6 +100,15 @@ async function cmdNew(){
   showToast('New session created');
 }
 
+function cmdCompact(){
+  // Send as a regular message to the agent -- the agent's run_conversation
+  // preflight will detect the high token count and trigger _compress_context.
+  // We send a user message so it appears in the conversation.
+  $('msg').value='Please compress and summarize the conversation context to free up space.';
+  send();
+  showToast('Requesting context compression...');
+}
+
 async function cmdUsage(){
   const next=!window._showTokenUsage;
   window._showTokenUsage=next;
diff --git a/static/messages.js b/static/messages.js
index 35cf27f..257994e 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -177,6 +177,17 @@ async function send(){
       renderSessionList();setBusy(false);setStatus('');
     });
 
+    source.addEventListener('compressed',e=>{
+      // Context was auto-compressed during this turn -- show a system message
+      if(!S.session||S.session.session_id!==activeSid) return;
+      try{
+        const d=JSON.parse(e.data);
+        const sysMsg={role:'assistant',content:'*[Context was auto-compressed to continue the conversation]*'};
+        S.messages.push(sysMsg);
+        showToast(d.message||'Context compressed');
+      }catch(err){}
+    });
+
     source.addEventListener('apperror',e=>{
       // Application-level error sent explicitly by the server (rate limit, crash, etc.)
       // This is distinct from the SSE network 'error' event below.

From 2797e5189bba8e1fbee27bbc0edd89c26a025856 Mon Sep 17 00:00:00 2001
From: Nathan Esquenazi <nesquena@gmail.com>
Date: Sat, 4 Apr 2026 18:50:17 -0700
Subject: [PATCH 2/3] feat: context window usage indicator with real agent data

The context indicator in the composer footer now shows real data from
the agent's context compressor instead of hardcoded estimates:

- last_prompt_tokens / context_length (e.g. '12.4k / 200k (6%)')
- Bar color: blue <50%, yellow 50-75%, red >75%
- Hover tooltip shows exact numbers + compression threshold
- Cost appended when available

Backend: streaming.py now reads context_length, threshold_tokens, and
last_prompt_tokens from agent.context_compressor after run_conversation()
and includes them in the usage dict sent with the 'done' SSE event.

This matches the CLI's context window display (the bar that shows
current context vs total window).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 api/streaming.py |  6 ++++++
 static/ui.js     | 22 ++++++++++------------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/api/streaming.py b/api/streaming.py
index fa08894..6d7c480 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -310,6 +310,12 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
                             break
             s.save()
             usage = {'input_tokens': input_tokens, 'output_tokens': output_tokens, 'estimated_cost': estimated_cost}
+            # Include context window data from the agent's compressor for the UI indicator
+            _cc = getattr(agent, 'context_compressor', None)
+            if _cc:
+                usage['context_length'] = getattr(_cc, 'context_length', 0) or 0
+                usage['threshold_tokens'] = getattr(_cc, 'threshold_tokens', 0) or 0
+                usage['last_prompt_tokens'] = getattr(_cc, 'last_prompt_tokens', 0) or 0
             put('done', {'session': s.compact() | {'messages': s.messages, 'tool_calls': tool_calls}, 'usage': usage})
           finally:
             if old_cwd is None: os.environ.pop('TERMINAL_CWD', None)
diff --git a/static/ui.js b/static/ui.js
index fa4121a..0cb7b54 100644
--- a/static/ui.js
+++ b/static/ui.js
@@ -88,18 +88,11 @@ function _fmtTokens(n){if(!n||n<0)return'0';if(n>=1e6)return(n/1e6).toFixed(1)+'
 function _syncCtxIndicator(usage){
   const el=$('ctxIndicator');
   if(!el)return;
-  const inTok=usage.input_tokens||0;
-  const outTok=usage.output_tokens||0;
-  const total=inTok+outTok;
-  if(!total){el.style.display='none';return;}
+  const promptTok=usage.last_prompt_tokens||usage.input_tokens||0;
+  const ctxWindow=usage.context_length||0;
+  if(!promptTok||!ctxWindow){el.style.display='none';return;}
   el.style.display='';
-  // Estimate context window from model name (rough, covers major families)
-  // TODO: fetch exact values from server or model metadata API
-  const _CTX={claude:200000,gemini:1000000,'gpt-4o':128000,'gpt-5':128000,o3:200000,o4:200000,deepseek:128000,llama:128000};
-  const _m=(S.session&&S.session.model||'').toLowerCase();
-  let ctxWindow=128000;
-  for(const[k,v]of Object.entries(_CTX)){if(_m.includes(k)){ctxWindow=v;break;}}
-  const pct=Math.min(100,Math.round((inTok/ctxWindow)*100));
+  const pct=Math.min(100,Math.round((promptTok/ctxWindow)*100));
   const bar=$('ctxBar');
   const label=$('ctxLabel');
   if(bar){
@@ -108,10 +101,15 @@ function _syncCtxIndicator(usage){
   }
   if(label){
     const cost=usage.estimated_cost;
-    let text=`${_fmtTokens(inTok)} in \u00b7 ${_fmtTokens(outTok)} out`;
+    let text=`${_fmtTokens(promptTok)} / ${_fmtTokens(ctxWindow)}`;
+    if(pct>0) text+=` (${pct}%)`;
     if(cost) text+=` \u00b7 $${cost<0.01?cost.toFixed(4):cost.toFixed(2)}`;
     label.textContent=text;
   }
+  // Update title with detailed info
+  const threshold=usage.threshold_tokens||0;
+  el.title=`Context: ${_fmtTokens(promptTok)} of ${_fmtTokens(ctxWindow)} tokens used`
+    +(threshold?`\nAuto-compress at ${_fmtTokens(threshold)} (${Math.round(threshold/ctxWindow*100)}%)`:'');
 }
 
 function scrollIfPinned(){

From 4a6769ec08dc3167fd39cc57ece71e5ef0b2a0de Mon Sep 17 00:00:00 2001
From: Nathan Esquenazi <nesquena@gmail.com>
Date: Sat, 4 Apr 2026 19:00:02 -0700
Subject: [PATCH 3/3] docs: v0.32 release notes, version bump for
 auto-compaction handling

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md      | 30 +++++++++++++++++++++++++++++-
 SPRINTS.md        |  4 ++--
 static/index.html |  2 +-
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ce10f0b..90189e5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,34 @@
 
 ---
 
+## [v0.32] Auto-Compaction Handling + /compact Command (Issue #90)
+*April 5, 2026 | 424 tests*
+
+### Features
+- **Auto-compaction detection.** When the agent's `run_conversation()` triggers
+  context compression and rotates the session ID, the WebUI detects the mismatch
+  and renames the session file + cache entry so messages don't split across files.
+- **`compressed` SSE event.** Frontend receives a notification when compression
+  fires, shows a system message ("Context was auto-compressed") and a toast.
+- **`/compact` slash command.** Type `/compact` to request the agent compress
+  the conversation context. Sends a natural-language message that triggers the
+  agent's compression preflight.
+- **Real context window data.** The context usage indicator now uses actual
+  `context_length`, `threshold_tokens`, and `last_prompt_tokens` from the agent's
+  compressor instead of the client-side model name lookup. Tooltip shows the
+  auto-compress threshold. Hides gracefully when the agent has no compressor.
+
+### Architecture
+- `api/streaming.py`: Session ID mismatch detection after `run_conversation()`,
+  file rename, SESSIONS cache update under lock, `compressed` SSE event,
+  `context_length`/`threshold_tokens`/`last_prompt_tokens` in usage dict.
+- `static/commands.js`: `/compact` command.
+- `static/messages.js`: `compressed` SSE event handler.
+- `static/ui.js`: `_syncCtxIndicator()` rewritten to use server-side compressor
+  data instead of client-side model estimates.
+
+---
+
 ## [v0.31.2] CLI session delete fix
 *April 5, 2026 | 424 tests*
 
@@ -1113,4 +1141,4 @@ Three-panel layout: sessions sidebar, chat area, workspace panel.
 
 ---
 
-*Last updated: v0.31, April 4, 2026 | Tests: 424*
+*Last updated: v0.32, April 5, 2026 | Tests: 424*
diff --git a/SPRINTS.md b/SPRINTS.md
index 1b860cc..daa085a 100644
--- a/SPRINTS.md
+++ b/SPRINTS.md
@@ -1,6 +1,6 @@
 # Hermes Web UI -- Forward Sprint Plan
 
-> Current state: v0.31 | 424 tests | Daily driver ready
+> Current state: v0.32 | 424 tests | Daily driver ready
 > This document plans the path from here to two targets:
 >
 > Target A: 1:1 feature parity with the Hermes CLI (everything you can do from the
@@ -898,5 +898,5 @@ genuinely differentiating for an open-source project
 ---
 
 *Last updated: April 5, 2026*
-*Current version: v0.31.2 | 424 tests*
+*Current version: v0.32 | 424 tests*
 *Next sprint: Sprint 24 (Web Polish + Bug Fix Pass)*
diff --git a/static/index.html b/static/index.html
index a0526b1..a4baf4b 100644
--- a/static/index.html
+++ b/static/index.html
@@ -13,7 +13,7 @@
 <body>
 <div class="layout">
   <aside class="sidebar">
-    <div class="sidebar-header"><div class="logo">H</div><div><h1 style="margin:0;font-size:15px;font-weight:700;letter-spacing:-.01em">Hermes</h1><div style="font-size:10px;color:var(--muted);opacity:.8;margin-top:1px">v0.31.2</div></div></div>
+    <div class="sidebar-header"><div class="logo">H</div><div><h1 style="margin:0;font-size:15px;font-weight:700;letter-spacing:-.01em">Hermes</h1><div style="font-size:10px;color:var(--muted);opacity:.8;margin-top:1px">v0.32</div></div></div>
     <div class="sidebar-nav">
       <button class="nav-tab active" data-panel="chat" data-label="Chat" onclick="switchPanel('chat')" title="Chat">&#128172;</button>
       <button class="nav-tab" data-panel="tasks" data-label="Tasks" onclick="switchPanel('tasks')" title="Tasks">&#128197;</button>