Merge pull request #91 from nesquena/feat/auto-compaction-handling

feat: handle auto-compaction side effects + /compact command (#90)
This commit is contained in:
Nathan Esquenazi
2026-04-04 19:00:15 -07:00
committed by GitHub
7 changed files with 104 additions and 16 deletions

View File

@@ -5,6 +5,34 @@
---
## [v0.32] Auto-Compaction Handling + /compact Command (Issue #90)
*April 5, 2026 | 424 tests*
### Features
- **Auto-compaction detection.** When the agent's `run_conversation()` triggers
context compression and rotates the session ID, the WebUI detects the mismatch
and renames the session file + cache entry so messages don't split across files.
- **`compressed` SSE event.** Frontend receives a notification when compression
fires, shows a system message ("Context was auto-compressed") and a toast.
- **`/compact` slash command.** Type `/compact` to request the agent compress
the conversation context. Sends a natural-language message that triggers the
agent's compression preflight.
- **Real context window data.** The context usage indicator now uses actual
`context_length`, `threshold_tokens`, and `last_prompt_tokens` from the agent's
compressor instead of the client-side model name lookup. Tooltip shows the
auto-compress threshold. Hides gracefully when the agent has no compressor.
### Architecture
- `api/streaming.py`: Session ID mismatch detection after `run_conversation()`,
file rename, SESSIONS cache update under lock, `compressed` SSE event,
`context_length`/`threshold_tokens`/`last_prompt_tokens` in usage dict.
- `static/commands.js`: `/compact` command.
- `static/messages.js`: `compressed` SSE event handler.
- `static/ui.js`: `_syncCtxIndicator()` rewritten to use server-side compressor
data instead of client-side model estimates.
---
## [v0.31.2] CLI session delete fix
*April 5, 2026 | 424 tests*
@@ -1113,4 +1141,4 @@ Three-panel layout: sessions sidebar, chat area, workspace panel.
---
*Last updated: v0.31, April 4, 2026 | Tests: 424*
*Last updated: v0.32, April 5, 2026 | Tests: 424*

View File

@@ -1,6 +1,6 @@
# Hermes Web UI -- Forward Sprint Plan
> Current state: v0.31 | 424 tests | Daily driver ready
> Current state: v0.32 | 424 tests | Daily driver ready
> This document plans the path from here to two targets:
>
> Target A: 1:1 feature parity with the Hermes CLI (everything you can do from the
@@ -898,5 +898,5 @@ genuinely differentiating for an open-source project
---
*Last updated: April 5, 2026*
*Current version: v0.31.2 | 424 tests*
*Current version: v0.32 | 424 tests*
*Next sprint: Sprint 24 (Web Polish + Bug Fix Pass)*

View File

@@ -12,6 +12,7 @@ from pathlib import Path
from api.config import (
STREAMS, STREAMS_LOCK, CANCEL_FLAGS, CLI_TOOLSETS,
LOCK, SESSIONS, SESSION_DIR,
_get_session_agent_lock, _set_thread_env, _clear_thread_env,
resolve_model_provider,
)
@@ -206,6 +207,40 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
persist_user_message=msg_text,
)
s.messages = result.get('messages') or s.messages
# ── Handle context compression side effects ──
# If compression fired inside run_conversation, the agent may have
# rotated its session_id. Detect and fix the mismatch so the WebUI
# continues writing to the correct session file.
_agent_sid = getattr(agent, 'session_id', None)
_compressed = False
if _agent_sid and _agent_sid != session_id:
old_sid = session_id
new_sid = _agent_sid
# Rename the session file
old_path = SESSION_DIR / f'{old_sid}.json'
new_path = SESSION_DIR / f'{new_sid}.json'
s.session_id = new_sid
with LOCK:
if old_sid in SESSIONS:
SESSIONS[new_sid] = SESSIONS.pop(old_sid)
if old_path.exists() and not new_path.exists():
try:
old_path.rename(new_path)
except OSError:
pass
_compressed = True
# Also detect compression via the result dict or compressor state
if not _compressed:
_compressor = getattr(agent, 'context_compressor', None)
if _compressor and getattr(_compressor, 'compression_count', 0) > 0:
_compressed = True
# Notify the frontend that compression happened
if _compressed:
put('compressed', {
'message': 'Context auto-compressed to continue the conversation',
})
# Stamp 'timestamp' on any messages that don't have one yet
_now = time.time()
for _m in s.messages:
@@ -275,6 +310,12 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
break
s.save()
usage = {'input_tokens': input_tokens, 'output_tokens': output_tokens, 'estimated_cost': estimated_cost}
# Include context window data from the agent's compressor for the UI indicator
_cc = getattr(agent, 'context_compressor', None)
if _cc:
usage['context_length'] = getattr(_cc, 'context_length', 0) or 0
usage['threshold_tokens'] = getattr(_cc, 'threshold_tokens', 0) or 0
usage['last_prompt_tokens'] = getattr(_cc, 'last_prompt_tokens', 0) or 0
put('done', {'session': s.compact() | {'messages': s.messages, 'tool_calls': tool_calls}, 'usage': usage})
finally:
if old_cwd is None: os.environ.pop('TERMINAL_CWD', None)

View File

@@ -5,6 +5,7 @@
const COMMANDS=[
{name:'help', desc:'List available commands', fn:cmdHelp},
{name:'clear', desc:'Clear conversation messages', fn:cmdClear},
{name:'compact', desc:'Compress conversation context', fn:cmdCompact},
{name:'model', desc:'Switch model (e.g. /model gpt-4o)', fn:cmdModel, arg:'model_name'},
{name:'workspace', desc:'Switch workspace by name', fn:cmdWorkspace, arg:'name'},
{name:'new', desc:'Start a new chat session', fn:cmdNew},
@@ -99,6 +100,15 @@ async function cmdNew(){
showToast('New session created');
}
function cmdCompact(){
// Send as a regular message to the agent -- the agent's run_conversation
// preflight will detect the high token count and trigger _compress_context.
// We send a user message so it appears in the conversation.
$('msg').value='Please compress and summarize the conversation context to free up space.';
send();
showToast('Requesting context compression...');
}
async function cmdUsage(){
const next=!window._showTokenUsage;
window._showTokenUsage=next;

View File

@@ -13,7 +13,7 @@
<body>
<div class="layout">
<aside class="sidebar">
<div class="sidebar-header"><div class="logo">H</div><div><h1 style="margin:0;font-size:15px;font-weight:700;letter-spacing:-.01em">Hermes</h1><div style="font-size:10px;color:var(--muted);opacity:.8;margin-top:1px">v0.31.2</div></div></div>
<div class="sidebar-header"><div class="logo">H</div><div><h1 style="margin:0;font-size:15px;font-weight:700;letter-spacing:-.01em">Hermes</h1><div style="font-size:10px;color:var(--muted);opacity:.8;margin-top:1px">v0.32</div></div></div>
<div class="sidebar-nav">
<button class="nav-tab active" data-panel="chat" data-label="Chat" onclick="switchPanel('chat')" title="Chat">&#128172;</button>
<button class="nav-tab" data-panel="tasks" data-label="Tasks" onclick="switchPanel('tasks')" title="Tasks">&#128197;</button>

View File

@@ -177,6 +177,17 @@ async function send(){
renderSessionList();setBusy(false);setStatus('');
});
source.addEventListener('compressed',e=>{
// Context was auto-compressed during this turn -- show a system message
if(!S.session||S.session.session_id!==activeSid) return;
try{
const d=JSON.parse(e.data);
const sysMsg={role:'assistant',content:'*[Context was auto-compressed to continue the conversation]*'};
S.messages.push(sysMsg);
showToast(d.message||'Context compressed');
}catch(err){}
});
source.addEventListener('apperror',e=>{
// Application-level error sent explicitly by the server (rate limit, crash, etc.)
// This is distinct from the SSE network 'error' event below.

View File

@@ -88,18 +88,11 @@ function _fmtTokens(n){if(!n||n<0)return'0';if(n>=1e6)return(n/1e6).toFixed(1)+'
function _syncCtxIndicator(usage){
const el=$('ctxIndicator');
if(!el)return;
const inTok=usage.input_tokens||0;
const outTok=usage.output_tokens||0;
const total=inTok+outTok;
if(!total){el.style.display='none';return;}
const promptTok=usage.last_prompt_tokens||usage.input_tokens||0;
const ctxWindow=usage.context_length||0;
if(!promptTok||!ctxWindow){el.style.display='none';return;}
el.style.display='';
// Estimate context window from model name (rough, covers major families)
// TODO: fetch exact values from server or model metadata API
const _CTX={claude:200000,gemini:1000000,'gpt-4o':128000,'gpt-5':128000,o3:200000,o4:200000,deepseek:128000,llama:128000};
const _m=(S.session&&S.session.model||'').toLowerCase();
let ctxWindow=128000;
for(const[k,v]of Object.entries(_CTX)){if(_m.includes(k)){ctxWindow=v;break;}}
const pct=Math.min(100,Math.round((inTok/ctxWindow)*100));
const pct=Math.min(100,Math.round((promptTok/ctxWindow)*100));
const bar=$('ctxBar');
const label=$('ctxLabel');
if(bar){
@@ -108,10 +101,15 @@ function _syncCtxIndicator(usage){
}
if(label){
const cost=usage.estimated_cost;
let text=`${_fmtTokens(inTok)} in \u00b7 ${_fmtTokens(outTok)} out`;
let text=`${_fmtTokens(promptTok)} / ${_fmtTokens(ctxWindow)}`;
if(pct>0) text+=` (${pct}%)`;
if(cost) text+=` \u00b7 $${cost<0.01?cost.toFixed(4):cost.toFixed(2)}`;
label.textContent=text;
}
// Update title with detailed info
const threshold=usage.threshold_tokens||0;
el.title=`Context: ${_fmtTokens(promptTok)} of ${_fmtTokens(ctxWindow)} tokens used`
+(threshold?`\nAuto-compress at ${_fmtTokens(threshold)} (${Math.round(threshold/ctxWindow*100)}%)`:'');
}
function scrollIfPinned(){