Merge pull request #91 from nesquena/feat/auto-compaction-handling
feat: handle auto-compaction side effects + /compact command (#90)
This commit is contained in:
30
CHANGELOG.md
30
CHANGELOG.md
@@ -5,6 +5,34 @@
|
||||
|
||||
---
|
||||
|
||||
## [v0.32] Auto-Compaction Handling + /compact Command (Issue #90)
|
||||
*April 5, 2026 | 424 tests*
|
||||
|
||||
### Features
|
||||
- **Auto-compaction detection.** When the agent's `run_conversation()` triggers
|
||||
context compression and rotates the session ID, the WebUI detects the mismatch
|
||||
and renames the session file + cache entry so messages don't split across files.
|
||||
- **`compressed` SSE event.** Frontend receives a notification when compression
|
||||
fires, shows a system message ("Context was auto-compressed") and a toast.
|
||||
- **`/compact` slash command.** Type `/compact` to request the agent compress
|
||||
the conversation context. Sends a natural-language message that triggers the
|
||||
agent's compression preflight.
|
||||
- **Real context window data.** The context usage indicator now uses actual
|
||||
`context_length`, `threshold_tokens`, and `last_prompt_tokens` from the agent's
|
||||
compressor instead of the client-side model name lookup. Tooltip shows the
|
||||
auto-compress threshold. Hides gracefully when the agent has no compressor.
|
||||
|
||||
### Architecture
|
||||
- `api/streaming.py`: Session ID mismatch detection after `run_conversation()`,
|
||||
file rename, SESSIONS cache update under lock, `compressed` SSE event,
|
||||
`context_length`/`threshold_tokens`/`last_prompt_tokens` in usage dict.
|
||||
- `static/commands.js`: `/compact` command.
|
||||
- `static/messages.js`: `compressed` SSE event handler.
|
||||
- `static/ui.js`: `_syncCtxIndicator()` rewritten to use server-side compressor
|
||||
data instead of client-side model estimates.
|
||||
|
||||
---
|
||||
|
||||
## [v0.31.2] CLI session delete fix
|
||||
*April 5, 2026 | 424 tests*
|
||||
|
||||
@@ -1113,4 +1141,4 @@ Three-panel layout: sessions sidebar, chat area, workspace panel.
|
||||
|
||||
---
|
||||
|
||||
*Last updated: v0.31, April 4, 2026 | Tests: 424*
|
||||
*Last updated: v0.32, April 5, 2026 | Tests: 424*
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Hermes Web UI -- Forward Sprint Plan
|
||||
|
||||
> Current state: v0.31 | 424 tests | Daily driver ready
|
||||
> Current state: v0.32 | 424 tests | Daily driver ready
|
||||
> This document plans the path from here to two targets:
|
||||
>
|
||||
> Target A: 1:1 feature parity with the Hermes CLI (everything you can do from the
|
||||
@@ -898,5 +898,5 @@ genuinely differentiating for an open-source project
|
||||
---
|
||||
|
||||
*Last updated: April 5, 2026*
|
||||
*Current version: v0.31.2 | 424 tests*
|
||||
*Current version: v0.32 | 424 tests*
|
||||
*Next sprint: Sprint 24 (Web Polish + Bug Fix Pass)*
|
||||
|
||||
@@ -12,6 +12,7 @@ from pathlib import Path
|
||||
|
||||
from api.config import (
|
||||
STREAMS, STREAMS_LOCK, CANCEL_FLAGS, CLI_TOOLSETS,
|
||||
LOCK, SESSIONS, SESSION_DIR,
|
||||
_get_session_agent_lock, _set_thread_env, _clear_thread_env,
|
||||
resolve_model_provider,
|
||||
)
|
||||
@@ -206,6 +207,40 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
|
||||
persist_user_message=msg_text,
|
||||
)
|
||||
s.messages = result.get('messages') or s.messages
|
||||
|
||||
# ── Handle context compression side effects ──
|
||||
# If compression fired inside run_conversation, the agent may have
|
||||
# rotated its session_id. Detect and fix the mismatch so the WebUI
|
||||
# continues writing to the correct session file.
|
||||
_agent_sid = getattr(agent, 'session_id', None)
|
||||
_compressed = False
|
||||
if _agent_sid and _agent_sid != session_id:
|
||||
old_sid = session_id
|
||||
new_sid = _agent_sid
|
||||
# Rename the session file
|
||||
old_path = SESSION_DIR / f'{old_sid}.json'
|
||||
new_path = SESSION_DIR / f'{new_sid}.json'
|
||||
s.session_id = new_sid
|
||||
with LOCK:
|
||||
if old_sid in SESSIONS:
|
||||
SESSIONS[new_sid] = SESSIONS.pop(old_sid)
|
||||
if old_path.exists() and not new_path.exists():
|
||||
try:
|
||||
old_path.rename(new_path)
|
||||
except OSError:
|
||||
pass
|
||||
_compressed = True
|
||||
# Also detect compression via the result dict or compressor state
|
||||
if not _compressed:
|
||||
_compressor = getattr(agent, 'context_compressor', None)
|
||||
if _compressor and getattr(_compressor, 'compression_count', 0) > 0:
|
||||
_compressed = True
|
||||
# Notify the frontend that compression happened
|
||||
if _compressed:
|
||||
put('compressed', {
|
||||
'message': 'Context auto-compressed to continue the conversation',
|
||||
})
|
||||
|
||||
# Stamp 'timestamp' on any messages that don't have one yet
|
||||
_now = time.time()
|
||||
for _m in s.messages:
|
||||
@@ -275,6 +310,12 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
|
||||
break
|
||||
s.save()
|
||||
usage = {'input_tokens': input_tokens, 'output_tokens': output_tokens, 'estimated_cost': estimated_cost}
|
||||
# Include context window data from the agent's compressor for the UI indicator
|
||||
_cc = getattr(agent, 'context_compressor', None)
|
||||
if _cc:
|
||||
usage['context_length'] = getattr(_cc, 'context_length', 0) or 0
|
||||
usage['threshold_tokens'] = getattr(_cc, 'threshold_tokens', 0) or 0
|
||||
usage['last_prompt_tokens'] = getattr(_cc, 'last_prompt_tokens', 0) or 0
|
||||
put('done', {'session': s.compact() | {'messages': s.messages, 'tool_calls': tool_calls}, 'usage': usage})
|
||||
finally:
|
||||
if old_cwd is None: os.environ.pop('TERMINAL_CWD', None)
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
const COMMANDS=[
|
||||
{name:'help', desc:'List available commands', fn:cmdHelp},
|
||||
{name:'clear', desc:'Clear conversation messages', fn:cmdClear},
|
||||
{name:'compact', desc:'Compress conversation context', fn:cmdCompact},
|
||||
{name:'model', desc:'Switch model (e.g. /model gpt-4o)', fn:cmdModel, arg:'model_name'},
|
||||
{name:'workspace', desc:'Switch workspace by name', fn:cmdWorkspace, arg:'name'},
|
||||
{name:'new', desc:'Start a new chat session', fn:cmdNew},
|
||||
@@ -99,6 +100,15 @@ async function cmdNew(){
|
||||
showToast('New session created');
|
||||
}
|
||||
|
||||
function cmdCompact(){
|
||||
// Send as a regular message to the agent -- the agent's run_conversation
|
||||
// preflight will detect the high token count and trigger _compress_context.
|
||||
// We send a user message so it appears in the conversation.
|
||||
$('msg').value='Please compress and summarize the conversation context to free up space.';
|
||||
send();
|
||||
showToast('Requesting context compression...');
|
||||
}
|
||||
|
||||
async function cmdUsage(){
|
||||
const next=!window._showTokenUsage;
|
||||
window._showTokenUsage=next;
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
<body>
|
||||
<div class="layout">
|
||||
<aside class="sidebar">
|
||||
<div class="sidebar-header"><div class="logo">H</div><div><h1 style="margin:0;font-size:15px;font-weight:700;letter-spacing:-.01em">Hermes</h1><div style="font-size:10px;color:var(--muted);opacity:.8;margin-top:1px">v0.31.2</div></div></div>
|
||||
<div class="sidebar-header"><div class="logo">H</div><div><h1 style="margin:0;font-size:15px;font-weight:700;letter-spacing:-.01em">Hermes</h1><div style="font-size:10px;color:var(--muted);opacity:.8;margin-top:1px">v0.32</div></div></div>
|
||||
<div class="sidebar-nav">
|
||||
<button class="nav-tab active" data-panel="chat" data-label="Chat" onclick="switchPanel('chat')" title="Chat">💬</button>
|
||||
<button class="nav-tab" data-panel="tasks" data-label="Tasks" onclick="switchPanel('tasks')" title="Tasks">📅</button>
|
||||
|
||||
@@ -177,6 +177,17 @@ async function send(){
|
||||
renderSessionList();setBusy(false);setStatus('');
|
||||
});
|
||||
|
||||
source.addEventListener('compressed',e=>{
|
||||
// Context was auto-compressed during this turn -- show a system message
|
||||
if(!S.session||S.session.session_id!==activeSid) return;
|
||||
try{
|
||||
const d=JSON.parse(e.data);
|
||||
const sysMsg={role:'assistant',content:'*[Context was auto-compressed to continue the conversation]*'};
|
||||
S.messages.push(sysMsg);
|
||||
showToast(d.message||'Context compressed');
|
||||
}catch(err){}
|
||||
});
|
||||
|
||||
source.addEventListener('apperror',e=>{
|
||||
// Application-level error sent explicitly by the server (rate limit, crash, etc.)
|
||||
// This is distinct from the SSE network 'error' event below.
|
||||
|
||||
22
static/ui.js
22
static/ui.js
@@ -88,18 +88,11 @@ function _fmtTokens(n){if(!n||n<0)return'0';if(n>=1e6)return(n/1e6).toFixed(1)+'
|
||||
function _syncCtxIndicator(usage){
|
||||
const el=$('ctxIndicator');
|
||||
if(!el)return;
|
||||
const inTok=usage.input_tokens||0;
|
||||
const outTok=usage.output_tokens||0;
|
||||
const total=inTok+outTok;
|
||||
if(!total){el.style.display='none';return;}
|
||||
const promptTok=usage.last_prompt_tokens||usage.input_tokens||0;
|
||||
const ctxWindow=usage.context_length||0;
|
||||
if(!promptTok||!ctxWindow){el.style.display='none';return;}
|
||||
el.style.display='';
|
||||
// Estimate context window from model name (rough, covers major families)
|
||||
// TODO: fetch exact values from server or model metadata API
|
||||
const _CTX={claude:200000,gemini:1000000,'gpt-4o':128000,'gpt-5':128000,o3:200000,o4:200000,deepseek:128000,llama:128000};
|
||||
const _m=(S.session&&S.session.model||'').toLowerCase();
|
||||
let ctxWindow=128000;
|
||||
for(const[k,v]of Object.entries(_CTX)){if(_m.includes(k)){ctxWindow=v;break;}}
|
||||
const pct=Math.min(100,Math.round((inTok/ctxWindow)*100));
|
||||
const pct=Math.min(100,Math.round((promptTok/ctxWindow)*100));
|
||||
const bar=$('ctxBar');
|
||||
const label=$('ctxLabel');
|
||||
if(bar){
|
||||
@@ -108,10 +101,15 @@ function _syncCtxIndicator(usage){
|
||||
}
|
||||
if(label){
|
||||
const cost=usage.estimated_cost;
|
||||
let text=`${_fmtTokens(inTok)} in \u00b7 ${_fmtTokens(outTok)} out`;
|
||||
let text=`${_fmtTokens(promptTok)} / ${_fmtTokens(ctxWindow)}`;
|
||||
if(pct>0) text+=` (${pct}%)`;
|
||||
if(cost) text+=` \u00b7 $${cost<0.01?cost.toFixed(4):cost.toFixed(2)}`;
|
||||
label.textContent=text;
|
||||
}
|
||||
// Update title with detailed info
|
||||
const threshold=usage.threshold_tokens||0;
|
||||
el.title=`Context: ${_fmtTokens(promptTok)} of ${_fmtTokens(ctxWindow)} tokens used`
|
||||
+(threshold?`\nAuto-compress at ${_fmtTokens(threshold)} (${Math.round(threshold/ctxWindow*100)}%)`:'');
|
||||
}
|
||||
|
||||
function scrollIfPinned(){
|
||||
|
||||
Reference in New Issue
Block a user