feat: handle auto-compaction side effects + /compact command

The agent's run_conversation() already triggers context compression
internally, but the WebUI was unaware of the side effects:

1. Session ID rotation: compression creates a new session_id inside
   the agent. The WebUI kept writing to the old session file, causing
   silent data loss. Fix: detect agent.session_id mismatch after
   run_conversation(), rename the session file, and update in-memory
   caches.

2. No user notification: compression was invisible. Fix: emit a
   'compressed' SSE event when compression is detected. Frontend shows
   a system message and toast.

3. No manual control: Fix: add /compact slash command that sends a
   message to the agent requesting context compression. Shows in the
   autocomplete dropdown.

Detection works two ways:
- agent.session_id != original session_id (ID rotation)
- agent.context_compressor.compression_count > 0 (compressor state)

Closes #90

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Nathan Esquenazi
2026-04-04 18:46:34 -07:00
parent 2e7ce0a341
commit 429a0ea228
3 changed files with 56 additions and 0 deletions

View File

@@ -12,6 +12,7 @@ from pathlib import Path
from api.config import ( from api.config import (
STREAMS, STREAMS_LOCK, CANCEL_FLAGS, CLI_TOOLSETS, STREAMS, STREAMS_LOCK, CANCEL_FLAGS, CLI_TOOLSETS,
LOCK, SESSIONS, SESSION_DIR,
_get_session_agent_lock, _set_thread_env, _clear_thread_env, _get_session_agent_lock, _set_thread_env, _clear_thread_env,
resolve_model_provider, resolve_model_provider,
) )
@@ -206,6 +207,40 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
persist_user_message=msg_text, persist_user_message=msg_text,
) )
s.messages = result.get('messages') or s.messages s.messages = result.get('messages') or s.messages
# ── Handle context compression side effects ──
# If compression fired inside run_conversation, the agent may have
# rotated its session_id. Detect and fix the mismatch so the WebUI
# continues writing to the correct session file.
_agent_sid = getattr(agent, 'session_id', None)
_compressed = False
if _agent_sid and _agent_sid != session_id:
old_sid = session_id
new_sid = _agent_sid
# Rename the session file
old_path = SESSION_DIR / f'{old_sid}.json'
new_path = SESSION_DIR / f'{new_sid}.json'
s.session_id = new_sid
with LOCK:
if old_sid in SESSIONS:
SESSIONS[new_sid] = SESSIONS.pop(old_sid)
if old_path.exists() and not new_path.exists():
try:
old_path.rename(new_path)
except OSError:
pass
_compressed = True
# Also detect compression via the result dict or compressor state
if not _compressed:
_compressor = getattr(agent, 'context_compressor', None)
if _compressor and getattr(_compressor, 'compression_count', 0) > 0:
_compressed = True
# Notify the frontend that compression happened
if _compressed:
put('compressed', {
'message': 'Context auto-compressed to continue the conversation',
})
# Stamp 'timestamp' on any messages that don't have one yet # Stamp 'timestamp' on any messages that don't have one yet
_now = time.time() _now = time.time()
for _m in s.messages: for _m in s.messages:

View File

@@ -5,6 +5,7 @@
const COMMANDS=[ const COMMANDS=[
{name:'help', desc:'List available commands', fn:cmdHelp}, {name:'help', desc:'List available commands', fn:cmdHelp},
{name:'clear', desc:'Clear conversation messages', fn:cmdClear}, {name:'clear', desc:'Clear conversation messages', fn:cmdClear},
{name:'compact', desc:'Compress conversation context', fn:cmdCompact},
{name:'model', desc:'Switch model (e.g. /model gpt-4o)', fn:cmdModel, arg:'model_name'}, {name:'model', desc:'Switch model (e.g. /model gpt-4o)', fn:cmdModel, arg:'model_name'},
{name:'workspace', desc:'Switch workspace by name', fn:cmdWorkspace, arg:'name'}, {name:'workspace', desc:'Switch workspace by name', fn:cmdWorkspace, arg:'name'},
{name:'new', desc:'Start a new chat session', fn:cmdNew}, {name:'new', desc:'Start a new chat session', fn:cmdNew},
@@ -99,6 +100,15 @@ async function cmdNew(){
showToast('New session created'); showToast('New session created');
} }
function cmdCompact(){
// Send as a regular message to the agent -- the agent's run_conversation
// preflight will detect the high token count and trigger _compress_context.
// We send a user message so it appears in the conversation.
$('msg').value='Please compress and summarize the conversation context to free up space.';
send();
showToast('Requesting context compression...');
}
async function cmdUsage(){ async function cmdUsage(){
const next=!window._showTokenUsage; const next=!window._showTokenUsage;
window._showTokenUsage=next; window._showTokenUsage=next;

View File

@@ -177,6 +177,17 @@ async function send(){
renderSessionList();setBusy(false);setStatus(''); renderSessionList();setBusy(false);setStatus('');
}); });
source.addEventListener('compressed',e=>{
// Context was auto-compressed during this turn -- show a system message
if(!S.session||S.session.session_id!==activeSid) return;
try{
const d=JSON.parse(e.data);
const sysMsg={role:'assistant',content:'*[Context was auto-compressed to continue the conversation]*'};
S.messages.push(sysMsg);
showToast(d.message||'Context compressed');
}catch(err){}
});
source.addEventListener('apperror',e=>{ source.addEventListener('apperror',e=>{
// Application-level error sent explicitly by the server (rate limit, crash, etc.) // Application-level error sent explicitly by the server (rate limit, crash, etc.)
// This is distinct from the SSE network 'error' event below. // This is distinct from the SSE network 'error' event below.