feat: handle auto-compaction side effects + /compact command
The agent's run_conversation() already triggers context compression internally, but the WebUI was unaware of the side effects: 1. Session ID rotation: compression creates a new session_id inside the agent. The WebUI kept writing to the old session file, causing silent data loss. Fix: detect agent.session_id mismatch after run_conversation(), rename the session file, and update in-memory caches. 2. No user notification: compression was invisible. Fix: emit a 'compressed' SSE event when compression is detected. Frontend shows a system message and toast. 3. No manual control: Fix: add /compact slash command that sends a message to the agent requesting context compression. Shows in the autocomplete dropdown. Detection works two ways: - agent.session_id != original session_id (ID rotation) - agent.context_compressor.compression_count > 0 (compressor state) Closes #90 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -12,6 +12,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
from api.config import (
|
from api.config import (
|
||||||
STREAMS, STREAMS_LOCK, CANCEL_FLAGS, CLI_TOOLSETS,
|
STREAMS, STREAMS_LOCK, CANCEL_FLAGS, CLI_TOOLSETS,
|
||||||
|
LOCK, SESSIONS, SESSION_DIR,
|
||||||
_get_session_agent_lock, _set_thread_env, _clear_thread_env,
|
_get_session_agent_lock, _set_thread_env, _clear_thread_env,
|
||||||
resolve_model_provider,
|
resolve_model_provider,
|
||||||
)
|
)
|
||||||
@@ -206,6 +207,40 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
|
|||||||
persist_user_message=msg_text,
|
persist_user_message=msg_text,
|
||||||
)
|
)
|
||||||
s.messages = result.get('messages') or s.messages
|
s.messages = result.get('messages') or s.messages
|
||||||
|
|
||||||
|
# ── Handle context compression side effects ──
|
||||||
|
# If compression fired inside run_conversation, the agent may have
|
||||||
|
# rotated its session_id. Detect and fix the mismatch so the WebUI
|
||||||
|
# continues writing to the correct session file.
|
||||||
|
_agent_sid = getattr(agent, 'session_id', None)
|
||||||
|
_compressed = False
|
||||||
|
if _agent_sid and _agent_sid != session_id:
|
||||||
|
old_sid = session_id
|
||||||
|
new_sid = _agent_sid
|
||||||
|
# Rename the session file
|
||||||
|
old_path = SESSION_DIR / f'{old_sid}.json'
|
||||||
|
new_path = SESSION_DIR / f'{new_sid}.json'
|
||||||
|
s.session_id = new_sid
|
||||||
|
with LOCK:
|
||||||
|
if old_sid in SESSIONS:
|
||||||
|
SESSIONS[new_sid] = SESSIONS.pop(old_sid)
|
||||||
|
if old_path.exists() and not new_path.exists():
|
||||||
|
try:
|
||||||
|
old_path.rename(new_path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
_compressed = True
|
||||||
|
# Also detect compression via the result dict or compressor state
|
||||||
|
if not _compressed:
|
||||||
|
_compressor = getattr(agent, 'context_compressor', None)
|
||||||
|
if _compressor and getattr(_compressor, 'compression_count', 0) > 0:
|
||||||
|
_compressed = True
|
||||||
|
# Notify the frontend that compression happened
|
||||||
|
if _compressed:
|
||||||
|
put('compressed', {
|
||||||
|
'message': 'Context auto-compressed to continue the conversation',
|
||||||
|
})
|
||||||
|
|
||||||
# Stamp 'timestamp' on any messages that don't have one yet
|
# Stamp 'timestamp' on any messages that don't have one yet
|
||||||
_now = time.time()
|
_now = time.time()
|
||||||
for _m in s.messages:
|
for _m in s.messages:
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
const COMMANDS=[
|
const COMMANDS=[
|
||||||
{name:'help', desc:'List available commands', fn:cmdHelp},
|
{name:'help', desc:'List available commands', fn:cmdHelp},
|
||||||
{name:'clear', desc:'Clear conversation messages', fn:cmdClear},
|
{name:'clear', desc:'Clear conversation messages', fn:cmdClear},
|
||||||
|
{name:'compact', desc:'Compress conversation context', fn:cmdCompact},
|
||||||
{name:'model', desc:'Switch model (e.g. /model gpt-4o)', fn:cmdModel, arg:'model_name'},
|
{name:'model', desc:'Switch model (e.g. /model gpt-4o)', fn:cmdModel, arg:'model_name'},
|
||||||
{name:'workspace', desc:'Switch workspace by name', fn:cmdWorkspace, arg:'name'},
|
{name:'workspace', desc:'Switch workspace by name', fn:cmdWorkspace, arg:'name'},
|
||||||
{name:'new', desc:'Start a new chat session', fn:cmdNew},
|
{name:'new', desc:'Start a new chat session', fn:cmdNew},
|
||||||
@@ -99,6 +100,15 @@ async function cmdNew(){
|
|||||||
showToast('New session created');
|
showToast('New session created');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function cmdCompact(){
|
||||||
|
// Send as a regular message to the agent -- the agent's run_conversation
|
||||||
|
// preflight will detect the high token count and trigger _compress_context.
|
||||||
|
// We send a user message so it appears in the conversation.
|
||||||
|
$('msg').value='Please compress and summarize the conversation context to free up space.';
|
||||||
|
send();
|
||||||
|
showToast('Requesting context compression...');
|
||||||
|
}
|
||||||
|
|
||||||
async function cmdUsage(){
|
async function cmdUsage(){
|
||||||
const next=!window._showTokenUsage;
|
const next=!window._showTokenUsage;
|
||||||
window._showTokenUsage=next;
|
window._showTokenUsage=next;
|
||||||
|
|||||||
@@ -177,6 +177,17 @@ async function send(){
|
|||||||
renderSessionList();setBusy(false);setStatus('');
|
renderSessionList();setBusy(false);setStatus('');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
source.addEventListener('compressed',e=>{
|
||||||
|
// Context was auto-compressed during this turn -- show a system message
|
||||||
|
if(!S.session||S.session.session_id!==activeSid) return;
|
||||||
|
try{
|
||||||
|
const d=JSON.parse(e.data);
|
||||||
|
const sysMsg={role:'assistant',content:'*[Context was auto-compressed to continue the conversation]*'};
|
||||||
|
S.messages.push(sysMsg);
|
||||||
|
showToast(d.message||'Context compressed');
|
||||||
|
}catch(err){}
|
||||||
|
});
|
||||||
|
|
||||||
source.addEventListener('apperror',e=>{
|
source.addEventListener('apperror',e=>{
|
||||||
// Application-level error sent explicitly by the server (rate limit, crash, etc.)
|
// Application-level error sent explicitly by the server (rate limit, crash, etc.)
|
||||||
// This is distinct from the SSE network 'error' event below.
|
// This is distinct from the SSE network 'error' event below.
|
||||||
|
|||||||
Reference in New Issue
Block a user