diff --git a/api/streaming.py b/api/streaming.py index 52cdf2f..fa08894 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -12,6 +12,7 @@ from pathlib import Path from api.config import ( STREAMS, STREAMS_LOCK, CANCEL_FLAGS, CLI_TOOLSETS, + LOCK, SESSIONS, SESSION_DIR, _get_session_agent_lock, _set_thread_env, _clear_thread_env, resolve_model_provider, ) @@ -206,6 +207,40 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta persist_user_message=msg_text, ) s.messages = result.get('messages') or s.messages + + # ── Handle context compression side effects ── + # If compression fired inside run_conversation, the agent may have + # rotated its session_id. Detect and fix the mismatch so the WebUI + # continues writing to the correct session file. + _agent_sid = getattr(agent, 'session_id', None) + _compressed = False + if _agent_sid and _agent_sid != session_id: + old_sid = session_id + new_sid = _agent_sid + # Rename the session file + old_path = SESSION_DIR / f'{old_sid}.json' + new_path = SESSION_DIR / f'{new_sid}.json' + s.session_id = new_sid + with LOCK: + if old_sid in SESSIONS: + SESSIONS[new_sid] = SESSIONS.pop(old_sid) + if old_path.exists() and not new_path.exists(): + try: + old_path.rename(new_path) + except OSError: + pass + _compressed = True + # Also detect compression via the result dict or compressor state + if not _compressed: + _compressor = getattr(agent, 'context_compressor', None) + if _compressor and getattr(_compressor, 'compression_count', 0) > 0: + _compressed = True + # Notify the frontend that compression happened + if _compressed: + put('compressed', { + 'message': 'Context auto-compressed to continue the conversation', + }) + # Stamp 'timestamp' on any messages that don't have one yet _now = time.time() for _m in s.messages: diff --git a/static/commands.js b/static/commands.js index 33c24b7..1bfed92 100644 --- a/static/commands.js +++ b/static/commands.js @@ -5,6 +5,7 @@ const COMMANDS=[ {name:'help', desc:'List available commands', fn:cmdHelp}, {name:'clear', desc:'Clear conversation messages', fn:cmdClear}, + {name:'compact', desc:'Compress conversation context', fn:cmdCompact}, {name:'model', desc:'Switch model (e.g. /model gpt-4o)', fn:cmdModel, arg:'model_name'}, {name:'workspace', desc:'Switch workspace by name', fn:cmdWorkspace, arg:'name'}, {name:'new', desc:'Start a new chat session', fn:cmdNew}, @@ -99,6 +100,15 @@ async function cmdNew(){ showToast('New session created'); } +function cmdCompact(){ + // Send as a regular message to the agent -- the agent's run_conversation + // preflight will detect the high token count and trigger _compress_context. + // We send a user message so it appears in the conversation. + $('msg').value='Please compress and summarize the conversation context to free up space.'; + send(); + showToast('Requesting context compression...'); +} + async function cmdUsage(){ const next=!window._showTokenUsage; window._showTokenUsage=next; diff --git a/static/messages.js b/static/messages.js index 35cf27f..257994e 100644 --- a/static/messages.js +++ b/static/messages.js @@ -177,6 +177,17 @@ async function send(){ renderSessionList();setBusy(false);setStatus(''); }); + source.addEventListener('compressed',e=>{ + // Context was auto-compressed during this turn -- show a system message + if(!S.session||S.session.session_id!==activeSid) return; + try{ + const d=JSON.parse(e.data); + const sysMsg={role:'assistant',content:'*[Context was auto-compressed to continue the conversation]*'}; + S.messages.push(sysMsg); + showToast(d.message||'Context compressed'); + }catch(err){} + }); + source.addEventListener('apperror',e=>{ // Application-level error sent explicitly by the server (rate limit, crash, etc.) // This is distinct from the SSE network 'error' event below.