diff --git a/api/streaming.py b/api/streaming.py
index 52cdf2f..fa08894 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -12,6 +12,7 @@ from pathlib import Path
 
 from api.config import (
     STREAMS, STREAMS_LOCK, CANCEL_FLAGS, CLI_TOOLSETS,
+    LOCK, SESSIONS, SESSION_DIR,
     _get_session_agent_lock, _set_thread_env, _clear_thread_env,
     resolve_model_provider,
 )
@@ -206,6 +207,40 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
                 persist_user_message=msg_text,
             )
             s.messages = result.get('messages') or s.messages
+
+            # ── Handle context compression side effects ──
+            # If compression fired inside run_conversation, the agent may have
+            # rotated its session_id. Detect and fix the mismatch so the WebUI
+            # continues writing to the correct session file.
+            _agent_sid = getattr(agent, 'session_id', None)
+            _compressed = False
+            if _agent_sid and _agent_sid != session_id:
+                old_sid = session_id
+                new_sid = _agent_sid
+                # Rename the session file
+                old_path = SESSION_DIR / f'{old_sid}.json'
+                new_path = SESSION_DIR / f'{new_sid}.json'
+                s.session_id = new_sid
+                with LOCK:
+                    if old_sid in SESSIONS:
+                        SESSIONS[new_sid] = SESSIONS.pop(old_sid)
+                if old_path.exists() and not new_path.exists():
+                    try:
+                        old_path.rename(new_path)
+                    except OSError:
+                        pass
+                _compressed = True
+            # Also detect compression via the result dict or compressor state
+            if not _compressed:
+                _compressor = getattr(agent, 'context_compressor', None)
+                if _compressor and getattr(_compressor, 'compression_count', 0) > 0:
+                    _compressed = True
+            # Notify the frontend that compression happened
+            if _compressed:
+                put('compressed', {
+                    'message': 'Context auto-compressed to continue the conversation',
+                })
+
             # Stamp 'timestamp' on any messages that don't have one yet
             _now = time.time()
             for _m in s.messages:
diff --git a/static/commands.js b/static/commands.js
index 33c24b7..1bfed92 100644
--- a/static/commands.js
+++ b/static/commands.js
@@ -5,6 +5,7 @@
 const COMMANDS=[
   {name:'help',      desc:'List available commands',             fn:cmdHelp},
   {name:'clear',     desc:'Clear conversation messages',         fn:cmdClear},
+  {name:'compact',   desc:'Compress conversation context',       fn:cmdCompact},
   {name:'model',     desc:'Switch model (e.g. /model gpt-4o)',  fn:cmdModel,     arg:'model_name'},
   {name:'workspace', desc:'Switch workspace by name',            fn:cmdWorkspace, arg:'name'},
   {name:'new',       desc:'Start a new chat session',            fn:cmdNew},
@@ -99,6 +100,15 @@ async function cmdNew(){
   showToast('New session created');
 }
 
+function cmdCompact(){
+  // Send as a regular message to the agent -- the agent's run_conversation
+  // preflight will detect the high token count and trigger _compress_context.
+  // We send a user message so it appears in the conversation.
+  $('msg').value='Please compress and summarize the conversation context to free up space.';
+  send();
+  showToast('Requesting context compression...');
+}
+
 async function cmdUsage(){
   const next=!window._showTokenUsage;
   window._showTokenUsage=next;
diff --git a/static/messages.js b/static/messages.js
index 35cf27f..257994e 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -177,6 +177,17 @@ async function send(){
       renderSessionList();setBusy(false);setStatus('');
     });
 
+    source.addEventListener('compressed',e=>{
+      // Context was auto-compressed during this turn -- show a system message
+      if(!S.session||S.session.session_id!==activeSid) return;
+      try{
+        const d=JSON.parse(e.data);
+        const sysMsg={role:'assistant',content:'*[Context was auto-compressed to continue the conversation]*'};
+        S.messages.push(sysMsg);
+        showToast(d.message||'Context compressed');
+      }catch(err){}
+    });
+
     source.addEventListener('apperror',e=>{
       // Application-level error sent explicitly by the server (rate limit, crash, etc.)
       // This is distinct from the SSE network 'error' event below.