feat: context window usage indicator with real agent data

The context indicator in the composer footer now shows real data from the agent's context compressor instead of hardcoded estimates: - last_prompt_tokens / context_length (e.g. '12.4k / 200k (6%)') - Bar color: blue <50%, yellow 50-75%, red >75% - Hover tooltip shows exact numbers + compression threshold - Cost appended when available Backend: streaming.py now reads context_length, threshold_tokens, and last_prompt_tokens from agent.context_compressor after run_conversation() and includes them in the usage dict sent with the 'done' SSE event. This matches the CLI's context window display (the bar that shows current context vs total window). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-04 18:50:17 -07:00
parent 429a0ea228
commit 2797e5189b
2 changed files with 16 additions and 12 deletions
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -310,6 +310,12 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
                            break
            s.save()
            usage = {'input_tokens': input_tokens, 'output_tokens': output_tokens, 'estimated_cost': estimated_cost}
+            # Include context window data from the agent's compressor for the UI indicator
+            _cc = getattr(agent, 'context_compressor', None)
+            if _cc:
+                usage['context_length'] = getattr(_cc, 'context_length', 0) or 0
+                usage['threshold_tokens'] = getattr(_cc, 'threshold_tokens', 0) or 0
+                usage['last_prompt_tokens'] = getattr(_cc, 'last_prompt_tokens', 0) or 0
            put('done', {'session': s.compact() | {'messages': s.messages, 'tool_calls': tool_calls}, 'usage': usage})
          finally:
            if old_cwd is None: os.environ.pop('TERMINAL_CWD', None)
--- a/static/ui.js
+++ b/static/ui.js
@@ -88,18 +88,11 @@ function _fmtTokens(n){if(!n||n<0)return'0';if(n>=1e6)return(n/1e6).toFixed(1)+'
 function _syncCtxIndicator(usage){
  const el=$('ctxIndicator');
  if(!el)return;
-  const inTok=usage.input_tokens||0;
-  const outTok=usage.output_tokens||0;
-  const total=inTok+outTok;
-  if(!total){el.style.display='none';return;}
+  const promptTok=usage.last_prompt_tokens||usage.input_tokens||0;
+  const ctxWindow=usage.context_length||0;
+  if(!promptTok||!ctxWindow){el.style.display='none';return;}
  el.style.display='';
-  // Estimate context window from model name (rough, covers major families)
-  // TODO: fetch exact values from server or model metadata API
-  const _CTX={claude:200000,gemini:1000000,'gpt-4o':128000,'gpt-5':128000,o3:200000,o4:200000,deepseek:128000,llama:128000};
-  const _m=(S.session&&S.session.model||'').toLowerCase();
-  let ctxWindow=128000;
-  for(const[k,v]of Object.entries(_CTX)){if(_m.includes(k)){ctxWindow=v;break;}}
-  const pct=Math.min(100,Math.round((inTok/ctxWindow)*100));
+  const pct=Math.min(100,Math.round((promptTok/ctxWindow)*100));
  const bar=$('ctxBar');
  const label=$('ctxLabel');
  if(bar){
@@ -108,10 +101,15 @@ function _syncCtxIndicator(usage){
  }
  if(label){
    const cost=usage.estimated_cost;
-    let text=`${_fmtTokens(inTok)} in \u00b7 ${_fmtTokens(outTok)} out`;
+    let text=`${_fmtTokens(promptTok)} / ${_fmtTokens(ctxWindow)}`;
+    if(pct>0) text+=` (${pct}%)`;
    if(cost) text+=` \u00b7 $${cost<0.01?cost.toFixed(4):cost.toFixed(2)}`;
    label.textContent=text;
  }
+  // Update title with detailed info
+  const threshold=usage.threshold_tokens||0;
+  el.title=`Context: ${_fmtTokens(promptTok)} of ${_fmtTokens(ctxWindow)} tokens used`
+    +(threshold?`\nAuto-compress at ${_fmtTokens(threshold)} (${Math.round(threshold/ctxWindow*100)}%)`:'');
 }

 function scrollIfPinned(){