feat: context window usage indicator with real agent data

The context indicator in the composer footer now shows real data from
the agent's context compressor instead of hardcoded estimates:

- last_prompt_tokens / context_length (e.g. '12.4k / 200k (6%)')
- Bar color: blue <50%, yellow 50-75%, red >75%
- Hover tooltip shows exact numbers + compression threshold
- Cost appended when available

Backend: streaming.py now reads context_length, threshold_tokens, and
last_prompt_tokens from agent.context_compressor after run_conversation()
and includes them in the usage dict sent with the 'done' SSE event.

This matches the CLI's context window display (the bar that shows
current context vs total window).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Nathan Esquenazi
2026-04-04 18:50:17 -07:00
parent 429a0ea228
commit 2797e5189b
2 changed files with 16 additions and 12 deletions

View File

@@ -310,6 +310,12 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
break
s.save()
usage = {'input_tokens': input_tokens, 'output_tokens': output_tokens, 'estimated_cost': estimated_cost}
# Include context window data from the agent's compressor for the UI indicator
_cc = getattr(agent, 'context_compressor', None)
if _cc:
usage['context_length'] = getattr(_cc, 'context_length', 0) or 0
usage['threshold_tokens'] = getattr(_cc, 'threshold_tokens', 0) or 0
usage['last_prompt_tokens'] = getattr(_cc, 'last_prompt_tokens', 0) or 0
put('done', {'session': s.compact() | {'messages': s.messages, 'tool_calls': tool_calls}, 'usage': usage})
finally:
if old_cwd is None: os.environ.pop('TERMINAL_CWD', None)

View File

@@ -88,18 +88,11 @@ function _fmtTokens(n){if(!n||n<0)return'0';if(n>=1e6)return(n/1e6).toFixed(1)+'
function _syncCtxIndicator(usage){
const el=$('ctxIndicator');
if(!el)return;
const inTok=usage.input_tokens||0;
const outTok=usage.output_tokens||0;
const total=inTok+outTok;
if(!total){el.style.display='none';return;}
const promptTok=usage.last_prompt_tokens||usage.input_tokens||0;
const ctxWindow=usage.context_length||0;
if(!promptTok||!ctxWindow){el.style.display='none';return;}
el.style.display='';
// Estimate context window from model name (rough, covers major families)
// TODO: fetch exact values from server or model metadata API
const _CTX={claude:200000,gemini:1000000,'gpt-4o':128000,'gpt-5':128000,o3:200000,o4:200000,deepseek:128000,llama:128000};
const _m=(S.session&&S.session.model||'').toLowerCase();
let ctxWindow=128000;
for(const[k,v]of Object.entries(_CTX)){if(_m.includes(k)){ctxWindow=v;break;}}
const pct=Math.min(100,Math.round((inTok/ctxWindow)*100));
const pct=Math.min(100,Math.round((promptTok/ctxWindow)*100));
const bar=$('ctxBar');
const label=$('ctxLabel');
if(bar){
@@ -108,10 +101,15 @@ function _syncCtxIndicator(usage){
}
if(label){
const cost=usage.estimated_cost;
let text=`${_fmtTokens(inTok)} in \u00b7 ${_fmtTokens(outTok)} out`;
let text=`${_fmtTokens(promptTok)} / ${_fmtTokens(ctxWindow)}`;
if(pct>0) text+=` (${pct}%)`;
if(cost) text+=` \u00b7 $${cost<0.01?cost.toFixed(4):cost.toFixed(2)}`;
label.textContent=text;
}
// Update title with detailed info
const threshold=usage.threshold_tokens||0;
el.title=`Context: ${_fmtTokens(promptTok)} of ${_fmtTokens(ctxWindow)} tokens used`
+(threshold?`\nAuto-compress at ${_fmtTokens(threshold)} (${Math.round(threshold/ctxWindow*100)}%)`:'');
}
function scrollIfPinned(){