diff --git a/CHANGELOG.md b/CHANGELOG.md index ce10f0b..90189e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,34 @@ --- +## [v0.32] Auto-Compaction Handling + /compact Command (Issue #90) +*April 5, 2026 | 424 tests* + +### Features +- **Auto-compaction detection.** When the agent's `run_conversation()` triggers + context compression and rotates the session ID, the WebUI detects the mismatch + and renames the session file + cache entry so messages don't split across files. +- **`compressed` SSE event.** Frontend receives a notification when compression + fires, shows a system message ("Context was auto-compressed") and a toast. +- **`/compact` slash command.** Type `/compact` to request the agent compress + the conversation context. Sends a natural-language message that triggers the + agent's compression preflight. +- **Real context window data.** The context usage indicator now uses actual + `context_length`, `threshold_tokens`, and `last_prompt_tokens` from the agent's + compressor instead of the client-side model name lookup. Tooltip shows the + auto-compress threshold. Hides gracefully when the agent has no compressor. + +### Architecture +- `api/streaming.py`: Session ID mismatch detection after `run_conversation()`, + file rename, SESSIONS cache update under lock, `compressed` SSE event, + `context_length`/`threshold_tokens`/`last_prompt_tokens` in usage dict. +- `static/commands.js`: `/compact` command. +- `static/messages.js`: `compressed` SSE event handler. +- `static/ui.js`: `_syncCtxIndicator()` rewritten to use server-side compressor + data instead of client-side model estimates. + +--- + ## [v0.31.2] CLI session delete fix *April 5, 2026 | 424 tests* @@ -1113,4 +1141,4 @@ Three-panel layout: sessions sidebar, chat area, workspace panel. --- -*Last updated: v0.31, April 4, 2026 | Tests: 424* +*Last updated: v0.32, April 5, 2026 | Tests: 424* diff --git a/SPRINTS.md b/SPRINTS.md index 1b860cc..daa085a 100644 --- a/SPRINTS.md +++ b/SPRINTS.md @@ -1,6 +1,6 @@ # Hermes Web UI -- Forward Sprint Plan -> Current state: v0.31 | 424 tests | Daily driver ready +> Current state: v0.32 | 424 tests | Daily driver ready > This document plans the path from here to two targets: > > Target A: 1:1 feature parity with the Hermes CLI (everything you can do from the @@ -898,5 +898,5 @@ genuinely differentiating for an open-source project --- *Last updated: April 5, 2026* -*Current version: v0.31.2 | 424 tests* +*Current version: v0.32 | 424 tests* *Next sprint: Sprint 24 (Web Polish + Bug Fix Pass)* diff --git a/api/streaming.py b/api/streaming.py index 52cdf2f..6d7c480 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -12,6 +12,7 @@ from pathlib import Path from api.config import ( STREAMS, STREAMS_LOCK, CANCEL_FLAGS, CLI_TOOLSETS, + LOCK, SESSIONS, SESSION_DIR, _get_session_agent_lock, _set_thread_env, _clear_thread_env, resolve_model_provider, ) @@ -206,6 +207,40 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta persist_user_message=msg_text, ) s.messages = result.get('messages') or s.messages + + # ── Handle context compression side effects ── + # If compression fired inside run_conversation, the agent may have + # rotated its session_id. Detect and fix the mismatch so the WebUI + # continues writing to the correct session file. + _agent_sid = getattr(agent, 'session_id', None) + _compressed = False + if _agent_sid and _agent_sid != session_id: + old_sid = session_id + new_sid = _agent_sid + # Rename the session file + old_path = SESSION_DIR / f'{old_sid}.json' + new_path = SESSION_DIR / f'{new_sid}.json' + s.session_id = new_sid + with LOCK: + if old_sid in SESSIONS: + SESSIONS[new_sid] = SESSIONS.pop(old_sid) + if old_path.exists() and not new_path.exists(): + try: + old_path.rename(new_path) + except OSError: + pass + _compressed = True + # Also detect compression via the result dict or compressor state + if not _compressed: + _compressor = getattr(agent, 'context_compressor', None) + if _compressor and getattr(_compressor, 'compression_count', 0) > 0: + _compressed = True + # Notify the frontend that compression happened + if _compressed: + put('compressed', { + 'message': 'Context auto-compressed to continue the conversation', + }) + # Stamp 'timestamp' on any messages that don't have one yet _now = time.time() for _m in s.messages: @@ -275,6 +310,12 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta break s.save() usage = {'input_tokens': input_tokens, 'output_tokens': output_tokens, 'estimated_cost': estimated_cost} + # Include context window data from the agent's compressor for the UI indicator + _cc = getattr(agent, 'context_compressor', None) + if _cc: + usage['context_length'] = getattr(_cc, 'context_length', 0) or 0 + usage['threshold_tokens'] = getattr(_cc, 'threshold_tokens', 0) or 0 + usage['last_prompt_tokens'] = getattr(_cc, 'last_prompt_tokens', 0) or 0 put('done', {'session': s.compact() | {'messages': s.messages, 'tool_calls': tool_calls}, 'usage': usage}) finally: if old_cwd is None: os.environ.pop('TERMINAL_CWD', None) diff --git a/static/commands.js b/static/commands.js index 33c24b7..1bfed92 100644 --- a/static/commands.js +++ b/static/commands.js @@ -5,6 +5,7 @@ const COMMANDS=[ {name:'help', desc:'List available commands', fn:cmdHelp}, {name:'clear', desc:'Clear conversation messages', fn:cmdClear}, + {name:'compact', desc:'Compress conversation context', fn:cmdCompact}, {name:'model', desc:'Switch model (e.g. /model gpt-4o)', fn:cmdModel, arg:'model_name'}, {name:'workspace', desc:'Switch workspace by name', fn:cmdWorkspace, arg:'name'}, {name:'new', desc:'Start a new chat session', fn:cmdNew}, @@ -99,6 +100,15 @@ async function cmdNew(){ showToast('New session created'); } +function cmdCompact(){ + // Send as a regular message to the agent -- the agent's run_conversation + // preflight will detect the high token count and trigger _compress_context. + // We send a user message so it appears in the conversation. + $('msg').value='Please compress and summarize the conversation context to free up space.'; + send(); + showToast('Requesting context compression...'); +} + async function cmdUsage(){ const next=!window._showTokenUsage; window._showTokenUsage=next; diff --git a/static/index.html b/static/index.html index a0526b1..a4baf4b 100644 --- a/static/index.html +++ b/static/index.html @@ -13,7 +13,7 @@