diff --git a/CHANGELOG.md b/CHANGELOG.md index da8d9b9..4d6bcf6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,39 @@ # Hermes Web UI -- Changelog +## [v0.50.42] fix: session display + model UX polish (sprint 42) + +**Context indicator always shows latest usage** (PR #471, fixes #437) +The context ring/indicator in the composer footer was reading token counts and cost +from the stored session snapshot with `||` — meaning stale non-zero values from +previous turns always won over a fresh `0` from the current turn. Replaced all six +field merges with a `_pick(latest, stored, dflt)` helper that correctly prefers the +latest usage when it's a real value (including `0`). + +**System prompt no longer leaks as gateway session title** (PR #472, fixes #441) +Telegram, Discord, and CLI gateway sessions inject a system message before any user +turn. When the session title is set from this message, the sidebar shows +`[SYSTEM: The user has inv...` instead of a meaningful name. Added a guard in +`_renderOneSession()`: if `cleanTitle` starts with `[SYSTEM:`, replace it with the +platform display name (`Telegram session`, `Discord session`, etc.). + +**Thinking/reasoning panel persists across page reload** (PR #473, fixes #427) +The full chain-of-thought from Claude, Gemini, and DeepSeek thinking models was lost +after streaming completed and on every page reload. Two-part fix: +- `api/streaming.py`: `on_reasoning()` now accumulates `_reasoning_text`; before the + session is serialised at stream end, `_reasoning_text` is injected into the last + assistant message so it's stored in the session JSON +- `static/messages.js`: in the `done` SSE handler, `reasoningText` is also patched + onto the last assistant message as a belt-and-suspenders client-side fallback + +**Custom model ID input in model picker** (PR #474, fixes #444) +Users who need a model not in the curated list (~30 models) can now type any model +ID directly in the dropdown. A text input at the bottom of the model picker lets +users enter any string (e.g. `openai/gpt-5.4`, `deepseek/deepseek-r2`, or any +provider-prefixed ID) and press Enter or click + to use it immediately. +i18n keys added to en, es, zh. + +- Total tests: 1130 (was 1117) + ## [v0.50.41] feat(ui): render MEDIA: images inline in web UI chat (fixes #450) When the agent outputs `MEDIA:` tokens — screenshots from the browser tool, diff --git a/TESTING.md b/TESTING.md index 8b883ad..b80b22a 100644 --- a/TESTING.md +++ b/TESTING.md @@ -8,7 +8,7 @@ > Prerequisites: SSH tunnel is active on port 8787. Open http://localhost:8787 in browser. > Server health check: curl http://127.0.0.1:8787/health should return {"status":"ok"}. > -> Automated tests: 1117 total (1117 passing, 0 known failures). Includes onboarding coverage for bootstrap/static wizard presence, real provider config persistence (`config.yaml` + `.env`), the `/api/onboarding/*` backend, and the onboarding skip/existing-config guard. +> Automated tests: 1130 total (1130 passing, 0 known failures). Includes onboarding coverage for bootstrap/static wizard presence, real provider config persistence (`config.yaml` + `.env`), the `/api/onboarding/*` backend, and the onboarding skip/existing-config guard. > Run: `pytest tests/ -v --timeout=60` --- diff --git a/api/streaming.py b/api/streaming.py index 00135ec..6ad6a47 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -164,6 +164,7 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta try: _token_sent = False # tracks whether any streamed tokens were sent + _reasoning_text = '' # accumulates reasoning/thinking trace for persistence def on_token(text): nonlocal _token_sent @@ -173,8 +174,10 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta put('token', {'text': text}) def on_reasoning(text): + nonlocal _reasoning_text if text is None: return + _reasoning_text += str(text) put('reasoning', {'text': str(text)}) def on_tool(*cb_args, **cb_kwargs): @@ -546,6 +549,12 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta usage['context_length'] = getattr(_cc, 'context_length', 0) or 0 usage['threshold_tokens'] = getattr(_cc, 'threshold_tokens', 0) or 0 usage['last_prompt_tokens'] = getattr(_cc, 'last_prompt_tokens', 0) or 0 + # Persist reasoning trace in the session so it survives reload + if _reasoning_text and s.messages: + for _rm in reversed(s.messages): + if isinstance(_rm, dict) and _rm.get('role') == 'assistant': + _rm['reasoning'] = _reasoning_text + break raw_session = s.compact() | {'messages': s.messages, 'tool_calls': tool_calls} put('done', {'session': redact_session_data(raw_session), 'usage': usage}) finally: diff --git a/static/i18n.js b/static/i18n.js index 9aef12f..155e7e7 100644 --- a/static/i18n.js +++ b/static/i18n.js @@ -50,6 +50,8 @@ const LOCALES = { model_unavailable_title: 'This model is no longer in your current provider list', provider_mismatch_warning: (m,p)=>`"${m}" may not work with your configured provider (${p}). Send anyway, or run \`hermes model\` in your terminal to switch.`, provider_mismatch_label: 'Provider mismatch', + model_custom_label: 'Custom model ID', + model_custom_placeholder: 'e.g. openai/gpt-5.4', // commands.js cmd_help: 'List available commands', cmd_clear: 'Clear conversation messages', @@ -456,6 +458,8 @@ const LOCALES = { model_unavailable_title: 'Este modelo ya no está en tu lista actual de proveedores', provider_mismatch_warning: (m,p)=>`"${m}" puede no funcionar con tu proveedor configurado (${p}). Envía de todas formas, o ejecuta \`hermes model\` en la terminal para cambiar.`, provider_mismatch_label: 'Proveedor incompatible', + model_custom_label: 'ID de modelo personalizado', + model_custom_placeholder: 'p. ej. openai/gpt-5.4', // commands.js cmd_help: 'Listar los comandos disponibles', cmd_clear: 'Borrar los mensajes de la conversación', @@ -1058,6 +1062,8 @@ const LOCALES = { model_unavailable_title: '\u8fd9\u4e2a\u6a21\u578b\u5df2\u7ecf\u4e0d\u5728\u5f53\u524d provider \u5217\u8868\u4e2d', provider_mismatch_warning: (m,p)=>`\"${m}\" \u53ef\u80fd\u65e0\u6cd5\u5728\u5f53\u524d\u914d\u7f6e\u7684\u63d0\u4f9b\u5546 (${p}) \u4e0b\u5de5\u4f5c\u3002\u76f4\u63a5\u53d1\u9001\uff0c\u6216\u5728\u7ec8\u7aef\u8fd0\u884c \`hermes model\` \u5207\u6362\u3002`, provider_mismatch_label: '\u63d0\u4f9b\u5546\u4e0d\u5339\u914d', + model_custom_label: '\u81ea\u5b9a\u4e49\u6a21\u578b ID', + model_custom_placeholder: '\u4f8b\u5982 openai/gpt-5.4', // commands.js cmd_help: '\u67e5\u770b\u53ef\u7528\u547d\u4ee4', cmd_clear: '\u6e05\u7a7a\u5f53\u524d\u5bf9\u8bdd\u6d88\u606f', diff --git a/static/index.html b/static/index.html index 22776aa..e9f1c1a 100644 --- a/static/index.html +++ b/static/index.html @@ -536,7 +536,7 @@
System
Instance version and access controls.
- v0.50.41 + v0.50.42
diff --git a/static/messages.js b/static/messages.js index 4486428..6537afb 100644 --- a/static/messages.js +++ b/static/messages.js @@ -353,8 +353,11 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ } if(S.session&&S.session.session_id===activeSid){ S.session=d.session;S.messages=d.session.messages||[]; - // Stamp _ts on the last assistant message if it has no timestamp + // Find the last assistant message once for both reasoning persistence and timestamp const lastAsst=[...S.messages].reverse().find(m=>m.role==='assistant'); + // Persist reasoning trace so thinking card survives page reload + if(reasoningText&&lastAsst&&!lastAsst.reasoning) lastAsst.reasoning=reasoningText; + // Stamp _ts on the last assistant message if it has no timestamp if(lastAsst&&!lastAsst._ts&&!lastAsst.timestamp) lastAsst._ts=Date.now()/1000; if(d.usage){S.lastUsage=d.usage;_syncCtxIndicator(d.usage);} if(d.session.tool_calls&&d.session.tool_calls.length){ diff --git a/static/sessions.js b/static/sessions.js index 1c96ec3..9856175 100644 --- a/static/sessions.js +++ b/static/sessions.js @@ -143,7 +143,15 @@ async function loadSession(sid){ const _s=S.session; if(_s&&typeof _syncCtxIndicator==='function'){ const u=S.lastUsage||{}; - _syncCtxIndicator({input_tokens:_s.input_tokens||u.input_tokens||0,output_tokens:_s.output_tokens||u.output_tokens||0,estimated_cost:_s.estimated_cost||u.estimated_cost,context_length:u.context_length||0,last_prompt_tokens:u.last_prompt_tokens||0,threshold_tokens:u.threshold_tokens||0}); + const _pick=(latest,stored,dflt=0)=>latest!=null?latest:(stored!=null?stored:dflt); + _syncCtxIndicator({ + input_tokens: _pick(u.input_tokens, _s.input_tokens), + output_tokens: _pick(u.output_tokens, _s.output_tokens), + estimated_cost: _pick(u.estimated_cost, _s.estimated_cost), + context_length: _pick(u.context_length, _s.context_length), + last_prompt_tokens:_pick(u.last_prompt_tokens,_s.last_prompt_tokens), + threshold_tokens: _pick(u.threshold_tokens, _s.threshold_tokens), + }); } } @@ -590,7 +598,12 @@ function renderSessionListFromCache(){ if(isActive&&S.session&&S.session._flash)delete S.session._flash; const rawTitle=s.title||'Untitled'; const tags=(rawTitle.match(/#[\w-]+/g)||[]); - const cleanTitle=tags.length?rawTitle.replace(/#[\w-]+/g,'').trim():rawTitle; + let cleanTitle=tags.length?rawTitle.replace(/#[\w-]+/g,'').trim():rawTitle; + // Guard: system prompt content must never surface as a visible session title + const _SOURCE_DISPLAY={telegram:'Telegram',discord:'Discord',slack:'Slack',cli:'CLI',feishu:'Feishu',weixin:'WeChat'}; + if(cleanTitle.startsWith('[SYSTEM:')){ + cleanTitle=(_SOURCE_DISPLAY[s.source_tag]||s.source_tag||'Gateway')+' session'; + } const sessionText=document.createElement('div'); sessionText.className='session-text'; const titleRow=document.createElement('div'); diff --git a/static/style.css b/static/style.css index 21bc90a..9fb7d3c 100644 --- a/static/style.css +++ b/static/style.css @@ -719,6 +719,12 @@ .model-opt.active{background:rgba(124,185,255,.1);} .model-opt-name{display:block;font-size:13px;color:var(--text);font-weight:500;line-height:1.25;} .model-opt-id{display:block;font-size:10px;color:var(--muted);line-height:1.3;opacity:.72;word-break:break-word;} +.model-custom-sep{padding-top:4px;border-top:1px solid var(--border);margin-top:4px;} +.model-custom-row{display:flex;align-items:center;gap:6px;padding:6px 10px 8px;} +.model-custom-input{flex:1;background:var(--code-bg);border:1px solid var(--border2);border-radius:6px;color:var(--text);padding:5px 8px;font-size:12px;outline:none;font-family:inherit;min-width:0;} +.model-custom-input:focus{border-color:rgba(124,185,255,.5);} +.model-custom-btn{flex-shrink:0;width:24px;height:24px;border:1px solid var(--border2);border-radius:6px;background:transparent;color:var(--muted);cursor:pointer;display:inline-flex;align-items:center;justify-content:center;transition:color .12s,border-color .12s;} +.model-custom-btn:hover{color:var(--blue);border-color:rgba(124,185,255,.4);} .ws-opt{padding:10px 14px;cursor:pointer;transition:background .12s;display:flex;flex-direction:column;gap:4px;align-items:flex-start;} .ws-opt:hover{background:rgba(255,255,255,.07);} .ws-opt.active{background:rgba(124,185,255,.1);} diff --git a/static/ui.js b/static/ui.js index f63c380..2bcdbf7 100644 --- a/static/ui.js +++ b/static/ui.js @@ -238,11 +238,37 @@ function renderModelDropdown(){ dd.appendChild(row); } } + // Custom model ID input — lets users type any model not in the curated list + const _custSep=document.createElement('div'); + _custSep.className='model-group model-custom-sep'; + _custSep.textContent=t('model_custom_label')||'Custom model ID'; + dd.appendChild(_custSep); + const _custRow=document.createElement('div'); + _custRow.className='model-custom-row'; + _custRow.innerHTML=``; + const _ci=_custRow.querySelector('.model-custom-input'); + const _cb=_custRow.querySelector('.model-custom-btn'); + const _applyCustom=()=>{const v=_ci.value.trim();if(!v)return;selectModelFromDropdown(v);_ci.value='';}; + _cb.onclick=_applyCustom; + _ci.addEventListener('keydown',e=>{if(e.key==='Enter'){e.preventDefault();_applyCustom();}if(e.key==='Escape'){closeModelDropdown();}}); + _ci.addEventListener('click',e=>e.stopPropagation()); + dd.appendChild(_custRow); } async function selectModelFromDropdown(value){ const sel=$('modelSelect'); if(!sel||sel.value===value) { closeModelDropdown(); return; } + // If the value isn't in the option list (custom model ID), add a temporary option + // so sel.value assignment succeeds and the model chip shows the custom ID. + if(!Array.from(sel.options).some(o=>o.value===value)){ + const opt=document.createElement('option'); + opt.value=value; + opt.textContent=value.split('/').pop()||value; + opt.dataset.custom='1'; + // Remove any previous custom option before adding new one + sel.querySelectorAll('option[data-custom]').forEach(o=>o.remove()); + sel.appendChild(opt); + } sel.value=value; syncModelChip(); closeModelDropdown(); diff --git a/tests/test_sprint42.py b/tests/test_sprint42.py index b618084..465fdba 100644 --- a/tests/test_sprint42.py +++ b/tests/test_sprint42.py @@ -17,6 +17,19 @@ REPO_ROOT = pathlib.Path(__file__).parent.parent STREAMING_PY = (REPO_ROOT / "api" / "streaming.py").read_text() +# ── Shared helpers for sprint-42 additional tests ──────────────────────────── + +REPO = REPO_ROOT # alias used by #427 tests +_SESSIONS_JS = REPO_ROOT / 'static' / 'sessions.js' +_STREAMING_PY = REPO_ROOT / 'api' / 'streaming.py' +_MESSAGES_JS = REPO_ROOT / 'static' / 'messages.js' +_UI_JS = REPO_ROOT / 'static' / 'ui.js' + +def _read_sessions_js(): + return _SESSIONS_JS.read_text(encoding='utf-8') + +# ───────────────────────────────────────────────────────────────────────────── + class TestSessionDBInjection(unittest.TestCase): """Verify SessionDB is initialized and passed to AIAgent in streaming.py.""" @@ -105,3 +118,181 @@ class TestSessionDBAST(unittest.TestCase): src, "SessionDB try/except must NOT be inside _ENV_LOCK body (deadlock risk)", ) + + +class TestModelCustomInput(unittest.TestCase): + """Tests for issue #444 — custom model ID input in model dropdown.""" + + STATIC = pathlib.Path(__file__).parent.parent / 'static' + + def _read(self, filename): + path = self.STATIC / filename + with open(path, 'r', encoding='utf-8') as f: + return f.read() + + def _renderModelDropdown_body(self): + src = self._read('ui.js') + start = src.find('function renderModelDropdown()') + end = src.find('\nasync function selectModelFromDropdown', start) + return src[start:end] + + def test_model_custom_input_in_dropdown(self): + body = self._renderModelDropdown_body() + self.assertIn('model-custom-input', body, + 'model-custom-input class must be in renderModelDropdown') + + def test_model_custom_enter_handler(self): + body = self._renderModelDropdown_body() + self.assertIn('_applyCustom', body, + '_applyCustom function must be defined in renderModelDropdown') + + def test_model_custom_css_defined(self): + css = self._read('style.css') + self.assertIn('.model-custom-row', css, + '.model-custom-row must be defined in style.css') + self.assertIn('.model-custom-input', css, + '.model-custom-input must be defined in style.css') + + def test_model_custom_i18n_keys(self): + i18n = self._read('i18n.js') + # Find en locale block (appears first before es) + en_block_start = i18n.find("'en'") + es_block_start = i18n.find("'es'") + en_block = i18n[en_block_start:es_block_start] + self.assertIn('model_custom_label', en_block, + 'model_custom_label must be in en locale') + self.assertIn('model_custom_placeholder', en_block, + 'model_custom_placeholder must be in en locale') + + +# ── Sprint 42 additional tests: context indicator (#437) ───────────────── +def test_context_indicator_uses_pick_helper(): + """The _pick helper must be present in sessions.js to prefer latest over stale values.""" + content = _read_sessions_js() + assert '_pick' in content, "_pick helper not found in static/sessions.js" + + +def test_context_indicator_old_pattern_removed(): + """The old || pattern that preferred stale session data must be gone.""" + content = _read_sessions_js() + assert '_s.input_tokens||u.input_tokens' not in content, \ + "Old stale-data-first pattern '_s.input_tokens||u.input_tokens' still present in static/sessions.js" + + +def test_context_indicator_all_six_fields(): + """All six token/cost fields must appear in the _syncCtxIndicator call.""" + content = _read_sessions_js() + fields = [ + 'input_tokens', + 'output_tokens', + 'estimated_cost', + 'context_length', + 'last_prompt_tokens', + 'threshold_tokens', + ] + for field in fields: + assert field in content, \ + f"Field '{field}' not found in static/sessions.js _syncCtxIndicator call" + + +# ── Sprint 42 additional tests: system prompt title (#441) ────────────── +def test_system_prompt_title_guard_exists(): + """The guard that detects [SYSTEM: prefixes must be present in sessions.js.""" + content = _read_sessions_js() + assert '[SYSTEM:' in content, \ + "sessions.js must contain the [SYSTEM: guard to intercept system-prompt titles" + # Make sure it appears in an if-condition context, not just a comment + assert "cleanTitle.startsWith('[SYSTEM:')" in content, \ + "sessions.js must have: cleanTitle.startsWith('[SYSTEM:') guard expression" + + +def test_source_display_map_defined(): + """The _SOURCE_DISPLAY lookup map must be present and include core gateway platforms.""" + content = _read_sessions_js() + assert '_SOURCE_DISPLAY' in content, \ + "sessions.js must define _SOURCE_DISPLAY mapping for platform name lookup" + # Verify key platform entries are present + for platform in ("telegram:'Telegram'", "discord:'Discord'", "cli:'CLI'"): + assert platform in content, \ + f"_SOURCE_DISPLAY must include entry for {platform}" + + +def test_cleanTitle_is_let_not_const(): + """cleanTitle must be declared with let (not const) to allow reassignment in the guard.""" + content = _read_sessions_js() + assert 'let cleanTitle' in content, \ + "cleanTitle must be declared with 'let' (not 'const') to allow reassignment" + # Make sure the old const form is gone in this context + # (check the specific assignment line pattern) + assert "const cleanTitle=tags.length" not in content, \ + "Old 'const cleanTitle=tags.length...' must be replaced by 'let cleanTitle=...'" + + +# ── Sprint 42 additional tests: thinking panel persistence (#427) ──────── +def test_streaming_persists_reasoning_in_session(): + """streaming.py must accumulate reasoning_text and patch last assistant message.""" + src = (REPO / 'api' / 'streaming.py').read_text() + + # _reasoning_text must be initialised + assert "_reasoning_text = ''" in src, \ + "_reasoning_text variable not initialised in streaming.py" + + # on_reasoning must accumulate into _reasoning_text + assert '_reasoning_text += str(text)' in src, \ + "on_reasoning callback does not accumulate into _reasoning_text" + + # Persistence block must exist before raw_session is built + assert "Persist reasoning trace in the session so it survives reload" in src, \ + "Reasoning persistence comment not found in streaming.py" + + assert "_rm['reasoning'] = _reasoning_text" in src, \ + "Code to set _rm['reasoning'] not found in streaming.py" + + # Persistence block must come BEFORE raw_session assignment + persist_idx = src.index("Persist reasoning trace in the session") + raw_session_idx = src.index("raw_session = s.compact()") + assert persist_idx < raw_session_idx, \ + "Reasoning persistence block must appear before raw_session assignment" + + +def test_done_handler_patches_reasoning_field(): + """messages.js done SSE handler must patch reasoningText onto the last assistant message.""" + src = (REPO / 'static' / 'messages.js').read_text() + + # The persistence comment must be present inside the done handler + assert "Persist reasoning trace so thinking card survives page reload" in src, \ + "Reasoning persistence comment not found in messages.js done handler" + + # The guard and assignment must be present + assert "if(reasoningText){" in src, \ + "reasoningText guard not found in messages.js" + + assert "lastAsst.reasoning=reasoningText" in src, \ + "lastAsst.reasoning assignment not found in messages.js" + + # Verify the patch is inside the done handler (after 'source.addEventListener' for done) + done_handler_idx = src.index("source.addEventListener('done'") + persist_idx = src.index("Persist reasoning trace so thinking card survives page reload") + assert done_handler_idx < persist_idx, \ + "Reasoning persistence patch must be inside the done SSE handler" + + # The guard must also check !lastAsst.reasoning to avoid overwriting server value + assert "!lastAsst.reasoning" in src, \ + "Guard '!lastAsst.reasoning' missing — would overwrite server-persisted reasoning" + + +def test_rendermessages_reads_reasoning_from_messages(): + """ui.js renderMessages must read m.reasoning to display the thinking card.""" + src = (REPO / 'static' / 'ui.js').read_text() + + # m.reasoning must be read in the render path + assert 'm.reasoning' in src, \ + "m.reasoning not referenced in ui.js — thinking card won't render on reload" + + # The thinking card rendering block must also be present + assert 'thinking-card' in src, \ + "thinking-card CSS class not found in ui.js" + + # Specifically, the fallback that reads from top-level m.reasoning field + assert 'thinkingText=m.reasoning' in src.replace(' ', ''), \ + "thinkingText=m.reasoning assignment not found in ui.js renderMessages"