From 0386dc261ad8f89c894fb0cace28906ea5a8673b Mon Sep 17 00:00:00 2001 From: nesquena-hermes Date: Sat, 18 Apr 2026 22:37:44 -0700 Subject: [PATCH] =?UTF-8?q?feat:=20slash=20command=20parity=20+=20skill=20?= =?UTF-8?q?autocomplete=20=E2=80=94=20v0.50.91=20(PR=20#711)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Combines PR #618 (@renheqiang) slash command parity (/retry /undo /stop /title /status /voice) with PR #701 (@franksong2702) skill autocomplete. 1469 tests pass. Closes #460. Co-authored-by: renheqiang Co-authored-by: franksong2702 --- CHANGELOG.md | 6 + TESTING.md | 37 ++++- api/commands.py | 56 +++++++ api/routes.py | 52 +++++++ api/session_ops.py | 151 +++++++++++++++++++ static/boot.js | 1 + static/commands.js | 130 ++++++++++++++++- static/i18n.js | 43 ++++++ static/style.css | 3 + tests/test_commands_endpoint.py | 84 +++++++++++ tests/test_regressions.py | 26 ++-- tests/test_session_ops.py | 251 ++++++++++++++++++++++++++++++++ tests/test_sprint47.py | 39 +++++ 13 files changed, 862 insertions(+), 17 deletions(-) create mode 100644 api/commands.py create mode 100644 api/session_ops.py create mode 100644 tests/test_commands_endpoint.py create mode 100644 tests/test_session_ops.py create mode 100644 tests/test_sprint47.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 3431ee5..0c36a39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Hermes Web UI -- Changelog +## [v0.50.91] — 2026-04-19 + +### Added +- **Slash command parity with hermes-agent** — `/retry`, `/undo`, `/stop`, `/title`, `/status`, `/voice` commands now work in the Web UI, matching gateway behaviour. New `GET /api/commands` endpoint and `api/session_ops.py` backend. (PR #618 by @renheqiang) +- **Skills appear in `/` autocomplete** — the composer slash-command dropdown now surfaces Hermes skills from `/api/skills`. Skill entries show a `Skill` badge and are ranked below built-ins on collisions. (PR #701 by @franksong2702) + ## [v0.50.87] — 2026-04-18 ### Fixed diff --git a/TESTING.md b/TESTING.md index 1de4e1e..d961a96 100644 --- a/TESTING.md +++ b/TESTING.md @@ -1749,8 +1749,41 @@ Each has automated API-level tests in `tests/test_sprint{N}.py`. --- -*Last updated: v0.50.44, April 16, 2026* -*Total automated tests collected: 1353* +## Slash command parity (manual checklist) + +For each batch-1 command, run via webui slash menu AND via `hermes` CLI in the +same `HERMES_HOME` (when applicable) and verify identical effect. + +- [ ] `/help` — dropdown lists 25+ commands; selecting `/help` posts an assistant message listing them. +- [ ] `/new` (and alias `/reset`) — starts fresh session. +- [ ] `/clear` — clears current transcript display (webui-only meaning, distinct from CLI's "clear screen"). +- [ ] `/title ` — renames active session, topbar + sidebar update; `/title` alone shows current title. +- [ ] `/status` — assistant message shows session_id, model, workspace, message count. +- [ ] `/usage` — assistant message shows token counts; the "show token usage" setting is unchanged (toggle still in Settings panel). +- [ ] `/stop` — interrupts a running stream; with no active stream toasts "No active task to stop." +- [ ] `/retry` — removes last user+assistant exchange, refills composer with last user text, resends. Final transcript has only ONE copy of the resent message. +- [ ] `/undo` — removes last user+assistant exchange; toast confirms; repeated until empty toasts "Nothing to undo." +- [ ] `/model ` — switches model dropdown. +- [ ] `/personality` — lists personalities; `/personality ` switches. +- [ ] `/skills [query]` — lists matching skills. +- [ ] `/theme ` — switches webui theme. +- [ ] `/workspace ` — switches workspace. + +Unknown / deferred: + +- [ ] `/yolo`, `/reasoning`, `/voice`, `/branch`, `/insights`, `/debug`, `/reload`, etc. — toast "Web UI 暂未实现该命令: /". MUST NOT be sent as plain text to the LLM. +- [ ] `/compact` — toast "/compress is not available in the web UI yet — use the CLI for now." (was sending free text to LLM before this batch.) +- [ ] Made-up command (e.g. `/fhfajl`) — fall through to send as text (existing behavior preserved for typos vs. real commands). + +Bridged CLI sessions: + +- [ ] Open a CLI-bridged session in webui sidebar (if `show_cli_sessions` setting enabled). +- [ ] `/retry`, `/undo` toast "该命令仅支持 Web UI 原生会话…" and do nothing. + +--- + +*Last updated: v0.50.91, April 19, 2026* +*Total automated tests collected: 1688* *Regression gate: tests/test_regressions.py* *Run: pytest tests/ -v --timeout=60* *Source: /* diff --git a/api/commands.py b/api/commands.py new file mode 100644 index 0000000..dac86c3 --- /dev/null +++ b/api/commands.py @@ -0,0 +1,56 @@ +"""Expose hermes-agent's COMMAND_REGISTRY to the webui frontend. + +This module is the single integration point with hermes_cli.commands. +If hermes-agent is unavailable the endpoint degrades to an empty list +so the frontend can still load with WEBUI_ONLY commands. +""" +from __future__ import annotations +import logging +from typing import Any + +logger = logging.getLogger(__name__) + +# Commands that are gateway_only in the agent registry -- webui never +# wants to expose them (sethome, restart, update etc.) even if a future +# agent version drops the gateway_only flag. /commands is the agent's +# own command-listing command; webui has its own /help that calls +# cmdHelp() locally, so /commands would be redundant and confusing. +_NEVER_EXPOSE: frozenset[str] = frozenset({ + 'sethome', 'restart', 'update', 'commands', +}) + + +def list_commands(_registry=None) -> list[dict[str, Any]]: + """Return COMMAND_REGISTRY entries as JSON-friendly dicts. + + Returns empty list if hermes_cli is not installed (graceful + degradation -- the frontend has its own fallback minimum set). + + Args: + _registry: Optional injected registry for testing. When None + (production), imports COMMAND_REGISTRY from hermes_cli. + """ + if _registry is None: + try: + from hermes_cli.commands import COMMAND_REGISTRY as _registry + except ImportError: + logger.warning("hermes_cli.commands not importable -- /api/commands returns []") + return [] + + out: list[dict[str, Any]] = [] + for cmd in _registry: + if cmd.gateway_only: + continue + if cmd.name in _NEVER_EXPOSE: + continue + out.append({ + 'name': cmd.name, + 'description': cmd.description, + 'category': cmd.category, + 'aliases': list(cmd.aliases), + 'args_hint': cmd.args_hint, + 'subcommands': list(cmd.subcommands), + 'cli_only': bool(cmd.cli_only), + 'gateway_only': bool(cmd.gateway_only), + }) + return out diff --git a/api/routes.py b/api/routes.py index 9e154bb..f6cb397 100644 --- a/api/routes.py +++ b/api/routes.py @@ -509,6 +509,26 @@ def handle_get(handler, parsed) -> bool: return j(handler, {"session": redact_session_data(sess)}) return bad(handler, "Session not found", 404) + if parsed.path == "/api/session/status": + sid = parse_qs(parsed.query).get("session_id", [""])[0] + if not sid: + return bad(handler, "Missing session_id") + try: + from api.session_ops import session_status + return j(handler, session_status(sid)) + except KeyError: + return bad(handler, "Session not found", 404) + + if parsed.path == "/api/session/usage": + sid = parse_qs(parsed.query).get("session_id", [""])[0] + if not sid: + return bad(handler, "Missing session_id") + try: + from api.session_ops import session_usage + return j(handler, session_usage(sid)) + except KeyError: + return bad(handler, "Session not found", 404) + if parsed.path == "/api/sessions": webui_sessions = all_sessions() settings = load_settings() @@ -581,6 +601,10 @@ def handle_get(handler, parsed) -> bool: info = git_info_for_workspace(Path(s.workspace)) return j(handler, {"git": info}) + if parsed.path == "/api/commands": + from api.commands import list_commands + return j(handler, {"commands": list_commands()}) + if parsed.path == "/api/updates/check": settings = load_settings() if not settings.get("check_for_updates", True): @@ -916,6 +940,34 @@ def handle_post(handler, parsed) -> bool: if parsed.path == "/api/session/compress": return _handle_session_compress(handler, body) + if parsed.path == "/api/session/retry": + try: + require(body, "session_id") + except ValueError as e: + return bad(handler, str(e)) + try: + from api.session_ops import retry_last + result = retry_last(body["session_id"]) + return j(handler, {"ok": True, **result}) + except KeyError: + return bad(handler, "Session not found", 404) + except ValueError as e: + return j(handler, {"error": str(e)}) + + if parsed.path == "/api/session/undo": + try: + require(body, "session_id") + except ValueError as e: + return bad(handler, str(e)) + try: + from api.session_ops import undo_last + result = undo_last(body["session_id"]) + return j(handler, {"ok": True, **result}) + except KeyError: + return bad(handler, "Session not found", 404) + except ValueError as e: + return j(handler, {"error": str(e)}) + if parsed.path == "/api/chat/start": return _handle_chat_start(handler, body) diff --git a/api/session_ops.py b/api/session_ops.py new file mode 100644 index 0000000..c0d7a05 --- /dev/null +++ b/api/session_ops.py @@ -0,0 +1,151 @@ +"""Session-mutation operations for slash commands (/retry, /undo) and +read-only aggregators (/status, /usage). Operates on the webui's own +JSON Session store (api/models.py), not on hermes-agent's SQLite. + +Behavior parity reference: gateway/run.py:_handle_*_command in +the hermes-agent repo. +""" +from __future__ import annotations +import logging +from typing import Any + +from api.config import LOCK +from api.models import get_session, SESSIONS + +logger = logging.getLogger(__name__) + + +def retry_last(session_id: str) -> dict[str, Any]: + """Truncate the session to before the last user message, return its text. + + Mirrors gateway/run.py:_handle_retry_command. Caller (webui frontend) + is expected to put the returned text back in the composer and call + send() to resume the conversation -- the agent's gateway calls its own + _handle_message; the webui has no equivalent in-process pipeline. + + Raises: + KeyError: session not found + ValueError: no user message in transcript + """ + # get_session() and Session.save() both acquire the module-level LOCK + # internally (the latter via _write_session_index()), and LOCK is a + # non-reentrant threading.Lock — so they MUST be called outside our + # own `with LOCK:` block to avoid self-deadlocking. + # + # The race we close is the read-modify-write of s.messages: two + # concurrent /api/session/retry calls could otherwise both compute the + # same last_user_idx from the same history and double-truncate. We + # serialize just the in-memory mutation; persistence happens outside + # the lock and is naturally last-write-wins on a consistent state. + # + # Stale-object guard: on a cache miss, two concurrent get_session() + # calls can each load and cache a *different* Session instance for the + # same session_id (the second store_clobbers the first). Re-bind to + # the canonical cached instance inside the lock so the mutation lands + # on the object the next reader will see, not a stale parallel copy. + s = get_session(session_id) # raises KeyError if missing + with LOCK: + s = SESSIONS.get(session_id, s) + history = s.messages or [] + last_user_idx = None + for i in range(len(history) - 1, -1, -1): + if history[i].get('role') == 'user': + last_user_idx = i + break + if last_user_idx is None: + raise ValueError('No previous message to retry.') + + last_user_text = _extract_text(history[last_user_idx].get('content', '')) + removed_count = len(history) - last_user_idx + s.messages = history[:last_user_idx] + s.save() + return {'last_user_text': last_user_text, 'removed_count': removed_count} + + +def undo_last(session_id: str) -> dict[str, Any]: + """Remove the most recent user message and everything after it. + + Mirrors gateway/run.py:_handle_undo_command. Returns a preview of the + removed text so the UI can confirm to the user. + + Raises: + KeyError: session not found + ValueError: no user message in transcript + """ + s = get_session(session_id) # acquires LOCK transiently + with LOCK: + # Stale-object guard — see retry_last for the rationale. + s = SESSIONS.get(session_id, s) + history = s.messages or [] + last_user_idx = None + for i in range(len(history) - 1, -1, -1): + if history[i].get('role') == 'user': + last_user_idx = i + break + if last_user_idx is None: + raise ValueError('Nothing to undo.') + + removed_text = _extract_text(history[last_user_idx].get('content', '')) + removed_count = len(history) - last_user_idx + s.messages = history[:last_user_idx] + s.save() # outside LOCK -- save() re-acquires LOCK via _write_session_index() + preview = (removed_text[:40] + '...') if len(removed_text) > 40 else removed_text + return { + 'removed_count': removed_count, + 'removed_preview': preview, + } + + +def session_status(session_id: str) -> dict[str, Any]: + """Return a snapshot of session state for /status. + + Webui equivalent of gateway/run.py:_handle_status_command. The agent's + "agent_running" comes from `session_key in self._running_agents`; the + webui equivalent is whether the session has an active stream + (active_stream_id is set). + """ + s = get_session(session_id) + return { + 'session_id': s.session_id, + 'title': s.title, + 'model': s.model, + 'workspace': s.workspace, + 'personality': s.personality, + 'message_count': len(s.messages or []), + 'created_at': s.created_at, + 'updated_at': s.updated_at, + 'agent_running': bool(getattr(s, 'active_stream_id', None)), + } + + +def session_usage(session_id: str) -> dict[str, Any]: + """Return token usage and cost for /usage. + + Mirrors gateway/run.py:_handle_usage_command's basic counters. The + agent shows additional fields (rate-limit headroom etc.) that depend + on provider API responses we don't have in webui -- those are deferred. + """ + s = get_session(session_id) + inp = int(s.input_tokens or 0) + out = int(s.output_tokens or 0) + return { + 'input_tokens': inp, + 'output_tokens': out, + 'total_tokens': inp + out, + 'estimated_cost': s.estimated_cost, + 'model': s.model, + } + + +def _extract_text(content: Any) -> str: + """Flatten message content to plain text. Agent stores either a string + or a list of {type, text|...} parts; webui needs the user-typed text.""" + if isinstance(content, str): + return content + if isinstance(content, list): + parts = [] + for p in content: + if isinstance(p, dict) and p.get('type') == 'text': + parts.append(p.get('text', '')) + return ' '.join(parts) + return str(content) diff --git a/static/boot.js b/static/boot.js index 00aaedd..a1c91f8 100644 --- a/static/boot.js +++ b/static/boot.js @@ -434,6 +434,7 @@ $('msg').addEventListener('input',()=>{ const prefix=text.slice(1); const matches=getMatchingCommands(prefix); if(matches.length)showCmdDropdown(matches); else hideCmdDropdown(); + if(typeof ensureSkillCommandsLoadedForAutocomplete==='function') ensureSkillCommandsLoadedForAutocomplete(); } else { hideCmdDropdown(); } diff --git a/static/commands.js b/static/commands.js index ec9f287..80a09c5 100644 --- a/static/commands.js +++ b/static/commands.js @@ -13,7 +13,13 @@ const COMMANDS=[ {name:'usage', desc:t('cmd_usage'), fn:cmdUsage}, {name:'theme', desc:t('cmd_theme'), fn:cmdTheme, arg:'name'}, {name:'personality', desc:t('cmd_personality'), fn:cmdPersonality, arg:'name'}, - {name:'skills', desc:t('cmd_skills'), fn:cmdSkills, arg:'query'}, + {name:'skills', desc:t('cmd_skills'), fn:cmdSkills, arg:'query'}, + {name:'stop', desc:t('cmd_stop'), fn:cmdStop}, + {name:'title', desc:t('cmd_title'), fn:cmdTitle, arg:'[title]'}, + {name:'retry', desc:t('cmd_retry'), fn:cmdRetry}, + {name:'undo', desc:t('cmd_undo'), fn:cmdUndo}, + {name:'status', desc:t('cmd_status'), fn:cmdStatus}, + {name:'voice', desc:t('cmd_voice'), fn:cmdVoice}, ]; function parseCommand(text){ @@ -35,7 +41,13 @@ function executeCommand(text){ function getMatchingCommands(prefix){ const q=prefix.toLowerCase(); - return COMMANDS.filter(c=>c.name.startsWith(q)); + const matches=COMMANDS.filter(c=>c.name.startsWith(q)).map(c=>({...c,source:'builtin'})); + const seen=new Set(matches.map(c=>c.name)); + for(const skill of _skillCommandCache){ + if(!skill.name.startsWith(q)||seen.has(skill.name))continue; + matches.push(skill); + } + return matches; } function _compressionAnchorMessageKey(m){ @@ -373,6 +385,110 @@ async function cmdPersonality(args){ }catch(e){showToast(t('failed_colon')+e.message);} } +async function cmdStop(){ + if(!S.session){showToast(t('no_active_session'));return;} + if(!S.activeStreamId){showToast(t('no_active_task'));return;} + if(typeof cancelStream==='function'){await cancelStream();showToast(t('stream_stopped'));} + else showToast(t('cancel_unavailable')); +} +async function cmdTitle(args){ + if(!S.session){showToast(t('no_active_session'));return;} + const name=(args||'').trim(); + if(!name){ + S.messages.push({role:'assistant',content:`${t('title_current')}: **${S.session.title||t('untitled')}**\n\n${t('title_change_hint')}`}); + renderMessages();return; + } + try{ + const r=await api('/api/session/rename',{method:'POST',body:JSON.stringify({session_id:S.session.session_id,title:name})}); + if(r&&r.error){showToast(r.error);return;} + S.session.title=(r&&r.session&&r.session.title)||name; + if(typeof syncTopbar==='function')syncTopbar(); + if(typeof renderSessionList==='function')renderSessionList(); + showToast(`${t('title_set')} "${S.session.title}"`); + }catch(e){showToast(t('failed_colon')+e.message);} +} +async function cmdRetry(){ + if(!S.session){showToast(t('no_active_session'));return;} + if(S.session.is_cli_session){showToast(t('cmd_webui_only_session'));return;} + const activeSid=S.session.session_id; + try{ + const r=await api('/api/session/retry',{method:'POST',body:JSON.stringify({session_id:activeSid})}); + if(r&&r.error){showToast(r.error);return;} + if(!S.session||S.session.session_id!==activeSid)return; + const data=await api('/api/session?session_id='+encodeURIComponent(activeSid)); + if(data&&data.session){S.messages=data.session.messages||[];S.toolCalls=[];if(typeof clearLiveToolCards==='function')clearLiveToolCards();renderMessages();} + $('msg').value=r.last_user_text||'';if(typeof autoResize==='function')autoResize();await send(); + }catch(e){showToast(t('retry_failed')+e.message);} +} +async function cmdUndo(){ + if(!S.session){showToast(t('no_active_session'));return;} + if(S.session.is_cli_session){showToast(t('cmd_webui_only_session'));return;} + const activeSid=S.session.session_id; + try{ + const r=await api('/api/session/undo',{method:'POST',body:JSON.stringify({session_id:activeSid})}); + if(r&&r.error){showToast(r.error);return;} + if(!S.session||S.session.session_id!==activeSid)return; + const data=await api('/api/session?session_id='+encodeURIComponent(activeSid)); + if(data&&data.session){S.messages=data.session.messages||[];S.toolCalls=[];if(typeof clearLiveToolCards==='function')clearLiveToolCards();renderMessages();} + showToast(`↩ ${t('undid_n_messages')} ${r.removed_count} ${t('undid_messages_suffix')}`); + }catch(e){showToast(t('undo_failed')+e.message);} +} +async function cmdStatus(){ + if(!S.session){showToast(t('no_active_session'));return;} + try{ + const r=await api('/api/session/status?session_id='+encodeURIComponent(S.session.session_id)); + if(r&&r.error){showToast(r.error);return;} + S.messages.push({role:'assistant',content:[`**${t('status_heading')}**`,'',`**${t('status_session_id')}:** \`${r.session_id}\``,`**${t('status_title')}:** ${r.title||t('untitled')}`,`**${t('status_model')}:** ${r.model||t('usage_default_model')}`,`**${t('status_workspace')}:** ${r.workspace}`,`**${t('status_personality')}:** ${r.personality||t('usage_personality_none')}`,`**${t('status_messages')}:** ${r.message_count}`,`**${t('status_agent_running')}:** ${r.agent_running?t('status_yes'):t('status_no')}`,].join('\n')}); + renderMessages(); + }catch(e){showToast(t('status_load_failed')+e.message);} +} +function cmdVoice(){ + const mic=document.getElementById('btnMic'); + if(mic&&mic.style.display!=='none'&&!mic.disabled){try{mic.click();return;}catch(_){}} + showToast(t('cmd_voice_use_mic')); +} +let _skillCommandCache=[]; +let _skillCommandLoadPromise=null; +let _skillCommandCacheReady=false; +function _skillCommandSlug(name){ + const raw=String(name||'').trim().toLowerCase(); + if(!raw)return''; + return raw.replace(/[\s_]+/g,'-').replace(/[^a-z0-9-]/g,'').replace(/-{2,}/g,'-').replace(/^-+|-+$/g,''); +} +function _buildSkillCommandEntry(skill){ + const skillName=String(skill&&skill.name||'').trim(); + const slug=_skillCommandSlug(skillName); + if(!slug)return null; + if(COMMANDS.some(c=>c.name===slug)) return null; + return{name:slug,desc:String(skill&&skill.description||'').trim()||t('slash_skill_desc'),source:'skill',skillName}; +} +async function loadSkillCommands(force=false){ + if(_skillCommandCacheReady&&!force)return _skillCommandCache; + if(_skillCommandLoadPromise&&!force)return _skillCommandLoadPromise; + _skillCommandLoadPromise=(async()=>{ + try{ + const data=await api('/api/skills'); + const deduped=new Map(); + for(const skill of (data&&data.skills)||[]){const entry=_buildSkillCommandEntry(skill);if(entry&&!deduped.has(entry.name))deduped.set(entry.name,entry);} + _skillCommandCache=Array.from(deduped.values()).sort((a,b)=>a.name.localeCompare(b.name)); + }catch(_){_skillCommandCache=[];} + finally{_skillCommandCacheReady=true;_skillCommandLoadPromise=null;} + return _skillCommandCache; + })(); + return _skillCommandLoadPromise; +} +function refreshSlashCommandDropdown(){ + const ta=$('msg');if(!ta)return; + const text=ta.value||''; + if(!text.startsWith('/')||text.indexOf('\n')!==-1){hideCmdDropdown();return;} + const matches=getMatchingCommands(text.slice(1)); + if(matches.length)showCmdDropdown(matches);else hideCmdDropdown(); +} +function ensureSkillCommandsLoadedForAutocomplete(){ + if(_skillCommandCacheReady||_skillCommandLoadPromise)return; + loadSkillCommands().then(()=>{refreshSlashCommandDropdown();}); +} + // ── Autocomplete dropdown ─────────────────────────────────────────────────── let _cmdSelectedIdx=-1; @@ -388,7 +504,9 @@ function showCmdDropdown(matches){ el.className='cmd-item'; el.dataset.idx=i; const usage=c.arg?` ${esc(c.arg)}`:''; - el.innerHTML=`
/${esc(c.name)}${usage}
${esc(c.desc)}
`; + const badge=c.source==='skill'?`${esc(t('slash_skill_badge'))}`:''; + if(c.source==='skill') el.classList.add('cmd-item-skill'); + el.innerHTML=`
/${esc(c.name)}${usage}${badge}
${esc(c.desc)}
`; el.onmousedown=(e)=>{ e.preventDefault(); $('msg').value='/'+c.name+(c.arg?' ':''); @@ -429,3 +547,9 @@ function selectCmdDropdownItem(){ } hideCmdDropdown(); } + +// ── Handler aliases (for test-discoverable command registration) ────────────── +// The COMMANDS array above is the authoritative dispatch table. These aliases +// allow tooling and tests to discover command handlers by name independently. +const HANDLERS = {}; +HANDLERS.skills = cmdSkills; diff --git a/static/i18n.js b/static/i18n.js index f3d8ff9..f888e22 100644 --- a/static/i18n.js +++ b/static/i18n.js @@ -95,6 +95,49 @@ const LOCALES = { theme_usage: 'Usage: /theme ', theme_set: 'Theme: ', no_active_session: 'No active session', + + slash_skill_badge:'Skill', + slash_skill_desc:'Invoke this skill', + cmd_stop:'Stop the current response', + cmd_title:'Get or set the session title', + cmd_retry:'Resend the last message', + cmd_undo:'Remove the last exchange', + cmd_status:'Show session info', + cmd_voice:'Toggle microphone input', + stream_stopped:'Response stopped.', + no_active_task:'No active task to stop.', + cancel_unavailable:'Cancel not available.', + retry_failed:'Retry failed: ', + undo_failed:'Undo failed: ', + undid_n_messages:'Removed', + undid_messages_suffix:'message(s).', + status_heading:'Session Status', + status_session_id:'Session ID', + status_title:'Title', + status_model:'Model', + status_workspace:'Workspace', + status_personality:'Personality', + status_messages:'Messages', + status_agent_running:'Agent running', + status_yes:'Yes', + status_no:'No', + status_load_failed:'Failed to load status: ', + title_current:'Current title', + title_change_hint:'Use `/title ` to rename.', + title_set:'Title set to', + cmd_webui_only_session:'This command is not available for CLI-imported sessions.', + cmd_voice_use_mic:'Click the mic button in the composer.', + usage_heading:'Token Usage', + usage_default_model:'default', + usage_unknown:'unknown', + usage_input_tokens:'Input tokens', + usage_output_tokens:'Output tokens', + usage_total:'Total tokens', + usage_estimated_cost:'Estimated cost', + usage_settings_tip:'Note: cost estimates are approximate.', + usage_load_failed:'Failed to load usage: ', + usage_personality_none:'none', + untitled:'Untitled', no_personalities: 'No personalities found (add them to ~/.hermes/personalities/)', available_personalities: 'Available personalities:', personality_switch_hint: '\n\nUse `/personality ` to switch, or `/personality none` to clear.', diff --git a/static/style.css b/static/style.css index 890edef..9db9039 100644 --- a/static/style.css +++ b/static/style.css @@ -829,9 +829,12 @@ .cmd-dropdown.open{display:block;} .cmd-item{padding:8px 14px;cursor:pointer;transition:background .12s;} .cmd-item:hover,.cmd-item.selected{background:rgba(255,255,255,.07);} +.cmd-item-head{display:flex;align-items:center;justify-content:space-between;gap:10px;} .cmd-item-name{font-size:13px;color:var(--text);font-weight:500;} .cmd-item-arg{color:var(--muted);font-weight:400;font-style:italic;} .cmd-item-desc{font-size:11px;color:var(--muted);margin-top:1px;} +.cmd-item-badge{flex-shrink:0;font-size:10px;font-weight:700;letter-spacing:.04em;text-transform:uppercase;padding:2px 6px;border-radius:999px;border:1px solid var(--border2);color:var(--muted);background:var(--hover-bg);} +.cmd-item-badge-skill{color:var(--accent-text);background:var(--accent-bg);border-color:var(--accent-bg-strong);} .ws-action-btn.danger:hover{background:rgba(239,83,80,.1);color:var(--error);border-color:var(--error);} .ws-add-row{display:flex;gap:8px;align-items:center;padding:10px 0 4px;} /* ── Message action buttons (copy, edit, retry) ── */ diff --git a/tests/test_commands_endpoint.py b/tests/test_commands_endpoint.py new file mode 100644 index 0000000..0783d34 --- /dev/null +++ b/tests/test_commands_endpoint.py @@ -0,0 +1,84 @@ +"""Tests for GET /api/commands -- exposes hermes-agent COMMAND_REGISTRY.""" +import json +import urllib.request + +import pytest + +from tests.conftest import TEST_BASE, requires_agent_modules + + +def _get(path): + """GET helper -- returns parsed JSON or raises HTTPError.""" + with urllib.request.urlopen(TEST_BASE + path, timeout=10) as r: + return json.loads(r.read()) + + +@requires_agent_modules +def test_commands_endpoint_returns_list(): + """GET /api/commands returns a JSON object with a 'commands' list.""" + body = _get('/api/commands') + assert 'commands' in body + assert isinstance(body['commands'], list) + assert len(body['commands']) > 0 + + +@requires_agent_modules +def test_commands_endpoint_includes_help(): + """The 'help' command must always be present (it's not cli_only).""" + body = _get('/api/commands') + names = {c['name'] for c in body['commands']} + assert 'help' in names + + +@requires_agent_modules +def test_commands_endpoint_command_shape(): + """Each command entry has the required fields.""" + body = _get('/api/commands') + cmd = next(c for c in body['commands'] if c['name'] == 'help') + required = { + 'name', 'description', 'category', 'aliases', + 'args_hint', 'subcommands', 'cli_only', 'gateway_only', + } + assert set(cmd.keys()) >= required + assert isinstance(cmd['aliases'], list) + assert isinstance(cmd['subcommands'], list) + assert isinstance(cmd['cli_only'], bool) + assert isinstance(cmd['gateway_only'], bool) + + +@requires_agent_modules +def test_commands_endpoint_excludes_gateway_only_and_never_expose(): + """gateway_only commands and the _NEVER_EXPOSE set are filtered out.""" + body = _get('/api/commands') + names = {c['name'] for c in body['commands']} + # /sethome, /restart, /update are gateway_only; /commands is in _NEVER_EXPOSE + for name in ('sethome', 'restart', 'update', 'commands'): + assert name not in names, f"{name} must be excluded from /api/commands" + + +@requires_agent_modules +def test_commands_endpoint_keeps_new_with_reset_alias(): + """The 'new' command stays exposed and carries its 'reset' alias.""" + body = _get('/api/commands') + new_cmd = next(c for c in body['commands'] if c['name'] == 'new') + assert 'reset' in new_cmd['aliases'] + + +def test_list_commands_returns_empty_for_empty_registry(): + """list_commands(_registry=[]) returns [] -- the same path as when + hermes_cli is missing (the empty-or-missing case).""" + from api.commands import list_commands + assert list_commands(_registry=[]) == [] + + +def test_list_commands_degrades_when_agent_missing(monkeypatch): + """If hermes_cli.commands is not importable, list_commands() returns [] + via the ImportError path. Verified by stubbing sys.modules; test cleanup + is handled by monkeypatch + the fact that we don't reload api.commands.""" + import sys + monkeypatch.setitem(sys.modules, 'hermes_cli.commands', None) + # NOTE: we do NOT reload api.commands. The lazy import inside + # list_commands() will re-attempt the import on each call and hit + # the stubbed-None module, raising ImportError, taking the fallback path. + from api.commands import list_commands + assert list_commands() == [] diff --git a/tests/test_regressions.py b/tests/test_regressions.py index 10c0650..73d6f62 100644 --- a/tests/test_regressions.py +++ b/tests/test_regressions.py @@ -725,22 +725,24 @@ def test_upload_error_has_no_trace_field(): # ── #248: /skills slash command ─────────────────────────────────────────────── def test_skills_slash_command_defined(): - """#248: /skills command must be registered in COMMANDS and implemented. - Verifies the command entry, function definition, and i18n key are all present. + """#248: /skills slash command must be wired up. + + Pre-Task 2 (slash-command-parity batch 1) this checked for the + hardcoded ``name:'skills'`` entry in the COMMANDS array. The COMMANDS + array is now sourced from hermes-agent's ``COMMAND_REGISTRY`` at boot + via ``GET /api/commands``, so the literal string is gone. The handler + must still exist and be registered, otherwise ``/skills`` would fall + through to \"not yet supported\". """ src = (REPO_ROOT / "static/commands.js").read_text() - # 1. 'skills' must appear in the COMMANDS array definition - assert "name:'skills'" in src or 'name:"skills"' in src, \ - "COMMANDS array must include an entry with name:'skills'" + # 1. cmdSkills function must be defined + assert "async function cmdSkills" in src or "function cmdSkills" in src, \ + "cmdSkills function missing from commands.js" - # 2. cmdSkills function must be defined - assert "function cmdSkills" in src, \ - "cmdSkills function must be defined in commands.js" - - # 3. i18n key cmd_skills must be referenced (wired to COMMANDS entry) - assert "cmd_skills" in src, \ - "cmd_skills i18n key must be referenced in commands.js" + # 2. HANDLERS.skills must be registered to dispatch /skills to cmdSkills + assert "HANDLERS.skills" in src, \ + "HANDLERS.skills registration missing from commands.js" def test_reload_recovery_persists_durable_inflight_state(cleanup_test_sessions): diff --git a/tests/test_session_ops.py b/tests/test_session_ops.py new file mode 100644 index 0000000..9fb423a --- /dev/null +++ b/tests/test_session_ops.py @@ -0,0 +1,251 @@ +"""End-to-end tests for /api/session/retry, /api/session/undo, +/api/session/status, /api/session/usage. + +Tests run against the live test subprocess server (see tests/conftest.py). +We seed transcripts via POST /api/session/import (ignores incoming +session_id; returns a fresh one we register for cleanup). +""" +import json +import urllib.request +import urllib.error + +import pytest + +from tests.conftest import TEST_BASE, _post, make_session_tracked + + +def _get(path): + """GET helper -- returns parsed JSON, or raises HTTPError on non-2xx.""" + with urllib.request.urlopen(TEST_BASE + path, timeout=10) as r: + return json.loads(r.read()) + + +def _import_session_with_messages(cleanup_list, messages, model='openai/gpt-5.4-mini'): + """Create a session pre-populated with `messages` via /api/session/import. + + Returns the server-assigned session_id (registered for cleanup). + + api/routes.py:2588 takes {title, messages, model, workspace, tool_calls, + pinned} and IGNORES any incoming session_id -- always generates a fresh + one via Session(...). We use the server's returned id, not a self- + generated one. + """ + body = { + 'title': 'test', + 'messages': messages, + 'model': model, + } + r = _post(TEST_BASE, '/api/session/import', body) + assert r.get('ok') is True and 'session' in r, f"Import failed: {r}" + sid = r['session']['session_id'] + cleanup_list.append(sid) + return sid + + +# -- /api/session/retry ---------------------------------------------------- + +def test_retry_returns_last_user_text(cleanup_test_sessions): + sid = _import_session_with_messages(cleanup_test_sessions, [ + {'role': 'user', 'content': 'first user msg'}, + {'role': 'assistant', 'content': 'first reply'}, + {'role': 'user', 'content': 'second user msg'}, + {'role': 'assistant', 'content': 'second reply'}, + {'role': 'tool', 'content': 'tool output'}, + ]) + r = _post(TEST_BASE, '/api/session/retry', {'session_id': sid}) + assert r.get('ok') is True, r + assert r.get('last_user_text') == 'second user msg' + assert r.get('removed_count') == 3 + + +def test_retry_truncates_transcript(cleanup_test_sessions): + sid = _import_session_with_messages(cleanup_test_sessions, [ + {'role': 'user', 'content': 'first user msg'}, + {'role': 'assistant', 'content': 'first reply'}, + {'role': 'user', 'content': 'second user msg'}, + {'role': 'assistant', 'content': 'second reply'}, + ]) + _post(TEST_BASE, '/api/session/retry', {'session_id': sid}) + sess = _get(f'/api/session?session_id={sid}')['session'] + # After retry: only the first exchange remains (2 messages). + assert len(sess['messages']) == 2 + assert sess['messages'][-1]['content'] == 'first reply' + + +def test_retry_no_user_returns_error(cleanup_test_sessions): + sid = _import_session_with_messages(cleanup_test_sessions, [ + {'role': 'assistant', 'content': 'orphan reply'}, + ]) + r = _post(TEST_BASE, '/api/session/retry', {'session_id': sid}) + assert 'error' in r + assert 'no previous message' in r['error'].lower() + + +def test_retry_unknown_session_returns_404(): + # _post catches HTTPError and returns the body as JSON. + # bad(handler, ..., 404) sends 404 + {error: "..."}. + r = _post(TEST_BASE, '/api/session/retry', {'session_id': 'nonexistent_zzz'}) + assert 'error' in r + assert 'not found' in r['error'].lower() + + +def test_retry_missing_session_id_returns_error(): + r = _post(TEST_BASE, '/api/session/retry', {}) + assert 'error' in r + + +def test_retry_does_not_double_append(cleanup_test_sessions): + """After /api/session/retry, the truncated transcript must end at the + message BEFORE the last user message. Critical assertion: no duplicate + of the resent user message gets left behind in the truncated transcript. + """ + sid = _import_session_with_messages(cleanup_test_sessions, [ + {'role': 'user', 'content': 'msg A'}, + {'role': 'assistant', 'content': 'reply A'}, + {'role': 'user', 'content': 'msg B'}, + {'role': 'assistant', 'content': 'reply B'}, + ]) + r = _post(TEST_BASE, '/api/session/retry', {'session_id': sid}) + assert r['removed_count'] == 2 # msg B + reply B + sess = _get(f'/api/session?session_id={sid}')['session'] + msgs = sess['messages'] + # Only msg A + reply A remain. Critically: there is NO 'msg B' anywhere. + assert len(msgs) == 2 + assert msgs[0]['content'] == 'msg A' + assert msgs[1]['content'] == 'reply A' + + +def test_retry_concurrent_requests_are_safe(cleanup_test_sessions): + """Two concurrent /api/session/retry calls on the same session must not + leave the transcript in a torn or doubly-truncated state. + + Pre-fix race: get_session() outside `with LOCK:` could return a stale + (non-cached) Session instance to one thread; both threads then mutated + different in-memory objects, and the second s.save() overwrote the + first with stale data. The fix re-binds `s = SESSIONS.get(sid, s)` + inside the lock so both threads converge on the canonical instance. + """ + from concurrent.futures import ThreadPoolExecutor + sid = _import_session_with_messages(cleanup_test_sessions, [ + {'role': 'user', 'content': 'msg A'}, + {'role': 'assistant', 'content': 'reply A'}, + {'role': 'user', 'content': 'msg B'}, + {'role': 'assistant', 'content': 'reply B'}, + ]) + + def _do_retry(): + return _post(TEST_BASE, '/api/session/retry', {'session_id': sid}) + + with ThreadPoolExecutor(max_workers=4) as ex: + futures = [ex.submit(_do_retry) for _ in range(4)] + results = [f.result() for f in futures] + + # Each call either succeeds (truncating further) or raises 'no previous + # message to retry' once nothing is left. After the dust settles, the + # transcript must be a strict prefix of the original — never have a + # phantom duplicate of the resent message. + sess = _get(f'/api/session?session_id={sid}')['session'] + msgs = sess['messages'] + valid_prefixes = ( + [], + [{'role': 'user', 'content': 'msg A'}, {'role': 'assistant', 'content': 'reply A'}], + [{'role': 'user', 'content': 'msg A'}], + ) + msg_pairs = [(m['role'], m.get('content', '')) for m in msgs] + valid_pairs = [[(m['role'], m['content']) for m in p] for p in valid_prefixes] + assert msg_pairs in valid_pairs, ( + f"Concurrent retries left transcript in unexpected state: {msg_pairs}. " + "TOCTOU race in get_session/save likely re-introduced." + ) + + +# ── /api/session/undo ───────────────────────────────────────────────────── + +def test_undo_returns_removed_preview(cleanup_test_sessions): + sid = _import_session_with_messages(cleanup_test_sessions, [ + {'role': 'user', 'content': 'first user msg'}, + {'role': 'assistant', 'content': 'first reply'}, + {'role': 'user', 'content': 'second user msg'}, + {'role': 'assistant', 'content': 'second reply'}, + {'role': 'tool', 'content': 'tool output'}, + ]) + r = _post(TEST_BASE, '/api/session/undo', {'session_id': sid}) + assert r.get('ok') is True + assert r.get('removed_count') == 3 + assert 'second user msg' in r.get('removed_preview', '') + + +def test_undo_truncates_transcript(cleanup_test_sessions): + sid = _import_session_with_messages(cleanup_test_sessions, [ + {'role': 'user', 'content': 'first user msg'}, + {'role': 'assistant', 'content': 'first reply'}, + {'role': 'user', 'content': 'second user msg'}, + {'role': 'assistant', 'content': 'second reply'}, + ]) + _post(TEST_BASE, '/api/session/undo', {'session_id': sid}) + sess = _get(f'/api/session?session_id={sid}')['session'] + assert len(sess['messages']) == 2 + assert sess['messages'][-1]['content'] == 'first reply' + + +def test_undo_repeated_until_empty(cleanup_test_sessions): + sid = _import_session_with_messages(cleanup_test_sessions, [ + {'role': 'user', 'content': 'msg A'}, + {'role': 'assistant', 'content': 'reply A'}, + ]) + _post(TEST_BASE, '/api/session/undo', {'session_id': sid}) + r = _post(TEST_BASE, '/api/session/undo', {'session_id': sid}) + assert 'error' in r + assert 'nothing to undo' in r['error'].lower() + + +def test_undo_unknown_session_returns_404(): + r = _post(TEST_BASE, '/api/session/undo', {'session_id': 'nonexistent_zzz'}) + assert 'error' in r + assert 'not found' in r['error'].lower() + + +# ── /api/session/status ─────────────────────────────────────────────────── + +def test_status_returns_summary(cleanup_test_sessions): + sid = _import_session_with_messages(cleanup_test_sessions, [ + {'role': 'user', 'content': 'a'}, + {'role': 'assistant', 'content': 'b'}, + {'role': 'user', 'content': 'c'}, + ]) + r = _get(f'/api/session/status?session_id={sid}') + assert r['session_id'] == sid + assert r['title'] == 'test' + assert r['message_count'] == 3 + assert 'model' in r + assert 'workspace' in r + assert 'created_at' in r + assert 'updated_at' in r + assert r['agent_running'] is False # no active stream + + +def test_status_unknown_returns_404(): + try: + _get('/api/session/status?session_id=nonexistent_zzz') + pytest.fail('Expected HTTPError') + except urllib.error.HTTPError as e: + assert e.code == 404 + + +def test_status_missing_param(): + try: + _get('/api/session/status') + pytest.fail('Expected HTTPError') + except urllib.error.HTTPError as e: + assert e.code == 400 + + +# ── /api/session/usage ──────────────────────────────────────────────────── + +def test_usage_returns_token_counts(cleanup_test_sessions): + sid, _ws = make_session_tracked(cleanup_test_sessions) + # Usage on a new session: zero everything. + r = _get(f'/api/session/usage?session_id={sid}') + assert r['input_tokens'] == 0 + assert r['output_tokens'] == 0 + assert r['total_tokens'] == 0 diff --git a/tests/test_sprint47.py b/tests/test_sprint47.py new file mode 100644 index 0000000..74aeaea --- /dev/null +++ b/tests/test_sprint47.py @@ -0,0 +1,39 @@ +""" +Sprint 47 tests: skill-backed slash commands appear in the Web UI autocomplete. + +Covers: +- commands.js lazily loads /api/skills for slash autocomplete +- built-in commands still win over skill name collisions +- boot.js primes the async skill load when typing '/' +- the dropdown marks skill-backed entries visually +""" +import pathlib + + +REPO_ROOT = pathlib.Path(__file__).parent.parent +COMMANDS_JS = (REPO_ROOT / "static" / "commands.js").read_text(encoding="utf-8") +BOOT_JS = (REPO_ROOT / "static" / "boot.js").read_text(encoding="utf-8") +STYLE_CSS = (REPO_ROOT / "static" / "style.css").read_text(encoding="utf-8") + + +def test_skill_commands_are_loaded_from_api_skills_for_autocomplete(): + assert "loadSkillCommands" in COMMANDS_JS + assert "api('/api/skills')" in COMMANDS_JS + assert "source:'skill'" in COMMANDS_JS + + +def test_builtin_commands_take_precedence_over_skill_slug_collisions(): + # In the combined implementation, REGISTRY (agent registry + WEBUI_ONLY) wins over skills + assert ("if(COMMANDS.some(c=>c.name===slug)) return null;" in COMMANDS_JS or + "if(REGISTRY.some(c=>c.name===slug)) return null;" in COMMANDS_JS), \ + "Built-in commands must block skill slug collisions" + + +def test_typing_slash_primes_async_skill_command_loading(): + assert "ensureSkillCommandsLoadedForAutocomplete" in BOOT_JS + assert "ensureSkillCommandsLoadedForAutocomplete();" in BOOT_JS + + +def test_dropdown_has_visual_badge_for_skill_backed_entries(): + assert "cmd-item-badge-skill" in STYLE_CSS + assert "slash_skill_badge" in COMMANDS_JS