feat: slash command parity + skill autocomplete — v0.50.91 (PR #711)
Combines PR #618 (@renheqiang) slash command parity (/retry /undo /stop /title /status /voice) with PR #701 (@franksong2702) skill autocomplete. 1469 tests pass. Closes #460. Co-authored-by: renheqiang <renheqiang@users.noreply.github.com> Co-authored-by: franksong2702 <franksong2702@users.noreply.github.com>
This commit is contained in:
84
tests/test_commands_endpoint.py
Normal file
84
tests/test_commands_endpoint.py
Normal file
@@ -0,0 +1,84 @@
|
||||
"""Tests for GET /api/commands -- exposes hermes-agent COMMAND_REGISTRY."""
|
||||
import json
|
||||
import urllib.request
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.conftest import TEST_BASE, requires_agent_modules
|
||||
|
||||
|
||||
def _get(path):
|
||||
"""GET helper -- returns parsed JSON or raises HTTPError."""
|
||||
with urllib.request.urlopen(TEST_BASE + path, timeout=10) as r:
|
||||
return json.loads(r.read())
|
||||
|
||||
|
||||
@requires_agent_modules
|
||||
def test_commands_endpoint_returns_list():
|
||||
"""GET /api/commands returns a JSON object with a 'commands' list."""
|
||||
body = _get('/api/commands')
|
||||
assert 'commands' in body
|
||||
assert isinstance(body['commands'], list)
|
||||
assert len(body['commands']) > 0
|
||||
|
||||
|
||||
@requires_agent_modules
|
||||
def test_commands_endpoint_includes_help():
|
||||
"""The 'help' command must always be present (it's not cli_only)."""
|
||||
body = _get('/api/commands')
|
||||
names = {c['name'] for c in body['commands']}
|
||||
assert 'help' in names
|
||||
|
||||
|
||||
@requires_agent_modules
|
||||
def test_commands_endpoint_command_shape():
|
||||
"""Each command entry has the required fields."""
|
||||
body = _get('/api/commands')
|
||||
cmd = next(c for c in body['commands'] if c['name'] == 'help')
|
||||
required = {
|
||||
'name', 'description', 'category', 'aliases',
|
||||
'args_hint', 'subcommands', 'cli_only', 'gateway_only',
|
||||
}
|
||||
assert set(cmd.keys()) >= required
|
||||
assert isinstance(cmd['aliases'], list)
|
||||
assert isinstance(cmd['subcommands'], list)
|
||||
assert isinstance(cmd['cli_only'], bool)
|
||||
assert isinstance(cmd['gateway_only'], bool)
|
||||
|
||||
|
||||
@requires_agent_modules
|
||||
def test_commands_endpoint_excludes_gateway_only_and_never_expose():
|
||||
"""gateway_only commands and the _NEVER_EXPOSE set are filtered out."""
|
||||
body = _get('/api/commands')
|
||||
names = {c['name'] for c in body['commands']}
|
||||
# /sethome, /restart, /update are gateway_only; /commands is in _NEVER_EXPOSE
|
||||
for name in ('sethome', 'restart', 'update', 'commands'):
|
||||
assert name not in names, f"{name} must be excluded from /api/commands"
|
||||
|
||||
|
||||
@requires_agent_modules
|
||||
def test_commands_endpoint_keeps_new_with_reset_alias():
|
||||
"""The 'new' command stays exposed and carries its 'reset' alias."""
|
||||
body = _get('/api/commands')
|
||||
new_cmd = next(c for c in body['commands'] if c['name'] == 'new')
|
||||
assert 'reset' in new_cmd['aliases']
|
||||
|
||||
|
||||
def test_list_commands_returns_empty_for_empty_registry():
|
||||
"""list_commands(_registry=[]) returns [] -- the same path as when
|
||||
hermes_cli is missing (the empty-or-missing case)."""
|
||||
from api.commands import list_commands
|
||||
assert list_commands(_registry=[]) == []
|
||||
|
||||
|
||||
def test_list_commands_degrades_when_agent_missing(monkeypatch):
|
||||
"""If hermes_cli.commands is not importable, list_commands() returns []
|
||||
via the ImportError path. Verified by stubbing sys.modules; test cleanup
|
||||
is handled by monkeypatch + the fact that we don't reload api.commands."""
|
||||
import sys
|
||||
monkeypatch.setitem(sys.modules, 'hermes_cli.commands', None)
|
||||
# NOTE: we do NOT reload api.commands. The lazy import inside
|
||||
# list_commands() will re-attempt the import on each call and hit
|
||||
# the stubbed-None module, raising ImportError, taking the fallback path.
|
||||
from api.commands import list_commands
|
||||
assert list_commands() == []
|
||||
@@ -725,22 +725,24 @@ def test_upload_error_has_no_trace_field():
|
||||
# ── #248: /skills slash command ───────────────────────────────────────────────
|
||||
|
||||
def test_skills_slash_command_defined():
|
||||
"""#248: /skills command must be registered in COMMANDS and implemented.
|
||||
Verifies the command entry, function definition, and i18n key are all present.
|
||||
"""#248: /skills slash command must be wired up.
|
||||
|
||||
Pre-Task 2 (slash-command-parity batch 1) this checked for the
|
||||
hardcoded ``name:'skills'`` entry in the COMMANDS array. The COMMANDS
|
||||
array is now sourced from hermes-agent's ``COMMAND_REGISTRY`` at boot
|
||||
via ``GET /api/commands``, so the literal string is gone. The handler
|
||||
must still exist and be registered, otherwise ``/skills`` would fall
|
||||
through to \"not yet supported\".
|
||||
"""
|
||||
src = (REPO_ROOT / "static/commands.js").read_text()
|
||||
|
||||
# 1. 'skills' must appear in the COMMANDS array definition
|
||||
assert "name:'skills'" in src or 'name:"skills"' in src, \
|
||||
"COMMANDS array must include an entry with name:'skills'"
|
||||
# 1. cmdSkills function must be defined
|
||||
assert "async function cmdSkills" in src or "function cmdSkills" in src, \
|
||||
"cmdSkills function missing from commands.js"
|
||||
|
||||
# 2. cmdSkills function must be defined
|
||||
assert "function cmdSkills" in src, \
|
||||
"cmdSkills function must be defined in commands.js"
|
||||
|
||||
# 3. i18n key cmd_skills must be referenced (wired to COMMANDS entry)
|
||||
assert "cmd_skills" in src, \
|
||||
"cmd_skills i18n key must be referenced in commands.js"
|
||||
# 2. HANDLERS.skills must be registered to dispatch /skills to cmdSkills
|
||||
assert "HANDLERS.skills" in src, \
|
||||
"HANDLERS.skills registration missing from commands.js"
|
||||
|
||||
|
||||
def test_reload_recovery_persists_durable_inflight_state(cleanup_test_sessions):
|
||||
|
||||
251
tests/test_session_ops.py
Normal file
251
tests/test_session_ops.py
Normal file
@@ -0,0 +1,251 @@
|
||||
"""End-to-end tests for /api/session/retry, /api/session/undo,
|
||||
/api/session/status, /api/session/usage.
|
||||
|
||||
Tests run against the live test subprocess server (see tests/conftest.py).
|
||||
We seed transcripts via POST /api/session/import (ignores incoming
|
||||
session_id; returns a fresh one we register for cleanup).
|
||||
"""
|
||||
import json
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.conftest import TEST_BASE, _post, make_session_tracked
|
||||
|
||||
|
||||
def _get(path):
|
||||
"""GET helper -- returns parsed JSON, or raises HTTPError on non-2xx."""
|
||||
with urllib.request.urlopen(TEST_BASE + path, timeout=10) as r:
|
||||
return json.loads(r.read())
|
||||
|
||||
|
||||
def _import_session_with_messages(cleanup_list, messages, model='openai/gpt-5.4-mini'):
|
||||
"""Create a session pre-populated with `messages` via /api/session/import.
|
||||
|
||||
Returns the server-assigned session_id (registered for cleanup).
|
||||
|
||||
api/routes.py:2588 takes {title, messages, model, workspace, tool_calls,
|
||||
pinned} and IGNORES any incoming session_id -- always generates a fresh
|
||||
one via Session(...). We use the server's returned id, not a self-
|
||||
generated one.
|
||||
"""
|
||||
body = {
|
||||
'title': 'test',
|
||||
'messages': messages,
|
||||
'model': model,
|
||||
}
|
||||
r = _post(TEST_BASE, '/api/session/import', body)
|
||||
assert r.get('ok') is True and 'session' in r, f"Import failed: {r}"
|
||||
sid = r['session']['session_id']
|
||||
cleanup_list.append(sid)
|
||||
return sid
|
||||
|
||||
|
||||
# -- /api/session/retry ----------------------------------------------------
|
||||
|
||||
def test_retry_returns_last_user_text(cleanup_test_sessions):
|
||||
sid = _import_session_with_messages(cleanup_test_sessions, [
|
||||
{'role': 'user', 'content': 'first user msg'},
|
||||
{'role': 'assistant', 'content': 'first reply'},
|
||||
{'role': 'user', 'content': 'second user msg'},
|
||||
{'role': 'assistant', 'content': 'second reply'},
|
||||
{'role': 'tool', 'content': 'tool output'},
|
||||
])
|
||||
r = _post(TEST_BASE, '/api/session/retry', {'session_id': sid})
|
||||
assert r.get('ok') is True, r
|
||||
assert r.get('last_user_text') == 'second user msg'
|
||||
assert r.get('removed_count') == 3
|
||||
|
||||
|
||||
def test_retry_truncates_transcript(cleanup_test_sessions):
|
||||
sid = _import_session_with_messages(cleanup_test_sessions, [
|
||||
{'role': 'user', 'content': 'first user msg'},
|
||||
{'role': 'assistant', 'content': 'first reply'},
|
||||
{'role': 'user', 'content': 'second user msg'},
|
||||
{'role': 'assistant', 'content': 'second reply'},
|
||||
])
|
||||
_post(TEST_BASE, '/api/session/retry', {'session_id': sid})
|
||||
sess = _get(f'/api/session?session_id={sid}')['session']
|
||||
# After retry: only the first exchange remains (2 messages).
|
||||
assert len(sess['messages']) == 2
|
||||
assert sess['messages'][-1]['content'] == 'first reply'
|
||||
|
||||
|
||||
def test_retry_no_user_returns_error(cleanup_test_sessions):
|
||||
sid = _import_session_with_messages(cleanup_test_sessions, [
|
||||
{'role': 'assistant', 'content': 'orphan reply'},
|
||||
])
|
||||
r = _post(TEST_BASE, '/api/session/retry', {'session_id': sid})
|
||||
assert 'error' in r
|
||||
assert 'no previous message' in r['error'].lower()
|
||||
|
||||
|
||||
def test_retry_unknown_session_returns_404():
|
||||
# _post catches HTTPError and returns the body as JSON.
|
||||
# bad(handler, ..., 404) sends 404 + {error: "..."}.
|
||||
r = _post(TEST_BASE, '/api/session/retry', {'session_id': 'nonexistent_zzz'})
|
||||
assert 'error' in r
|
||||
assert 'not found' in r['error'].lower()
|
||||
|
||||
|
||||
def test_retry_missing_session_id_returns_error():
|
||||
r = _post(TEST_BASE, '/api/session/retry', {})
|
||||
assert 'error' in r
|
||||
|
||||
|
||||
def test_retry_does_not_double_append(cleanup_test_sessions):
|
||||
"""After /api/session/retry, the truncated transcript must end at the
|
||||
message BEFORE the last user message. Critical assertion: no duplicate
|
||||
of the resent user message gets left behind in the truncated transcript.
|
||||
"""
|
||||
sid = _import_session_with_messages(cleanup_test_sessions, [
|
||||
{'role': 'user', 'content': 'msg A'},
|
||||
{'role': 'assistant', 'content': 'reply A'},
|
||||
{'role': 'user', 'content': 'msg B'},
|
||||
{'role': 'assistant', 'content': 'reply B'},
|
||||
])
|
||||
r = _post(TEST_BASE, '/api/session/retry', {'session_id': sid})
|
||||
assert r['removed_count'] == 2 # msg B + reply B
|
||||
sess = _get(f'/api/session?session_id={sid}')['session']
|
||||
msgs = sess['messages']
|
||||
# Only msg A + reply A remain. Critically: there is NO 'msg B' anywhere.
|
||||
assert len(msgs) == 2
|
||||
assert msgs[0]['content'] == 'msg A'
|
||||
assert msgs[1]['content'] == 'reply A'
|
||||
|
||||
|
||||
def test_retry_concurrent_requests_are_safe(cleanup_test_sessions):
|
||||
"""Two concurrent /api/session/retry calls on the same session must not
|
||||
leave the transcript in a torn or doubly-truncated state.
|
||||
|
||||
Pre-fix race: get_session() outside `with LOCK:` could return a stale
|
||||
(non-cached) Session instance to one thread; both threads then mutated
|
||||
different in-memory objects, and the second s.save() overwrote the
|
||||
first with stale data. The fix re-binds `s = SESSIONS.get(sid, s)`
|
||||
inside the lock so both threads converge on the canonical instance.
|
||||
"""
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
sid = _import_session_with_messages(cleanup_test_sessions, [
|
||||
{'role': 'user', 'content': 'msg A'},
|
||||
{'role': 'assistant', 'content': 'reply A'},
|
||||
{'role': 'user', 'content': 'msg B'},
|
||||
{'role': 'assistant', 'content': 'reply B'},
|
||||
])
|
||||
|
||||
def _do_retry():
|
||||
return _post(TEST_BASE, '/api/session/retry', {'session_id': sid})
|
||||
|
||||
with ThreadPoolExecutor(max_workers=4) as ex:
|
||||
futures = [ex.submit(_do_retry) for _ in range(4)]
|
||||
results = [f.result() for f in futures]
|
||||
|
||||
# Each call either succeeds (truncating further) or raises 'no previous
|
||||
# message to retry' once nothing is left. After the dust settles, the
|
||||
# transcript must be a strict prefix of the original — never have a
|
||||
# phantom duplicate of the resent message.
|
||||
sess = _get(f'/api/session?session_id={sid}')['session']
|
||||
msgs = sess['messages']
|
||||
valid_prefixes = (
|
||||
[],
|
||||
[{'role': 'user', 'content': 'msg A'}, {'role': 'assistant', 'content': 'reply A'}],
|
||||
[{'role': 'user', 'content': 'msg A'}],
|
||||
)
|
||||
msg_pairs = [(m['role'], m.get('content', '')) for m in msgs]
|
||||
valid_pairs = [[(m['role'], m['content']) for m in p] for p in valid_prefixes]
|
||||
assert msg_pairs in valid_pairs, (
|
||||
f"Concurrent retries left transcript in unexpected state: {msg_pairs}. "
|
||||
"TOCTOU race in get_session/save likely re-introduced."
|
||||
)
|
||||
|
||||
|
||||
# ── /api/session/undo ─────────────────────────────────────────────────────
|
||||
|
||||
def test_undo_returns_removed_preview(cleanup_test_sessions):
|
||||
sid = _import_session_with_messages(cleanup_test_sessions, [
|
||||
{'role': 'user', 'content': 'first user msg'},
|
||||
{'role': 'assistant', 'content': 'first reply'},
|
||||
{'role': 'user', 'content': 'second user msg'},
|
||||
{'role': 'assistant', 'content': 'second reply'},
|
||||
{'role': 'tool', 'content': 'tool output'},
|
||||
])
|
||||
r = _post(TEST_BASE, '/api/session/undo', {'session_id': sid})
|
||||
assert r.get('ok') is True
|
||||
assert r.get('removed_count') == 3
|
||||
assert 'second user msg' in r.get('removed_preview', '')
|
||||
|
||||
|
||||
def test_undo_truncates_transcript(cleanup_test_sessions):
|
||||
sid = _import_session_with_messages(cleanup_test_sessions, [
|
||||
{'role': 'user', 'content': 'first user msg'},
|
||||
{'role': 'assistant', 'content': 'first reply'},
|
||||
{'role': 'user', 'content': 'second user msg'},
|
||||
{'role': 'assistant', 'content': 'second reply'},
|
||||
])
|
||||
_post(TEST_BASE, '/api/session/undo', {'session_id': sid})
|
||||
sess = _get(f'/api/session?session_id={sid}')['session']
|
||||
assert len(sess['messages']) == 2
|
||||
assert sess['messages'][-1]['content'] == 'first reply'
|
||||
|
||||
|
||||
def test_undo_repeated_until_empty(cleanup_test_sessions):
|
||||
sid = _import_session_with_messages(cleanup_test_sessions, [
|
||||
{'role': 'user', 'content': 'msg A'},
|
||||
{'role': 'assistant', 'content': 'reply A'},
|
||||
])
|
||||
_post(TEST_BASE, '/api/session/undo', {'session_id': sid})
|
||||
r = _post(TEST_BASE, '/api/session/undo', {'session_id': sid})
|
||||
assert 'error' in r
|
||||
assert 'nothing to undo' in r['error'].lower()
|
||||
|
||||
|
||||
def test_undo_unknown_session_returns_404():
|
||||
r = _post(TEST_BASE, '/api/session/undo', {'session_id': 'nonexistent_zzz'})
|
||||
assert 'error' in r
|
||||
assert 'not found' in r['error'].lower()
|
||||
|
||||
|
||||
# ── /api/session/status ───────────────────────────────────────────────────
|
||||
|
||||
def test_status_returns_summary(cleanup_test_sessions):
|
||||
sid = _import_session_with_messages(cleanup_test_sessions, [
|
||||
{'role': 'user', 'content': 'a'},
|
||||
{'role': 'assistant', 'content': 'b'},
|
||||
{'role': 'user', 'content': 'c'},
|
||||
])
|
||||
r = _get(f'/api/session/status?session_id={sid}')
|
||||
assert r['session_id'] == sid
|
||||
assert r['title'] == 'test'
|
||||
assert r['message_count'] == 3
|
||||
assert 'model' in r
|
||||
assert 'workspace' in r
|
||||
assert 'created_at' in r
|
||||
assert 'updated_at' in r
|
||||
assert r['agent_running'] is False # no active stream
|
||||
|
||||
|
||||
def test_status_unknown_returns_404():
|
||||
try:
|
||||
_get('/api/session/status?session_id=nonexistent_zzz')
|
||||
pytest.fail('Expected HTTPError')
|
||||
except urllib.error.HTTPError as e:
|
||||
assert e.code == 404
|
||||
|
||||
|
||||
def test_status_missing_param():
|
||||
try:
|
||||
_get('/api/session/status')
|
||||
pytest.fail('Expected HTTPError')
|
||||
except urllib.error.HTTPError as e:
|
||||
assert e.code == 400
|
||||
|
||||
|
||||
# ── /api/session/usage ────────────────────────────────────────────────────
|
||||
|
||||
def test_usage_returns_token_counts(cleanup_test_sessions):
|
||||
sid, _ws = make_session_tracked(cleanup_test_sessions)
|
||||
# Usage on a new session: zero everything.
|
||||
r = _get(f'/api/session/usage?session_id={sid}')
|
||||
assert r['input_tokens'] == 0
|
||||
assert r['output_tokens'] == 0
|
||||
assert r['total_tokens'] == 0
|
||||
39
tests/test_sprint47.py
Normal file
39
tests/test_sprint47.py
Normal file
@@ -0,0 +1,39 @@
|
||||
"""
|
||||
Sprint 47 tests: skill-backed slash commands appear in the Web UI autocomplete.
|
||||
|
||||
Covers:
|
||||
- commands.js lazily loads /api/skills for slash autocomplete
|
||||
- built-in commands still win over skill name collisions
|
||||
- boot.js primes the async skill load when typing '/'
|
||||
- the dropdown marks skill-backed entries visually
|
||||
"""
|
||||
import pathlib
|
||||
|
||||
|
||||
REPO_ROOT = pathlib.Path(__file__).parent.parent
|
||||
COMMANDS_JS = (REPO_ROOT / "static" / "commands.js").read_text(encoding="utf-8")
|
||||
BOOT_JS = (REPO_ROOT / "static" / "boot.js").read_text(encoding="utf-8")
|
||||
STYLE_CSS = (REPO_ROOT / "static" / "style.css").read_text(encoding="utf-8")
|
||||
|
||||
|
||||
def test_skill_commands_are_loaded_from_api_skills_for_autocomplete():
|
||||
assert "loadSkillCommands" in COMMANDS_JS
|
||||
assert "api('/api/skills')" in COMMANDS_JS
|
||||
assert "source:'skill'" in COMMANDS_JS
|
||||
|
||||
|
||||
def test_builtin_commands_take_precedence_over_skill_slug_collisions():
|
||||
# In the combined implementation, REGISTRY (agent registry + WEBUI_ONLY) wins over skills
|
||||
assert ("if(COMMANDS.some(c=>c.name===slug)) return null;" in COMMANDS_JS or
|
||||
"if(REGISTRY.some(c=>c.name===slug)) return null;" in COMMANDS_JS), \
|
||||
"Built-in commands must block skill slug collisions"
|
||||
|
||||
|
||||
def test_typing_slash_primes_async_skill_command_loading():
|
||||
assert "ensureSkillCommandsLoadedForAutocomplete" in BOOT_JS
|
||||
assert "ensureSkillCommandsLoadedForAutocomplete();" in BOOT_JS
|
||||
|
||||
|
||||
def test_dropdown_has_visual_badge_for_skill_backed_entries():
|
||||
assert "cmd-item-badge-skill" in STYLE_CSS
|
||||
assert "slash_skill_badge" in COMMANDS_JS
|
||||
Reference in New Issue
Block a user