252 lines
10 KiB
Python
252 lines
10 KiB
Python
"""End-to-end tests for /api/session/retry, /api/session/undo,
|
|
/api/session/status, /api/session/usage.
|
|
|
|
Tests run against the live test subprocess server (see tests/conftest.py).
|
|
We seed transcripts via POST /api/session/import (ignores incoming
|
|
session_id; returns a fresh one we register for cleanup).
|
|
"""
|
|
import json
|
|
import urllib.request
|
|
import urllib.error
|
|
|
|
import pytest
|
|
|
|
from tests.conftest import TEST_BASE, _post, make_session_tracked
|
|
|
|
|
|
def _get(path):
|
|
"""GET helper -- returns parsed JSON, or raises HTTPError on non-2xx."""
|
|
with urllib.request.urlopen(TEST_BASE + path, timeout=10) as r:
|
|
return json.loads(r.read())
|
|
|
|
|
|
def _import_session_with_messages(cleanup_list, messages, model='openai/gpt-5.4-mini'):
|
|
"""Create a session pre-populated with `messages` via /api/session/import.
|
|
|
|
Returns the server-assigned session_id (registered for cleanup).
|
|
|
|
api/routes.py:2588 takes {title, messages, model, workspace, tool_calls,
|
|
pinned} and IGNORES any incoming session_id -- always generates a fresh
|
|
one via Session(...). We use the server's returned id, not a self-
|
|
generated one.
|
|
"""
|
|
body = {
|
|
'title': 'test',
|
|
'messages': messages,
|
|
'model': model,
|
|
}
|
|
r = _post(TEST_BASE, '/api/session/import', body)
|
|
assert r.get('ok') is True and 'session' in r, f"Import failed: {r}"
|
|
sid = r['session']['session_id']
|
|
cleanup_list.append(sid)
|
|
return sid
|
|
|
|
|
|
# -- /api/session/retry ----------------------------------------------------
|
|
|
|
def test_retry_returns_last_user_text(cleanup_test_sessions):
|
|
sid = _import_session_with_messages(cleanup_test_sessions, [
|
|
{'role': 'user', 'content': 'first user msg'},
|
|
{'role': 'assistant', 'content': 'first reply'},
|
|
{'role': 'user', 'content': 'second user msg'},
|
|
{'role': 'assistant', 'content': 'second reply'},
|
|
{'role': 'tool', 'content': 'tool output'},
|
|
])
|
|
r = _post(TEST_BASE, '/api/session/retry', {'session_id': sid})
|
|
assert r.get('ok') is True, r
|
|
assert r.get('last_user_text') == 'second user msg'
|
|
assert r.get('removed_count') == 3
|
|
|
|
|
|
def test_retry_truncates_transcript(cleanup_test_sessions):
|
|
sid = _import_session_with_messages(cleanup_test_sessions, [
|
|
{'role': 'user', 'content': 'first user msg'},
|
|
{'role': 'assistant', 'content': 'first reply'},
|
|
{'role': 'user', 'content': 'second user msg'},
|
|
{'role': 'assistant', 'content': 'second reply'},
|
|
])
|
|
_post(TEST_BASE, '/api/session/retry', {'session_id': sid})
|
|
sess = _get(f'/api/session?session_id={sid}')['session']
|
|
# After retry: only the first exchange remains (2 messages).
|
|
assert len(sess['messages']) == 2
|
|
assert sess['messages'][-1]['content'] == 'first reply'
|
|
|
|
|
|
def test_retry_no_user_returns_error(cleanup_test_sessions):
|
|
sid = _import_session_with_messages(cleanup_test_sessions, [
|
|
{'role': 'assistant', 'content': 'orphan reply'},
|
|
])
|
|
r = _post(TEST_BASE, '/api/session/retry', {'session_id': sid})
|
|
assert 'error' in r
|
|
assert 'no previous message' in r['error'].lower()
|
|
|
|
|
|
def test_retry_unknown_session_returns_404():
|
|
# _post catches HTTPError and returns the body as JSON.
|
|
# bad(handler, ..., 404) sends 404 + {error: "..."}.
|
|
r = _post(TEST_BASE, '/api/session/retry', {'session_id': 'nonexistent_zzz'})
|
|
assert 'error' in r
|
|
assert 'not found' in r['error'].lower()
|
|
|
|
|
|
def test_retry_missing_session_id_returns_error():
|
|
r = _post(TEST_BASE, '/api/session/retry', {})
|
|
assert 'error' in r
|
|
|
|
|
|
def test_retry_does_not_double_append(cleanup_test_sessions):
|
|
"""After /api/session/retry, the truncated transcript must end at the
|
|
message BEFORE the last user message. Critical assertion: no duplicate
|
|
of the resent user message gets left behind in the truncated transcript.
|
|
"""
|
|
sid = _import_session_with_messages(cleanup_test_sessions, [
|
|
{'role': 'user', 'content': 'msg A'},
|
|
{'role': 'assistant', 'content': 'reply A'},
|
|
{'role': 'user', 'content': 'msg B'},
|
|
{'role': 'assistant', 'content': 'reply B'},
|
|
])
|
|
r = _post(TEST_BASE, '/api/session/retry', {'session_id': sid})
|
|
assert r['removed_count'] == 2 # msg B + reply B
|
|
sess = _get(f'/api/session?session_id={sid}')['session']
|
|
msgs = sess['messages']
|
|
# Only msg A + reply A remain. Critically: there is NO 'msg B' anywhere.
|
|
assert len(msgs) == 2
|
|
assert msgs[0]['content'] == 'msg A'
|
|
assert msgs[1]['content'] == 'reply A'
|
|
|
|
|
|
def test_retry_concurrent_requests_are_safe(cleanup_test_sessions):
|
|
"""Two concurrent /api/session/retry calls on the same session must not
|
|
leave the transcript in a torn or doubly-truncated state.
|
|
|
|
Pre-fix race: get_session() outside `with LOCK:` could return a stale
|
|
(non-cached) Session instance to one thread; both threads then mutated
|
|
different in-memory objects, and the second s.save() overwrote the
|
|
first with stale data. The fix re-binds `s = SESSIONS.get(sid, s)`
|
|
inside the lock so both threads converge on the canonical instance.
|
|
"""
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
sid = _import_session_with_messages(cleanup_test_sessions, [
|
|
{'role': 'user', 'content': 'msg A'},
|
|
{'role': 'assistant', 'content': 'reply A'},
|
|
{'role': 'user', 'content': 'msg B'},
|
|
{'role': 'assistant', 'content': 'reply B'},
|
|
])
|
|
|
|
def _do_retry():
|
|
return _post(TEST_BASE, '/api/session/retry', {'session_id': sid})
|
|
|
|
with ThreadPoolExecutor(max_workers=4) as ex:
|
|
futures = [ex.submit(_do_retry) for _ in range(4)]
|
|
results = [f.result() for f in futures]
|
|
|
|
# Each call either succeeds (truncating further) or raises 'no previous
|
|
# message to retry' once nothing is left. After the dust settles, the
|
|
# transcript must be a strict prefix of the original — never have a
|
|
# phantom duplicate of the resent message.
|
|
sess = _get(f'/api/session?session_id={sid}')['session']
|
|
msgs = sess['messages']
|
|
valid_prefixes = (
|
|
[],
|
|
[{'role': 'user', 'content': 'msg A'}, {'role': 'assistant', 'content': 'reply A'}],
|
|
[{'role': 'user', 'content': 'msg A'}],
|
|
)
|
|
msg_pairs = [(m['role'], m.get('content', '')) for m in msgs]
|
|
valid_pairs = [[(m['role'], m['content']) for m in p] for p in valid_prefixes]
|
|
assert msg_pairs in valid_pairs, (
|
|
f"Concurrent retries left transcript in unexpected state: {msg_pairs}. "
|
|
"TOCTOU race in get_session/save likely re-introduced."
|
|
)
|
|
|
|
|
|
# ── /api/session/undo ─────────────────────────────────────────────────────
|
|
|
|
def test_undo_returns_removed_preview(cleanup_test_sessions):
|
|
sid = _import_session_with_messages(cleanup_test_sessions, [
|
|
{'role': 'user', 'content': 'first user msg'},
|
|
{'role': 'assistant', 'content': 'first reply'},
|
|
{'role': 'user', 'content': 'second user msg'},
|
|
{'role': 'assistant', 'content': 'second reply'},
|
|
{'role': 'tool', 'content': 'tool output'},
|
|
])
|
|
r = _post(TEST_BASE, '/api/session/undo', {'session_id': sid})
|
|
assert r.get('ok') is True
|
|
assert r.get('removed_count') == 3
|
|
assert 'second user msg' in r.get('removed_preview', '')
|
|
|
|
|
|
def test_undo_truncates_transcript(cleanup_test_sessions):
|
|
sid = _import_session_with_messages(cleanup_test_sessions, [
|
|
{'role': 'user', 'content': 'first user msg'},
|
|
{'role': 'assistant', 'content': 'first reply'},
|
|
{'role': 'user', 'content': 'second user msg'},
|
|
{'role': 'assistant', 'content': 'second reply'},
|
|
])
|
|
_post(TEST_BASE, '/api/session/undo', {'session_id': sid})
|
|
sess = _get(f'/api/session?session_id={sid}')['session']
|
|
assert len(sess['messages']) == 2
|
|
assert sess['messages'][-1]['content'] == 'first reply'
|
|
|
|
|
|
def test_undo_repeated_until_empty(cleanup_test_sessions):
|
|
sid = _import_session_with_messages(cleanup_test_sessions, [
|
|
{'role': 'user', 'content': 'msg A'},
|
|
{'role': 'assistant', 'content': 'reply A'},
|
|
])
|
|
_post(TEST_BASE, '/api/session/undo', {'session_id': sid})
|
|
r = _post(TEST_BASE, '/api/session/undo', {'session_id': sid})
|
|
assert 'error' in r
|
|
assert 'nothing to undo' in r['error'].lower()
|
|
|
|
|
|
def test_undo_unknown_session_returns_404():
|
|
r = _post(TEST_BASE, '/api/session/undo', {'session_id': 'nonexistent_zzz'})
|
|
assert 'error' in r
|
|
assert 'not found' in r['error'].lower()
|
|
|
|
|
|
# ── /api/session/status ───────────────────────────────────────────────────
|
|
|
|
def test_status_returns_summary(cleanup_test_sessions):
|
|
sid = _import_session_with_messages(cleanup_test_sessions, [
|
|
{'role': 'user', 'content': 'a'},
|
|
{'role': 'assistant', 'content': 'b'},
|
|
{'role': 'user', 'content': 'c'},
|
|
])
|
|
r = _get(f'/api/session/status?session_id={sid}')
|
|
assert r['session_id'] == sid
|
|
assert r['title'] == 'test'
|
|
assert r['message_count'] == 3
|
|
assert 'model' in r
|
|
assert 'workspace' in r
|
|
assert 'created_at' in r
|
|
assert 'updated_at' in r
|
|
assert r['agent_running'] is False # no active stream
|
|
|
|
|
|
def test_status_unknown_returns_404():
|
|
try:
|
|
_get('/api/session/status?session_id=nonexistent_zzz')
|
|
pytest.fail('Expected HTTPError')
|
|
except urllib.error.HTTPError as e:
|
|
assert e.code == 404
|
|
|
|
|
|
def test_status_missing_param():
|
|
try:
|
|
_get('/api/session/status')
|
|
pytest.fail('Expected HTTPError')
|
|
except urllib.error.HTTPError as e:
|
|
assert e.code == 400
|
|
|
|
|
|
# ── /api/session/usage ────────────────────────────────────────────────────
|
|
|
|
def test_usage_returns_token_counts(cleanup_test_sessions):
|
|
sid, _ws = make_session_tracked(cleanup_test_sessions)
|
|
# Usage on a new session: zero everything.
|
|
r = _get(f'/api/session/usage?session_id={sid}')
|
|
assert r['input_tokens'] == 0
|
|
assert r['output_tokens'] == 0
|
|
assert r['total_tokens'] == 0
|