feat: redesign chat transcript + fix streaming/persistence lifecycle — v0.50.70 (PR #587 by @aronprins)

Redesign chat transcript + fix streaming/persistence lifecycle — v0.50.70

Squash-merges PR #587 by @aronprins (Aron Prins). Full credit to @aronprins for all feature and fix work.

Transcript redesign: unified --msg-rail/--msg-max CSS variables, user turns as tinted cards, thinking cards as bordered panels, error card treatment, day-change separators, composer fade.

Approval/clarify as composer flyouts: cards slide up from behind composer top, overflow:hidden + translateY clip prevents travel visibility, focus({preventScroll:true}).

Streaming lifecycle: DOM order user→thinking→tool cards→response, no mid-stream jump. Live tool cards inserted before [data-live-assistant].

Persistence: reasoning attached before s.save(), _restore_reasoning_metadata on reload, role=tool rows preserved in S.messages, CLI-session tool-result fallback.

Workspace panel FOUC fix: [data-workspace-panel] set at parse time.

Docs: docs/ui-ux/index.html + two-stage-proposal.html.

Maintainer additions (433b867): CHANGELOG v0.50.70, version badge, usage badge loop simplification.

Reviewed and approved by @nesquena (independent review). 1361 tests passing.
This commit is contained in:
Aron Prins
2026-04-16 23:04:42 +02:00
committed by GitHub
parent 25d38a467a
commit 9a3dc10d93
20 changed files with 2770 additions and 469 deletions

View File

@@ -1,216 +1,114 @@
"""
Regression tests for issue #401 / PR #402:
Tool call cards show incorrect/duplicate entries on session load after context compaction.
Regression tests for tool-card persistence on session reload.
Root cause: loadSession() applied its own B9 sanitization (producing a new message array
with different indices) but did not remap the session-level tool_calls.assistant_msg_idx
values to match. It then assigned the broken tool_calls directly to S.toolCalls, bypassing
renderMessages()'s fallback that correctly derives tool calls from per-message tool_calls.
The older loadSession() path rewrote message history on the client:
- dropped role='tool' rows
- dropped empty assistant rows even when they carried tool_calls
- then ignored session.tool_calls on reload
Fix: build origIdxToSanitizedIdx during the B9 pass and remap each tc.assistant_msg_idx;
set S.toolCalls=[] so renderMessages() uses the fallback derivation.
These tests verify the JS logic statically (no server needed).
That broke both durable logging and page refresh for valid tool runs.
"""
import json
import pathlib
import subprocess
import textwrap
import json
REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
SESSIONS_JS = (REPO_ROOT / "static" / "sessions.js").read_text(encoding="utf-8")
UI_JS = (REPO_ROOT / "static" / "ui.js").read_text(encoding="utf-8")
# --- Static structural checks ---
def test_loadsession_sets_toolcalls_empty():
"""loadSession must set S.toolCalls=[] instead of pre-filling from session-level tool_calls."""
assert "S.toolCalls=[]" in SESSIONS_JS, (
"loadSession() must set S.toolCalls=[] so renderMessages() uses its fallback "
"derivation from per-message tool_calls with correct sanitized-array indices"
def test_loadsession_preserves_tool_rows():
"""Reload must keep tool rows in S.messages so snippets can be reconstructed."""
assert "if (m.role === 'tool') continue;" not in SESSIONS_JS, (
"loadSession() must not drop role='tool' messages; renderMessages() hides them "
"visually, but it still needs them for snippet reconstruction"
)
def test_loadsession_does_not_assign_broken_tool_calls():
"""loadSession must NOT assign session.tool_calls directly to S.toolCalls (causes index mismatch)."""
# The old broken pattern: S.toolCalls=(data.session.tool_calls||[]).map(tc=>({...tc,done:true}))
assert "S.toolCalls=(data.session.tool_calls" not in SESSIONS_JS, (
"loadSession() must not assign session-level tool_calls directly to S.toolCalls — "
"those indices are relative to the pre-sanitization array and will be wrong after B9 filtering"
)
def test_loadsession_uses_session_toolcalls_only_as_fallback():
"""Session summaries are the fallback, not the primary reload source."""
assert "if(!hasMessageToolMetadata&&data.session.tool_calls&&data.session.tool_calls.length)" in SESSIONS_JS
assert "S.toolCalls=(data.session.tool_calls||[]).map(tc=>({...tc,done:true}));" in SESSIONS_JS
assert "S.toolCalls=[];" in SESSIONS_JS
def test_loadsession_builds_idx_remap():
"""loadSession must build an origIdxToSanitizedIdx map during B9 sanitization."""
assert "origIdxToSanitizedIdx" in SESSIONS_JS, (
"loadSession() must build origIdxToSanitizedIdx during B9 sanitization "
"to remap session-level tool_calls.assistant_msg_idx"
)
def test_rendermessages_treats_openai_toolcall_assistants_as_visible():
"""OpenAI assistant rows with empty content but tool_calls must stay anchorable."""
assert "const hasTc=Array.isArray(m.tool_calls)&&m.tool_calls.length>0;" in UI_JS
assert "if(hasTc||hasTu||_messageHasReasoningPayload(m)) return true;" in UI_JS
def test_loadsession_remaps_assistant_msg_idx():
"""loadSession must remap tc.assistant_msg_idx using the index map."""
assert "tc.assistant_msg_idx" in SESSIONS_JS, (
"loadSession() must update tc.assistant_msg_idx using the sanitized index map"
)
# --- Behavioural Node.js tests ---
def _run_js(script_body: str) -> dict:
"""Run a JS snippet that exercises the B9 sanitization logic extracted from sessions.js."""
# Extract just the B9 + index-remap block from loadSession
# We'll re-implement it inline for testability
script = textwrap.dedent(f"""
// Simulate the B9 sanitization + index remap logic from loadSession()
function sanitizeAndRemap(messages, tool_calls) {{
const allMsgs = messages || [];
const sanitized = [];
const origIdxToSanitizedIdx = {{}};
let lastKeptAsstIdx = -1;
for (let i = 0; i < allMsgs.length; i++) {{
const m = allMsgs[i];
if (!m || !m.role) continue;
if (m.role === 'tool') continue;
if (m.role === 'assistant') {{
let c = m.content || '';
if (Array.isArray(c)) c = c.filter(p => p && p.type === 'text').map(p => p.text || '').join('');
if (!String(c).trim().length) {{ continue; }}
lastKeptAsstIdx = sanitized.length;
}}
origIdxToSanitizedIdx[i] = sanitized.length;
sanitized.push(m);
}}
const remapped = (tool_calls || []).map(tc => {{
if (!tc || tc.assistant_msg_idx === undefined) return tc;
const origIdx = tc.assistant_msg_idx;
const newIdx = (origIdx in origIdxToSanitizedIdx)
? origIdxToSanitizedIdx[origIdx]
: (lastKeptAsstIdx >= 0 ? lastKeptAsstIdx : -1);
return {{ ...tc, assistant_msg_idx: newIdx }};
function loadSessionShape(messages, sessionToolCalls) {{
const filtered = (messages || []).filter(m => m && m.role);
const hasMessageToolMetadata = filtered.some(m => {{
if (!m || m.role !== 'assistant') return false;
const hasTc = Array.isArray(m.tool_calls) && m.tool_calls.length > 0;
const hasTu = Array.isArray(m.content) && m.content.some(p => p && p.type === 'tool_use');
return hasTc || hasTu;
}});
return {{ sanitized, remapped }};
const toolCalls = (!hasMessageToolMetadata && sessionToolCalls && sessionToolCalls.length)
? sessionToolCalls.map(tc => ({{ ...tc, done: true }}))
: [];
return {{ filtered, hasMessageToolMetadata, toolCalls }};
}}
{script_body}
""")
proc = subprocess.run(
["node", "-e", script], check=True, capture_output=True, text=True
)
proc = subprocess.run(["node", "-e", script], check=True, capture_output=True, text=True)
return json.loads(proc.stdout)
def test_b9_remaps_tool_call_idx_after_empty_assistant_filtered():
"""Tool call pointing to index 1 (empty assistant at orig idx 1, kept at idx 0) remaps correctly."""
def test_reload_keeps_empty_assistant_toolcall_anchor():
"""OpenAI-style assistant {content:'', tool_calls:[...]} must survive reload."""
result = _run_js("""
const messages = [
{ role: 'user', content: 'hello' }, // orig 0 -> sanitized 0
{ role: 'assistant', content: '' }, // orig 1 -> FILTERED (empty)
{ role: 'assistant', content: 'done.' }, // orig 2 -> sanitized 1
{ role: 'user', content: 'list files' },
{
role: 'assistant',
content: '',
tool_calls: [{ id: 'call-1', function: { name: 'terminal', arguments: '{}' } }]
},
{ role: 'tool', tool_call_id: 'call-1', content: '{"output":"ok"}' },
{ role: 'assistant', content: 'Done.' }
];
const tool_calls = [
{ name: 'terminal', assistant_msg_idx: 1 }, // pointed to filtered-out empty assistant
{ name: 'read_file', assistant_msg_idx: 2 }, // pointed to kept assistant
];
const { sanitized, remapped } = sanitizeAndRemap(messages, tool_calls);
const loaded = loadSessionShape(messages, [{ name: 'terminal', assistant_msg_idx: 1 }]);
process.stdout.write(JSON.stringify({
sanitized_length: sanitized.length,
tc0_new_idx: remapped[0].assistant_msg_idx, // should attach to lastKeptAsstIdx = 1
tc1_new_idx: remapped[1].assistant_msg_idx, // should remap 2 -> 1
filtered_len: loaded.filtered.length,
has_metadata: loaded.hasMessageToolMetadata,
fallback_len: loaded.toolCalls.length,
assistant_tool_idx: loaded.filtered.findIndex(m => m.role === 'assistant' && m.tool_calls),
tool_idx: loaded.filtered.findIndex(m => m.role === 'tool')
}));
""")
assert result["sanitized_length"] == 2, f"Expected 2 messages after B9, got {result['sanitized_length']}"
assert result["tc0_new_idx"] == 1, (
f"Tool call pointing to filtered empty assistant should attach to last kept assistant (idx 1), got {result['tc0_new_idx']}"
)
assert result["tc1_new_idx"] == 1, (
f"Tool call pointing to orig idx 2 should remap to sanitized idx 1, got {result['tc1_new_idx']}"
)
assert result["filtered_len"] == 4
assert result["has_metadata"] is True
assert result["fallback_len"] == 0
assert result["assistant_tool_idx"] == 1
assert result["tool_idx"] == 2
def test_b9_remaps_multiple_empty_assistants():
"""Multiple consecutive empty assistants all remap to the last (nearest) kept assistant.
Note: the remapping pass runs after the full sanitization loop, so lastKeptAsstIdx
already reflects the final kept-assistant position. This means even empty-assistant
tool calls that came BEFORE the kept assistant get attached to it — which is correct
behavior for context-compacted sessions where all tool calls belong to the one
non-empty assistant response.
"""
def test_reload_uses_session_summary_when_messages_have_no_tool_metadata():
"""Older sessions should still render from session.tool_calls on reload."""
result = _run_js("""
const messages = [
{ role: 'user', content: 'go' }, // orig 0 -> sanitized 0
{ role: 'assistant', content: '' }, // orig 1 -> FILTERED
{ role: 'assistant', content: '' }, // orig 2 -> FILTERED
{ role: 'assistant', content: '' }, // orig 3 -> FILTERED
{ role: 'assistant', content: 'result' }, // orig 4 -> sanitized 1
{ role: 'user', content: 'build site' },
{ role: 'assistant', content: 'Starting.' },
{ role: 'tool', content: '{"bytes_written": 4955}' },
{ role: 'assistant', content: '' }
];
const tool_calls = [
{ name: 'a', assistant_msg_idx: 1 },
{ name: 'b', assistant_msg_idx: 2 },
{ name: 'c', assistant_msg_idx: 3 },
{ name: 'd', assistant_msg_idx: 4 },
const sessionToolCalls = [
{ name: 'write_file', assistant_msg_idx: 1, snippet: 'bytes_written', tid: '' }
];
const { sanitized, remapped } = sanitizeAndRemap(messages, tool_calls);
const loaded = loadSessionShape(messages, sessionToolCalls);
process.stdout.write(JSON.stringify({
sanitized_length: sanitized.length,
tc0_idx: remapped[0].assistant_msg_idx,
tc1_idx: remapped[1].assistant_msg_idx,
tc2_idx: remapped[2].assistant_msg_idx,
tc3_idx: remapped[3].assistant_msg_idx,
has_metadata: loaded.hasMessageToolMetadata,
fallback_len: loaded.toolCalls.length,
done_flag: loaded.toolCalls[0] && loaded.toolCalls[0].done === true
}));
""")
assert result["sanitized_length"] == 2
# Tool calls from filtered empty assistants: after the full loop, lastKeptAsstIdx=1,
# so all filtered-assistant tool calls correctly attach to the kept assistant at idx 1.
assert result["tc0_idx"] == 1, f"Expected 1 (last kept asst), got {result['tc0_idx']}"
assert result["tc1_idx"] == 1
assert result["tc2_idx"] == 1
# Tool call from the kept assistant at orig idx 4 -> sanitized idx 1
assert result["tc3_idx"] == 1, f"Expected 1, got {result['tc3_idx']}"
def test_b9_no_filtering_needed_indices_preserved():
"""When no empty assistant messages exist, indices should pass through unchanged."""
result = _run_js("""
const messages = [
{ role: 'user', content: 'hi' }, // orig 0 -> sanitized 0
{ role: 'assistant', content: 'hello' }, // orig 1 -> sanitized 1
{ role: 'user', content: 'more' }, // orig 2 -> sanitized 2
{ role: 'assistant', content: 'yes' }, // orig 3 -> sanitized 3
];
const tool_calls = [
{ name: 'x', assistant_msg_idx: 1 },
{ name: 'y', assistant_msg_idx: 3 },
];
const { sanitized, remapped } = sanitizeAndRemap(messages, tool_calls);
process.stdout.write(JSON.stringify({
sanitized_length: sanitized.length,
tc0_idx: remapped[0].assistant_msg_idx,
tc1_idx: remapped[1].assistant_msg_idx,
}));
""")
assert result["sanitized_length"] == 4
assert result["tc0_idx"] == 1, f"Expected 1, got {result['tc0_idx']}"
assert result["tc1_idx"] == 3, f"Expected 3, got {result['tc1_idx']}"
def test_b9_tool_role_messages_filtered():
"""Messages with role='tool' must be filtered out and not affect index mapping."""
result = _run_js("""
const messages = [
{ role: 'user', content: 'run' }, // orig 0 -> sanitized 0
{ role: 'tool', content: 'output' }, // orig 1 -> FILTERED (tool role)
{ role: 'assistant', content: 'done' }, // orig 2 -> sanitized 1
];
const tool_calls = [
{ name: 'terminal', assistant_msg_idx: 2 },
];
const { sanitized, remapped } = sanitizeAndRemap(messages, tool_calls);
process.stdout.write(JSON.stringify({
sanitized_length: sanitized.length,
tc0_idx: remapped[0].assistant_msg_idx,
}));
""")
assert result["sanitized_length"] == 2, f"tool-role message must be filtered, got {result['sanitized_length']}"
assert result["tc0_idx"] == 1, f"Expected orig idx 2 -> sanitized idx 1, got {result['tc0_idx']}"
assert result["has_metadata"] is False
assert result["fallback_len"] == 1
assert result["done_flag"] is True