fix: correct tool call card rendering on session load after context compaction (#408)
* fix: correct tool call card rendering on session load Two bugs caused duplicate/incorrect tool call cards when loading sessions (especially after context compaction): 1. loadSession() sanitized messages (B9 filter) but did NOT update the session-level tool_calls array's assistant_msg_idx references. Since compact() returns only sanitized messages and recomputes tool_calls with indices into the compacted array, the original assistant_msg_idx values became stale/misaligned. 2. loadSession() then assigned the broken session-level tool_calls directly to S.toolCalls. This prevented renderMessages()'s fallback path (which derives tool_calls from per-message tool_calls using correct sanitized-array indices) from ever running. Fix: - Keep full sanitization loop with index remapping for session-level tool_calls (in case they're needed by other code paths). - Instead of assigning broken session-level tool_calls to S.toolCalls, set S.toolCalls=[] so renderMessages() uses the fallback derivation from per-message tool_calls, which already have correct indices. * test: add 8 regression tests for issue #401 tool call index remapping * docs: v0.50.29 release — version badge and CHANGELOG --------- Co-authored-by: Frank Song <franksong2702@gmail.com> Co-authored-by: Nathan Esquenazi <nesquena@gmail.com>
This commit is contained in:
216
tests/test_issue401.py
Normal file
216
tests/test_issue401.py
Normal file
@@ -0,0 +1,216 @@
|
||||
"""
|
||||
Regression tests for issue #401 / PR #402:
|
||||
Tool call cards show incorrect/duplicate entries on session load after context compaction.
|
||||
|
||||
Root cause: loadSession() applied its own B9 sanitization (producing a new message array
|
||||
with different indices) but did not remap the session-level tool_calls.assistant_msg_idx
|
||||
values to match. It then assigned the broken tool_calls directly to S.toolCalls, bypassing
|
||||
renderMessages()'s fallback that correctly derives tool calls from per-message tool_calls.
|
||||
|
||||
Fix: build origIdxToSanitizedIdx during the B9 pass and remap each tc.assistant_msg_idx;
|
||||
set S.toolCalls=[] so renderMessages() uses the fallback derivation.
|
||||
|
||||
These tests verify the JS logic statically (no server needed).
|
||||
"""
|
||||
import pathlib
|
||||
import subprocess
|
||||
import textwrap
|
||||
import json
|
||||
|
||||
REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
|
||||
SESSIONS_JS = (REPO_ROOT / "static" / "sessions.js").read_text(encoding="utf-8")
|
||||
|
||||
|
||||
# --- Static structural checks ---
|
||||
|
||||
def test_loadsession_sets_toolcalls_empty():
|
||||
"""loadSession must set S.toolCalls=[] instead of pre-filling from session-level tool_calls."""
|
||||
assert "S.toolCalls=[]" in SESSIONS_JS, (
|
||||
"loadSession() must set S.toolCalls=[] so renderMessages() uses its fallback "
|
||||
"derivation from per-message tool_calls with correct sanitized-array indices"
|
||||
)
|
||||
|
||||
|
||||
def test_loadsession_does_not_assign_broken_tool_calls():
|
||||
"""loadSession must NOT assign session.tool_calls directly to S.toolCalls (causes index mismatch)."""
|
||||
# The old broken pattern: S.toolCalls=(data.session.tool_calls||[]).map(tc=>({...tc,done:true}))
|
||||
assert "S.toolCalls=(data.session.tool_calls" not in SESSIONS_JS, (
|
||||
"loadSession() must not assign session-level tool_calls directly to S.toolCalls — "
|
||||
"those indices are relative to the pre-sanitization array and will be wrong after B9 filtering"
|
||||
)
|
||||
|
||||
|
||||
def test_loadsession_builds_idx_remap():
|
||||
"""loadSession must build an origIdxToSanitizedIdx map during B9 sanitization."""
|
||||
assert "origIdxToSanitizedIdx" in SESSIONS_JS, (
|
||||
"loadSession() must build origIdxToSanitizedIdx during B9 sanitization "
|
||||
"to remap session-level tool_calls.assistant_msg_idx"
|
||||
)
|
||||
|
||||
|
||||
def test_loadsession_remaps_assistant_msg_idx():
|
||||
"""loadSession must remap tc.assistant_msg_idx using the index map."""
|
||||
assert "tc.assistant_msg_idx" in SESSIONS_JS, (
|
||||
"loadSession() must update tc.assistant_msg_idx using the sanitized index map"
|
||||
)
|
||||
|
||||
|
||||
# --- Behavioural Node.js tests ---
|
||||
|
||||
def _run_js(script_body: str) -> dict:
|
||||
"""Run a JS snippet that exercises the B9 sanitization logic extracted from sessions.js."""
|
||||
# Extract just the B9 + index-remap block from loadSession
|
||||
# We'll re-implement it inline for testability
|
||||
script = textwrap.dedent(f"""
|
||||
// Simulate the B9 sanitization + index remap logic from loadSession()
|
||||
function sanitizeAndRemap(messages, tool_calls) {{
|
||||
const allMsgs = messages || [];
|
||||
const sanitized = [];
|
||||
const origIdxToSanitizedIdx = {{}};
|
||||
let lastKeptAsstIdx = -1;
|
||||
for (let i = 0; i < allMsgs.length; i++) {{
|
||||
const m = allMsgs[i];
|
||||
if (!m || !m.role) continue;
|
||||
if (m.role === 'tool') continue;
|
||||
if (m.role === 'assistant') {{
|
||||
let c = m.content || '';
|
||||
if (Array.isArray(c)) c = c.filter(p => p && p.type === 'text').map(p => p.text || '').join('');
|
||||
if (!String(c).trim().length) {{ continue; }}
|
||||
lastKeptAsstIdx = sanitized.length;
|
||||
}}
|
||||
origIdxToSanitizedIdx[i] = sanitized.length;
|
||||
sanitized.push(m);
|
||||
}}
|
||||
const remapped = (tool_calls || []).map(tc => {{
|
||||
if (!tc || tc.assistant_msg_idx === undefined) return tc;
|
||||
const origIdx = tc.assistant_msg_idx;
|
||||
const newIdx = (origIdx in origIdxToSanitizedIdx)
|
||||
? origIdxToSanitizedIdx[origIdx]
|
||||
: (lastKeptAsstIdx >= 0 ? lastKeptAsstIdx : -1);
|
||||
return {{ ...tc, assistant_msg_idx: newIdx }};
|
||||
}});
|
||||
return {{ sanitized, remapped }};
|
||||
}}
|
||||
|
||||
{script_body}
|
||||
""")
|
||||
proc = subprocess.run(
|
||||
["node", "-e", script], check=True, capture_output=True, text=True
|
||||
)
|
||||
return json.loads(proc.stdout)
|
||||
|
||||
|
||||
def test_b9_remaps_tool_call_idx_after_empty_assistant_filtered():
|
||||
"""Tool call pointing to index 1 (empty assistant at orig idx 1, kept at idx 0) remaps correctly."""
|
||||
result = _run_js("""
|
||||
const messages = [
|
||||
{ role: 'user', content: 'hello' }, // orig 0 -> sanitized 0
|
||||
{ role: 'assistant', content: '' }, // orig 1 -> FILTERED (empty)
|
||||
{ role: 'assistant', content: 'done.' }, // orig 2 -> sanitized 1
|
||||
];
|
||||
const tool_calls = [
|
||||
{ name: 'terminal', assistant_msg_idx: 1 }, // pointed to filtered-out empty assistant
|
||||
{ name: 'read_file', assistant_msg_idx: 2 }, // pointed to kept assistant
|
||||
];
|
||||
const { sanitized, remapped } = sanitizeAndRemap(messages, tool_calls);
|
||||
process.stdout.write(JSON.stringify({
|
||||
sanitized_length: sanitized.length,
|
||||
tc0_new_idx: remapped[0].assistant_msg_idx, // should attach to lastKeptAsstIdx = 1
|
||||
tc1_new_idx: remapped[1].assistant_msg_idx, // should remap 2 -> 1
|
||||
}));
|
||||
""")
|
||||
assert result["sanitized_length"] == 2, f"Expected 2 messages after B9, got {result['sanitized_length']}"
|
||||
assert result["tc0_new_idx"] == 1, (
|
||||
f"Tool call pointing to filtered empty assistant should attach to last kept assistant (idx 1), got {result['tc0_new_idx']}"
|
||||
)
|
||||
assert result["tc1_new_idx"] == 1, (
|
||||
f"Tool call pointing to orig idx 2 should remap to sanitized idx 1, got {result['tc1_new_idx']}"
|
||||
)
|
||||
|
||||
|
||||
def test_b9_remaps_multiple_empty_assistants():
|
||||
"""Multiple consecutive empty assistants all remap to the last (nearest) kept assistant.
|
||||
|
||||
Note: the remapping pass runs after the full sanitization loop, so lastKeptAsstIdx
|
||||
already reflects the final kept-assistant position. This means even empty-assistant
|
||||
tool calls that came BEFORE the kept assistant get attached to it — which is correct
|
||||
behavior for context-compacted sessions where all tool calls belong to the one
|
||||
non-empty assistant response.
|
||||
"""
|
||||
result = _run_js("""
|
||||
const messages = [
|
||||
{ role: 'user', content: 'go' }, // orig 0 -> sanitized 0
|
||||
{ role: 'assistant', content: '' }, // orig 1 -> FILTERED
|
||||
{ role: 'assistant', content: '' }, // orig 2 -> FILTERED
|
||||
{ role: 'assistant', content: '' }, // orig 3 -> FILTERED
|
||||
{ role: 'assistant', content: 'result' }, // orig 4 -> sanitized 1
|
||||
];
|
||||
const tool_calls = [
|
||||
{ name: 'a', assistant_msg_idx: 1 },
|
||||
{ name: 'b', assistant_msg_idx: 2 },
|
||||
{ name: 'c', assistant_msg_idx: 3 },
|
||||
{ name: 'd', assistant_msg_idx: 4 },
|
||||
];
|
||||
const { sanitized, remapped } = sanitizeAndRemap(messages, tool_calls);
|
||||
process.stdout.write(JSON.stringify({
|
||||
sanitized_length: sanitized.length,
|
||||
tc0_idx: remapped[0].assistant_msg_idx,
|
||||
tc1_idx: remapped[1].assistant_msg_idx,
|
||||
tc2_idx: remapped[2].assistant_msg_idx,
|
||||
tc3_idx: remapped[3].assistant_msg_idx,
|
||||
}));
|
||||
""")
|
||||
assert result["sanitized_length"] == 2
|
||||
# Tool calls from filtered empty assistants: after the full loop, lastKeptAsstIdx=1,
|
||||
# so all filtered-assistant tool calls correctly attach to the kept assistant at idx 1.
|
||||
assert result["tc0_idx"] == 1, f"Expected 1 (last kept asst), got {result['tc0_idx']}"
|
||||
assert result["tc1_idx"] == 1
|
||||
assert result["tc2_idx"] == 1
|
||||
# Tool call from the kept assistant at orig idx 4 -> sanitized idx 1
|
||||
assert result["tc3_idx"] == 1, f"Expected 1, got {result['tc3_idx']}"
|
||||
|
||||
|
||||
def test_b9_no_filtering_needed_indices_preserved():
|
||||
"""When no empty assistant messages exist, indices should pass through unchanged."""
|
||||
result = _run_js("""
|
||||
const messages = [
|
||||
{ role: 'user', content: 'hi' }, // orig 0 -> sanitized 0
|
||||
{ role: 'assistant', content: 'hello' }, // orig 1 -> sanitized 1
|
||||
{ role: 'user', content: 'more' }, // orig 2 -> sanitized 2
|
||||
{ role: 'assistant', content: 'yes' }, // orig 3 -> sanitized 3
|
||||
];
|
||||
const tool_calls = [
|
||||
{ name: 'x', assistant_msg_idx: 1 },
|
||||
{ name: 'y', assistant_msg_idx: 3 },
|
||||
];
|
||||
const { sanitized, remapped } = sanitizeAndRemap(messages, tool_calls);
|
||||
process.stdout.write(JSON.stringify({
|
||||
sanitized_length: sanitized.length,
|
||||
tc0_idx: remapped[0].assistant_msg_idx,
|
||||
tc1_idx: remapped[1].assistant_msg_idx,
|
||||
}));
|
||||
""")
|
||||
assert result["sanitized_length"] == 4
|
||||
assert result["tc0_idx"] == 1, f"Expected 1, got {result['tc0_idx']}"
|
||||
assert result["tc1_idx"] == 3, f"Expected 3, got {result['tc1_idx']}"
|
||||
|
||||
|
||||
def test_b9_tool_role_messages_filtered():
|
||||
"""Messages with role='tool' must be filtered out and not affect index mapping."""
|
||||
result = _run_js("""
|
||||
const messages = [
|
||||
{ role: 'user', content: 'run' }, // orig 0 -> sanitized 0
|
||||
{ role: 'tool', content: 'output' }, // orig 1 -> FILTERED (tool role)
|
||||
{ role: 'assistant', content: 'done' }, // orig 2 -> sanitized 1
|
||||
];
|
||||
const tool_calls = [
|
||||
{ name: 'terminal', assistant_msg_idx: 2 },
|
||||
];
|
||||
const { sanitized, remapped } = sanitizeAndRemap(messages, tool_calls);
|
||||
process.stdout.write(JSON.stringify({
|
||||
sanitized_length: sanitized.length,
|
||||
tc0_idx: remapped[0].assistant_msg_idx,
|
||||
}));
|
||||
""")
|
||||
assert result["sanitized_length"] == 2, f"tool-role message must be filtered, got {result['sanitized_length']}"
|
||||
assert result["tc0_idx"] == 1, f"Expected orig idx 2 -> sanitized idx 1, got {result['tc0_idx']}"
|
||||
Reference in New Issue
Block a user