feat: redesign chat transcript + fix streaming/persistence lifecycle — v0.50.70 (PR #587 by @aronprins)

Redesign chat transcript + fix streaming/persistence lifecycle — v0.50.70 Squash-merges PR #587 by @aronprins (Aron Prins). Full credit to @aronprins for all feature and fix work. Transcript redesign: unified --msg-rail/--msg-max CSS variables, user turns as tinted cards, thinking cards as bordered panels, error card treatment, day-change separators, composer fade. Approval/clarify as composer flyouts: cards slide up from behind composer top, overflow:hidden + translateY clip prevents travel visibility, focus({preventScroll:true}). Streaming lifecycle: DOM order user→thinking→tool cards→response, no mid-stream jump. Live tool cards inserted before [data-live-assistant]. Persistence: reasoning attached before s.save(), _restore_reasoning_metadata on reload, role=tool rows preserved in S.messages, CLI-session tool-result fallback. Workspace panel FOUC fix: [data-workspace-panel] set at parse time. Docs: docs/ui-ux/index.html + two-stage-proposal.html. Maintainer additions (433b867): CHANGELOG v0.50.70, version badge, usage badge loop simplification. Reviewed and approved by @nesquena (independent review). 1361 tests passing.
2026-04-16 23:04:42 +02:00
parent 25d38a467a
commit 9a3dc10d93
20 changed files with 2770 additions and 469 deletions
--- a/tests/test_issue401.py
+++ b/tests/test_issue401.py
@@ -1,216 +1,114 @@
 """
-Regression tests for issue #401 / PR #402:
-Tool call cards show incorrect/duplicate entries on session load after context compaction.
+Regression tests for tool-card persistence on session reload.

-Root cause: loadSession() applied its own B9 sanitization (producing a new message array
-with different indices) but did not remap the session-level tool_calls.assistant_msg_idx
-values to match. It then assigned the broken tool_calls directly to S.toolCalls, bypassing
-renderMessages()'s fallback that correctly derives tool calls from per-message tool_calls.
+The older loadSession() path rewrote message history on the client:
+- dropped role='tool' rows
+- dropped empty assistant rows even when they carried tool_calls
+- then ignored session.tool_calls on reload

-Fix: build origIdxToSanitizedIdx during the B9 pass and remap each tc.assistant_msg_idx;
-set S.toolCalls=[] so renderMessages() uses the fallback derivation.
-
-These tests verify the JS logic statically (no server needed).
+That broke both durable logging and page refresh for valid tool runs.
 """
+import json
 import pathlib
 import subprocess
 import textwrap
-import json

 REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
 SESSIONS_JS = (REPO_ROOT / "static" / "sessions.js").read_text(encoding="utf-8")
+UI_JS = (REPO_ROOT / "static" / "ui.js").read_text(encoding="utf-8")


-# --- Static structural checks ---
-
-def test_loadsession_sets_toolcalls_empty():
-    """loadSession must set S.toolCalls=[] instead of pre-filling from session-level tool_calls."""
-    assert "S.toolCalls=[]" in SESSIONS_JS, (
-        "loadSession() must set S.toolCalls=[] so renderMessages() uses its fallback "
-        "derivation from per-message tool_calls with correct sanitized-array indices"
+def test_loadsession_preserves_tool_rows():
+    """Reload must keep tool rows in S.messages so snippets can be reconstructed."""
+    assert "if (m.role === 'tool') continue;" not in SESSIONS_JS, (
+        "loadSession() must not drop role='tool' messages; renderMessages() hides them "
+        "visually, but it still needs them for snippet reconstruction"
    )


-def test_loadsession_does_not_assign_broken_tool_calls():
-    """loadSession must NOT assign session.tool_calls directly to S.toolCalls (causes index mismatch)."""
-    # The old broken pattern: S.toolCalls=(data.session.tool_calls||[]).map(tc=>({...tc,done:true}))
-    assert "S.toolCalls=(data.session.tool_calls" not in SESSIONS_JS, (
-        "loadSession() must not assign session-level tool_calls directly to S.toolCalls — "
-        "those indices are relative to the pre-sanitization array and will be wrong after B9 filtering"
-    )
+def test_loadsession_uses_session_toolcalls_only_as_fallback():
+    """Session summaries are the fallback, not the primary reload source."""
+    assert "if(!hasMessageToolMetadata&&data.session.tool_calls&&data.session.tool_calls.length)" in SESSIONS_JS
+    assert "S.toolCalls=(data.session.tool_calls||[]).map(tc=>({...tc,done:true}));" in SESSIONS_JS
+    assert "S.toolCalls=[];" in SESSIONS_JS


-def test_loadsession_builds_idx_remap():
-    """loadSession must build an origIdxToSanitizedIdx map during B9 sanitization."""
-    assert "origIdxToSanitizedIdx" in SESSIONS_JS, (
-        "loadSession() must build origIdxToSanitizedIdx during B9 sanitization "
-        "to remap session-level tool_calls.assistant_msg_idx"
-    )
+def test_rendermessages_treats_openai_toolcall_assistants_as_visible():
+    """OpenAI assistant rows with empty content but tool_calls must stay anchorable."""
+    assert "const hasTc=Array.isArray(m.tool_calls)&&m.tool_calls.length>0;" in UI_JS
+    assert "if(hasTc||hasTu||_messageHasReasoningPayload(m)) return true;" in UI_JS


-def test_loadsession_remaps_assistant_msg_idx():
-    """loadSession must remap tc.assistant_msg_idx using the index map."""
-    assert "tc.assistant_msg_idx" in SESSIONS_JS, (
-        "loadSession() must update tc.assistant_msg_idx using the sanitized index map"
-    )
-
-
-# --- Behavioural Node.js tests ---
-
 def _run_js(script_body: str) -> dict:
-    """Run a JS snippet that exercises the B9 sanitization logic extracted from sessions.js."""
-    # Extract just the B9 + index-remap block from loadSession
-    # We'll re-implement it inline for testability
    script = textwrap.dedent(f"""
-        // Simulate the B9 sanitization + index remap logic from loadSession()
-        function sanitizeAndRemap(messages, tool_calls) {{
-            const allMsgs = messages || [];
-            const sanitized = [];
-            const origIdxToSanitizedIdx = {{}};
-            let lastKeptAsstIdx = -1;
-            for (let i = 0; i < allMsgs.length; i++) {{
-                const m = allMsgs[i];
-                if (!m || !m.role) continue;
-                if (m.role === 'tool') continue;
-                if (m.role === 'assistant') {{
-                    let c = m.content || '';
-                    if (Array.isArray(c)) c = c.filter(p => p && p.type === 'text').map(p => p.text || '').join('');
-                    if (!String(c).trim().length) {{ continue; }}
-                    lastKeptAsstIdx = sanitized.length;
-                }}
-                origIdxToSanitizedIdx[i] = sanitized.length;
-                sanitized.push(m);
-            }}
-            const remapped = (tool_calls || []).map(tc => {{
-                if (!tc || tc.assistant_msg_idx === undefined) return tc;
-                const origIdx = tc.assistant_msg_idx;
-                const newIdx = (origIdx in origIdxToSanitizedIdx)
-                    ? origIdxToSanitizedIdx[origIdx]
-                    : (lastKeptAsstIdx >= 0 ? lastKeptAsstIdx : -1);
-                return {{ ...tc, assistant_msg_idx: newIdx }};
+        function loadSessionShape(messages, sessionToolCalls) {{
+            const filtered = (messages || []).filter(m => m && m.role);
+            const hasMessageToolMetadata = filtered.some(m => {{
+                if (!m || m.role !== 'assistant') return false;
+                const hasTc = Array.isArray(m.tool_calls) && m.tool_calls.length > 0;
+                const hasTu = Array.isArray(m.content) && m.content.some(p => p && p.type === 'tool_use');
+                return hasTc || hasTu;
            }});
-            return {{ sanitized, remapped }};
+            const toolCalls = (!hasMessageToolMetadata && sessionToolCalls && sessionToolCalls.length)
+                ? sessionToolCalls.map(tc => ({{ ...tc, done: true }}))
+                : [];
+            return {{ filtered, hasMessageToolMetadata, toolCalls }};
        }}

        {script_body}
    """)
-    proc = subprocess.run(
-        ["node", "-e", script], check=True, capture_output=True, text=True
-    )
+    proc = subprocess.run(["node", "-e", script], check=True, capture_output=True, text=True)
    return json.loads(proc.stdout)


-def test_b9_remaps_tool_call_idx_after_empty_assistant_filtered():
-    """Tool call pointing to index 1 (empty assistant at orig idx 1, kept at idx 0) remaps correctly."""
+def test_reload_keeps_empty_assistant_toolcall_anchor():
+    """OpenAI-style assistant {content:'', tool_calls:[...]} must survive reload."""
    result = _run_js("""
        const messages = [
-            { role: 'user', content: 'hello' },          // orig 0 -> sanitized 0
-            { role: 'assistant', content: '' },           // orig 1 -> FILTERED (empty)
-            { role: 'assistant', content: 'done.' },      // orig 2 -> sanitized 1
+            { role: 'user', content: 'list files' },
+            {
+                role: 'assistant',
+                content: '',
+                tool_calls: [{ id: 'call-1', function: { name: 'terminal', arguments: '{}' } }]
+            },
+            { role: 'tool', tool_call_id: 'call-1', content: '{"output":"ok"}' },
+            { role: 'assistant', content: 'Done.' }
        ];
-        const tool_calls = [
-            { name: 'terminal', assistant_msg_idx: 1 },   // pointed to filtered-out empty assistant
-            { name: 'read_file', assistant_msg_idx: 2 },  // pointed to kept assistant
-        ];
-        const { sanitized, remapped } = sanitizeAndRemap(messages, tool_calls);
+        const loaded = loadSessionShape(messages, [{ name: 'terminal', assistant_msg_idx: 1 }]);
        process.stdout.write(JSON.stringify({
-            sanitized_length: sanitized.length,
-            tc0_new_idx: remapped[0].assistant_msg_idx,  // should attach to lastKeptAsstIdx = 1
-            tc1_new_idx: remapped[1].assistant_msg_idx,  // should remap 2 -> 1
+            filtered_len: loaded.filtered.length,
+            has_metadata: loaded.hasMessageToolMetadata,
+            fallback_len: loaded.toolCalls.length,
+            assistant_tool_idx: loaded.filtered.findIndex(m => m.role === 'assistant' && m.tool_calls),
+            tool_idx: loaded.filtered.findIndex(m => m.role === 'tool')
        }));
    """)
-    assert result["sanitized_length"] == 2, f"Expected 2 messages after B9, got {result['sanitized_length']}"
-    assert result["tc0_new_idx"] == 1, (
-        f"Tool call pointing to filtered empty assistant should attach to last kept assistant (idx 1), got {result['tc0_new_idx']}"
-    )
-    assert result["tc1_new_idx"] == 1, (
-        f"Tool call pointing to orig idx 2 should remap to sanitized idx 1, got {result['tc1_new_idx']}"
-    )
+    assert result["filtered_len"] == 4
+    assert result["has_metadata"] is True
+    assert result["fallback_len"] == 0
+    assert result["assistant_tool_idx"] == 1
+    assert result["tool_idx"] == 2


-def test_b9_remaps_multiple_empty_assistants():
-    """Multiple consecutive empty assistants all remap to the last (nearest) kept assistant.
-
-    Note: the remapping pass runs after the full sanitization loop, so lastKeptAsstIdx
-    already reflects the final kept-assistant position. This means even empty-assistant
-    tool calls that came BEFORE the kept assistant get attached to it — which is correct
-    behavior for context-compacted sessions where all tool calls belong to the one
-    non-empty assistant response.
-    """
+def test_reload_uses_session_summary_when_messages_have_no_tool_metadata():
+    """Older sessions should still render from session.tool_calls on reload."""
    result = _run_js("""
        const messages = [
-            { role: 'user', content: 'go' },              // orig 0 -> sanitized 0
-            { role: 'assistant', content: '' },           // orig 1 -> FILTERED
-            { role: 'assistant', content: '' },           // orig 2 -> FILTERED
-            { role: 'assistant', content: '' },           // orig 3 -> FILTERED
-            { role: 'assistant', content: 'result' },     // orig 4 -> sanitized 1
+            { role: 'user', content: 'build site' },
+            { role: 'assistant', content: 'Starting.' },
+            { role: 'tool', content: '{"bytes_written": 4955}' },
+            { role: 'assistant', content: '' }
        ];
-        const tool_calls = [
-            { name: 'a', assistant_msg_idx: 1 },
-            { name: 'b', assistant_msg_idx: 2 },
-            { name: 'c', assistant_msg_idx: 3 },
-            { name: 'd', assistant_msg_idx: 4 },
+        const sessionToolCalls = [
+            { name: 'write_file', assistant_msg_idx: 1, snippet: 'bytes_written', tid: '' }
        ];
-        const { sanitized, remapped } = sanitizeAndRemap(messages, tool_calls);
+        const loaded = loadSessionShape(messages, sessionToolCalls);
        process.stdout.write(JSON.stringify({
-            sanitized_length: sanitized.length,
-            tc0_idx: remapped[0].assistant_msg_idx,
-            tc1_idx: remapped[1].assistant_msg_idx,
-            tc2_idx: remapped[2].assistant_msg_idx,
-            tc3_idx: remapped[3].assistant_msg_idx,
+            has_metadata: loaded.hasMessageToolMetadata,
+            fallback_len: loaded.toolCalls.length,
+            done_flag: loaded.toolCalls[0] && loaded.toolCalls[0].done === true
        }));
    """)
-    assert result["sanitized_length"] == 2
-    # Tool calls from filtered empty assistants: after the full loop, lastKeptAsstIdx=1,
-    # so all filtered-assistant tool calls correctly attach to the kept assistant at idx 1.
-    assert result["tc0_idx"] == 1, f"Expected 1 (last kept asst), got {result['tc0_idx']}"
-    assert result["tc1_idx"] == 1
-    assert result["tc2_idx"] == 1
-    # Tool call from the kept assistant at orig idx 4 -> sanitized idx 1
-    assert result["tc3_idx"] == 1, f"Expected 1, got {result['tc3_idx']}"
-
-
-def test_b9_no_filtering_needed_indices_preserved():
-    """When no empty assistant messages exist, indices should pass through unchanged."""
-    result = _run_js("""
-        const messages = [
-            { role: 'user', content: 'hi' },              // orig 0 -> sanitized 0
-            { role: 'assistant', content: 'hello' },      // orig 1 -> sanitized 1
-            { role: 'user', content: 'more' },            // orig 2 -> sanitized 2
-            { role: 'assistant', content: 'yes' },        // orig 3 -> sanitized 3
-        ];
-        const tool_calls = [
-            { name: 'x', assistant_msg_idx: 1 },
-            { name: 'y', assistant_msg_idx: 3 },
-        ];
-        const { sanitized, remapped } = sanitizeAndRemap(messages, tool_calls);
-        process.stdout.write(JSON.stringify({
-            sanitized_length: sanitized.length,
-            tc0_idx: remapped[0].assistant_msg_idx,
-            tc1_idx: remapped[1].assistant_msg_idx,
-        }));
-    """)
-    assert result["sanitized_length"] == 4
-    assert result["tc0_idx"] == 1, f"Expected 1, got {result['tc0_idx']}"
-    assert result["tc1_idx"] == 3, f"Expected 3, got {result['tc1_idx']}"
-
-
-def test_b9_tool_role_messages_filtered():
-    """Messages with role='tool' must be filtered out and not affect index mapping."""
-    result = _run_js("""
-        const messages = [
-            { role: 'user', content: 'run' },             // orig 0 -> sanitized 0
-            { role: 'tool', content: 'output' },          // orig 1 -> FILTERED (tool role)
-            { role: 'assistant', content: 'done' },       // orig 2 -> sanitized 1
-        ];
-        const tool_calls = [
-            { name: 'terminal', assistant_msg_idx: 2 },
-        ];
-        const { sanitized, remapped } = sanitizeAndRemap(messages, tool_calls);
-        process.stdout.write(JSON.stringify({
-            sanitized_length: sanitized.length,
-            tc0_idx: remapped[0].assistant_msg_idx,
-        }));
-    """)
-    assert result["sanitized_length"] == 2, f"tool-role message must be filtered, got {result['sanitized_length']}"
-    assert result["tc0_idx"] == 1, f"Expected orig idx 2 -> sanitized idx 1, got {result['tc0_idx']}"
+    assert result["has_metadata"] is False
+    assert result["fallback_len"] == 1
+    assert result["done_flag"] is True