perf: rAF-throttled token streaming for smoother rendering

Token events from SSE now buffer and render at most once per animation frame via requestAnimationFrame, instead of calling renderMd() and writing to the DOM on every single token event. Before: ~100 tokens/sec = ~100 DOM writes/sec (causes jank on heavy output) After: ~100 tokens/sec batched to ~60 DOM writes/sec (one per frame) The change is a small wrapper: _scheduleRender() gates rendering behind a rAF flag so multiple tokens arriving between frames are batched into a single renderMd() + scrollIfPinned() call. Inspired by PR #75 (@MartinNielsenDev). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-04 14:05:51 -07:00
parent e2d24f57ac
commit c0769c50a2
1 changed files with 13 additions and 2 deletions
--- a/static/messages.js
+++ b/static/messages.js
@@ -103,14 +103,25 @@ async function send(){
  // ── Shared SSE handler wiring (used for initial connection and reconnect) ──
  let _reconnectAttempted=false;

+  // rAF-throttled rendering: buffer tokens, render at most once per frame
+  let _renderPending=false;
+  function _scheduleRender(){
+    if(_renderPending) return;
+    _renderPending=true;
+    requestAnimationFrame(()=>{
+      _renderPending=false;
+      if(assistantBody) assistantBody.innerHTML=renderMd(assistantText);
+      scrollIfPinned();
+    });
+  }
+
  function _wireSSE(source){
    source.addEventListener('token',e=>{
      if(!S.session||S.session.session_id!==activeSid) return;
      const d=JSON.parse(e.data);
      assistantText+=d.text;
      ensureAssistantRow();
-      assistantBody.innerHTML=renderMd(assistantText);
-      scrollIfPinned();
+      _scheduleRender();
    });

    source.addEventListener('tool',e=>{