Phase 8: TypeScript migration, i18n rewrite, Activity Tree, Projects API, Heartbeats

2026-04-29 11:50:00 +02:00
parent c705fad626
commit 255914c9f1
43 changed files with 17948 additions and 6899 deletions
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -777,9 +777,13 @@ def _sse(handler, event, data):

 def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, attachments=None, agent=None):
    """Run agent in background thread, writing SSE events to STREAMS[stream_id]."""
+    print(f'[DEBUG streaming] started stream_id={stream_id}', flush=True)
    q = STREAMS.get(stream_id)
+    print(f'[DEBUG streaming] STREAMS keys={list(STREAMS.keys())}', flush=True)
    if q is None:
+        print(f'[DEBUG streaming] queue is None for stream_id={stream_id}', flush=True)
        return
+    print(f'[DEBUG streaming] queue found, agent={agent}', flush=True)
    s = None
    _rt = {}
    old_cwd = None
@@ -937,12 +941,41 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
            _reasoning_text = ''  # accumulates reasoning/thinking trace for persistence
            _live_tool_calls = []  # tool progress fallback when final messages omit tool IDs

+            _token_buf = []   # token text buffer for batching
+            _token_buf_timer = None   # threading.Timer reference
+            _token_buf_closed = False  # True after sentinel seen
+
+            def _flush_token_buf():
+                nonlocal _token_buf_timer
+                if _token_buf_closed or not _token_buf:
+                    return
+                # Grab and clear the buffer atomically
+                batch = ''.join(_token_buf)
+                _token_buf.clear()
+                # Cancel any pending timer
+                if _token_buf_timer is not None:
+                    _token_buf_timer.cancel()
+                    _token_buf_timer = None
+                # _buf_closed guard ensures we never put after sentinel
+                if not _token_buf_closed:
+                    put('token', {'text': batch})
+
            def on_token(text):
-                nonlocal _token_sent
+                nonlocal _token_sent, _token_buf_timer, _token_buf_closed
                if text is None:
+                    # Flush any remaining buffered tokens, then mark closed
+                    _flush_token_buf()
+                    _token_buf_closed = True
                    return  # end-of-stream sentinel
                _token_sent = True
-                put('token', {'text': text})
+                _token_buf.append(text)
+                if len(_token_buf) >= 20:
+                    # Flush immediately on 20-token threshold
+                    _flush_token_buf()
+                elif _token_buf_timer is None:
+                    # Start 100ms debounce timer (only if not already pending)
+                    _token_buf_timer = threading.Timer(0.1, _flush_token_buf)
+                    _token_buf_timer.start()

            def on_reasoning(text):
                nonlocal _reasoning_text
@@ -1318,6 +1351,13 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
                    if isinstance(_rm, dict) and _rm.get('role') == 'assistant':
                        _rm['reasoning'] = _reasoning_text
                        break
+            # Tag the last assistant message with per-turn token usage so the UI
+            # can display it on that specific message instead of the cumulative total.
+            if s.messages:
+                for _rm in reversed(s.messages):
+                    if isinstance(_rm, dict) and _rm.get('role') == 'assistant':
+                        _rm['_usage'] = {'in': input_tokens, 'out': output_tokens}
+                        break
            s.save()
            # Sync to state.db for /insights (opt-in setting)
            try:
@@ -1342,6 +1382,9 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
                usage['context_length'] = getattr(_cc, 'context_length', 0) or 0
                usage['threshold_tokens'] = getattr(_cc, 'threshold_tokens', 0) or 0
                usage['last_prompt_tokens'] = getattr(_cc, 'last_prompt_tokens', 0) or 0
+            # Send cumulative session totals separately so UI can label them as "session total"
+            usage['_session_input_tokens'] = s.input_tokens or 0
+            usage['_session_output_tokens'] = s.output_tokens or 0
            # (reasoning trace already attached + saved above, before s.save())
            raw_session = s.compact() | {'messages': s.messages, 'tool_calls': tool_calls}
            put('done', {'session': redact_session_data(raw_session), 'usage': usage})