Merge pull request #182 from nesquena/fix/thinking-display-edge-cases

fix: harden thinking display streaming edge cases
This commit is contained in:
Nathan Esquenazi
2026-04-08 18:51:06 -07:00
committed by GitHub
2 changed files with 46 additions and 1 deletions

View File

@@ -84,6 +84,11 @@ async function send(){
let assistantText='';
let assistantRow=null;
let assistantBody=null;
// Thinking tag patterns for streaming display
const _thinkPairs=[
{open:'<think>',close:'</think>'},
{open:'<|channel>thought\n',close:'<channel|>'}
];
function ensureAssistantRow(){
if(assistantRow)return;
@@ -106,12 +111,36 @@ async function send(){
// rAF-throttled rendering: buffer tokens, render at most once per frame
let _renderPending=false;
// Extract display text from assistantText, stripping completed thinking blocks
// and hiding content still inside an open thinking block.
function _streamDisplay(){
const raw=assistantText;
for(const {open,close} of _thinkPairs){
if(raw.startsWith(open)){
const ci=raw.indexOf(close,open.length);
if(ci!==-1){
// Thinking block complete — strip it, show the rest
return raw.slice(ci+close.length).replace(/^\s+/,'');
}
// Still inside thinking block — show placeholder
return '';
}
// Hide partial tag prefixes while streaming so users don't see
// `<thi`, `<think`, etc. before the model finishes the token.
if(open.startsWith(raw)) return '';
}
return raw;
}
function _scheduleRender(){
if(_renderPending) return;
_renderPending=true;
requestAnimationFrame(()=>{
_renderPending=false;
if(assistantBody) assistantBody.innerHTML=renderMd(assistantText);
if(assistantBody){
const txt=_streamDisplay();
const isThinking=!txt&&assistantText.length>0;
assistantBody.innerHTML=txt?renderMd(txt):(isThinking?'<span style="color:var(--muted);font-size:13px">Thinking\u2026</span>':'');
}
scrollIfPinned();
});
}

View File

@@ -503,6 +503,22 @@ function renderMessages(){
if(!thinkingText && m.reasoning){
thinkingText=m.reasoning;
}
// Parse inline thinking tags from plain text: <think>...</think> (DeepSeek, QwQ, etc.)
// and Gemma 4 channel tokens: <|channel>thought\n...<channel|>
if(!thinkingText && typeof content==='string'){
const thinkMatch=content.match(/^<think>([\s\S]*?)<\/think>\s*/);
if(thinkMatch){
thinkingText=thinkMatch[1].trim();
content=content.slice(thinkMatch[0].length);
}
if(!thinkingText){
const gemmaMatch=content.match(/^<\|channel>thought\n([\s\S]*?)<channel\|>\s*/);
if(gemmaMatch){
thinkingText=gemmaMatch[1].trim();
content=content.slice(gemmaMatch[0].length);
}
}
}
const isUser=m.role==='user';
const isLastAssistant=!isUser&&vi===visWithIdx.length-1;
// Render thinking card before the assistant message (collapsed by default)