fix: strip think tags when model emits leading whitespace before <think> (#327)
Remove ^ anchor from think/Gemma regexes in ui.js; trimStart() before startsWith checks in messages.js streaming path. Fixes MiniMax M2.7 and any model emitting leading newlines before <think>. 10 new tests, 768 total.
This commit is contained in:
@@ -6,6 +6,13 @@
|
|||||||
---
|
---
|
||||||
|
|
||||||
|
|
||||||
|
## [v0.50.5] Think-tag stripping with leading whitespace (PR #327)
|
||||||
|
|
||||||
|
- **Fix think-tag rendering for models that emit leading whitespace** (e.g. MiniMax M2.7): Some models emit one or more newlines before the `<think>` opening tag. The previous regex used a `^` anchor, so it only matched when `<think>` was the very first character. When the anchor failed, the raw `</think>` tag appeared in the rendered message body.
|
||||||
|
- `static/ui.js` (stored messages): removed `^` anchor from `<think>` and Gemma channel-token regexes; switched from `.slice()` to `.replace()` + `.trimStart()` so stripping works regardless of position
|
||||||
|
- `static/messages.js` (live stream): `trimStart()` before `startsWith`/`indexOf` checks; partial-tag-prefix guard also uses trimmed buffer
|
||||||
|
- 10 new tests in `tests/test_sprint38.py`; 768 tests total (up from 758)
|
||||||
|
|
||||||
## [v0.50.3] Onboarding completes gracefully for pre-configured providers (PR #323, fixes #322)
|
## [v0.50.3] Onboarding completes gracefully for pre-configured providers (PR #323, fixes #322)
|
||||||
|
|
||||||
- **OAuth/CLI-configured providers no longer blocked by onboarding** (closes #322): Users with providers already set up via the CLI (`openai-codex`, `copilot`, `nous`, etc.) hit `Unsupported provider for WebUI onboarding` when clicking "Open Hermes" on the finish page. The wizard now marks onboarding complete and lets them through — the agent setup is already done, no wizard steps needed.
|
- **OAuth/CLI-configured providers no longer blocked by onboarding** (closes #322): Users with providers already set up via the CLI (`openai-codex`, `copilot`, `nous`, etc.) hit `Unsupported provider for WebUI onboarding` when clicking "Open Hermes" on the finish page. The wizard now marks onboarding complete and lets them through — the agent setup is already done, no wizard steps needed.
|
||||||
|
|||||||
@@ -526,7 +526,7 @@
|
|||||||
<div class="settings-section-title">System</div>
|
<div class="settings-section-title">System</div>
|
||||||
<div class="settings-section-meta">Instance version and access controls.</div>
|
<div class="settings-section-meta">Instance version and access controls.</div>
|
||||||
</div>
|
</div>
|
||||||
<span class="settings-version-badge">v0.50.3</span>
|
<span class="settings-version-badge">v0.50.5</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="settings-field" style="border-top:1px solid var(--border);padding-top:12px;margin-top:8px">
|
<div class="settings-field" style="border-top:1px solid var(--border);padding-top:12px;margin-top:8px">
|
||||||
<label for="settingsPassword" data-i18n="settings_label_password">Access Password</label>
|
<label for="settingsPassword" data-i18n="settings_label_password">Access Password</label>
|
||||||
|
|||||||
@@ -116,18 +116,21 @@ async function send(){
|
|||||||
function _streamDisplay(){
|
function _streamDisplay(){
|
||||||
const raw=assistantText;
|
const raw=assistantText;
|
||||||
for(const {open,close} of _thinkPairs){
|
for(const {open,close} of _thinkPairs){
|
||||||
if(raw.startsWith(open)){
|
// Trim leading whitespace before checking for the open tag — some models
|
||||||
const ci=raw.indexOf(close,open.length);
|
// (e.g. MiniMax) emit newlines before <think>.
|
||||||
|
const trimmed=raw.trimStart();
|
||||||
|
if(trimmed.startsWith(open)){
|
||||||
|
const ci=trimmed.indexOf(close,open.length);
|
||||||
if(ci!==-1){
|
if(ci!==-1){
|
||||||
// Thinking block complete — strip it, show the rest
|
// Thinking block complete — strip it, show the rest
|
||||||
return raw.slice(ci+close.length).replace(/^\s+/,'');
|
return trimmed.slice(ci+close.length).replace(/^\s+/,'');
|
||||||
}
|
}
|
||||||
// Still inside thinking block — show placeholder
|
// Still inside thinking block — show placeholder
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
// Hide partial tag prefixes while streaming so users don't see
|
// Hide partial tag prefixes while streaming so users don't see
|
||||||
// `<thi`, `<think`, etc. before the model finishes the token.
|
// `<thi`, `<think`, etc. before the model finishes the token.
|
||||||
if(open.startsWith(raw)) return '';
|
if(open.startsWith(trimmed)) return '';
|
||||||
}
|
}
|
||||||
return raw;
|
return raw;
|
||||||
}
|
}
|
||||||
|
|||||||
11
static/ui.js
11
static/ui.js
@@ -801,19 +801,20 @@ function renderMessages(){
|
|||||||
if(!thinkingText && m.reasoning){
|
if(!thinkingText && m.reasoning){
|
||||||
thinkingText=m.reasoning;
|
thinkingText=m.reasoning;
|
||||||
}
|
}
|
||||||
// Parse inline thinking tags from plain text: <think>...</think> (DeepSeek, QwQ, etc.)
|
// Parse inline thinking tags from plain text: <think>...</think> (DeepSeek, QwQ, MiniMax, etc.)
|
||||||
// and Gemma 4 channel tokens: <|channel>thought\n...<channel|>
|
// and Gemma 4 channel tokens: <|channel>thought\n...<channel|>
|
||||||
|
// Note: no ^ anchor — some models emit leading whitespace/newlines before <think>.
|
||||||
if(!thinkingText && typeof content==='string'){
|
if(!thinkingText && typeof content==='string'){
|
||||||
const thinkMatch=content.match(/^<think>([\s\S]*?)<\/think>\s*/);
|
const thinkMatch=content.match(/<think>([\s\S]*?)<\/think>/);
|
||||||
if(thinkMatch){
|
if(thinkMatch){
|
||||||
thinkingText=thinkMatch[1].trim();
|
thinkingText=thinkMatch[1].trim();
|
||||||
content=content.slice(thinkMatch[0].length);
|
content=content.replace(/<think>[\s\S]*?<\/think>\s*/,'').trimStart();
|
||||||
}
|
}
|
||||||
if(!thinkingText){
|
if(!thinkingText){
|
||||||
const gemmaMatch=content.match(/^<\|channel>thought\n([\s\S]*?)<channel\|>\s*/);
|
const gemmaMatch=content.match(/<\|channel>thought\n([\s\S]*?)<channel\|>/);
|
||||||
if(gemmaMatch){
|
if(gemmaMatch){
|
||||||
thinkingText=gemmaMatch[1].trim();
|
thinkingText=gemmaMatch[1].trim();
|
||||||
content=content.slice(gemmaMatch[0].length);
|
content=content.replace(/<\|channel>thought\n[\s\S]*?<channel\|>\s*/,'').trimStart();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
111
tests/test_sprint38.py
Normal file
111
tests/test_sprint38.py
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
"""
|
||||||
|
Sprint 38 Tests: Think-tag stripping with leading whitespace (PR #327).
|
||||||
|
|
||||||
|
Covers the static render path (ui.js regex logic, verified against the JS source)
|
||||||
|
and the streaming render path (messages.js _streamDisplay logic).
|
||||||
|
"""
|
||||||
|
import pathlib
|
||||||
|
import re
|
||||||
|
|
||||||
|
REPO_ROOT = pathlib.Path(__file__).parent.parent
|
||||||
|
UI_JS = (REPO_ROOT / "static" / "ui.js").read_text()
|
||||||
|
MSG_JS = (REPO_ROOT / "static" / "messages.js").read_text()
|
||||||
|
|
||||||
|
|
||||||
|
# ── ui.js: static render path ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_think_regex_has_no_anchor():
|
||||||
|
"""The <think> regex in ui.js must not use a ^ anchor so leading whitespace is allowed."""
|
||||||
|
# Find the thinkMatch line by locating the .match( call on that line
|
||||||
|
idx = UI_JS.find("const thinkMatch=content.match(")
|
||||||
|
assert idx >= 0, "thinkMatch line not found in ui.js"
|
||||||
|
line = UI_JS[idx:idx+100]
|
||||||
|
# The regex must NOT start with ^ right after the opening /
|
||||||
|
assert "/^<think>" not in line and "(/^" not in line, \
|
||||||
|
f"thinkMatch regex must not use ^ anchor — found: {line.strip()}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_gemma_regex_has_no_anchor():
|
||||||
|
"""The Gemma channel-token regex in ui.js must not use a ^ anchor."""
|
||||||
|
match = re.search(r'const gemmaMatch=content\.match\((/[^/]+/)\)', UI_JS)
|
||||||
|
assert match, "gemmaMatch line not found in ui.js"
|
||||||
|
pattern = match.group(1)
|
||||||
|
assert not pattern.startswith('/^'), \
|
||||||
|
f"gemmaMatch regex must not use ^ anchor — got {pattern}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_think_content_removal_uses_replace_not_slice():
|
||||||
|
"""After extracting thinkingText, content must use .replace() not .slice() to remove the tag."""
|
||||||
|
# Find the block that handles thinkMatch
|
||||||
|
idx = UI_JS.find("if(thinkMatch){")
|
||||||
|
assert idx >= 0, "thinkMatch handler block not found"
|
||||||
|
block = UI_JS[idx:idx+200]
|
||||||
|
assert "content.replace(" in block, \
|
||||||
|
"ui.js must use content.replace() to remove <think> block (not .slice())"
|
||||||
|
assert ".trimStart()" in block, \
|
||||||
|
"ui.js must call .trimStart() on content after removing the <think> block"
|
||||||
|
|
||||||
|
|
||||||
|
def test_gemma_content_removal_uses_replace_not_slice():
|
||||||
|
"""Gemma channel token removal must also use .replace() not .slice()."""
|
||||||
|
idx = UI_JS.find("if(gemmaMatch){")
|
||||||
|
assert idx >= 0, "gemmaMatch handler block not found"
|
||||||
|
block = UI_JS[idx:idx+200]
|
||||||
|
assert "content.replace(" in block, \
|
||||||
|
"ui.js must use content.replace() to remove Gemma channel block (not .slice())"
|
||||||
|
assert ".trimStart()" in block, \
|
||||||
|
"ui.js must call .trimStart() on content after removing the Gemma channel block"
|
||||||
|
|
||||||
|
|
||||||
|
# ── messages.js: streaming render path ───────────────────────────────────────
|
||||||
|
|
||||||
|
def test_stream_display_trims_before_startswith():
|
||||||
|
"""_streamDisplay in messages.js must call .trimStart() before .startsWith() check."""
|
||||||
|
fn_idx = MSG_JS.find("function _streamDisplay()")
|
||||||
|
assert fn_idx >= 0, "_streamDisplay function not found in messages.js"
|
||||||
|
fn_end = MSG_JS.find("\n }", fn_idx) + 4
|
||||||
|
fn_body = MSG_JS[fn_idx:fn_end]
|
||||||
|
assert "trimStart()" in fn_body, \
|
||||||
|
"_streamDisplay must call trimStart() to handle models that emit leading whitespace before <think>"
|
||||||
|
|
||||||
|
|
||||||
|
def test_stream_display_uses_trimmed_for_startswith():
|
||||||
|
"""_streamDisplay must check trimmed.startsWith(open), not raw.startsWith(open)."""
|
||||||
|
fn_idx = MSG_JS.find("function _streamDisplay()")
|
||||||
|
fn_end = MSG_JS.find("\n }", fn_idx) + 4
|
||||||
|
fn_body = MSG_JS[fn_idx:fn_end]
|
||||||
|
assert "trimmed.startsWith(open)" in fn_body, \
|
||||||
|
"_streamDisplay must use trimmed.startsWith(open) not raw.startsWith(open)"
|
||||||
|
|
||||||
|
|
||||||
|
def test_stream_display_partial_tag_uses_trimmed():
|
||||||
|
"""The partial-tag guard in _streamDisplay must also use trimmed, not raw."""
|
||||||
|
fn_idx = MSG_JS.find("function _streamDisplay()")
|
||||||
|
fn_end = MSG_JS.find("\n }", fn_idx) + 4
|
||||||
|
fn_body = MSG_JS[fn_idx:fn_end]
|
||||||
|
assert "open.startsWith(trimmed)" in fn_body, \
|
||||||
|
"Partial-tag guard must use open.startsWith(trimmed) not open.startsWith(raw)"
|
||||||
|
|
||||||
|
|
||||||
|
def test_stream_display_trims_return_after_close():
|
||||||
|
"""After stripping a completed think block, _streamDisplay must trim leading whitespace from the result."""
|
||||||
|
fn_idx = MSG_JS.find("function _streamDisplay()")
|
||||||
|
fn_end = MSG_JS.find("\n }", fn_idx) + 4
|
||||||
|
fn_body = MSG_JS[fn_idx:fn_end]
|
||||||
|
# The return after finding close must strip whitespace from the result
|
||||||
|
assert ".replace(/^" in fn_body and "s+/,'')" in fn_body, \
|
||||||
|
"_streamDisplay must strip leading whitespace from content after the closing think tag"
|
||||||
|
|
||||||
|
|
||||||
|
# ── Regression: existing anchored patterns must be gone ──────────────────────
|
||||||
|
|
||||||
|
def test_no_anchored_think_regex_in_ui_js():
|
||||||
|
"""The old anchored regex /^<think>/ must not exist in ui.js."""
|
||||||
|
assert "/^<think>" not in UI_JS, \
|
||||||
|
"Old anchored /^<think>/ regex still present in ui.js — fix not applied"
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_anchored_gemma_regex_in_ui_js():
|
||||||
|
"""The old anchored Gemma regex must not exist in ui.js."""
|
||||||
|
assert "/^<|channel>" not in UI_JS, \
|
||||||
|
"Old anchored /^<|channel>/ regex still present in ui.js — fix not applied"
|
||||||
Reference in New Issue
Block a user