From 9c44d0cf3e7623937dc9c81f3f0562bafa668c6a Mon Sep 17 00:00:00 2001 From: nesquena-hermes Date: Sun, 12 Apr 2026 14:07:00 -0700 Subject: [PATCH] fix: strip think tags when model emits leading whitespace before (#327) Remove ^ anchor from think/Gemma regexes in ui.js; trimStart() before startsWith checks in messages.js streaming path. Fixes MiniMax M2.7 and any model emitting leading newlines before . 10 new tests, 768 total. --- CHANGELOG.md | 7 +++ static/index.html | 2 +- static/messages.js | 11 ++-- static/ui.js | 11 ++-- tests/test_sprint38.py | 111 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 132 insertions(+), 10 deletions(-) create mode 100644 tests/test_sprint38.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d71c95..1fd9112 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,13 @@ --- +## [v0.50.5] Think-tag stripping with leading whitespace (PR #327) + +- **Fix think-tag rendering for models that emit leading whitespace** (e.g. MiniMax M2.7): Some models emit one or more newlines before the `` opening tag. The previous regex used a `^` anchor, so it only matched when `` was the very first character. When the anchor failed, the raw `` tag appeared in the rendered message body. + - `static/ui.js` (stored messages): removed `^` anchor from `` and Gemma channel-token regexes; switched from `.slice()` to `.replace()` + `.trimStart()` so stripping works regardless of position + - `static/messages.js` (live stream): `trimStart()` before `startsWith`/`indexOf` checks; partial-tag-prefix guard also uses trimmed buffer + - 10 new tests in `tests/test_sprint38.py`; 768 tests total (up from 758) + ## [v0.50.3] Onboarding completes gracefully for pre-configured providers (PR #323, fixes #322) - **OAuth/CLI-configured providers no longer blocked by onboarding** (closes #322): Users with providers already set up via the CLI (`openai-codex`, `copilot`, `nous`, etc.) hit `Unsupported provider for WebUI onboarding` when clicking "Open Hermes" on the finish page. The wizard now marks onboarding complete and lets them through — the agent setup is already done, no wizard steps needed. diff --git a/static/index.html b/static/index.html index dec6669..2075018 100644 --- a/static/index.html +++ b/static/index.html @@ -526,7 +526,7 @@
System
- v0.50.3 + v0.50.5
diff --git a/static/messages.js b/static/messages.js index 9bdbbbd..85a31a1 100644 --- a/static/messages.js +++ b/static/messages.js @@ -116,18 +116,21 @@ async function send(){ function _streamDisplay(){ const raw=assistantText; for(const {open,close} of _thinkPairs){ - if(raw.startsWith(open)){ - const ci=raw.indexOf(close,open.length); + // Trim leading whitespace before checking for the open tag — some models + // (e.g. MiniMax) emit newlines before . + const trimmed=raw.trimStart(); + if(trimmed.startsWith(open)){ + const ci=trimmed.indexOf(close,open.length); if(ci!==-1){ // Thinking block complete — strip it, show the rest - return raw.slice(ci+close.length).replace(/^\s+/,''); + return trimmed.slice(ci+close.length).replace(/^\s+/,''); } // Still inside thinking block — show placeholder return ''; } // Hide partial tag prefixes while streaming so users don't see // `... (DeepSeek, QwQ, etc.) + // Parse inline thinking tags from plain text: ... (DeepSeek, QwQ, MiniMax, etc.) // and Gemma 4 channel tokens: <|channel>thought\n... + // Note: no ^ anchor — some models emit leading whitespace/newlines before . if(!thinkingText && typeof content==='string'){ - const thinkMatch=content.match(/^([\s\S]*?)<\/think>\s*/); + const thinkMatch=content.match(/([\s\S]*?)<\/think>/); if(thinkMatch){ thinkingText=thinkMatch[1].trim(); - content=content.slice(thinkMatch[0].length); + content=content.replace(/[\s\S]*?<\/think>\s*/,'').trimStart(); } if(!thinkingText){ - const gemmaMatch=content.match(/^<\|channel>thought\n([\s\S]*?)\s*/); + const gemmaMatch=content.match(/<\|channel>thought\n([\s\S]*?)/); if(gemmaMatch){ thinkingText=gemmaMatch[1].trim(); - content=content.slice(gemmaMatch[0].length); + content=content.replace(/<\|channel>thought\n[\s\S]*?\s*/,'').trimStart(); } } } diff --git a/tests/test_sprint38.py b/tests/test_sprint38.py new file mode 100644 index 0000000..21e9a5d --- /dev/null +++ b/tests/test_sprint38.py @@ -0,0 +1,111 @@ +""" +Sprint 38 Tests: Think-tag stripping with leading whitespace (PR #327). + +Covers the static render path (ui.js regex logic, verified against the JS source) +and the streaming render path (messages.js _streamDisplay logic). +""" +import pathlib +import re + +REPO_ROOT = pathlib.Path(__file__).parent.parent +UI_JS = (REPO_ROOT / "static" / "ui.js").read_text() +MSG_JS = (REPO_ROOT / "static" / "messages.js").read_text() + + +# ── ui.js: static render path ──────────────────────────────────────────────── + +def test_think_regex_has_no_anchor(): + """The regex in ui.js must not use a ^ anchor so leading whitespace is allowed.""" + # Find the thinkMatch line by locating the .match( call on that line + idx = UI_JS.find("const thinkMatch=content.match(") + assert idx >= 0, "thinkMatch line not found in ui.js" + line = UI_JS[idx:idx+100] + # The regex must NOT start with ^ right after the opening / + assert "/^" not in line and "(/^" not in line, \ + f"thinkMatch regex must not use ^ anchor — found: {line.strip()}" + + +def test_gemma_regex_has_no_anchor(): + """The Gemma channel-token regex in ui.js must not use a ^ anchor.""" + match = re.search(r'const gemmaMatch=content\.match\((/[^/]+/)\)', UI_JS) + assert match, "gemmaMatch line not found in ui.js" + pattern = match.group(1) + assert not pattern.startswith('/^'), \ + f"gemmaMatch regex must not use ^ anchor — got {pattern}" + + +def test_think_content_removal_uses_replace_not_slice(): + """After extracting thinkingText, content must use .replace() not .slice() to remove the tag.""" + # Find the block that handles thinkMatch + idx = UI_JS.find("if(thinkMatch){") + assert idx >= 0, "thinkMatch handler block not found" + block = UI_JS[idx:idx+200] + assert "content.replace(" in block, \ + "ui.js must use content.replace() to remove block (not .slice())" + assert ".trimStart()" in block, \ + "ui.js must call .trimStart() on content after removing the block" + + +def test_gemma_content_removal_uses_replace_not_slice(): + """Gemma channel token removal must also use .replace() not .slice().""" + idx = UI_JS.find("if(gemmaMatch){") + assert idx >= 0, "gemmaMatch handler block not found" + block = UI_JS[idx:idx+200] + assert "content.replace(" in block, \ + "ui.js must use content.replace() to remove Gemma channel block (not .slice())" + assert ".trimStart()" in block, \ + "ui.js must call .trimStart() on content after removing the Gemma channel block" + + +# ── messages.js: streaming render path ─────────────────────────────────────── + +def test_stream_display_trims_before_startswith(): + """_streamDisplay in messages.js must call .trimStart() before .startsWith() check.""" + fn_idx = MSG_JS.find("function _streamDisplay()") + assert fn_idx >= 0, "_streamDisplay function not found in messages.js" + fn_end = MSG_JS.find("\n }", fn_idx) + 4 + fn_body = MSG_JS[fn_idx:fn_end] + assert "trimStart()" in fn_body, \ + "_streamDisplay must call trimStart() to handle models that emit leading whitespace before " + + +def test_stream_display_uses_trimmed_for_startswith(): + """_streamDisplay must check trimmed.startsWith(open), not raw.startsWith(open).""" + fn_idx = MSG_JS.find("function _streamDisplay()") + fn_end = MSG_JS.find("\n }", fn_idx) + 4 + fn_body = MSG_JS[fn_idx:fn_end] + assert "trimmed.startsWith(open)" in fn_body, \ + "_streamDisplay must use trimmed.startsWith(open) not raw.startsWith(open)" + + +def test_stream_display_partial_tag_uses_trimmed(): + """The partial-tag guard in _streamDisplay must also use trimmed, not raw.""" + fn_idx = MSG_JS.find("function _streamDisplay()") + fn_end = MSG_JS.find("\n }", fn_idx) + 4 + fn_body = MSG_JS[fn_idx:fn_end] + assert "open.startsWith(trimmed)" in fn_body, \ + "Partial-tag guard must use open.startsWith(trimmed) not open.startsWith(raw)" + + +def test_stream_display_trims_return_after_close(): + """After stripping a completed think block, _streamDisplay must trim leading whitespace from the result.""" + fn_idx = MSG_JS.find("function _streamDisplay()") + fn_end = MSG_JS.find("\n }", fn_idx) + 4 + fn_body = MSG_JS[fn_idx:fn_end] + # The return after finding close must strip whitespace from the result + assert ".replace(/^" in fn_body and "s+/,'')" in fn_body, \ + "_streamDisplay must strip leading whitespace from content after the closing think tag" + + +# ── Regression: existing anchored patterns must be gone ────────────────────── + +def test_no_anchored_think_regex_in_ui_js(): + """The old anchored regex /^/ must not exist in ui.js.""" + assert "/^" not in UI_JS, \ + "Old anchored /^/ regex still present in ui.js — fix not applied" + + +def test_no_anchored_gemma_regex_in_ui_js(): + """The old anchored Gemma regex must not exist in ui.js.""" + assert "/^<|channel>" not in UI_JS, \ + "Old anchored /^<|channel>/ regex still present in ui.js — fix not applied"