fix(streaming): strip Gemma 4 thinking token delimiter in all paths — closes #607
Fixes <|turn|>thinking delimiter (was wrong as <|turn>thinking) in api/streaming.py, static/messages.js, and static/ui.js. Adds 13 regression tests. Independent review by @nesquena.
This commit is contained in:
@@ -15,6 +15,10 @@
|
|||||||
- **Topbar border invisible in light mode** — added `:root:not(.dark)` border override. (PR #627)
|
- **Topbar border invisible in light mode** — added `:root:not(.dark)` border override. (PR #627)
|
||||||
- **User message bubble text contrast** — accent-colored bubbles now use skin-aware text colors meeting WCAG AA (Poseidon dark improved from 2.8 → 6.5 ratio). (PR #627)
|
- **User message bubble text contrast** — accent-colored bubbles now use skin-aware text colors meeting WCAG AA (Poseidon dark improved from 2.8 → 6.5 ratio). (PR #627)
|
||||||
- **Settings skin persistence race condition** — save now waits for server confirmation before applying. (PR #627)
|
- **Settings skin persistence race condition** — save now waits for server confirmation before applying. (PR #627)
|
||||||
|
## [v0.50.78] — 2026-04-17
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- **Gemma 4 thinking tokens no longer shown raw in chat** — added `<|turn|>thinking\n...<turn|>` to the streaming think-token parser in `static/messages.js` and `_strip_thinking_markup()` in `api/streaming.py`. Previously Gemma 4's reasoning output appeared as raw text prepended to the answer. (Closes #607)
|
||||||
|
|
||||||
## [v0.50.76] — 2026-04-17
|
## [v0.50.76] — 2026-04-17
|
||||||
|
|
||||||
|
|||||||
@@ -68,6 +68,7 @@ def _strip_thinking_markup(text: str) -> str:
|
|||||||
s = str(text)
|
s = str(text)
|
||||||
s = re.sub(r'<think>.*?</think>', ' ', s, flags=re.IGNORECASE | re.DOTALL)
|
s = re.sub(r'<think>.*?</think>', ' ', s, flags=re.IGNORECASE | re.DOTALL)
|
||||||
s = re.sub(r'<\|channel\|>thought.*?<channel\|>', ' ', s, flags=re.IGNORECASE | re.DOTALL)
|
s = re.sub(r'<\|channel\|>thought.*?<channel\|>', ' ', s, flags=re.IGNORECASE | re.DOTALL)
|
||||||
|
s = re.sub(r'<\|turn\|>thinking\n.*?<turn\|>', ' ', s, flags=re.IGNORECASE | re.DOTALL) # Gemma 4
|
||||||
s = re.sub(r'^\s*(the|ther)\s+user\s+is\s+asking.*$', ' ', s, flags=re.IGNORECASE | re.MULTILINE)
|
s = re.sub(r'^\s*(the|ther)\s+user\s+is\s+asking.*$', ' ', s, flags=re.IGNORECASE | re.MULTILINE)
|
||||||
s = re.sub(r'\s+', ' ', s).strip()
|
s = re.sub(r'\s+', ' ', s).strip()
|
||||||
return s
|
return s
|
||||||
@@ -96,7 +97,7 @@ def _looks_invalid_generated_title(text: str) -> bool:
|
|||||||
if not s.strip():
|
if not s.strip():
|
||||||
return True
|
return True
|
||||||
return bool(
|
return bool(
|
||||||
re.search(r'<think>|<\|channel\|>thought', s, flags=re.IGNORECASE)
|
re.search(r'<think>|<\|channel\|>thought|<\|turn\|>thinking', s, flags=re.IGNORECASE)
|
||||||
or re.search(r'^\s*(the|ther)\s+user\s+', s, flags=re.IGNORECASE)
|
or re.search(r'^\s*(the|ther)\s+user\s+', s, flags=re.IGNORECASE)
|
||||||
or re.search(r'^\s*user\s+\w+\s+', s, flags=re.IGNORECASE)
|
or re.search(r'^\s*user\s+\w+\s+', s, flags=re.IGNORECASE)
|
||||||
or re.search(r'\b(they|user)\s+want(s)?\s+me\s+to\b', s, flags=re.IGNORECASE)
|
or re.search(r'\b(they|user)\s+want(s)?\s+me\s+to\b', s, flags=re.IGNORECASE)
|
||||||
|
|||||||
@@ -591,7 +591,7 @@
|
|||||||
<div class="settings-section-title">System</div>
|
<div class="settings-section-title">System</div>
|
||||||
<div class="settings-section-meta">Instance version and access controls.</div>
|
<div class="settings-section-meta">Instance version and access controls.</div>
|
||||||
</div>
|
</div>
|
||||||
<span class="settings-version-badge">v0.50.77</span>
|
<span class="settings-version-badge">v0.50.78</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="settings-field" style="border-top:1px solid var(--border);padding-top:12px;margin-top:8px">
|
<div class="settings-field" style="border-top:1px solid var(--border);padding-top:12px;margin-top:8px">
|
||||||
<label for="settingsPassword" data-i18n="settings_label_password">Access Password</label>
|
<label for="settingsPassword" data-i18n="settings_label_password">Access Password</label>
|
||||||
|
|||||||
@@ -144,7 +144,8 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
|
|||||||
// Thinking tag patterns for streaming display
|
// Thinking tag patterns for streaming display
|
||||||
const _thinkPairs=[
|
const _thinkPairs=[
|
||||||
{open:'<think>',close:'</think>'},
|
{open:'<think>',close:'</think>'},
|
||||||
{open:'<|channel>thought\n',close:'<channel|>'}
|
{open:'<|channel>thought\n',close:'<channel|>'},
|
||||||
|
{open:'<|turn|>thinking\n',close:'<turn|>'} // Gemma 4
|
||||||
];
|
];
|
||||||
|
|
||||||
function _isActiveSession(){
|
function _isActiveSession(){
|
||||||
|
|||||||
11
static/ui.js
11
static/ui.js
@@ -1095,7 +1095,7 @@ function _messageHasReasoningPayload(m){
|
|||||||
if(!m||m.role!=='assistant') return false;
|
if(!m||m.role!=='assistant') return false;
|
||||||
if(m.reasoning) return true;
|
if(m.reasoning) return true;
|
||||||
if(Array.isArray(m.content)) return m.content.some(p=>p&&(p.type==='thinking'||p.type==='reasoning'));
|
if(Array.isArray(m.content)) return m.content.some(p=>p&&(p.type==='thinking'||p.type==='reasoning'));
|
||||||
return /<think>[\s\S]*?<\/think>|<\|channel>thought\n[\s\S]*?<channel\|>/.test(String(m.content||''));
|
return /<think>[\s\S]*?<\/think>|<\|channel>thought\n[\s\S]*?<channel\|>|<\|turn\|>thinking\n[\s\S]*?<turn\|>/.test(String(m.content||''));
|
||||||
}
|
}
|
||||||
function _assistantRoleHtml(tsTitle=''){
|
function _assistantRoleHtml(tsTitle=''){
|
||||||
const _bn=window._botName||'Hermes';
|
const _bn=window._botName||'Hermes';
|
||||||
@@ -1167,12 +1167,21 @@ function renderMessages(){
|
|||||||
content=content.replace(/<think>[\s\S]*?<\/think>\s*/,'').trimStart();
|
content=content.replace(/<think>[\s\S]*?<\/think>\s*/,'').trimStart();
|
||||||
}
|
}
|
||||||
if(!thinkingText){
|
if(!thinkingText){
|
||||||
|
// Historical name "gemmaMatch" refers to MiniMax <|channel>thought format.
|
||||||
const gemmaMatch=content.match(/<\|channel>thought\n([\s\S]*?)<channel\|>/);
|
const gemmaMatch=content.match(/<\|channel>thought\n([\s\S]*?)<channel\|>/);
|
||||||
if(gemmaMatch){
|
if(gemmaMatch){
|
||||||
thinkingText=gemmaMatch[1].trim();
|
thinkingText=gemmaMatch[1].trim();
|
||||||
content=content.replace(/<\|channel>thought\n[\s\S]*?<channel\|>\s*/,'').trimStart();
|
content=content.replace(/<\|channel>thought\n[\s\S]*?<channel\|>\s*/,'').trimStart();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if(!thinkingText){
|
||||||
|
// Gemma 4 uses asymmetric <|turn|>thinking\n...<turn|> delimiters.
|
||||||
|
const gemmaTurnMatch=content.match(/<\|turn\|>thinking\n([\s\S]*?)<turn\|>/);
|
||||||
|
if(gemmaTurnMatch){
|
||||||
|
thinkingText=gemmaTurnMatch[1].trim();
|
||||||
|
content=content.replace(/<\|turn\|>thinking\n[\s\S]*?<turn\|>\s*/,'').trimStart();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
const isUser=m.role==='user';
|
const isUser=m.role==='user';
|
||||||
const isLastAssistant=!isUser&&vi===visWithIdx.length-1;
|
const isLastAssistant=!isUser&&vi===visWithIdx.length-1;
|
||||||
|
|||||||
98
tests/test_issue607.py
Normal file
98
tests/test_issue607.py
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
"""Tests for PR #648 — Gemma 4 thinking token stripping (closes #607)."""
|
||||||
|
import re
|
||||||
|
import pathlib
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _strip_thinking_markup tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
from api.streaming import _strip_thinking_markup, _looks_invalid_generated_title
|
||||||
|
|
||||||
|
|
||||||
|
class TestGemma4ThinkingTokenStrip:
|
||||||
|
"""Verify that <|turn|>thinking\n...\n<turn|> blocks are stripped."""
|
||||||
|
|
||||||
|
def test_strip_gemma4_basic(self):
|
||||||
|
"""Basic Gemma 4 thinking block stripped, answer kept."""
|
||||||
|
raw = "<|turn|>thinking\nSome internal reasoning\n<turn|>Final answer"
|
||||||
|
result = _strip_thinking_markup(raw)
|
||||||
|
assert result == "Final answer"
|
||||||
|
|
||||||
|
def test_strip_gemma4_multiline_reasoning(self):
|
||||||
|
"""Multi-line reasoning block stripped cleanly."""
|
||||||
|
raw = "<|turn|>thinking\nLine 1\nLine 2\nLine 3\n<turn|>Answer here"
|
||||||
|
result = _strip_thinking_markup(raw)
|
||||||
|
assert result == "Answer here"
|
||||||
|
|
||||||
|
def test_strip_gemma4_no_thinking_passthrough(self):
|
||||||
|
"""Normal response without thinking tokens passes through unchanged."""
|
||||||
|
raw = "Normal response without thinking tokens"
|
||||||
|
result = _strip_thinking_markup(raw)
|
||||||
|
assert result == raw
|
||||||
|
|
||||||
|
def test_strip_gemma4_with_leading_whitespace(self):
|
||||||
|
"""Leading whitespace before the thinking block is handled."""
|
||||||
|
raw = "\n\n<|turn|>thinking\nReasoning\n<turn|>Answer"
|
||||||
|
result = _strip_thinking_markup(raw)
|
||||||
|
assert result == "Answer"
|
||||||
|
|
||||||
|
def test_strip_gemma4_empty_reasoning(self):
|
||||||
|
"""Empty reasoning block (just delimiters) is stripped."""
|
||||||
|
raw = "<|turn|>thinking\n<turn|>Response"
|
||||||
|
result = _strip_thinking_markup(raw)
|
||||||
|
assert result == "Response"
|
||||||
|
|
||||||
|
def test_strip_gemma4_case_insensitive(self):
|
||||||
|
"""Pattern is case-insensitive (though Gemma 4 uses fixed case)."""
|
||||||
|
raw = "<|TURN|>THINKING\nreasoning\n<TURN|>answer"
|
||||||
|
result = _strip_thinking_markup(raw)
|
||||||
|
# The regex uses re.IGNORECASE — should strip uppercase variant too
|
||||||
|
assert "THINKING" not in result
|
||||||
|
assert "reasoning" not in result
|
||||||
|
|
||||||
|
def test_existing_think_tag_still_works(self):
|
||||||
|
"""Ensure <think>...</think> still stripped (no regression)."""
|
||||||
|
raw = "<think>inner reasoning</think>Final"
|
||||||
|
result = _strip_thinking_markup(raw)
|
||||||
|
assert result == "Final"
|
||||||
|
|
||||||
|
def test_existing_channel_tag_still_works(self):
|
||||||
|
"""Ensure <|channel|>thought...</channel|> still stripped."""
|
||||||
|
raw = "<|channel|>thoughtSome reasoning<channel|>Answer"
|
||||||
|
result = _strip_thinking_markup(raw)
|
||||||
|
assert result == "Answer"
|
||||||
|
|
||||||
|
|
||||||
|
class TestGemma4TitleLeakDetection:
|
||||||
|
"""Verify _looks_invalid_generated_title catches Gemma 4 leak."""
|
||||||
|
|
||||||
|
def test_detects_gemma4_leak_in_title(self):
|
||||||
|
raw = "<|turn|>thinking\nUser asked about X\n<turn|>Session Title"
|
||||||
|
assert _looks_invalid_generated_title(raw) is True
|
||||||
|
|
||||||
|
def test_clean_title_not_flagged(self):
|
||||||
|
assert _looks_invalid_generated_title("Python debugging session") is False
|
||||||
|
|
||||||
|
|
||||||
|
class TestGemma4MessagesJsThinkPairs:
|
||||||
|
"""Verify static/messages.js contains the correct Gemma 4 pair."""
|
||||||
|
|
||||||
|
def test_messages_js_has_correct_gemma4_open(self):
|
||||||
|
js = pathlib.Path("static/messages.js").read_text()
|
||||||
|
# Must have double-pipe format: <|turn|>thinking
|
||||||
|
assert "<|turn|>thinking" in js, (
|
||||||
|
"messages.js is missing correct Gemma 4 open delimiter '<|turn|>thinking'"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_messages_js_no_wrong_gemma4_open(self):
|
||||||
|
js = pathlib.Path("static/messages.js").read_text()
|
||||||
|
# Must NOT have single-pipe wrong format: <|turn>thinking
|
||||||
|
assert "<|turn>thinking" not in js, (
|
||||||
|
"messages.js still contains wrong Gemma 4 delimiter '<|turn>thinking' (missing |)"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_messages_js_has_gemma4_close(self):
|
||||||
|
js = pathlib.Path("static/messages.js").read_text()
|
||||||
|
assert "<turn|>" in js, "messages.js missing Gemma 4 close delimiter '<turn|>'"
|
||||||
@@ -57,6 +57,35 @@ def test_gemma_content_removal_uses_replace_not_slice():
|
|||||||
"ui.js must call .trimStart() on content after removing the Gemma channel block"
|
"ui.js must call .trimStart() on content after removing the Gemma channel block"
|
||||||
|
|
||||||
|
|
||||||
|
def test_gemma_turn_regex_in_ui_js():
|
||||||
|
"""The Gemma 4 <|turn|>thinking\\n...<turn|> pattern must be extracted from persisted content."""
|
||||||
|
# Detection in _messageHasReasoningPayload (correct double-pipe format)
|
||||||
|
assert "<\\|turn\\|>thinking" in UI_JS, (
|
||||||
|
"ui.js _messageHasReasoningPayload must detect Gemma 4 <|turn|>thinking\\n...<turn|> pattern"
|
||||||
|
" (note: double-pipe: <|turn|> not <|turn>)"
|
||||||
|
)
|
||||||
|
# Extraction block
|
||||||
|
match = re.search(r'const gemmaTurnMatch=content\.match\((/[^/]+/)\)', UI_JS)
|
||||||
|
assert match, "gemmaTurnMatch line not found in ui.js"
|
||||||
|
pattern = match.group(1)
|
||||||
|
assert not pattern.startswith('/^'), (
|
||||||
|
f"gemmaTurnMatch regex must not use ^ anchor — got {pattern}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_gemma_turn_content_removal_uses_replace_not_slice():
|
||||||
|
"""Gemma 4 turn token removal must use .replace() not .slice()."""
|
||||||
|
idx = UI_JS.find("if(gemmaTurnMatch){")
|
||||||
|
assert idx >= 0, "gemmaTurnMatch handler block not found in ui.js"
|
||||||
|
block = UI_JS[idx:idx+240]
|
||||||
|
assert "content.replace(" in block, (
|
||||||
|
"ui.js must use content.replace() to remove Gemma 4 turn block (not .slice())"
|
||||||
|
)
|
||||||
|
assert ".trimStart()" in block, (
|
||||||
|
"ui.js must call .trimStart() on content after removing the Gemma 4 turn block"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ── messages.js: streaming render path ───────────────────────────────────────
|
# ── messages.js: streaming render path ───────────────────────────────────────
|
||||||
|
|
||||||
def test_stream_display_trims_before_startswith():
|
def test_stream_display_trims_before_startswith():
|
||||||
|
|||||||
Reference in New Issue
Block a user