diff --git a/api/streaming.py b/api/streaming.py index 0e1d6a4..fb0eba7 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -76,8 +76,14 @@ def _strip_thinking_markup(text: str) -> str: def _sanitize_generated_title(text: str) -> str: """Sanitize LLM-generated title text before persisting to session.""" s = _strip_thinking_markup(text or '') + s = re.sub( + r'^\s*(?:[*_`~]+\s*)?(?:session\s+title|title)\s*[::]\s*(?:[*_`~]+\s*)?', + '', + s, + flags=re.IGNORECASE, + ) s = re.sub(r'^\s*title\s*:\s*', '', s, flags=re.IGNORECASE) - s = s.strip(" \t\r\n\"'`") + s = s.strip(" \t\r\n\"'`*_~") s = re.sub(r'\s+', ' ', s).strip() # Guard against chain-of-thought leakage and meta-reasoning patterns. if _looks_invalid_generated_title(s): @@ -128,7 +134,9 @@ def _first_exchange_snippets(messages): if role == 'user' and not user_text: user_text = _message_text(m.get('content')) elif role == 'assistant' and not asst_text: - asst_text = _message_text(m.get('content')) + candidate = _message_text(m.get('content')) + if candidate: + asst_text = candidate if user_text and asst_text: break return user_text[:500], asst_text[:500] @@ -149,6 +157,7 @@ def _title_prompts(user_text: str, assistant_text: str) -> tuple[str, list[str]] "Generate a short session title from this conversation start.\n" "Use BOTH the user's question and the assistant's visible answer.\n" "Return only the title text, 3-8 words, as a topic label.\n" + "Do not use markdown, bullets, labels, or prefixes like Session Title:.\n" "Do not output a full sentence.\n" "Do not output acknowledgements or completion phrases like OK, done, all set, 测试完成.\n" "Do not describe internal reasoning.\n" @@ -159,6 +168,7 @@ def _title_prompts(user_text: str, assistant_text: str) -> tuple[str, list[str]] "Rewrite this conversation start as a concise noun-phrase title.\n" "Use the actual topic, not the task outcome.\n" "Return title text only.\n" + "Do not use markdown, bullets, labels, or prefixes like Session Title:.\n" "Never output acknowledgements, completion status, or meta commentary." ), ] diff --git a/tests/test_title_sanitization.py b/tests/test_title_sanitization.py new file mode 100644 index 0000000..89f00f4 --- /dev/null +++ b/tests/test_title_sanitization.py @@ -0,0 +1,35 @@ +import unittest + +from api.streaming import _first_exchange_snippets, _sanitize_generated_title + + +class TestGeneratedTitleSanitization(unittest.TestCase): + def test_strips_session_title_markdown_prefix(self): + self.assertEqual( + _sanitize_generated_title("**Session Title:** Clarifying Topic for Discussion"), + "Clarifying Topic for Discussion", + ) + + def test_strips_plain_title_prefix(self): + self.assertEqual( + _sanitize_generated_title("Title: Clarifying Topic for Discussion"), + "Clarifying Topic for Discussion", + ) + + def test_strips_wrapping_markdown_emphasis(self): + self.assertEqual( + _sanitize_generated_title("**Clarifying Topic for Discussion**"), + "Clarifying Topic for Discussion", + ) + + def test_first_exchange_skips_empty_assistant_tool_call_placeholder(self): + messages = [ + {"role": "user", "content": "What time is it in San Francisco?"}, + {"role": "assistant", "content": "", "tool_calls": [{"id": "call_1"}]}, + {"role": "tool", "content": "tool output", "tool_call_id": "call_1"}, + {"role": "assistant", "content": "It is 6:16 PM in San Francisco."}, + ] + self.assertEqual( + _first_exchange_snippets(messages), + ("What time is it in San Francisco?", "It is 6:16 PM in San Francisco."), + )