From 9220a876bcc8f2a651212ed8a3955ecb602fdcad Mon Sep 17 00:00:00 2001 From: Hermes Agent Date: Wed, 15 Apr 2026 16:57:31 +0000 Subject: [PATCH 1/2] fix: strip orphaned tool messages before sending history to API (fixes #534) Extends _sanitize_messages_for_api() with a two-pass approach: 1. Collect all tool_call_ids declared in assistant messages (handles both OpenAI 'id' and Anthropic 'call_id' field names). 2. Drop any tool-role messages whose tool_call_id was not declared by a preceding assistant message. Strictly-conformant providers (Mercury-2/Inception, newer OpenAI models) reject histories with orphaned tool results with a 400 error: 'Message has tool role, but there was no previous assistant message with a tool call.' This can happen when histories are edited, when switching between providers, or when partial messages are stored. Adds 13 regression tests covering: valid roundtrip preservation, multiple tool calls, partial orphan filtering, Anthropic call_id, edge cases (None tool_calls, missing tool_call_id, non-dict entries). --- api/streaming.py | 26 ++++ tests/test_orphaned_tool_messages.py | 175 +++++++++++++++++++++++++++ 2 files changed, 201 insertions(+) create mode 100644 tests/test_orphaned_tool_messages.py diff --git a/api/streaming.py b/api/streaming.py index 23189a8..f77ce82 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -64,11 +64,37 @@ def _sanitize_messages_for_api(messages): The webui stores extra metadata on messages (attachments, timestamp, _ts) for display purposes. Some providers (e.g. Z.AI/GLM) reject unknown fields instead of ignoring them, causing HTTP 400 errors on subsequent messages. + + Also strips orphaned tool-role messages whose tool_call_id cannot be linked + to a preceding assistant message with tool_calls. Strictly-conformant providers + (Mercury-2/Inception, newer OpenAI models) reject histories containing dangling + tool results with a 400 error: "Message has tool role, but there was no previous + assistant message with a tool call." """ + # First pass: collect all tool_call_ids declared by assistant messages. + # Handles both OpenAI ('id') and Anthropic ('call_id') field names. + valid_tool_call_ids: set = set() + for msg in messages: + if not isinstance(msg, dict): + continue + if msg.get('role') == 'assistant': + for tc in msg.get('tool_calls') or []: + if isinstance(tc, dict): + tid = tc.get('id') or tc.get('call_id') or '' + if tid: + valid_tool_call_ids.add(tid) + + # Second pass: build the sanitized list, dropping orphaned tool messages. clean = [] for msg in messages: if not isinstance(msg, dict): continue + role = msg.get('role') + if role == 'tool': + tid = msg.get('tool_call_id') or '' + if not tid or tid not in valid_tool_call_ids: + # Orphaned tool result — skip to avoid 400 from strict providers. + continue sanitized = {k: v for k, v in msg.items() if k in _API_SAFE_MSG_KEYS} if sanitized.get('role'): clean.append(sanitized) diff --git a/tests/test_orphaned_tool_messages.py b/tests/test_orphaned_tool_messages.py new file mode 100644 index 0000000..7df71ec --- /dev/null +++ b/tests/test_orphaned_tool_messages.py @@ -0,0 +1,175 @@ +"""Tests for _sanitize_messages_for_api() orphaned-tool-message stripping. + +Regression for issue #534: strictly-conformant providers (Mercury-2/Inception, +newer OpenAI models) reject histories containing tool-role messages whose +tool_call_id has no matching tool_calls entry in a prior assistant message. +""" +import sys +import pathlib + +REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve() +sys.path.insert(0, str(REPO_ROOT)) + +from api.streaming import _sanitize_messages_for_api + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _asst_with_tool_call(call_id="call-1", call_id_key="id"): + return { + "role": "assistant", + "content": None, + "tool_calls": [{"type": "function", call_id_key: call_id, "function": {"name": "terminal", "arguments": "{}"}}], + "_ts": 12345, # extra field that should be stripped + } + + +def _tool_result(call_id="call-1"): + return {"role": "tool", "tool_call_id": call_id, "content": "ok", "_ts": 12345} + + +def _user(text="hello"): + return {"role": "user", "content": text, "_ts": 12345} + + +def _asst(text="hi"): + return {"role": "assistant", "content": text, "_ts": 12345} + + +# --------------------------------------------------------------------------- +# Tests: normal valid histories are preserved +# --------------------------------------------------------------------------- + +def test_valid_tool_roundtrip_preserved(): + """A linked assistant→tool pair must be kept intact.""" + msgs = [_user(), _asst_with_tool_call("call-1"), _tool_result("call-1"), _asst()] + result = _sanitize_messages_for_api(msgs) + roles = [m["role"] for m in result] + assert roles == ["user", "assistant", "tool", "assistant"] + + +def test_extra_fields_stripped(): + """Non-API fields (_ts etc.) are always stripped.""" + msgs = [_user(), _asst()] + result = _sanitize_messages_for_api(msgs) + for m in result: + assert "_ts" not in m + + +def test_valid_history_without_tool_messages_unchanged(): + """Plain user/assistant history with no tool calls is passed through unchanged.""" + msgs = [_user("a"), _asst("b"), _user("c"), _asst("d")] + result = _sanitize_messages_for_api(msgs) + assert len(result) == 4 + assert all(m["role"] in ("user", "assistant") for m in result) + + +def test_multiple_valid_tool_calls_preserved(): + """Multiple linked tool_call_ids in one assistant message are all preserved.""" + asst = { + "role": "assistant", + "content": None, + "tool_calls": [ + {"type": "function", "id": "call-1", "function": {"name": "f1", "arguments": "{}"}}, + {"type": "function", "id": "call-2", "function": {"name": "f2", "arguments": "{}"}}, + ], + } + msgs = [_user(), asst, _tool_result("call-1"), _tool_result("call-2"), _asst()] + result = _sanitize_messages_for_api(msgs) + roles = [m["role"] for m in result] + assert roles == ["user", "assistant", "tool", "tool", "assistant"] + + +# --------------------------------------------------------------------------- +# Tests: orphaned tool messages are dropped +# --------------------------------------------------------------------------- + +def test_orphaned_tool_message_dropped(): + """A tool message with no matching assistant tool_call is dropped.""" + msgs = [_user(), _asst(), _tool_result("call-orphan")] + result = _sanitize_messages_for_api(msgs) + roles = [m["role"] for m in result] + assert "tool" not in roles + assert roles == ["user", "assistant"] + + +def test_tool_message_missing_tool_call_id_dropped(): + """A tool message with no tool_call_id at all is dropped.""" + msg = {"role": "tool", "content": "result"} + msgs = [_user(), _asst_with_tool_call("call-1"), msg] + result = _sanitize_messages_for_api(msgs) + roles = [m["role"] for m in result] + assert "tool" not in roles + + +def test_partially_orphaned_tool_messages(): + """In a mixed batch, only the orphaned tool messages are dropped.""" + asst = _asst_with_tool_call("call-valid") + msgs = [ + _user(), + asst, + _tool_result("call-valid"), # linked → kept + _tool_result("call-ghost"), # orphaned → dropped + _asst(), + ] + result = _sanitize_messages_for_api(msgs) + roles = [m["role"] for m in result] + assert roles == ["user", "assistant", "tool", "assistant"] + # The kept tool message has the right call_id + tool_msgs = [m for m in result if m["role"] == "tool"] + assert tool_msgs[0]["tool_call_id"] == "call-valid" + + +def test_orphaned_tool_only_history(): + """A history consisting only of orphaned tool messages returns empty.""" + msgs = [_tool_result("dangling-1"), _tool_result("dangling-2")] + result = _sanitize_messages_for_api(msgs) + assert result == [] + + +# --------------------------------------------------------------------------- +# Tests: Anthropic 'call_id' field name (not OpenAI 'id') +# --------------------------------------------------------------------------- + +def test_anthropic_call_id_field_recognized(): + """Anthropic tool calls use 'call_id' not 'id' — both must be recognized.""" + asst = _asst_with_tool_call("call-anthropic", call_id_key="call_id") + msgs = [_user(), asst, _tool_result("call-anthropic"), _asst()] + result = _sanitize_messages_for_api(msgs) + roles = [m["role"] for m in result] + assert roles == ["user", "assistant", "tool", "assistant"] + + +# --------------------------------------------------------------------------- +# Tests: edge cases +# --------------------------------------------------------------------------- + +def test_empty_messages_list(): + assert _sanitize_messages_for_api([]) == [] + + +def test_non_dict_messages_skipped(): + """Non-dict items in the messages list are silently ignored.""" + msgs = ["not a dict", None, _user("hi"), 42] + result = _sanitize_messages_for_api(msgs) + assert len(result) == 1 + assert result[0]["role"] == "user" + + +def test_tool_calls_none_does_not_crash(): + """An assistant message with tool_calls=None is handled without crashing.""" + asst = {"role": "assistant", "content": "hello", "tool_calls": None} + msgs = [_user(), asst, _tool_result("call-1")] + result = _sanitize_messages_for_api(msgs) + # call-1 has no valid parent (tool_calls=None → no IDs registered) → dropped + roles = [m["role"] for m in result] + assert "tool" not in roles + + +def test_system_messages_preserved(): + """System messages are always preserved.""" + msgs = [{"role": "system", "content": "You are helpful."}, _user(), _asst()] + result = _sanitize_messages_for_api(msgs) + assert result[0]["role"] == "system" From ce4e01ea9272fa54ca69447731ccf9b4c0afcb28 Mon Sep 17 00:00:00 2001 From: Hermes Agent Date: Wed, 15 Apr 2026 19:06:54 +0000 Subject: [PATCH 2/2] chore: bump version to v0.50.51, update CHANGELOG --- CHANGELOG.md | 5 +++++ static/index.html | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f33c9a2..6602d46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Hermes Web UI -- Changelog +## [v0.50.51] — 2026-04-15 + +### Fixed +- **Orphaned tool messages** — conversation histories containing `role: tool` messages with no matching `tool_call_id` in a prior assistant message are now silently stripped before sending to the provider API. Fixes 400 errors from strictly-conformant providers (Mercury-2/Inception, newer OpenAI models). Adds 13 regression tests. (Fixes #534) + ## [v0.50.50] — 2026-04-15 ### Fixed diff --git a/static/index.html b/static/index.html index ef38643..347b366 100644 --- a/static/index.html +++ b/static/index.html @@ -552,7 +552,7 @@
System
- v0.50.50 + v0.50.51