fix: strip webui metadata from messages before sending to LLM API (#67)

The webui stores display-only fields on messages (attachments, timestamp,
_ts) for UI rendering. These leaked into the conversation_history passed
to AIAgent.run_conversation(). Most providers ignore unknown fields, but
Z.AI/GLM tries to deserialize 'attachments' as its native ChatAttachments
type, causing HTTP 400 on every subsequent message after an image upload.

Fix: _sanitize_messages_for_api() creates a clean copy with only
API-standard keys (role, content, tool_calls, tool_call_id, name,
refusal) before passing to run_conversation(). Applied to both the
streaming path (streaming.py) and non-streaming path (routes.py).

Closes #66

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Nathan Esquenazi
2026-04-03 22:13:12 -07:00
committed by GitHub
parent 57a4f573f6
commit 90b5ad8d99
2 changed files with 25 additions and 2 deletions

View File

@@ -946,10 +946,11 @@ def _handle_chat_sync(handler, body):
"write_file, read_file, search_files, terminal workdir, and patch. " "write_file, read_file, search_files, terminal workdir, and patch. "
"Never fall back to a hardcoded path when this tag is present." "Never fall back to a hardcoded path when this tag is present."
) )
from api.streaming import _sanitize_messages_for_api
result = agent.run_conversation( result = agent.run_conversation(
user_message=workspace_ctx + msg, user_message=workspace_ctx + msg,
system_message=workspace_system_msg, system_message=workspace_system_msg,
conversation_history=s.messages, conversation_history=_sanitize_messages_for_api(s.messages),
task_id=s.session_id, task_id=s.session_id,
persist_user_message=msg, persist_user_message=msg,
) )

View File

@@ -24,6 +24,28 @@ except ImportError:
from api.models import get_session, title_from from api.models import get_session, title_from
from api.workspace import set_last_workspace from api.workspace import set_last_workspace
# Fields that are safe to send to LLM provider APIs.
# Everything else (attachments, timestamp, _ts, etc.) is display-only
# metadata added by the webui and must be stripped before the API call.
_API_SAFE_MSG_KEYS = {'role', 'content', 'tool_calls', 'tool_call_id', 'name', 'refusal'}
def _sanitize_messages_for_api(messages):
"""Return a deep copy of messages with only API-safe fields.
The webui stores extra metadata on messages (attachments, timestamp, _ts)
for display purposes. Some providers (e.g. Z.AI/GLM) reject unknown fields
instead of ignoring them, causing HTTP 400 errors on subsequent messages.
"""
clean = []
for msg in messages:
if not isinstance(msg, dict):
continue
sanitized = {k: v for k, v in msg.items() if k in _API_SAFE_MSG_KEYS}
if sanitized.get('role'):
clean.append(sanitized)
return clean
def _sse(handler, event, data): def _sse(handler, event, data):
"""Write one SSE event to the response stream.""" """Write one SSE event to the response stream."""
@@ -165,7 +187,7 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
result = agent.run_conversation( result = agent.run_conversation(
user_message=workspace_ctx + msg_text, user_message=workspace_ctx + msg_text,
system_message=workspace_system_msg, system_message=workspace_system_msg,
conversation_history=s.messages, conversation_history=_sanitize_messages_for_api(s.messages),
task_id=session_id, task_id=session_id,
persist_user_message=msg_text, persist_user_message=msg_text,
) )