fix: silent agent errors, stale model list, live model fetching (#377)
* fix: silent errors, stale models, live model fetching (#373, #374, #375) - api/streaming.py: detect empty agent response (_assistant_added check), emit apperror(type='no_response' or 'auth_mismatch') instead of silent done - api/streaming.py: add _token_sent flag so guard works for streaming agents - static/messages.js: done handler belt-and-suspenders guard for zero replies - static/messages.js: apperror handler labels 'no_response' type distinctly - api/config.py: remove gpt-4o and o3 from _FALLBACK_MODELS and _PROVIDER_MODELS['openai'] (superseded by gpt-5.4-mini and o4-mini) - api/routes.py: new /api/models/live?provider= endpoint, fetches /v1/models from provider API with B310 scheme check + SSRF guard - static/ui.js: _fetchLiveModels() background fetch after static list loads, appends new models to dropdown, caches per session, skips unsupported providers Other: - tests/test_issues_373_374_375.py: 25 new structural tests - tests/test_regressions.py: extend done-handler window 1500->2500 chars - CHANGELOG.md: v0.50.19 entry; 947 tests (up from 922) * fix: SSRF hostname bypass + auth detection operator precedence 1. routes.py: SSRF guard used substring matching (any(k in hostname)) which allows bypass via hostnames like evil-ollama.attacker.com. Changed to exact hostname matching against a fixed set of known local hostnames (localhost, 127.0.0.1, 0.0.0.0, ::1). 2. streaming.py: _is_auth detection had a Python operator precedence bug on the ternary expression. The line: 'AuthenticationError' in type(...).__name__ if _last_err else False parsed as the ternary absorbing the rest of the or-chain when _last_err was falsy. Fixed to: (_last_err and 'AuthenticationError' in ...) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * docs: fix v0.50.20 CHANGELOG version number and test count (949 tests) --------- Co-authored-by: Nathan Esquenazi <nesquena@gmail.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -163,9 +163,13 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
|
||||
logger.debug("Approval module not available, falling back to polling")
|
||||
|
||||
try:
|
||||
_token_sent = False # tracks whether any streamed tokens were sent
|
||||
|
||||
def on_token(text):
|
||||
nonlocal _token_sent
|
||||
if text is None:
|
||||
return # end-of-stream sentinel
|
||||
_token_sent = True
|
||||
put('token', {'text': text})
|
||||
|
||||
def on_tool(name, preview, args):
|
||||
@@ -308,6 +312,45 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
|
||||
)
|
||||
s.messages = result.get('messages') or s.messages
|
||||
|
||||
# ── Detect silent agent failure (no assistant reply produced) ──
|
||||
# When the agent catches an auth/network error internally it may return
|
||||
# an empty final_response without raising — the stream would end with
|
||||
# a done event containing zero assistant messages, leaving the user with
|
||||
# no feedback. Emit an apperror so the client shows an inline error.
|
||||
_assistant_added = any(
|
||||
m.get('role') == 'assistant' and str(m.get('content') or '').strip()
|
||||
for m in (result.get('messages') or [])
|
||||
)
|
||||
# _token_sent tracks whether on_token() was called (any streamed text)
|
||||
if not _assistant_added and not _token_sent:
|
||||
_last_err = getattr(agent, '_last_error', None) or result.get('error') or ''
|
||||
_err_str = str(_last_err) if _last_err else ''
|
||||
_is_auth = (
|
||||
'401' in _err_str
|
||||
or (_last_err and 'AuthenticationError' in type(_last_err).__name__)
|
||||
or 'authentication' in _err_str.lower()
|
||||
or 'unauthorized' in _err_str.lower()
|
||||
or 'invalid api key' in _err_str.lower()
|
||||
or 'invalid_api_key' in _err_str.lower()
|
||||
)
|
||||
if _is_auth:
|
||||
put('apperror', {
|
||||
'message': _err_str or 'Authentication failed — check your API key.',
|
||||
'type': 'auth_mismatch',
|
||||
'hint': (
|
||||
'The selected model may not be supported by your configured provider or '
|
||||
'your API key is invalid. Run `hermes model` in your terminal to '
|
||||
'update credentials, then restart the WebUI.'
|
||||
),
|
||||
})
|
||||
else:
|
||||
put('apperror', {
|
||||
'message': _err_str or 'The agent returned no response. Check your API key and model selection.',
|
||||
'type': 'no_response',
|
||||
'hint': 'Verify your API key is valid and the selected model is available for your account.',
|
||||
})
|
||||
return # Don't emit done — the apperror already closes the stream on the client
|
||||
|
||||
# ── Handle context compression side effects ──
|
||||
# If compression fired inside run_conversation, the agent may have
|
||||
# rotated its session_id. Detect and fix the mismatch so the WebUI
|
||||
|
||||
Reference in New Issue
Block a user