Files
webui/tests/test_issues_373_374_375.py
nesquena-hermes 7a80e73eb2 fix: silent agent errors, stale model list, live model fetching (#377)
* fix: silent errors, stale models, live model fetching (#373, #374, #375)

- api/streaming.py: detect empty agent response (_assistant_added check),
  emit apperror(type='no_response' or 'auth_mismatch') instead of silent done
- api/streaming.py: add _token_sent flag so guard works for streaming agents
- static/messages.js: done handler belt-and-suspenders guard for zero replies
- static/messages.js: apperror handler labels 'no_response' type distinctly

- api/config.py: remove gpt-4o and o3 from _FALLBACK_MODELS and
  _PROVIDER_MODELS['openai'] (superseded by gpt-5.4-mini and o4-mini)

- api/routes.py: new /api/models/live?provider= endpoint, fetches /v1/models
  from provider API with B310 scheme check + SSRF guard
- static/ui.js: _fetchLiveModels() background fetch after static list loads,
  appends new models to dropdown, caches per session, skips unsupported providers

Other:
- tests/test_issues_373_374_375.py: 25 new structural tests
- tests/test_regressions.py: extend done-handler window 1500->2500 chars
- CHANGELOG.md: v0.50.19 entry; 947 tests (up from 922)

* fix: SSRF hostname bypass + auth detection operator precedence

1. routes.py: SSRF guard used substring matching (any(k in hostname))
   which allows bypass via hostnames like evil-ollama.attacker.com.
   Changed to exact hostname matching against a fixed set of known
   local hostnames (localhost, 127.0.0.1, 0.0.0.0, ::1).

2. streaming.py: _is_auth detection had a Python operator precedence
   bug on the ternary expression. The line:
     'AuthenticationError' in type(...).__name__ if _last_err else False
   parsed as the ternary absorbing the rest of the or-chain when
   _last_err was falsy. Fixed to: (_last_err and 'AuthenticationError' in ...)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* docs: fix v0.50.20 CHANGELOG version number and test count (949 tests)

---------

Co-authored-by: Nathan Esquenazi <nesquena@gmail.com>
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 15:52:35 -07:00

222 lines
11 KiB
Python

"""
Tests for issues #373, #374, and #375.
#373: Chat silently swallows errors — no feedback when agent fails to respond
#374: Remove stale OpenAI models from default list (gpt-4o, o3)
#375: Model dropdown should fetch live models from provider
"""
import pathlib
import re
REPO = pathlib.Path(__file__).parent.parent
STREAMING_PY = (REPO / "api" / "streaming.py").read_text(encoding="utf-8")
CONFIG_PY = (REPO / "api" / "config.py").read_text(encoding="utf-8")
ROUTES_PY = (REPO / "api" / "routes.py").read_text(encoding="utf-8")
MESSAGES_JS = (REPO / "static" / "messages.js").read_text(encoding="utf-8")
UI_JS = (REPO / "static" / "ui.js").read_text(encoding="utf-8")
# ── Issue #373: Silent error detection ──────────────────────────────────────
class TestSilentErrorDetection:
"""streaming.py must emit apperror when agent returns no assistant reply."""
def test_streaming_detects_no_assistant_reply(self):
"""streaming.py must check if any assistant message was produced."""
assert "_assistant_added" in STREAMING_PY, (
"streaming.py must check whether an assistant message was produced (#373)"
)
def test_streaming_emits_apperror_on_no_response(self):
"""streaming.py must emit apperror event when agent produced no reply."""
assert "no_response" in STREAMING_PY, (
"streaming.py must emit apperror with type='no_response' for silent failures (#373)"
)
def test_streaming_returns_early_after_apperror(self):
"""streaming.py must return after emitting apperror (not also emit done)."""
# The return statement must come after the put('apperror') for no_response
no_resp_pos = STREAMING_PY.find("'no_response'")
return_pos = STREAMING_PY.find("return # Don't emit done", no_resp_pos)
assert no_resp_pos != -1, "no_response type not found in streaming.py"
assert return_pos != -1, (
"streaming.py must return after emitting apperror to prevent also emitting done (#373)"
)
assert return_pos > no_resp_pos
def test_streaming_detects_auth_error_in_result(self):
"""streaming.py must detect auth errors from the result object."""
assert "_is_auth" in STREAMING_PY, (
"streaming.py must detect auth errors in silent failures (#373)"
)
assert "auth_mismatch" in STREAMING_PY, (
"streaming.py must emit auth_mismatch type for auth failures (#373)"
)
def test_messages_js_done_handler_detects_no_reply(self):
"""messages.js done handler must show an error if no assistant reply arrived."""
# Check for either the variable name or the inlined check pattern
has_no_reply_guard = (
"hasAssistantReply" in MESSAGES_JS
or ("role==='assistant'" in MESSAGES_JS and "No response received" in MESSAGES_JS)
)
assert has_no_reply_guard, (
"messages.js done handler must detect zero assistant replies (#373)"
)
assert "No response received" in MESSAGES_JS, (
"messages.js must show 'No response received' inline message (#373)"
)
def test_messages_js_handles_no_response_apperror_type(self):
"""messages.js apperror handler must recognise the no_response type."""
assert "isNoResponse" in MESSAGES_JS or "no_response" in MESSAGES_JS, (
"messages.js apperror handler must handle type='no_response' (#373)"
)
def test_messages_js_no_response_label(self):
"""messages.js must show a distinct label for no_response errors."""
assert "No response received" in MESSAGES_JS, (
"messages.js must display 'No response received' label for no_response errors (#373)"
)
# ── Issue #374: Stale model list cleanup ─────────────────────────────────────
class TestStaleModelListCleanup:
"""gpt-4o and o3 must be removed from the primary OpenAI model lists."""
def test_gpt4o_removed_from_fallback_models(self):
"""_FALLBACK_MODELS must not contain gpt-4o (issue #374)."""
fallback_block_start = CONFIG_PY.find("_FALLBACK_MODELS = [")
fallback_block_end = CONFIG_PY.find("]", fallback_block_start)
fallback_block = CONFIG_PY[fallback_block_start:fallback_block_end]
assert "gpt-4o" not in fallback_block, (
"_FALLBACK_MODELS still contains gpt-4o — remove it per issue #374"
)
def test_o3_removed_from_fallback_models(self):
"""_FALLBACK_MODELS must not contain o3 (issue #374)."""
fallback_block_start = CONFIG_PY.find("_FALLBACK_MODELS = [")
fallback_block_end = CONFIG_PY.find("]", fallback_block_start)
fallback_block = CONFIG_PY[fallback_block_start:fallback_block_end]
assert '"o3"' not in fallback_block and "'o3'" not in fallback_block, (
"_FALLBACK_MODELS still contains o3 — remove it per issue #374"
)
def test_gpt4o_removed_from_provider_models_openai(self):
"""_PROVIDER_MODELS['openai'] must not contain gpt-4o (issue #374)."""
openai_start = CONFIG_PY.find('"openai": [')
openai_end = CONFIG_PY.find("],", openai_start)
openai_block = CONFIG_PY[openai_start:openai_end]
assert "gpt-4o" not in openai_block, (
"_PROVIDER_MODELS['openai'] still contains gpt-4o — remove per issue #374"
)
def test_o3_removed_from_provider_models_openai(self):
"""_PROVIDER_MODELS['openai'] must not contain o3 (issue #374)."""
openai_start = CONFIG_PY.find('"openai": [')
openai_end = CONFIG_PY.find("],", openai_start)
openai_block = CONFIG_PY[openai_start:openai_end]
assert '"o3"' not in openai_block and "'o3'" not in openai_block, (
"_PROVIDER_MODELS['openai'] still contains o3 — remove per issue #374"
)
def test_fallback_still_has_gpt54_mini(self):
"""_FALLBACK_MODELS must still contain gpt-5.4-mini (not over-trimmed)."""
assert "gpt-5.4-mini" in CONFIG_PY, (
"_FALLBACK_MODELS must keep gpt-5.4-mini as primary OpenAI model (#374)"
)
def test_fallback_still_has_o4_mini(self):
"""_FALLBACK_MODELS must still contain o4-mini (reasoning model)."""
assert "o4-mini" in CONFIG_PY, (
"_FALLBACK_MODELS must keep o4-mini as reasoning model (#374)"
)
def test_copilot_list_unchanged(self):
"""Copilot provider model list should still include gpt-4o (it's a valid Copilot model)."""
copilot_start = CONFIG_PY.find('"copilot": [')
copilot_end = CONFIG_PY.find("],", copilot_start)
if copilot_start == -1:
return # No copilot list — that's fine
copilot_block = CONFIG_PY[copilot_start:copilot_end]
assert "gpt-4o" in copilot_block, (
"Copilot provider model list should keep gpt-4o (it's available via Copilot) (#374)"
)
# ── Issue #375: Live model fetching ─────────────────────────────────────────
class TestLiveModelFetching:
"""Backend and frontend must support live model fetching from provider APIs."""
def test_live_models_endpoint_exists_in_routes(self):
"""routes.py must have a /api/models/live endpoint (#375)."""
assert "/api/models/live" in ROUTES_PY, (
"routes.py must define /api/models/live endpoint (#375)"
)
def test_live_models_handler_function_exists(self):
"""routes.py must define _handle_live_models() function (#375)."""
assert "def _handle_live_models(" in ROUTES_PY, (
"routes.py must define _handle_live_models() for live model fetching (#375)"
)
def test_live_models_handler_validates_scheme(self):
"""_handle_live_models must validate URL scheme to prevent file:// injection (B310)."""
assert "nosec B310" in ROUTES_PY or ("scheme" in ROUTES_PY and "http" in ROUTES_PY), (
"_handle_live_models must validate URL scheme before urlopen (#375)"
)
def test_live_models_handler_has_ssrf_guard(self):
"""_handle_live_models must guard against SSRF (private IP access)."""
assert "ssrf_blocked" in ROUTES_PY or ("is_private" in ROUTES_PY and "live" in ROUTES_PY), (
"_handle_live_models must have SSRF protection for private IP ranges (#375)"
)
def test_live_models_unsupported_providers_gracefully_handled(self):
"""Providers without /v1/models support must return not_supported gracefully."""
assert "not_supported" in ROUTES_PY, (
"_handle_live_models must return not_supported for Anthropic/Google (#375)"
)
def test_frontend_has_fetch_live_models_function(self):
"""ui.js must define _fetchLiveModels() for background live model loading (#375)."""
assert "function _fetchLiveModels(" in UI_JS or "async function _fetchLiveModels(" in UI_JS, (
"ui.js must define _fetchLiveModels() function (#375)"
)
def test_frontend_live_models_cache_exists(self):
"""ui.js must cache live model responses to avoid redundant API calls (#375)."""
assert "_liveModelCache" in UI_JS, (
"ui.js must use _liveModelCache to avoid re-fetching on every dropdown open (#375)"
)
def test_frontend_calls_live_models_after_static_load(self):
"""populateModelDropdown must call _fetchLiveModels after rendering the static list (#375)."""
assert "_fetchLiveModels" in UI_JS, (
"populateModelDropdown must call _fetchLiveModels for background update (#375)"
)
def test_frontend_live_fetch_only_adds_new_models(self):
"""_fetchLiveModels must not duplicate models already in the static list (#375)."""
assert "existingIds" in UI_JS, (
"_fetchLiveModels must track existing model IDs to avoid duplicates (#375)"
)
def test_frontend_live_fetch_skips_unsupported_providers(self):
"""_fetchLiveModels must skip providers that don't support live fetching (#375)."""
assert "anthropic" in UI_JS and "google" in UI_JS, (
"_fetchLiveModels must skip Anthropic and Google (no /v1/models support) (#375)"
)
def test_live_models_endpoint_wired_in_routes(self):
"""The /api/models/live path must be handled in handle_get()."""
# Find handle_get and check our route appears inside it
handle_get_pos = ROUTES_PY.find("def handle_get(")
live_route_pos = ROUTES_PY.find('"/api/models/live"')
assert handle_get_pos != -1 and live_route_pos != -1
assert live_route_pos > handle_get_pos, (
"/api/models/live must be inside handle_get() (#375)"
)