fix: silent agent errors, stale model list, live model fetching (#377)

* fix: silent errors, stale models, live model fetching (#373, #374, #375) - api/streaming.py: detect empty agent response (_assistant_added check), emit apperror(type='no_response' or 'auth_mismatch') instead of silent done - api/streaming.py: add _token_sent flag so guard works for streaming agents - static/messages.js: done handler belt-and-suspenders guard for zero replies - static/messages.js: apperror handler labels 'no_response' type distinctly - api/config.py: remove gpt-4o and o3 from _FALLBACK_MODELS and _PROVIDER_MODELS['openai'] (superseded by gpt-5.4-mini and o4-mini) - api/routes.py: new /api/models/live?provider= endpoint, fetches /v1/models from provider API with B310 scheme check + SSRF guard - static/ui.js: _fetchLiveModels() background fetch after static list loads, appends new models to dropdown, caches per session, skips unsupported providers Other: - tests/test_issues_373_374_375.py: 25 new structural tests - tests/test_regressions.py: extend done-handler window 1500->2500 chars - CHANGELOG.md: v0.50.19 entry; 947 tests (up from 922) * fix: SSRF hostname bypass + auth detection operator precedence 1. routes.py: SSRF guard used substring matching (any(k in hostname)) which allows bypass via hostnames like evil-ollama.attacker.com. Changed to exact hostname matching against a fixed set of known local hostnames (localhost, 127.0.0.1, 0.0.0.0, ::1). 2. streaming.py: _is_auth detection had a Python operator precedence bug on the ternary expression. The line: 'AuthenticationError' in type(...).__name__ if _last_err else False parsed as the ternary absorbing the rest of the or-chain when _last_err was falsy. Fixed to: (_last_err and 'AuthenticationError' in ...) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * docs: fix v0.50.20 CHANGELOG version number and test count (949 tests) --------- Co-authored-by: Nathan Esquenazi <nesquena@gmail.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 15:52:35 -07:00
parent 78de40e015
commit 7a80e73eb2
8 changed files with 485 additions and 6 deletions
--- a/api/config.py
+++ b/api/config.py
@@ -406,8 +406,6 @@ CLI_TOOLSETS = get_config().get("platform_toolsets", {}).get("cli", _DEFAULT_TOO
 # Hardcoded fallback models (used when no config.yaml or agent is available)
 _FALLBACK_MODELS = [
    {"provider": "OpenAI", "id": "openai/gpt-5.4-mini", "label": "GPT-5.4 Mini"},
-    {"provider": "OpenAI", "id": "openai/gpt-4o", "label": "GPT-4o"},
-    {"provider": "OpenAI", "id": "openai/o3", "label": "o3"},
    {"provider": "OpenAI", "id": "openai/o4-mini", "label": "o4-mini"},
    {
        "provider": "Anthropic",
@@ -463,8 +461,6 @@ _PROVIDER_MODELS = {
    ],
    "openai": [
        {"id": "gpt-5.4-mini", "label": "GPT-5.4 Mini"},
-        {"id": "gpt-4o", "label": "GPT-4o"},
-        {"id": "o3", "label": "o3"},
        {"id": "o4-mini", "label": "o4-mini"},
    ],
    "openai-codex": [
--- a/api/routes.py
+++ b/api/routes.py
@@ -341,6 +341,9 @@ def handle_get(handler, parsed) -> bool:
    if parsed.path == "/api/models":
        return j(handler, get_available_models())

+    if parsed.path == "/api/models/live":
+        return _handle_live_models(handler, parsed)
+
    if parsed.path == "/api/settings":
        settings = load_settings()
        # Never expose the stored password hash to clients
@@ -1410,6 +1413,144 @@ def _handle_approval_inject(handler, parsed):
    return j(handler, {"error": "session_id required"}, status=400)


+def _handle_live_models(handler, parsed):
+    """Fetch the live model list from a provider's /v1/models endpoint.
+
+    Returns the provider's actual model catalog so the UI can show all
+    available models, not just the hardcoded fallback list.
+
+    Query params:
+        provider  (optional) — provider ID to fetch for; defaults to active
+        base_url  (optional) — override the base URL for the provider
+
+    Providers that don't expose a /v1/models endpoint (Anthropic) are not
+    supported here — the caller should fall back to the static list.
+
+    Supported: openai, openrouter, custom (any OpenAI-compatible endpoint).
+    """
+    import urllib.request as _ur
+    import ipaddress as _ip
+    import socket as _sock
+    from urllib.parse import urlparse as _up
+
+    qs = parse_qs(parsed.query)
+    provider = (qs.get("provider", [""])[0] or "").lower().strip()
+    base_url_override = (qs.get("base_url", [""])[0] or "").strip()
+
+    try:
+        from api.config import get_config as _gc, resolve_model_provider as _rmp
+        cfg = _gc()
+        active_provider = cfg.get("model", {}).get("provider") or ""
+        if not provider:
+            provider = active_provider
+
+        # Resolve API key and base URL for this provider
+        api_key = None
+        base_url = base_url_override or ""
+        try:
+            from hermes_cli.runtime_provider import resolve_runtime_provider
+            rt = resolve_runtime_provider(requested=provider)
+            api_key = rt.get("api_key")
+            if not base_url:
+                base_url = rt.get("base_url") or ""
+        except Exception:
+            pass
+
+        # Determine the /v1/models endpoint URL
+        if not base_url:
+            if provider in ("openai", "openai-codex", "copilot"):
+                base_url = "https://api.openai.com/v1"
+            elif provider == "openrouter":
+                base_url = "https://openrouter.ai/api/v1"
+            elif provider in ("anthropic",):
+                # Anthropic doesn't support /v1/models in a standard way
+                return j(handler, {"error": "not_supported", "models": []})
+            elif provider in ("google", "gemini"):
+                return j(handler, {"error": "not_supported", "models": []})
+            else:
+                # Generic OpenAI-compatible — try common paths
+                base_url = ""
+
+        if not base_url:
+            return j(handler, {"error": "no_base_url", "models": []})
+
+        # Build URL safely
+        base_url = base_url.rstrip("/")
+        if base_url.endswith("/v1"):
+            endpoint_url = base_url + "/models"
+        elif "/v1" in base_url:
+            endpoint_url = base_url.rstrip("/") + "/models"
+        else:
+            endpoint_url = base_url + "/v1/models"
+
+        # Validate scheme (B310 guard)
+        parsed_ep = _up(endpoint_url)
+        if parsed_ep.scheme not in ("http", "https"):
+            return j(handler, {"error": "invalid_scheme", "models": []}, status=400)
+
+        # SSRF guard: block private IPs (allow known local provider hostnames).
+        # Use exact hostname match — NOT substring — to prevent bypass via
+        # hostnames like evil-ollama.attacker.com containing "ollama".
+        _KNOWN_LOCAL_HOSTS = {"localhost", "127.0.0.1", "0.0.0.0", "::1"}
+        if parsed_ep.hostname:
+            hostname_lower = (parsed_ep.hostname or "").lower()
+            try:
+                for _, _, _, _, addr in _sock.getaddrinfo(parsed_ep.hostname, None):
+                    addr_obj = _ip.ip_address(addr[0])
+                    if addr_obj.is_private or addr_obj.is_loopback:
+                        if hostname_lower not in _KNOWN_LOCAL_HOSTS:
+                            return j(handler, {"error": "ssrf_blocked", "models": []}, status=400)
+            except _sock.gaierror:
+                pass
+
+        # Fetch models
+        req = _ur.Request(endpoint_url, method="GET")
+        req.add_header("User-Agent", "HermesWebUI/1.0")
+        if api_key:
+            req.add_header("Authorization", f"Bearer {api_key}")
+        with _ur.urlopen(req, timeout=8) as resp:  # nosec B310
+            raw = resp.read().decode("utf-8")
+
+        import json as _json
+        data = _json.loads(raw)
+        raw_models = data.get("data") or data.get("models") or []
+
+        # Normalise to {id, label} list; filter to text-generation models
+        models = []
+        seen = set()
+        for m in raw_models:
+            if not isinstance(m, dict):
+                continue
+            mid = m.get("id") or m.get("name") or ""
+            if not mid or mid in seen:
+                continue
+            # Skip embedding/image/audio models for direct providers
+            obj_type = (m.get("object") or "").lower()
+            if obj_type and obj_type not in ("model",):
+                continue
+            # Heuristic: skip obvious non-chat models
+            if any(skip in mid.lower() for skip in ("embed", "tts", "whisper", "dall-e", "davinci-edit", "babbage", "ada", "curie")):
+                continue
+            seen.add(mid)
+            label = m.get("name") or m.get("display_name") or mid
+            # For OpenAI, the id IS the label — clean it up
+            if label == mid:
+                label = mid.replace("-", " ").replace(".", ".").title()
+                # Restore original casing for well-known names
+                for known in ("GPT", "o1", "o3", "o4", "gpt"):
+                    label = label.replace(known.title(), known)
+            models.append({"id": mid, "label": label})
+
+        # Sort: newest (higher version numbers) first via lexicographic sort on reversed id
+        models.sort(key=lambda m: m["id"], reverse=True)
+
+        return j(handler, {"provider": provider, "models": models, "count": len(models)})
+
+    except Exception as _e:
+        logger.debug("Failed to fetch live models for %s: %s", provider, _e)
+        return j(handler, {"error": str(_e), "models": []})
+
+
 def _handle_cron_output(handler, parsed):
    from cron.jobs import OUTPUT_DIR as CRON_OUT

--- a/api/streaming.py
+++ b/api/streaming.py
@@ -163,9 +163,13 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
            logger.debug("Approval module not available, falling back to polling")

        try:
+            _token_sent = False  # tracks whether any streamed tokens were sent
+
            def on_token(text):
+                nonlocal _token_sent
                if text is None:
                    return  # end-of-stream sentinel
+                _token_sent = True
                put('token', {'text': text})

            def on_tool(name, preview, args):
@@ -308,6 +312,45 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
            )
            s.messages = result.get('messages') or s.messages

+            # ── Detect silent agent failure (no assistant reply produced) ──
+            # When the agent catches an auth/network error internally it may return
+            # an empty final_response without raising — the stream would end with
+            # a done event containing zero assistant messages, leaving the user with
+            # no feedback. Emit an apperror so the client shows an inline error.
+            _assistant_added = any(
+                m.get('role') == 'assistant' and str(m.get('content') or '').strip()
+                for m in (result.get('messages') or [])
+            )
+            # _token_sent tracks whether on_token() was called (any streamed text)
+            if not _assistant_added and not _token_sent:
+                _last_err = getattr(agent, '_last_error', None) or result.get('error') or ''
+                _err_str = str(_last_err) if _last_err else ''
+                _is_auth = (
+                    '401' in _err_str
+                    or (_last_err and 'AuthenticationError' in type(_last_err).__name__)
+                    or 'authentication' in _err_str.lower()
+                    or 'unauthorized' in _err_str.lower()
+                    or 'invalid api key' in _err_str.lower()
+                    or 'invalid_api_key' in _err_str.lower()
+                )
+                if _is_auth:
+                    put('apperror', {
+                        'message': _err_str or 'Authentication failed — check your API key.',
+                        'type': 'auth_mismatch',
+                        'hint': (
+                            'The selected model may not be supported by your configured provider or '
+                            'your API key is invalid. Run `hermes model` in your terminal to '
+                            'update credentials, then restart the WebUI.'
+                        ),
+                    })
+                else:
+                    put('apperror', {
+                        'message': _err_str or 'The agent returned no response. Check your API key and model selection.',
+                        'type': 'no_response',
+                        'hint': 'Verify your API key is valid and the selected model is available for your account.',
+                    })
+                return  # Don't emit done — the apperror already closes the stream on the client
+
            # ── Handle context compression side effects ──
            # If compression fired inside run_conversation, the agent may have
            # rotated its session_id. Detect and fix the mismatch so the WebUI