fix: silent agent errors, stale model list, live model fetching (#377)

* fix: silent errors, stale models, live model fetching (#373, #374, #375)

- api/streaming.py: detect empty agent response (_assistant_added check),
  emit apperror(type='no_response' or 'auth_mismatch') instead of silent done
- api/streaming.py: add _token_sent flag so guard works for streaming agents
- static/messages.js: done handler belt-and-suspenders guard for zero replies
- static/messages.js: apperror handler labels 'no_response' type distinctly

- api/config.py: remove gpt-4o and o3 from _FALLBACK_MODELS and
  _PROVIDER_MODELS['openai'] (superseded by gpt-5.4-mini and o4-mini)

- api/routes.py: new /api/models/live?provider= endpoint, fetches /v1/models
  from provider API with B310 scheme check + SSRF guard
- static/ui.js: _fetchLiveModels() background fetch after static list loads,
  appends new models to dropdown, caches per session, skips unsupported providers

Other:
- tests/test_issues_373_374_375.py: 25 new structural tests
- tests/test_regressions.py: extend done-handler window 1500->2500 chars
- CHANGELOG.md: v0.50.19 entry; 947 tests (up from 922)

* fix: SSRF hostname bypass + auth detection operator precedence

1. routes.py: SSRF guard used substring matching (any(k in hostname))
   which allows bypass via hostnames like evil-ollama.attacker.com.
   Changed to exact hostname matching against a fixed set of known
   local hostnames (localhost, 127.0.0.1, 0.0.0.0, ::1).

2. streaming.py: _is_auth detection had a Python operator precedence
   bug on the ternary expression. The line:
     'AuthenticationError' in type(...).__name__ if _last_err else False
   parsed as the ternary absorbing the rest of the or-chain when
   _last_err was falsy. Fixed to: (_last_err and 'AuthenticationError' in ...)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* docs: fix v0.50.20 CHANGELOG version number and test count (949 tests)

---------

Co-authored-by: Nathan Esquenazi <nesquena@gmail.com>
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
nesquena-hermes
2026-04-13 15:52:35 -07:00
committed by GitHub
parent 78de40e015
commit 7a80e73eb2
8 changed files with 485 additions and 6 deletions

View File

@@ -341,6 +341,9 @@ def handle_get(handler, parsed) -> bool:
if parsed.path == "/api/models":
return j(handler, get_available_models())
if parsed.path == "/api/models/live":
return _handle_live_models(handler, parsed)
if parsed.path == "/api/settings":
settings = load_settings()
# Never expose the stored password hash to clients
@@ -1410,6 +1413,144 @@ def _handle_approval_inject(handler, parsed):
return j(handler, {"error": "session_id required"}, status=400)
def _handle_live_models(handler, parsed):
"""Fetch the live model list from a provider's /v1/models endpoint.
Returns the provider's actual model catalog so the UI can show all
available models, not just the hardcoded fallback list.
Query params:
provider (optional) — provider ID to fetch for; defaults to active
base_url (optional) — override the base URL for the provider
Providers that don't expose a /v1/models endpoint (Anthropic) are not
supported here — the caller should fall back to the static list.
Supported: openai, openrouter, custom (any OpenAI-compatible endpoint).
"""
import urllib.request as _ur
import ipaddress as _ip
import socket as _sock
from urllib.parse import urlparse as _up
qs = parse_qs(parsed.query)
provider = (qs.get("provider", [""])[0] or "").lower().strip()
base_url_override = (qs.get("base_url", [""])[0] or "").strip()
try:
from api.config import get_config as _gc, resolve_model_provider as _rmp
cfg = _gc()
active_provider = cfg.get("model", {}).get("provider") or ""
if not provider:
provider = active_provider
# Resolve API key and base URL for this provider
api_key = None
base_url = base_url_override or ""
try:
from hermes_cli.runtime_provider import resolve_runtime_provider
rt = resolve_runtime_provider(requested=provider)
api_key = rt.get("api_key")
if not base_url:
base_url = rt.get("base_url") or ""
except Exception:
pass
# Determine the /v1/models endpoint URL
if not base_url:
if provider in ("openai", "openai-codex", "copilot"):
base_url = "https://api.openai.com/v1"
elif provider == "openrouter":
base_url = "https://openrouter.ai/api/v1"
elif provider in ("anthropic",):
# Anthropic doesn't support /v1/models in a standard way
return j(handler, {"error": "not_supported", "models": []})
elif provider in ("google", "gemini"):
return j(handler, {"error": "not_supported", "models": []})
else:
# Generic OpenAI-compatible — try common paths
base_url = ""
if not base_url:
return j(handler, {"error": "no_base_url", "models": []})
# Build URL safely
base_url = base_url.rstrip("/")
if base_url.endswith("/v1"):
endpoint_url = base_url + "/models"
elif "/v1" in base_url:
endpoint_url = base_url.rstrip("/") + "/models"
else:
endpoint_url = base_url + "/v1/models"
# Validate scheme (B310 guard)
parsed_ep = _up(endpoint_url)
if parsed_ep.scheme not in ("http", "https"):
return j(handler, {"error": "invalid_scheme", "models": []}, status=400)
# SSRF guard: block private IPs (allow known local provider hostnames).
# Use exact hostname match — NOT substring — to prevent bypass via
# hostnames like evil-ollama.attacker.com containing "ollama".
_KNOWN_LOCAL_HOSTS = {"localhost", "127.0.0.1", "0.0.0.0", "::1"}
if parsed_ep.hostname:
hostname_lower = (parsed_ep.hostname or "").lower()
try:
for _, _, _, _, addr in _sock.getaddrinfo(parsed_ep.hostname, None):
addr_obj = _ip.ip_address(addr[0])
if addr_obj.is_private or addr_obj.is_loopback:
if hostname_lower not in _KNOWN_LOCAL_HOSTS:
return j(handler, {"error": "ssrf_blocked", "models": []}, status=400)
except _sock.gaierror:
pass
# Fetch models
req = _ur.Request(endpoint_url, method="GET")
req.add_header("User-Agent", "HermesWebUI/1.0")
if api_key:
req.add_header("Authorization", f"Bearer {api_key}")
with _ur.urlopen(req, timeout=8) as resp: # nosec B310
raw = resp.read().decode("utf-8")
import json as _json
data = _json.loads(raw)
raw_models = data.get("data") or data.get("models") or []
# Normalise to {id, label} list; filter to text-generation models
models = []
seen = set()
for m in raw_models:
if not isinstance(m, dict):
continue
mid = m.get("id") or m.get("name") or ""
if not mid or mid in seen:
continue
# Skip embedding/image/audio models for direct providers
obj_type = (m.get("object") or "").lower()
if obj_type and obj_type not in ("model",):
continue
# Heuristic: skip obvious non-chat models
if any(skip in mid.lower() for skip in ("embed", "tts", "whisper", "dall-e", "davinci-edit", "babbage", "ada", "curie")):
continue
seen.add(mid)
label = m.get("name") or m.get("display_name") or mid
# For OpenAI, the id IS the label — clean it up
if label == mid:
label = mid.replace("-", " ").replace(".", ".").title()
# Restore original casing for well-known names
for known in ("GPT", "o1", "o3", "o4", "gpt"):
label = label.replace(known.title(), known)
models.append({"id": mid, "label": label})
# Sort: newest (higher version numbers) first via lexicographic sort on reversed id
models.sort(key=lambda m: m["id"], reverse=True)
return j(handler, {"provider": provider, "models": models, "count": len(models)})
except Exception as _e:
logger.debug("Failed to fetch live models for %s: %s", provider, _e)
return j(handler, {"error": str(_e), "models": []})
def _handle_cron_output(handler, parsed):
from cron.jobs import OUTPUT_DIR as CRON_OUT