🔧 Initial dev copy from live

2026-04-20 10:43:30 +02:00
commit 96977b576a
284 changed files with 95780 additions and 0 deletions
--- a/tests/test_issues_373_374_375.py
+++ b/tests/test_issues_373_374_375.py
@@ -0,0 +1,317 @@
+"""
+Tests for issues #373, #374, and #375.
+
+#373: Chat silently swallows errors — no feedback when agent fails to respond
+#374: Remove stale OpenAI models from default list (gpt-4o, o3)
+#375: Model dropdown should fetch live models from provider
+"""
+import pathlib
+import re
+
+REPO = pathlib.Path(__file__).parent.parent
+STREAMING_PY = (REPO / "api" / "streaming.py").read_text(encoding="utf-8")
+CONFIG_PY    = (REPO / "api" / "config.py").read_text(encoding="utf-8")
+ROUTES_PY    = (REPO / "api" / "routes.py").read_text(encoding="utf-8")
+MESSAGES_JS  = (REPO / "static" / "messages.js").read_text(encoding="utf-8")
+UI_JS        = (REPO / "static" / "ui.js").read_text(encoding="utf-8")
+
+
+# ── Issue #373: Silent error detection ──────────────────────────────────────
+
+class TestSilentErrorDetection:
+    """streaming.py must emit apperror when agent returns no assistant reply."""
+
+    def test_streaming_detects_no_assistant_reply(self):
+        """streaming.py must check if any assistant message was produced."""
+        assert "_assistant_added" in STREAMING_PY, (
+            "streaming.py must check whether an assistant message was produced (#373)"
+        )
+
+    def test_streaming_emits_apperror_on_no_response(self):
+        """streaming.py must emit apperror event when agent produced no reply."""
+        assert "no_response" in STREAMING_PY, (
+            "streaming.py must emit apperror with type='no_response' for silent failures (#373)"
+        )
+
+    def test_streaming_returns_early_after_apperror(self):
+        """streaming.py must return after emitting apperror (not also emit done)."""
+        # The return statement must come after the put('apperror') for no_response
+        no_resp_pos = STREAMING_PY.find("'no_response'")
+        return_pos = STREAMING_PY.find("return  # Don't emit done", no_resp_pos)
+        assert no_resp_pos != -1, "no_response type not found in streaming.py"
+        assert return_pos != -1, (
+            "streaming.py must return after emitting apperror to prevent also emitting done (#373)"
+        )
+        assert return_pos > no_resp_pos
+
+    def test_streaming_detects_auth_error_in_result(self):
+        """streaming.py must detect auth errors from the result object."""
+        assert "_is_auth" in STREAMING_PY, (
+            "streaming.py must detect auth errors in silent failures (#373)"
+        )
+        assert "auth_mismatch" in STREAMING_PY, (
+            "streaming.py must emit auth_mismatch type for auth failures (#373)"
+        )
+
+    def test_messages_js_done_handler_detects_no_reply(self):
+        """messages.js done handler must show an error if no assistant reply arrived."""
+        # Check for either the variable name or the inlined check pattern
+        has_no_reply_guard = (
+            "hasAssistantReply" in MESSAGES_JS
+            or ("role==='assistant'" in MESSAGES_JS and "No response received" in MESSAGES_JS)
+        )
+        assert has_no_reply_guard, (
+            "messages.js done handler must detect zero assistant replies (#373)"
+        )
+        assert "No response received" in MESSAGES_JS, (
+            "messages.js must show 'No response received' inline message (#373)"
+        )
+
+    def test_messages_js_handles_no_response_apperror_type(self):
+        """messages.js apperror handler must recognise the no_response type."""
+        assert "isNoResponse" in MESSAGES_JS or "no_response" in MESSAGES_JS, (
+            "messages.js apperror handler must handle type='no_response' (#373)"
+        )
+
+    def test_messages_js_no_response_label(self):
+        """messages.js must show a distinct label for no_response errors."""
+        assert "No response received" in MESSAGES_JS, (
+            "messages.js must display 'No response received' label for no_response errors (#373)"
+        )
+
+
+# ── Issue #374: Stale model list cleanup ─────────────────────────────────────
+
+class TestStaleModelListCleanup:
+    """gpt-4o and o3 must be removed from the primary OpenAI model lists."""
+
+    def test_gpt4o_removed_from_fallback_models(self):
+        """_FALLBACK_MODELS must not contain gpt-4o (issue #374)."""
+        fallback_block_start = CONFIG_PY.find("_FALLBACK_MODELS = [")
+        fallback_block_end = CONFIG_PY.find("]", fallback_block_start)
+        fallback_block = CONFIG_PY[fallback_block_start:fallback_block_end]
+        assert "gpt-4o" not in fallback_block, (
+            "_FALLBACK_MODELS still contains gpt-4o — remove it per issue #374"
+        )
+
+    def test_o3_removed_from_fallback_models(self):
+        """_FALLBACK_MODELS must not contain o3 (issue #374)."""
+        fallback_block_start = CONFIG_PY.find("_FALLBACK_MODELS = [")
+        fallback_block_end = CONFIG_PY.find("]", fallback_block_start)
+        fallback_block = CONFIG_PY[fallback_block_start:fallback_block_end]
+        assert '"o3"' not in fallback_block and "'o3'" not in fallback_block, (
+            "_FALLBACK_MODELS still contains o3 — remove it per issue #374"
+        )
+
+    def test_gpt4o_removed_from_provider_models_openai(self):
+        """_PROVIDER_MODELS['openai'] must not contain gpt-4o (issue #374)."""
+        openai_start = CONFIG_PY.find('"openai": [')
+        openai_end = CONFIG_PY.find("],", openai_start)
+        openai_block = CONFIG_PY[openai_start:openai_end]
+        assert "gpt-4o" not in openai_block, (
+            "_PROVIDER_MODELS['openai'] still contains gpt-4o — remove per issue #374"
+        )
+
+    def test_o3_removed_from_provider_models_openai(self):
+        """_PROVIDER_MODELS['openai'] must not contain o3 (issue #374)."""
+        openai_start = CONFIG_PY.find('"openai": [')
+        openai_end = CONFIG_PY.find("],", openai_start)
+        openai_block = CONFIG_PY[openai_start:openai_end]
+        assert '"o3"' not in openai_block and "'o3'" not in openai_block, (
+            "_PROVIDER_MODELS['openai'] still contains o3 — remove per issue #374"
+        )
+
+    def test_fallback_still_has_gpt54_mini(self):
+        """_FALLBACK_MODELS must still contain gpt-5.4-mini (not over-trimmed)."""
+        assert "gpt-5.4-mini" in CONFIG_PY, (
+            "_FALLBACK_MODELS must keep gpt-5.4-mini as primary OpenAI model (#374)"
+        )
+
+    def test_fallback_has_gpt54(self):
+        """_FALLBACK_MODELS must contain gpt-5.4-mini as the primary OpenAI option."""
+        from api.config import _FALLBACK_MODELS
+        ids = [m["id"] for m in _FALLBACK_MODELS]
+        assert any("gpt-5.4-mini" in mid for mid in ids), (
+            "_FALLBACK_MODELS must include gpt-5.4-mini as the primary OpenAI option"
+        )
+
+    def test_copilot_list_unchanged(self):
+        """Copilot provider model list should still include gpt-4o (it's a valid Copilot model)."""
+        copilot_start = CONFIG_PY.find('"copilot": [')
+        copilot_end = CONFIG_PY.find("],", copilot_start)
+        if copilot_start == -1:
+            return  # No copilot list — that's fine
+        copilot_block = CONFIG_PY[copilot_start:copilot_end]
+        assert "gpt-4o" in copilot_block, (
+            "Copilot provider model list should keep gpt-4o (it's available via Copilot) (#374)"
+        )
+
+
+# ── Issue #375: Live model fetching ─────────────────────────────────────────
+
+class TestLiveModelFetching:
+    """Backend and frontend must support live model fetching from provider APIs."""
+
+    def test_live_models_endpoint_exists_in_routes(self):
+        """routes.py must have a /api/models/live endpoint (#375)."""
+        assert "/api/models/live" in ROUTES_PY, (
+            "routes.py must define /api/models/live endpoint (#375)"
+        )
+
+    def test_live_models_handler_function_exists(self):
+        """routes.py must define _handle_live_models() function (#375)."""
+        assert "def _handle_live_models(" in ROUTES_PY, (
+            "routes.py must define _handle_live_models() for live model fetching (#375)"
+        )
+
+    def test_live_models_handler_validates_scheme(self):
+        """_handle_live_models must validate URL scheme to prevent file:// injection (B310)."""
+        assert "nosec B310" in ROUTES_PY or ("scheme" in ROUTES_PY and "http" in ROUTES_PY), (
+            "_handle_live_models must validate URL scheme before urlopen (#375)"
+        )
+
+    def test_live_models_handler_has_ssrf_guard(self):
+        """_handle_live_models must guard against SSRF (private IP access)."""
+        assert "ssrf_blocked" in ROUTES_PY or ("is_private" in ROUTES_PY and "live" in ROUTES_PY), (
+            "_handle_live_models must have SSRF protection for private IP ranges (#375)"
+        )
+
+    def test_live_models_all_providers_handled_via_agent(self):
+        """_handle_live_models must delegate to provider_model_ids() which handles all
+        providers gracefully — live fetch where possible, static fallback otherwise.
+        The old 'not_supported' return for Anthropic/Google is superseded: those
+        providers now return live or static model lists via the agent delegate."""
+        assert "provider_model_ids" in ROUTES_PY, (
+            "_handle_live_models must delegate to hermes_cli.models.provider_model_ids() "
+            "so all providers are handled uniformly (#375 upgrade)"
+        )
+
+    def test_frontend_has_fetch_live_models_function(self):
+        """ui.js must define _fetchLiveModels() for background live model loading (#375)."""
+        assert "function _fetchLiveModels(" in UI_JS or "async function _fetchLiveModels(" in UI_JS, (
+            "ui.js must define _fetchLiveModels() function (#375)"
+        )
+
+    def test_frontend_live_models_cache_exists(self):
+        """ui.js must cache live model responses to avoid redundant API calls (#375)."""
+        assert "_liveModelCache" in UI_JS, (
+            "ui.js must use _liveModelCache to avoid re-fetching on every dropdown open (#375)"
+        )
+
+    def test_frontend_calls_live_models_after_static_load(self):
+        """populateModelDropdown must call _fetchLiveModels after rendering the static list (#375)."""
+        assert "_fetchLiveModels" in UI_JS, (
+            "populateModelDropdown must call _fetchLiveModels for background update (#375)"
+        )
+
+    def test_frontend_live_fetch_only_adds_new_models(self):
+        """_fetchLiveModels must not duplicate models already in the static list (#375)."""
+        assert "existingIds" in UI_JS, (
+            "_fetchLiveModels must track existing model IDs to avoid duplicates (#375)"
+        )
+
+    def test_frontend_live_fetch_covers_all_providers(self):
+        """_fetchLiveModels no longer skips any provider — all providers return
+        live or fallback models via provider_model_ids() on the backend (#375 upgrade)."""
+        # The old skip list (anthropic, google, gemini) must be gone from the guard
+        skip_guard_pos = UI_JS.find("includes(provider)")
+        if skip_guard_pos != -1:
+            guard_line = UI_JS[max(0,skip_guard_pos-100):skip_guard_pos+50]
+            assert "anthropic" not in guard_line, (
+                "_fetchLiveModels must not skip anthropic — backend now handles it (#375 upgrade)"
+            )
+
+    def test_live_models_endpoint_wired_in_routes(self):
+        """The /api/models/live path must be handled in handle_get()."""
+        # Find handle_get and check our route appears inside it
+        handle_get_pos = ROUTES_PY.find("def handle_get(")
+        live_route_pos = ROUTES_PY.find('"/api/models/live"')
+        assert handle_get_pos != -1 and live_route_pos != -1
+        assert live_route_pos > handle_get_pos, (
+            "/api/models/live must be inside handle_get() (#375)"
+        )
+
+
+# ── #669: Gemini model IDs must be valid for Google AI Studio endpoint ────────
+
+class TestGeminiModelIds:
+    """Gemini 3.x model IDs must be valid for the native Google AI Studio provider.
+
+    The original code had gemini-3.1-flash-lite-preview missing from the
+    dropdown. The fallback list also erroneously used gemini-3.1-pro-preview
+    in some provider sections while omitting gemini-3.1-flash-lite-preview.
+    All provider sections must now include the full current Gemini 3.x lineup.
+    """
+
+    VALID_GEMINI_3 = [
+        "gemini-3.1-pro-preview",
+        "gemini-3-flash-preview",
+        "gemini-3.1-flash-lite-preview",
+    ]
+
+    def test_gemini_provider_models_has_3x(self):
+        """_PROVIDER_MODELS['gemini'] must contain valid Gemini 3.x model IDs (#669)."""
+        gemini_block_start = CONFIG_PY.find('"gemini": [')
+        assert gemini_block_start != -1, "_PROVIDER_MODELS['gemini'] block not found"
+        gemini_block = CONFIG_PY[gemini_block_start:gemini_block_start + 600]
+        for mid in self.VALID_GEMINI_3:
+            assert mid in gemini_block, (
+                f"_PROVIDER_MODELS['gemini'] must contain {mid!r} — "
+                f"this is a valid Google AI Studio model ID (#669)"
+            )
+
+    def test_gemini_provider_models_has_flash_lite(self):
+        """_PROVIDER_MODELS['gemini'] must contain gemini-3.1-flash-lite-preview (#669).
+
+        This was the model the reporter selected from the wizard — it must appear
+        in the native gemini provider model list so users can select it.
+        """
+        gemini_block_start = CONFIG_PY.find('"gemini": [')
+        assert gemini_block_start != -1
+        gemini_block = CONFIG_PY[gemini_block_start:gemini_block_start + 600]
+        assert "gemini-3.1-flash-lite-preview" in gemini_block, (
+            "_PROVIDER_MODELS['gemini'] missing gemini-3.1-flash-lite-preview — "
+            "this was the exact model the #669 reporter tried and got API_KEY_INVALID"
+        )
+
+    def test_fallback_models_has_gemini_3x(self):
+        """_FALLBACK_MODELS must contain valid Gemini 3.x OpenRouter model IDs (#669)."""
+        fallback_start = CONFIG_PY.find("_FALLBACK_MODELS = [")
+        fallback_end = CONFIG_PY.find("]", fallback_start + len("_FALLBACK_MODELS = ["))
+        # Find the closing bracket for the list (multi-line)
+        depth = 0
+        pos = fallback_start + len("_FALLBACK_MODELS = [")
+        for i, ch in enumerate(CONFIG_PY[pos:], start=pos):
+            if ch == '[':
+                depth += 1
+            elif ch == ']':
+                if depth == 0:
+                    fallback_end = i
+                    break
+                depth -= 1
+        fallback_block = CONFIG_PY[fallback_start:fallback_end]
+        for mid in ("google/gemini-3.1-pro-preview", "google/gemini-3-flash-preview"):
+            assert mid in fallback_block, (
+                f"_FALLBACK_MODELS must contain {mid!r} for OpenRouter Google models (#669)"
+            )
+
+    def test_gemini_provider_also_has_stable_25(self):
+        """_PROVIDER_MODELS['gemini'] must retain stable Gemini 2.5 models (#669)."""
+        gemini_block_start = CONFIG_PY.find('"gemini": [')
+        assert gemini_block_start != -1
+        gemini_block = CONFIG_PY[gemini_block_start:gemini_block_start + 600]
+        assert "gemini-2.5-pro" in gemini_block, (
+            "_PROVIDER_MODELS['gemini'] must keep gemini-2.5-pro as a stable fallback"
+        )
+
+    def test_no_invalid_gemini_3_pro_model(self):
+        """gemini-3-pro-preview must not appear — it was shut down March 9 2026 (#669)."""
+        assert "gemini-3-pro-preview" not in CONFIG_PY or "gemini-3.1-pro-preview" in CONFIG_PY, (
+            "gemini-3-pro-preview was shut down — use gemini-3.1-pro-preview instead (#669)"
+        )
+        # More precise: ensure the bare (non-.1) version isn't the only one present
+        count_bare = CONFIG_PY.count('"gemini-3-pro-preview"')
+        assert count_bare == 0, (
+            f"gemini-3-pro-preview appears {count_bare} time(s) in config.py — "
+            "it was shut down March 9 2026, use gemini-3.1-pro-preview (#669)"
+        )