diff --git a/api/streaming.py b/api/streaming.py
index 99bc531..d087fd0 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -112,13 +112,38 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
             if AIAgent is None:
                 raise ImportError("AIAgent not available -- check that hermes-agent is on sys.path")
             resolved_model, resolved_provider, resolved_base_url = resolve_model_provider(model)
+
+            # Read per-profile config at call time (not module-level snapshot)
+            from api.config import get_config as _get_config
+            _cfg = _get_config()
+
+            # Per-profile toolsets (fall back to module-level CLI_TOOLSETS)
+            _pt = _cfg.get('platform_toolsets', {})
+            _toolsets = _pt.get('cli', CLI_TOOLSETS) if isinstance(_pt, dict) else CLI_TOOLSETS
+
+            # Fallback model from profile config (e.g. for rate-limit recovery)
+            _fallback = _cfg.get('fallback_model') or None
+            if _fallback:
+                # Resolve the fallback through our provider logic too
+                fb_model = _fallback.get('model', '')
+                fb_provider = _fallback.get('provider', '')
+                fb_base_url = _fallback.get('base_url')
+                _fallback_resolved = {
+                    'model': fb_model,
+                    'provider': fb_provider,
+                    'base_url': fb_base_url,
+                }
+            else:
+                _fallback_resolved = None
+
             agent = AIAgent(
                 model=resolved_model,
                 provider=resolved_provider,
                 base_url=resolved_base_url,
                 platform='cli',
                 quiet_mode=True,
-                enabled_toolsets=CLI_TOOLSETS,
+                enabled_toolsets=_toolsets,
+                fallback_model=_fallback_resolved,
                 session_id=session_id,
                 stream_delta_callback=on_token,
                 tool_progress_callback=on_tool,
@@ -203,7 +228,18 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
 
     except Exception as e:
         print('[webui] stream error:\n' + traceback.format_exc(), flush=True)
-        put('error', {'message': str(e)})
+        err_str = str(e)
+        # Detect rate limit errors specifically so the client can show a helpful card
+        # rather than the generic "Connection lost" message
+        is_rate_limit = 'rate limit' in err_str.lower() or '429' in err_str or 'RateLimitError' in type(e).__name__
+        if is_rate_limit:
+            put('apperror', {
+                'message': err_str,
+                'type': 'rate_limit',
+                'hint': 'Rate limit reached. The fallback model (if configured) was also exhausted. Try again in a moment.',
+            })
+        else:
+            put('apperror', {'message': err_str, 'type': 'error'})
     finally:
         _clear_thread_env()  # TD1: always clear thread-local context
         with STREAMS_LOCK:
diff --git a/static/messages.js b/static/messages.js
index fd37cb8..6ba2c80 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -162,6 +162,46 @@ async function send(){
       renderSessionList();setBusy(false);setStatus('');
     });
 
+    source.addEventListener('apperror',e=>{
+      // Application-level error sent explicitly by the server (rate limit, crash, etc.)
+      // This is distinct from the SSE network 'error' event below.
+      source.close();
+      delete INFLIGHT[activeSid];clearInflight();stopApprovalPolling();
+      if(!_approvalSessionId||_approvalSessionId===activeSid) hideApprovalCard();
+      if(S.session&&S.session.session_id===activeSid){
+        S.activeStreamId=null;const _cbe=$('btnCancel');if(_cbe)_cbe.style.display='none';
+        clearLiveToolCards();if(!assistantText)removeThinking();
+        try{
+          const d=JSON.parse(e.data);
+          const isRateLimit=d.type==='rate_limit';
+          const icon=isRateLimit?'⏱️':'⚠️';
+          const label=isRateLimit?'Rate limit reached':'Error';
+          const hint=d.hint?`\n\n*${d.hint}*`:'';
+          S.messages.push({role:'assistant',content:`**${icon} ${label}:** ${d.message}${hint}`});
+        }catch(_){
+          S.messages.push({role:'assistant',content:'**⚠️ Error:** An error occurred. Check server logs.'});
+        }
+        renderMessages();
+      }else if(typeof trackBackgroundError==='function'){
+        const _errTitle=(typeof _allSessions!=='undefined'&&_allSessions.find(s=>s.session_id===activeSid)||{}).title||null;
+        try{const d=JSON.parse(e.data);trackBackgroundError(activeSid,_errTitle,d.message||'Error');}
+        catch(_){trackBackgroundError(activeSid,_errTitle,'Error');}
+      }
+      if(!S.session||!INFLIGHT[S.session.session_id]){setBusy(false);setStatus('');}
+    });
+
+    source.addEventListener('warning',e=>{
+      // Non-fatal warning from server (e.g. fallback activated, retrying)
+      if(!S.session||S.session.session_id!==activeSid) return;
+      try{
+        const d=JSON.parse(e.data);
+        // Show as a small inline notice, not a full error
+        setStatus(`⚠️ ${d.message||'Warning'}`);
+        // If it's a fallback notice, show it briefly then clear
+        if(d.type==='fallback') setTimeout(()=>setStatus(''),4000);
+      }catch(_){}
+    });
+
     source.addEventListener('error',e=>{
       source.close();
       // Attempt one reconnect if the stream is still active server-side