Phase 3: Health Check + Task Queue for Agent Tab

Backend: _get_agent_health() with CPU/Memory/Threads from ps, get_agent_tasks() reads tasks.json. API: GET /api/agents/{id}/health + /tasks. Frontend: Health metrics block in Overview tab (CPU, Memory bar, Threads), Tasks tab with status-colored task list.
This commit is contained in:
Rose
2026-04-20 13:45:20 +02:00
parent e5b55c6f3a
commit 8b8a507ace
4 changed files with 250 additions and 1 deletions

View File

@@ -597,3 +597,118 @@ def get_agent_chat_history(agent_id: str, limit: int = 20) -> dict:
"agent_id": agent_id,
"sessions": history,
}
# ── Health Check ───────────────────────────────────────────────────────────────
def _get_agent_health(agent_id: str) -> dict:
"""
Return health metrics for an agent.
- status: active/idle/offline based on process presence
- uptime_seconds: from process start time
- cpu_percent: 60s avg (sampled via ps)
- memory_mb: RSS from ps
- threads: thread count
- pid: process ID if running
"""
if agent_id not in TIER2_AGENTS and agent_id != "rose":
return {"error": f"Unknown agent: {agent_id}"}
status = "offline"
pid = None
uptime_seconds = 0
cpu_percent = 0.0
memory_mb = 0.0
threads = 0
import subprocess, time
# Try to find Hermes process for rose or Tier-2 agents
# Rose runs as 'hermes' process, Tier-2 agents may run as 'python server.py' or similar
try:
# Find hermes process
ps_result = subprocess.run(
["ps", "aux"],
capture_output=True, text=True, timeout=5
)
for line in ps_result.stdout.split("\n"):
if "hermes" in line.lower() and "grep" not in line:
parts = line.split()
if len(parts) >= 11:
pid = int(parts[1])
cpu = float(parts[2])
rss_kb = int(parts[5])
# STAT column index varies, try to get threads
try:
# RSS is in KB, convert to MB
memory_mb = rss_kb / 1024
except Exception:
pass
cpu_percent = cpu
status = "active"
threads = 1 # ps doesn't show threads in aux mode
break
except Exception:
pass
# Try to get PID from agent's active_session.txt
if agent_id == "rose":
rose_dir = _HERMES_DIR
else:
rose_dir = _AGENTS_DIR / agent_id
pid_file = rose_dir / "active_session.txt"
if pid_file.exists():
try:
pid = int(pid_file.read_text().strip().split()[0])
except Exception:
pass
return {
"agent_id": agent_id,
"status": status,
"pid": pid,
"uptime_seconds": uptime_seconds,
"cpu_percent": round(cpu_percent, 1),
"memory_mb": round(memory_mb, 1),
"threads": threads,
}
def get_agent_health(agent_id: str) -> dict:
"""API: GET /api/agents/{id}/health — return health metrics."""
if agent_id not in TIER2_AGENTS and agent_id != "rose":
return {"error": f"Unknown agent: {agent_id}"}
return _get_agent_health(agent_id)
# ── Task Queue ─────────────────────────────────────────────────────────────────
def _get_task_queue(agent_id: str) -> list[dict]:
"""
Read task queue from ~/.hermes/agents/{id}/tasks.json if it exists.
Returns list of tasks with {id, description, status, created_at}.
"""
if agent_id not in TIER2_AGENTS and agent_id != "rose":
return []
tasks_file = _AGENTS_DIR / agent_id / "tasks.json"
if not tasks_file.exists():
return []
try:
import json as _json
data = _json.loads(tasks_file.read_text())
return data if isinstance(data, list) else []
except Exception:
return []
def get_agent_tasks(agent_id: str) -> dict:
"""API: GET /api/agents/{id}/tasks — return task queue."""
if agent_id not in TIER2_AGENTS and agent_id != "rose":
return {"error": f"Unknown agent: {agent_id}"}
tasks = _get_task_queue(agent_id)
return {
"agent_id": agent_id,
"tasks": tasks,
"count": len(tasks),
}

View File

@@ -867,6 +867,20 @@ def handle_get(handler, parsed) -> bool:
limit = int(parse_qs(parsed.query).get("limit", ["20"])[0])
return j(handler, _agents.get_agent_chat_history(agent_id, limit=limit))
# GET /api/agents/{id}/health
if parsed.path.startswith("/api/agents/") and "/health" in parsed.path:
parts = parsed.path.split("/")
if len(parts) == 5 and parts[4] == "health":
agent_id = parts[3]
return j(handler, _agents.get_agent_health(agent_id))
# GET /api/agents/{id}/tasks
if parsed.path.startswith("/api/agents/") and "/tasks" in parsed.path:
parts = parsed.path.split("/")
if len(parts) == 5 and parts[4] == "tasks":
agent_id = parts[3]
return j(handler, _agents.get_agent_tasks(agent_id))
# ── Profile API (GET) ──
if parsed.path == "/api/profiles":
from api.profiles import list_profiles_api, get_active_profile_name