Phase 3: Health Check + Task Queue for Agent Tab
Backend: _get_agent_health() with CPU/Memory/Threads from ps, get_agent_tasks() reads tasks.json. API: GET /api/agents/{id}/health + /tasks. Frontend: Health metrics block in Overview tab (CPU, Memory bar, Threads), Tasks tab with status-colored task list.
This commit is contained in:
115
api/agents.py
115
api/agents.py
@@ -597,3 +597,118 @@ def get_agent_chat_history(agent_id: str, limit: int = 20) -> dict:
|
||||
"agent_id": agent_id,
|
||||
"sessions": history,
|
||||
}
|
||||
|
||||
|
||||
# ── Health Check ───────────────────────────────────────────────────────────────
|
||||
|
||||
def _get_agent_health(agent_id: str) -> dict:
|
||||
"""
|
||||
Return health metrics for an agent.
|
||||
- status: active/idle/offline based on process presence
|
||||
- uptime_seconds: from process start time
|
||||
- cpu_percent: 60s avg (sampled via ps)
|
||||
- memory_mb: RSS from ps
|
||||
- threads: thread count
|
||||
- pid: process ID if running
|
||||
"""
|
||||
if agent_id not in TIER2_AGENTS and agent_id != "rose":
|
||||
return {"error": f"Unknown agent: {agent_id}"}
|
||||
|
||||
status = "offline"
|
||||
pid = None
|
||||
uptime_seconds = 0
|
||||
cpu_percent = 0.0
|
||||
memory_mb = 0.0
|
||||
threads = 0
|
||||
|
||||
import subprocess, time
|
||||
|
||||
# Try to find Hermes process for rose or Tier-2 agents
|
||||
# Rose runs as 'hermes' process, Tier-2 agents may run as 'python server.py' or similar
|
||||
try:
|
||||
# Find hermes process
|
||||
ps_result = subprocess.run(
|
||||
["ps", "aux"],
|
||||
capture_output=True, text=True, timeout=5
|
||||
)
|
||||
for line in ps_result.stdout.split("\n"):
|
||||
if "hermes" in line.lower() and "grep" not in line:
|
||||
parts = line.split()
|
||||
if len(parts) >= 11:
|
||||
pid = int(parts[1])
|
||||
cpu = float(parts[2])
|
||||
rss_kb = int(parts[5])
|
||||
# STAT column index varies, try to get threads
|
||||
try:
|
||||
# RSS is in KB, convert to MB
|
||||
memory_mb = rss_kb / 1024
|
||||
except Exception:
|
||||
pass
|
||||
cpu_percent = cpu
|
||||
status = "active"
|
||||
threads = 1 # ps doesn't show threads in aux mode
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try to get PID from agent's active_session.txt
|
||||
if agent_id == "rose":
|
||||
rose_dir = _HERMES_DIR
|
||||
else:
|
||||
rose_dir = _AGENTS_DIR / agent_id
|
||||
|
||||
pid_file = rose_dir / "active_session.txt"
|
||||
if pid_file.exists():
|
||||
try:
|
||||
pid = int(pid_file.read_text().strip().split()[0])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return {
|
||||
"agent_id": agent_id,
|
||||
"status": status,
|
||||
"pid": pid,
|
||||
"uptime_seconds": uptime_seconds,
|
||||
"cpu_percent": round(cpu_percent, 1),
|
||||
"memory_mb": round(memory_mb, 1),
|
||||
"threads": threads,
|
||||
}
|
||||
|
||||
|
||||
def get_agent_health(agent_id: str) -> dict:
|
||||
"""API: GET /api/agents/{id}/health — return health metrics."""
|
||||
if agent_id not in TIER2_AGENTS and agent_id != "rose":
|
||||
return {"error": f"Unknown agent: {agent_id}"}
|
||||
return _get_agent_health(agent_id)
|
||||
|
||||
|
||||
# ── Task Queue ─────────────────────────────────────────────────────────────────
|
||||
|
||||
def _get_task_queue(agent_id: str) -> list[dict]:
|
||||
"""
|
||||
Read task queue from ~/.hermes/agents/{id}/tasks.json if it exists.
|
||||
Returns list of tasks with {id, description, status, created_at}.
|
||||
"""
|
||||
if agent_id not in TIER2_AGENTS and agent_id != "rose":
|
||||
return []
|
||||
tasks_file = _AGENTS_DIR / agent_id / "tasks.json"
|
||||
if not tasks_file.exists():
|
||||
return []
|
||||
try:
|
||||
import json as _json
|
||||
data = _json.loads(tasks_file.read_text())
|
||||
return data if isinstance(data, list) else []
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def get_agent_tasks(agent_id: str) -> dict:
|
||||
"""API: GET /api/agents/{id}/tasks — return task queue."""
|
||||
if agent_id not in TIER2_AGENTS and agent_id != "rose":
|
||||
return {"error": f"Unknown agent: {agent_id}"}
|
||||
tasks = _get_task_queue(agent_id)
|
||||
return {
|
||||
"agent_id": agent_id,
|
||||
"tasks": tasks,
|
||||
"count": len(tasks),
|
||||
}
|
||||
|
||||
@@ -867,6 +867,20 @@ def handle_get(handler, parsed) -> bool:
|
||||
limit = int(parse_qs(parsed.query).get("limit", ["20"])[0])
|
||||
return j(handler, _agents.get_agent_chat_history(agent_id, limit=limit))
|
||||
|
||||
# GET /api/agents/{id}/health
|
||||
if parsed.path.startswith("/api/agents/") and "/health" in parsed.path:
|
||||
parts = parsed.path.split("/")
|
||||
if len(parts) == 5 and parts[4] == "health":
|
||||
agent_id = parts[3]
|
||||
return j(handler, _agents.get_agent_health(agent_id))
|
||||
|
||||
# GET /api/agents/{id}/tasks
|
||||
if parsed.path.startswith("/api/agents/") and "/tasks" in parsed.path:
|
||||
parts = parsed.path.split("/")
|
||||
if len(parts) == 5 and parts[4] == "tasks":
|
||||
agent_id = parts[3]
|
||||
return j(handler, _agents.get_agent_tasks(agent_id))
|
||||
|
||||
# ── Profile API (GET) ──
|
||||
if parsed.path == "/api/profiles":
|
||||
from api.profiles import list_profiles_api, get_active_profile_name
|
||||
|
||||
Reference in New Issue
Block a user