commit 96977b576a31e26661c60af28dcf9fe078992336 Author: Rose Date: Mon Apr 20 10:43:30 2026 +0200 🔧 Initial dev copy from live diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..218f5d2 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,7 @@ +.git +.pytest_cache +__pycache__ +*.pyc +*.pyo +tests/ +.env* diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..938f023 --- /dev/null +++ b/.env.example @@ -0,0 +1,31 @@ +# Hermes Web UI -- local machine config template +# Copy this to .env and fill in your values. +# start.sh sources .env automatically if present. +# All values are optional -- auto-discovery will fill in anything left blank. + +# Path to your hermes-agent checkout (the repo that contains run_agent.py) +# HERMES_WEBUI_AGENT_DIR=/path/to/hermes-agent + +# Python executable to use (defaults to the agent venv if found) +# HERMES_WEBUI_PYTHON=/path/to/python + +# Bind address (default: 127.0.0.1 -- loopback only, safe default) +# HERMES_WEBUI_HOST=127.0.0.1 + +# Port to listen on (default: 8787) +# HERMES_WEBUI_PORT=8787 + +# Where to store sessions, workspaces, and other state (default: ~/.hermes/webui-mvp) +# HERMES_WEBUI_STATE_DIR=~/.hermes/webui-mvp + +# Default workspace directory shown on first launch +# HERMES_WEBUI_DEFAULT_WORKSPACE=~/workspace + +# Base directory for all Hermes state (affects all paths above if set) +# HERMES_HOME=~/.hermes + +# Path to your Hermes config.yaml (for toolsets and model config) +# HERMES_CONFIG_PATH=~/.hermes/config.yaml + +# Display name for the assistant in the UI (default: Hermes) +# HERMES_WEBUI_BOT_NAME=Hermes diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..5c31119 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,56 @@ +name: Release & Docker + +on: + push: + tags: + - 'v*' + +jobs: + release: + runs-on: ubuntu-latest + permissions: + contents: write # required: create GitHub Release + packages: write # required: push to ghcr.io + + steps: + - uses: actions/checkout@v4 + + # Create GitHub Release from tag with auto-generated notes + - name: Create GitHub Release + uses: softprops/action-gh-release@v2 + with: + generate_release_notes: true + + # Set up multi-arch build (QEMU + Buildx) + - uses: docker/setup-qemu-action@v3 + - uses: docker/setup-buildx-action@v3 + + # Log in to GitHub Container Registry + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Extract tags from the git ref (supports vX.Y and vX.Y.Z formats) + - name: Extract metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/${{ github.repository }} + tags: | + type=match,pattern=v(\d+\.\d+(?:\.\d+)?),group=1 + type=raw,value=latest + + # Build and push multi-arch image (amd64 + arm64) + - name: Build and push Docker image + uses: docker/build-push-action@v6 + with: + context: . + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..b16b1d5 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,30 @@ +name: Tests + +on: + pull_request: + branches: [master] + push: + branches: [master] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.11', '3.12', '3.13'] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pyyaml>=6.0 pytest pytest-timeout + + - name: Run tests + run: pytest tests/ -v --timeout=60 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..20373fa --- /dev/null +++ b/.gitignore @@ -0,0 +1,38 @@ +# Python cache +__pycache__/ +*.pyc +*.pyo +*.pyd + +# Backup and temporary files +*.bak +*.swp +*.swo + +# Archive directory (pre-git backups, kept on disk but not tracked) +archive/ + +# Local environment and secrets (but keep the example template) +.env +.env.* +!.env.example +.claude/ +CLAUDE.md +AGENTS.md +.cursorrules +.windsurfrules +.aider* +copilot-instructions.md + +# Generated screenshots and transient artifacts +screenshot-*.png +full-UI.png + +# OS files +.DS_Store +Thumbs.db + +# Local reference clones — never committed (except tracked design/UI-UX reference pages) +docs/* +!docs/ui-ux/ +!docs/ui-ux/** diff --git a/.signing_key b/.signing_key new file mode 100644 index 0000000..52c135b --- /dev/null +++ b/.signing_key @@ -0,0 +1 @@ +)DÈ·:ª§Þ˜û®m§¹ÌgývàˆssuÛEH0Óç \ No newline at end of file diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 0000000..45e18bb --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,1631 @@ +# Hermes Web UI: Developer and Architecture Guide + +> This document is the canonical reference for anyone (human or agent) working on the +> Hermes Web UI. It covers the exact current state of the code, every design decision and +> quirk discovered during development, and a phased architecture improvement roadmap that +> runs in parallel with the feature roadmap in ROADMAP.md. +> +> Keep this document updated as architecture changes are made. + +> Current shipped build: `v0.50.36-local.1` (April 16, 2026). +> Baseline: upstream `nesquena/hermes-webui` `v0.50.36`. +> Intentional local delta: first-time password enablement from Settings immediately issues a `hermes_session` cookie so the current browser remains signed in. The previous `Assistant Reply Language` customization has been removed, legacy `assistant_language` settings are filtered out on load/save, the workspace panel closed/open state is preloaded via a `documentElement` dataset marker before `style.css` paints to avoid a first-load desktop flash, transcript disclosure cards now animate caret rotation and body expansion with transitionable `max-height`/`opacity` states instead of `display:none/block`, and thinking cards now share the same rounded bordered card chrome as tool cards while keeping their gold palette. +> Automated coverage: 1353 tests collected (`pytest tests/ --collect-only -q`). + +--- + +## 1. Overview and Purpose + +The Hermes Web UI is a lightweight web application that gives you a browser-based +interface to the Hermes agent that is functionally equivalent to the CLI. It is modeled on +the Claude-style interface: a sidebar for session management, a central chat area, +and a demand-driven right panel used for workspace browsing and preview surfaces. +The right panel is closed by default on desktop and opens only when it is actively +being used for browsing or previewing content. + +To prevent a visible first-paint mismatch on refresh, `static/index.html` preloads the +saved workspace panel state into `document.documentElement.dataset.workspacePanel` +before the main stylesheet loads. Desktop CSS honors that preload marker immediately, +and `static/boot.js` keeps the dataset synchronized with the runtime panel state machine. + +The design philosophy is deliberately minimal. There is no build step, no bundler, no +frontend framework. The Python server is split into a routing shell (server.py) and +business logic modules (api/). The frontend is seven vanilla JS modules loaded from static/. +This makes the code easy to modify from a terminal or by an agent. + +For the current local build, the codebase is intentionally as close to upstream as possible: +the app now tracks upstream `v0.50.36`, keeps the password-session continuity patch in the +settings/onboarding flow, and does not carry forward the prior reply-language preference +feature. + +Hermes-level chrome is intentionally consolidated: the sidebar has no dedicated brand header. +Instead, the footer exposes a single "Hermes WebUI" launch button that opens one tabbed +control-center modal for global preferences, conversation import/export, and clear-conversation +actions. The topbar remains focused on conversation context and the workspace/files toggle. + +--- + +## 2. File Inventory + + / + server.py Thin routing shell + HTTP Handler + auth middleware. ~81 lines. + Delegates all route handling to api/routes.py. + bootstrap.py One-shot launcher: optional agent install, deps, health wait, browser open. + start.sh Thin wrapper around bootstrap.py for shell-based startup. + Dockerfile python:3.12-slim container image (~23 lines) + docker-compose.yml Compose config with named volume and optional auth (~22 lines) + .dockerignore Excludes .git, tests/, .env* from Docker builds + api/ + __init__.py Package marker + auth.py Optional password authentication, signed cookies (~149 lines) + config.py Discovery, globals, model detection, reloadable config (~701 lines) + helpers.py HTTP helpers: j(), bad(), require(), safe_resolve(), security headers (~71 lines) + models.py Session model + CRUD, per-session profile tracking (~137 lines) + profiles.py Profile state management, hermes_cli wrapper (~246 lines) + onboarding.py First-run onboarding status, real provider config writes, and readiness detection. + routes.py All GET + POST route handlers (~1180 lines) + startup.py Startup helpers: auto_install_agent_deps() (~50 lines) + streaming.py SSE engine, run_agent, cancel, HERMES_HOME save/restore (~236 lines) + upload.py Multipart parser, file upload handler (~78 lines) + workspace.py File ops: list_dir, read_file_content, workspace helpers (~77 lines) + static/ + index.html HTML template (~364 lines) + style.css All CSS incl. mobile responsive (~670 lines) + ui.js DOM helpers, renderMd, tool cards, model dropdown, file tree (~977 lines) + workspace.js File preview, file ops, loadDir, clearPreview (~185 lines) + sessions.js Session CRUD, list rendering, search, SVG icons, dropdown actions (~533 lines) + messages.js send(), SSE event handlers, approval, transcript (~297 lines) + panels.js Cron, skills, memory, workspace, profiles, todo, settings (~974 lines) + commands.js Slash command registry, parser, autocomplete dropdown (~156 lines) + onboarding.js First-run wizard overlay, provider setup flow, and settings/workspace orchestration. + boot.js Event wiring, mobile sidebar/workspace nav, voice input, boot IIFE (~338 lines) + tests/ + conftest.py Isolated test server (port 8788, separate HERMES_HOME) (~240 lines) + test_sprint{1-20b}.py Feature tests per sprint (21 files, 415 test functions) + test_regressions.py Permanent regression gate (23 tests) + AGENTS.md Instruction file for agents working in this directory. + ROADMAP.md Feature and product roadmap document. + SPRINTS.md Forward sprint plan with CLI + Claude parity targets. + ARCHITECTURE.md THIS FILE. + TESTING.md Manual browser test plan and automated coverage reference. + CHANGELOG.md Release notes per sprint. + BUGS.md Bug backlog and fixed items tracker. + requirements.txt Python dependencies. + .env.example Sample environment variable overrides. + +State directory (runtime data, separate from source): + + ~/.hermes/webui-mvp/ + sessions/ One JSON file per session: {session_id}.json + workspaces.json Registered workspaces list + last_workspace.txt Last-used workspace path + settings.json User settings (default model, workspace, send key, password hash) + projects.json Session project groups (name, color, id) + +Log file: + + /tmp/webui-mvp.log stdout/stderr from the background server process + +--- + +## 3. Runtime Environment + +- Python interpreter: /venv/bin/python +- The venv has all Hermes agent dependencies (run_agent, tools/*, cron/*) +- Server binds to 127.0.0.1:8787 (localhost only, not public internet) +- Access from Mac: SSH tunnel: ssh -N -L 8787:127.0.0.1:8787 @ +- The server imports Hermes modules via sys.path.insert(0, parent_dir) + +Environment variables controlling behavior: + + HERMES_WEBUI_HOST Bind address (default: 127.0.0.1) + HERMES_WEBUI_PORT Port (default: 8787) + HERMES_WEBUI_DEFAULT_WORKSPACE Default workspace path for new sessions + HERMES_WEBUI_STATE_DIR Where sessions/ folder lives + HERMES_CONFIG_PATH Path to ~/.hermes/config.yaml + HERMES_WEBUI_DEFAULT_MODEL Default LLM model string + HERMES_WEBUI_PASSWORD Optional: enable password auth (off by default) + HERMES_HOME Base directory for Hermes state (~/.hermes by default) + +Test isolation environment variables (set by conftest.py): + + HERMES_WEBUI_PORT=8788 Isolated test port + HERMES_WEBUI_STATE_DIR=~/.hermes/webui-mvp-test Isolated test state + HERMES_WEBUI_DEFAULT_WORKSPACE=.../test-workspace Isolated test workspace + +Tests NEVER talk to the production server (port 8787). +The test state dir is wiped before each test session and deleted after. +See: /tests/conftest.py + +Per-request environment variables (set by chat handler, restored after): + + TERMINAL_CWD Set to session.workspace before running agent. + The terminal tool reads this to default cwd. + HERMES_EXEC_ASK Set to "1" to enable approval gate for dangerous commands. + HERMES_SESSION_KEY Set to session_id. The approval tool keys pending entries + by this value, enabling per-session approval state. + HERMES_HOME Set to the active profile's directory before running agent. + Saved and restored around each agent run. + +WARNING: These env vars are process-global. Two concurrent chat requests will clobber +each other. This is safe only for single-user, single-concurrent-request use. +See Architecture Phase B for the fix. + +--- + +## 4. Server Architecture: Current State + +### 4.1 HTTP Server Layer + +Python stdlib ThreadingHTTPServer (from http.server). Each HTTP request runs in its own +thread. The Handler class subclasses BaseHTTPRequestHandler with two methods: + + do_GET Routes: /, /health, /api/session, /api/sessions, /api/list, + /api/chat/stream, /api/file, /api/approval/pending + do_POST Routes: /api/upload, /api/session/new, /api/session/update, + /api/session/delete, /api/chat/start, /api/chat, + /api/approval/respond + +Routing is a flat if/elif chain inside each method. No routing framework. + +Helper functions used by all handlers: + + j(handler, payload, status=200) Sends JSON response with correct headers + t(handler, payload, status=200, ct) Sends plain text or HTML response + read_body(handler) Reads and JSON-parses the POST body + +CRITICAL ORDERING RULE in do_POST: +The /api/upload check MUST appear BEFORE calling read_body(). read_body() calls +handler.rfile.read() which consumes the HTTP body stream. The upload handler also +needs rfile (to read the multipart payload). If read_body() runs first on a multipart +request, the upload handler receives an empty body and the upload silently fails. + +### 4.2 Session Model + +Session is a plain Python class (not a dataclass, not SQLAlchemy): + + Fields: + session_id hex string, 12 chars (uuid4().hex[:12]) + title string, auto-set from first user message + workspace absolute path string, resolved at creation + model model ID string (e.g. "anthropic/claude-sonnet-4.6") + messages list of OpenAI-format message dicts + created_at float Unix timestamp + updated_at float Unix timestamp, updated on every save() + pinned bool, default False (Sprint 12) + archived bool, default False (Sprint 14) + project_id string or null, FK to projects.json (Sprint 15) + tool_calls list of tool call dicts (Sprint 10) + + Key methods: + path (property) Returns SESSION_DIR/{session_id}.json + save() Writes __dict__ as pretty JSON to path, updates updated_at + load(cls, sid) Class method: reads JSON from disk, returns Session or None + compact() Returns metadata-only dict (no messages) for the session list + + In-memory cache: + SESSIONS = {} dict: session_id -> Session object + LOCK = threading.Lock() defined but NOT currently used around SESSIONS access + + get_session(sid): checks SESSIONS cache, loads from disk on miss, raises KeyError + new_session(workspace, model): creates Session, caches in SESSIONS, saves, returns + all_sessions(): scans SESSION_DIR/*.json + SESSIONS, deduplicates, sorts by updated_at, + returns list of compact() dicts + + all_sessions() does a full directory scan on every call. + With 10 sessions: negligible. With 1000+: will be slow. + See Architecture Phase C for the index file fix. + +title_from(): takes messages list, finds first user message, returns first 64 chars. +Called after run_conversation() completes to set the session title retroactively. + +### 4.3 SSE Streaming Engine + +This is the most architecturally interesting part. Two endpoints cooperate: + + POST /api/chat/start Receives the user message. Creates a queue.Queue, stores it + in STREAMS[stream_id], spawns a daemon thread running + _run_agent_streaming(), returns {stream_id} immediately. + + GET /api/chat/stream Long-lived SSE connection. Reads from STREAMS[stream_id] + and forwards events to the browser until 'done' or 'error'. + +Queue registry: + + STREAMS = {} dict: stream_id -> queue.Queue + STREAMS_LOCK = threading.Lock() + +SSE event types and their data shapes: + + token {"text": "..."} LLM token delta + tool {"name": "...", "preview": "..."} Tool invocation started + approval {"command": "...", "description": "...", "pattern_keys": [...]} + done {"session": {compact_fields + messages}} Agent finished successfully + error {"message": "...", "trace": "..."} Agent threw exception + +The SSE handler loop: + - Blocks on queue.get(timeout=30) + - On timeout (no events in 30s): sends a heartbeat comment (": heartbeat + +") + to keep the connection alive through proxies and firewalls + - On 'done' or 'error' event: breaks the loop and returns + - Catches BrokenPipeError and ConnectionResetError silently (browser disconnected) + +Stream cleanup: _run_agent_streaming() pops its stream_id from STREAMS in a finally +block. If the browser disconnects mid-stream, the daemon thread runs to completion and +then cleans up. The queue fills and the put_nowait() calls fail silently (queue.Full +is caught). + +Fallback sync endpoint: POST /api/chat still exists and holds the connection open until +the agent finishes. The frontend never uses it but it can be useful for debugging. + +### 4.4 Agent Invocation (_run_agent_streaming) + + def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id): + +1. Fetches session from SESSIONS (not from disk -- session was just updated by /api/chat/start) +2. Sets TERMINAL_CWD, HERMES_EXEC_ASK, HERMES_SESSION_KEY env vars +3. Creates AIAgent with: + - model=model, platform='cli', quiet_mode=True + - enabled_toolsets=CLI_TOOLSETS (from config.yaml or hardcoded default) + - session_id=session_id + - stream_delta_callback=on_token (fires per token) + - tool_progress_callback=on_tool (fires per tool invocation) +4. Calls agent.run_conversation(user_message=msg_text, conversation_history=s.messages, + task_id=session_id) + NOTE: keyword is task_id NOT session_id (common mistake, documented in skill) +5. On return: updates s.messages, calls title_from(), saves session +6. Puts ('done', {session: ...}) into queue +7. Finally block: restores env vars, pops stream_id from STREAMS + +on_token callback: + if text is None: return # end-of-stream sentinel from AIAgent + put('token', {'text': text}) + +on_tool callback: + put('tool', {'name': name, 'preview': preview}) + # Also immediately surface any pending approval: + if has_pending(session_id): + with _lock: p = dict(_pending.get(session_id, {})) + if p: put('approval', p) + +The approval surface-on-tool logic means approvals appear immediately after the tool +fires (within the same SSE stream), without waiting for the next poll cycle. + +### 4.5 Approval System Integration + +The approval system uses the existing Hermes gateway module at tools/approval.py. +All state lives in module-level variables in that file: + + _pending = {} dict: session_key -> pending_entry_dict + _lock = Lock() protects _pending + _permanent_approved set of permanently approved pattern keys + +Because server.py imports tools.approval at module load time and everything runs in the +same process, this state IS shared between HTTP threads and agent daemon threads. + +Important: this only works because Python imports are cached (sys.modules). The same +module object is used everywhere. If the approval module were ever imported in a subprocess +or via importlib.reload(), this would break. + +GET /api/approval/pending: + - Peeks at _pending[sid] without removing it + - Returns {pending: entry} or {pending: null} + - Called by the browser every 1500ms while S.busy is true (polling fallback) + +POST /api/approval/respond: + - Pops _pending[sid] (removes it) + - For choice "once" or "session": calls approve_session(sid, pattern_key) for each key + - For choice "always": calls approve_session + approve_permanent + save_permanent_allowlist + - For choice "deny": just pops, does nothing (agent gets denied result) + - Returns {ok: true, choice: choice} + +### 4.6 File Upload Parser + +parse_multipart(rfile, content_type, content_length): + - Reads all content_length bytes from rfile into memory (up to MAX_UPLOAD_BYTES = 20MB) + - Extracts boundary from Content-Type header + - Splits raw bytes on b'--' + boundary + - For each part: parses MIME headers via email.parser.HeaderParser + - Returns (fields, files) where fields is {name: value} and files is {name: (filename, bytes)} + +handle_upload(handler): + - Calls parse_multipart() + - Validates: file field present, filename present, session exists + - Sanitizes filename: replaces non-word chars with _, truncates to 200 chars + - Writes bytes to session.workspace / safe_name + - Returns {filename, path, size} + +Why not cgi.FieldStorage: + - Deprecated in Python 3.11+ + - Broken for binary files (silently corrupts or throws) + - The manual parser handles all file types correctly + +### 4.7 File System Operations + +safe_resolve(root, requested): + - Resolves requested path relative to root + - Calls .relative_to(root) to assert the result is inside root + - Raises ValueError on path traversal (../../etc/passwd) + +list_dir(workspace, rel='.'): + - Calls safe_resolve, then iterdir() + - Sorts: directories first, then files, case-insensitive alpha within each group + - Returns up to 200 entries with {name, path, type, size} + +read_file_content(workspace, rel): + - Calls safe_resolve + - Enforces MAX_FILE_BYTES = 200KB size limit + - Reads as UTF-8 with errors='replace' (binary files show replacement chars) + - Returns {path, content, size, lines} + +--- + +## 5. Frontend Architecture: Current State + +### 5.1 Structure + +The frontend is served from static/ as separate files: one HTML template, one CSS file, +and six JavaScript modules (~2,786 lines total). External dependencies: Prism.js (syntax +highlighting) and Mermaid.js (diagrams) from CDN, both loaded async/deferred with SRI hashes. + +Six JS modules loaded in order at end of : + 1. ui.js (~846 lines) DOM helpers, renderMd, tool card rendering, global state + 2. workspace.js (~169 lines) File tree, preview, file operations + 3. sessions.js (~532 lines) Session CRUD, list rendering, search, SVG icons, dropdown actions, project picker + 4. messages.js (~293 lines) send(), SSE event handlers, approval, transcript + 5. panels.js (~771 lines) Cron, skills, memory, workspace, todo, switchPanel + 6. boot.js (~175 lines) Event wiring + boot IIFE + +sessions.js defines an `ICONS` constant at module level with hardcoded SVG strings for all +session action buttons (pin, unpin, folder, archive, unarchive, duplicate, trash). All icons +inherit `currentColor` for consistent theming. + +Three-panel layout (in static/index.html): + +