From 4422a87de958508947caf991780e17331ed2a5a3 Mon Sep 17 00:00:00 2001 From: Nathan Esquenazi Date: Wed, 8 Apr 2026 14:22:39 +0000 Subject: [PATCH 1/2] fix: resolve _ENV_LOCK deadlock that blocks chat after first message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The v0.39.0 security sprint introduced _ENV_LOCK to protect env var mutations in the streaming path. The implementation held the lock for the entire agent run (potentially minutes), then tried to re-acquire it in the finally block — a guaranteed deadlock on any non-reentrant threading.Lock(). Result: first message completes (done event fires before finally hits), but the lock is never released. Every subsequent chat/start POST blocks forever waiting for that lock. Fix: narrow the lock scope to just the env mutation. Set the vars inside the with block, then let the lock release before the agent starts. The finally block re-acquires cleanly since it no longer re-enters an already-held lock. No logic change — only the critical section boundary moves. --- api/streaming.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/api/streaming.py b/api/streaming.py index 7d5c455..1f1f6f6 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -107,6 +107,9 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta HERMES_HOME=_profile_home, ) # Still set process-level env as fallback for tools that bypass thread-local + # Acquire lock only for the env mutation, then release before the agent runs. + # The finally block re-acquires to restore — keeping critical sections short + # and preventing a deadlock where the restore would re-enter the same lock. with _ENV_LOCK: old_cwd = os.environ.get('TERMINAL_CWD') old_exec_ask = os.environ.get('HERMES_EXEC_ASK') @@ -117,8 +120,8 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta os.environ['HERMES_SESSION_KEY'] = session_id if _profile_home: os.environ['HERMES_HOME'] = _profile_home - - try: + # Lock released — agent runs without holding it + try: def on_token(text): if text is None: return # end-of-stream sentinel @@ -378,16 +381,16 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta usage['threshold_tokens'] = getattr(_cc, 'threshold_tokens', 0) or 0 usage['last_prompt_tokens'] = getattr(_cc, 'last_prompt_tokens', 0) or 0 put('done', {'session': s.compact() | {'messages': s.messages, 'tool_calls': tool_calls}, 'usage': usage}) - finally: + finally: with _ENV_LOCK: - if old_cwd is None: os.environ.pop('TERMINAL_CWD', None) - else: os.environ['TERMINAL_CWD'] = old_cwd - if old_exec_ask is None: os.environ.pop('HERMES_EXEC_ASK', None) - else: os.environ['HERMES_EXEC_ASK'] = old_exec_ask - if old_session_key is None: os.environ.pop('HERMES_SESSION_KEY', None) - else: os.environ['HERMES_SESSION_KEY'] = old_session_key - if old_hermes_home is None: os.environ.pop('HERMES_HOME', None) - else: os.environ['HERMES_HOME'] = old_hermes_home + if old_cwd is None: os.environ.pop('TERMINAL_CWD', None) + else: os.environ['TERMINAL_CWD'] = old_cwd + if old_exec_ask is None: os.environ.pop('HERMES_EXEC_ASK', None) + else: os.environ['HERMES_EXEC_ASK'] = old_exec_ask + if old_session_key is None: os.environ.pop('HERMES_SESSION_KEY', None) + else: os.environ['HERMES_SESSION_KEY'] = old_session_key + if old_hermes_home is None: os.environ.pop('HERMES_HOME', None) + else: os.environ['HERMES_HOME'] = old_hermes_home except Exception as e: print('[webui] stream error:\n' + traceback.format_exc(), flush=True) From d919b584c6c527f7c32d98a242ddf066bb9c0f5a Mon Sep 17 00:00:00 2001 From: Nathan Esquenazi Date: Wed, 8 Apr 2026 07:26:41 -0700 Subject: [PATCH 2/2] docs: v0.39.1 release notes for ENV_LOCK deadlock fix Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 11 +++++++++++ static/index.html | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b7af9d..7e96f52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,17 @@ --- +## [v0.39.1] — 2026-04-08 + +### Bug Fixes +- **_ENV_LOCK deadlock resolved.** The environment variable lock was held for + the entire duration of agent execution (including all tool calls and streaming), + blocking all concurrent requests. Now the lock is acquired only for the brief + env variable read/write operations, released before the agent runs, and + re-acquired in the finally block for restoration. + +--- + ## [v0.39.0] — 2026-04-08 ### Security (12 fixes — PR #171 by @betamod, reviewed by @nesquena-hermes) diff --git a/static/index.html b/static/index.html index 3af9c3a..67ad9f1 100644 --- a/static/index.html +++ b/static/index.html @@ -14,7 +14,7 @@