From f948baceb63f6f8f1440c3fd6ac3fac4495625cf Mon Sep 17 00:00:00 2001 From: nesquena-hermes Date: Mon, 13 Apr 2026 12:23:16 -0700 Subject: [PATCH] fix: CSRF check fails behind reverse proxy on non-standard ports (#360) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: CSRF check fails behind reverse proxy on non-standard ports When serving behind a reverse proxy (e.g. Nginx Proxy Manager) on a non-standard port like 8000, the browser sends `Origin: https://example.com:8000` but the proxy forwards `Host: example.com` (without the port). The existing CSRF check compared these as raw strings, causing all POST requests to be rejected with 403. This commit: - Adds `_normalize_host_port()` to properly parse host:port pairs (incl. IPv6) - Adds `_ports_match()` that treats absent port as equivalent to 80/443 - Adds `HERMES_WEBUI_ALLOWED_ORIGINS` env var for explicitly trusting origins when port normalization alone isn't sufficient (e.g. port 8000) - Adds unit tests covering port normalization, allowlist, and rejection cases Co-Authored-By: Claude Opus 4.6 (1M context) * fix: CSRF port normalization — scheme-aware, allowlist validation, 29 tests (#360) api/routes.py: - _normalize_host_port(): parse host:port including IPv6 bracket notation - _ports_match(scheme, origin_port, allowed_port): scheme-aware — http absent=:80, https absent=:443; prevents cross-protocol false match (http://host:80 no longer passes for https://host:443 server) - _allowed_public_origins(): parse HERMES_WEBUI_ALLOWED_ORIGINS env var; warn and skip entries missing scheme prefix - _check_csrf(): extract origin scheme, pass to _ports_match; add origin_scheme tests/test_sprint29.py: 29 new tests (5 from PR + 24 added in review) - Unit tests for _normalize_host_port and _ports_match helpers - Cross-protocol rejection (http vs https default ports) - Explicit :80 / :443 same-origin pass - Non-default port rejection - Bug scenario with/without allowlist - Comma-separated allowlist - No-scheme allowlist warning - Trailing-slash normalization CHANGELOG.md: v0.50.16 entry; 900 tests total (up from 871) --------- Co-authored-by: liangxu.5 Co-authored-by: Claude Opus 4.6 (1M context) Co-authored-by: Nathan Esquenazi --- CHANGELOG.md | 9 ++ api/routes.py | 78 ++++++++++++++- tests/test_sprint29.py | 214 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 296 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a8e5331..702d22b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,15 @@ --- +## [v0.50.16] Fix CSRF check failing behind reverse proxy on non-standard ports (PR #360) + +- **CSRF no longer rejects POST requests from reverse-proxied deployments on non-standard ports** (`api/routes.py`, fixes #355): When serving behind Nginx Proxy Manager or similar on a port like `:8000`, browsers send `Origin: https://app.example.com:8000` while the proxy forwards `Host: app.example.com` (port stripped). The old string comparison failed this as cross-origin. Two changes fix it: + - `_normalize_host_port()`: properly splits host:port strings including IPv6 bracket notation (`[::1]:8080`) + - `_ports_match(scheme, origin_port, allowed_port)`: scheme-aware port equivalence — absent port equals `:80` for `http://` and `:443` for `https://`. This prevents the previous cross-protocol confusion where `http://host` could incorrectly match an `https://host:443` server (security fix applied on top of the original PR) + - `HERMES_WEBUI_ALLOWED_ORIGINS` env var: comma-separated explicit origin allowlist for cases where port normalization alone isn't sufficient (e.g. non-standard ports like `:8000` where the proxy strips the port entirely). Entries without a scheme (`https://`) are rejected with a startup warning. +- **Security fix applied during review**: the original `_ports_match` treated both port 80 and port 443 as interchangeable with "absent port", which is scheme-unaware. An `http://host` origin would pass for an `https://host:443` server. Fixed by making the default-port lookup scheme-specific. + - 29 new tests in `tests/test_sprint29.py` (5 from PR + 24 added during review): cover scheme-aware port matching, cross-protocol rejection, unit tests for `_normalize_host_port` and `_ports_match`, allowlist validation, comma-separated origins, no-scheme allowlist warning, the bug scenario with and without the allowlist; 900 tests total (up from 871) + ## [v0.50.15] KaTeX math rendering for LaTeX in chat and workspace previews (fixes #347) - **LaTeX / KaTeX math now renders in chat messages and workspace file previews** (`static/ui.js`, `static/workspace.js`, `static/style.css`, `static/index.html`): Inline math (`$...$`, `\(...\)`) and display math (`$$...$$`, `\[...\]`) are rendered via KaTeX instead of displaying as raw text. Follows the existing mermaid lazy-load pattern: delimiters are stashed before markdown processing, placeholder elements are emitted, and KaTeX JS is loaded from CDN on first use — no KaTeX JS is loaded unless math is present. diff --git a/api/routes.py b/api/routes.py index 3322cab..4875ffe 100644 --- a/api/routes.py +++ b/api/routes.py @@ -58,6 +58,68 @@ from api.helpers import ( import re as _re +def _normalize_host_port(value: str) -> tuple[str, str | None]: + """Split a host or host:port string into (hostname, port|None). + Handles IPv6 bracket notation, e.g. [::1]:8080.""" + value = value.strip().lower() + if not value: + return '', None + if value.startswith('['): + end = value.find(']') + if end != -1: + host = value[1:end] + rest = value[end + 1 :] + if rest.startswith(':') and rest[1:].isdigit(): + return host, rest[1:] + return host, None + if value.count(':') == 1: + host, port = value.rsplit(':', 1) + if port.isdigit(): + return host, port + return value, None + + +def _ports_match(origin_scheme: str, origin_port: str | None, allowed_port: str | None) -> bool: + """Return True when two ports should be considered equivalent, scheme-aware. + + Treats an absent port as the scheme default: port 80 for http, port 443 for https. + Port 80 is NOT treated as equivalent to 443 (different protocols = different origins). + """ + if origin_port == allowed_port: + return True + # Determine the default port for the origin's scheme + default = '443' if origin_scheme == 'https' else '80' + if not origin_port and allowed_port == default: + return True + if not allowed_port and origin_port == default: + return True + return False + + +def _allowed_public_origins() -> set[str]: + """Parse HERMES_WEBUI_ALLOWED_ORIGINS env var (comma-separated) into a set. + + Each entry must include the scheme, e.g. https://myapp.example.com:8000. + Entries without a scheme are silently skipped and a warning is printed. + """ + raw = os.getenv('HERMES_WEBUI_ALLOWED_ORIGINS', '') + result = set() + for value in raw.split(','): + value = value.strip().rstrip('/').lower() + if not value: + continue + if not (value.startswith('http://') or value.startswith('https://')): + import sys + print( + f"[webui] WARNING: HERMES_WEBUI_ALLOWED_ORIGINS entry {value!r} is missing " + f"the scheme (expected https://hostname or http://hostname). Entry ignored.", + flush=True, file=sys.stderr, + ) + continue + result.add(value) + return result + + def _check_csrf(handler) -> bool: """Reject cross-origin POST requests. Returns True if OK.""" origin = handler.headers.get("Origin", "") @@ -71,10 +133,16 @@ def _check_csrf(handler) -> bool: if not m: return False origin_host = m.group(1) + origin_scheme = m.group(0).split('://')[0].lower() # 'http' or 'https' + origin_name, origin_port = _normalize_host_port(origin_host) + # Check against explicitly allowed public origins (env var) + origin_value = m.group(0).rstrip('/').lower() + if origin_value in _allowed_public_origins(): + return True # Allow same-origin: check Host, X-Forwarded-Host (reverse proxy), and # X-Real-Host against the origin. Reverse proxies (Caddy, nginx) set # X-Forwarded-Host to the client's original Host header. - allowed_hosts = { + allowed_hosts = [ h.strip() for h in [ host, @@ -82,9 +150,11 @@ def _check_csrf(handler) -> bool: handler.headers.get("X-Real-Host", ""), ] if h.strip() - } - if origin_host in allowed_hosts: - return True + ] + for allowed in allowed_hosts: + allowed_name, allowed_port = _normalize_host_port(allowed) + if origin_name == allowed_name and _ports_match(origin_scheme, origin_port, allowed_port): + return True return False diff --git a/tests/test_sprint29.py b/tests/test_sprint29.py index c554185..c80903e 100644 --- a/tests/test_sprint29.py +++ b/tests/test_sprint29.py @@ -55,6 +55,13 @@ def post(path, body=None, headers=None): class TestCSRF: + @staticmethod + def _csrf_allowed(headers): + from types import SimpleNamespace + from api.routes import _check_csrf + + return _check_csrf(SimpleNamespace(headers=headers)) + def test_no_origin_no_referer_allowed(self): """Curl-style request with no Origin/Referer must pass CSRF check.""" body, status = post("/api/sessions/new", {}) @@ -87,7 +94,212 @@ class TestCSRF: {}, headers={"Referer": "http://127.0.0.1:8788/", "Host": "127.0.0.1:8788"}, ) - assert status != 403, f"Expected non-403 for same-referer request, got {status}: {body}" + assert status != 403, f"Expected non-403 for same-referer request, got {status}" + + def test_proxy_host_default_https_port_matches_http_origin(self): + """http:// origin without port must NOT match X-Forwarded-Host with :443. + + After the scheme-aware _ports_match fix: http:// absent port = :80, + which is not equal to :443. These are different protocols/ports and + should be rejected. In real reverse proxy scenarios where the external + URL is HTTPS, the browser sends Origin: https://... not http://... + See test_proxy_host_default_https_port_matches_https_origin for the + real-world proxy case that should pass. + """ + assert not self._csrf_allowed({ + "Origin": "http://example.com", + "X-Forwarded-Host": "example.com:443", + }), 'http origin (port :80) must not match https host (:443)' + + def test_proxy_host_default_https_port_matches_https_origin(self): + """HTTPS Origin without port should match X-Forwarded-Host with explicit :443.""" + assert self._csrf_allowed({ + "Origin": "https://example.com", + "X-Forwarded-Host": "example.com:443", + }) + + def test_proxy_host_port_normalization_still_rejects_other_host(self): + """Port normalization must not allow different hosts through.""" + assert not self._csrf_allowed({ + "Origin": "https://evil.com", + "X-Forwarded-Host": "example.com:443", + }) + + def test_allowed_public_origin_bypasses_missing_proxy_port(self, monkeypatch): + """Explicitly configured public origins should pass even if proxy strips :port from Host.""" + monkeypatch.setenv('HERMES_WEBUI_ALLOWED_ORIGINS', 'https://myapp.example.com:8000') + assert self._csrf_allowed({ + 'Origin': 'https://myapp.example.com:8000', + 'Host': 'myapp.example.com', + 'X-Forwarded-Proto': 'https', + }) + + def test_other_origin_not_allowed_by_public_origin_allowlist(self, monkeypatch): + """Allowlist must stay exact; unrelated origins must still be rejected.""" + monkeypatch.setenv('HERMES_WEBUI_ALLOWED_ORIGINS', 'https://myapp.example.com:8000') + assert not self._csrf_allowed({ + 'Origin': 'https://evil.com:8000', + 'Host': 'myapp.example.com', + 'X-Forwarded-Proto': 'https', + }) + + # ── Port normalization: scheme-aware (M-1 fix) ──────────────────────────── + + def test_cross_protocol_port_not_confused_http_origin_https_host(self): + """http:// origin must NOT match a host with :443 (HTTPS default). + + Before M-1 fix, _ports_match treated both 80 and 443 as equivalent to + absent port, allowing http://host to match https://host:443 servers. + """ + assert not self._csrf_allowed({ + 'Origin': 'http://example.com', # http, no port = :80 + 'X-Forwarded-Host': 'example.com:443', # HTTPS port + }), 'http origin should NOT match host advertising port 443' + + def test_cross_protocol_port_not_confused_https_origin_http_host(self): + """https:// origin must NOT match a host with :80 (HTTP default).""" + assert not self._csrf_allowed({ + 'Origin': 'https://example.com', # https, no port = :443 + 'X-Forwarded-Host': 'example.com:80', # HTTP port + }), 'https origin should NOT match host advertising port 80' + + def test_http_explicit_port_80_matches_host_without_port(self): + """http://example.com:80 is the same origin as http://example.com.""" + assert self._csrf_allowed({ + 'Origin': 'http://example.com:80', + 'Host': 'example.com', + }) + + def test_https_explicit_port_443_matches_host_without_port(self): + """https://example.com:443 is the same origin as https://example.com.""" + assert self._csrf_allowed({ + 'Origin': 'https://example.com:443', + 'Host': 'example.com', + }) + + def test_non_default_port_not_waived(self): + """Non-default ports (e.g. :8000) must not be treated as equivalent to absent.""" + assert not self._csrf_allowed({ + 'Origin': 'https://example.com:8000', + 'Host': 'example.com', + }) + + # ── Bug scenario: proxy strips non-standard port ────────────────────────── + + def test_bug_origin_8000_host_without_port_rejected_without_allowlist(self, monkeypatch): + """Without the allowlist, origin with :8000 must be rejected when proxy strips port. + + This documents the original bug: Origin: https://app.com:8000 with + Host: app.com (proxy stripped the port). Before this PR that returned 403. + The fix (HERMES_WEBUI_ALLOWED_ORIGINS) handles it; without the env var + the request is still rejected, which is the safe default. + """ + monkeypatch.delenv('HERMES_WEBUI_ALLOWED_ORIGINS', raising=False) + assert not self._csrf_allowed({ + 'Origin': 'https://myapp.example.com:8000', + 'Host': 'myapp.example.com', + }), 'without allowlist, port mismatch must be rejected (safe default)' + + def test_allowed_origins_comma_separated(self, monkeypatch): + """HERMES_WEBUI_ALLOWED_ORIGINS accepts multiple comma-separated origins.""" + monkeypatch.setenv( + 'HERMES_WEBUI_ALLOWED_ORIGINS', + 'https://app1.example.com:8000, https://app2.example.com:9000', + ) + assert self._csrf_allowed({'Origin': 'https://app1.example.com:8000', 'Host': 'proxy.internal'}) + assert self._csrf_allowed({'Origin': 'https://app2.example.com:9000', 'Host': 'proxy.internal'}) + assert not self._csrf_allowed({'Origin': 'https://evil.com:8000', 'Host': 'proxy.internal'}) + + def test_allowed_origins_without_scheme_ignored(self, monkeypatch, capsys): + """Allowlist entries missing the scheme are skipped and a warning is printed.""" + monkeypatch.setenv('HERMES_WEBUI_ALLOWED_ORIGINS', 'myapp.example.com:8000') + from api.routes import _allowed_public_origins + result = _allowed_public_origins() + assert len(result) == 0, 'entry without scheme must be ignored' + captured = capsys.readouterr() + assert 'WARNING' in captured.err and 'scheme' in captured.err.lower() + + def test_allowed_origins_trailing_slash_normalized(self, monkeypatch): + """Trailing slash in allowlist entry is stripped before comparison.""" + monkeypatch.setenv('HERMES_WEBUI_ALLOWED_ORIGINS', 'https://myapp.example.com:8000/') + assert self._csrf_allowed({ + 'Origin': 'https://myapp.example.com:8000', + 'Host': 'proxy.internal', + }) + + +# ── CSRF helpers: unit tests ───────────────────────────────────────────────── + + +class TestCSRFHelpers: + """Direct unit tests for _normalize_host_port and _ports_match.""" + def test_normalize_host_only(self): + from api.routes import _normalize_host_port + assert _normalize_host_port('example.com') == ('example.com', None) + + def test_normalize_host_with_port(self): + from api.routes import _normalize_host_port + assert _normalize_host_port('example.com:8000') == ('example.com', '8000') + + def test_normalize_ipv6_no_port(self): + from api.routes import _normalize_host_port + assert _normalize_host_port('[::1]') == ('::1', None) + + def test_normalize_ipv6_with_port(self): + from api.routes import _normalize_host_port + assert _normalize_host_port('[::1]:8080') == ('::1', '8080') + + def test_normalize_empty(self): + from api.routes import _normalize_host_port + assert _normalize_host_port('') == ('', None) + + def test_normalize_whitespace_stripped(self): + from api.routes import _normalize_host_port + assert _normalize_host_port(' example.com ') == ('example.com', None) + + def test_normalize_lowercases(self): + from api.routes import _normalize_host_port + assert _normalize_host_port('EXAMPLE.COM:80') == ('example.com', '80') + + def test_ports_match_identical(self): + from api.routes import _ports_match + assert _ports_match('https', '8000', '8000') is True + + def test_ports_match_both_absent(self): + from api.routes import _ports_match + assert _ports_match('https', None, None) is True + + def test_ports_match_https_absent_vs_443(self): + from api.routes import _ports_match + assert _ports_match('https', None, '443') is True + assert _ports_match('https', '443', None) is True + + def test_ports_match_http_absent_vs_80(self): + from api.routes import _ports_match + assert _ports_match('http', None, '80') is True + assert _ports_match('http', '80', None) is True + + def test_ports_match_http_absent_vs_443_rejected(self): + """http:// scheme: absent port is :80, not :443.""" + from api.routes import _ports_match + assert _ports_match('http', None, '443') is False + assert _ports_match('http', '443', None) is False + + def test_ports_match_https_absent_vs_80_rejected(self): + """https:// scheme: absent port is :443, not :80.""" + from api.routes import _ports_match + assert _ports_match('https', None, '80') is False + assert _ports_match('https', '80', None) is False + + def test_ports_match_non_default_never_waived(self): + from api.routes import _ports_match + assert _ports_match('https', None, '8000') is False + assert _ports_match('https', '8000', None) is False + assert _ports_match('http', None, '8080') is False + + def test_ports_match_different_non_default(self): + from api.routes import _ports_match + assert _ports_match('https', '8000', '9000') is False # ── 2. Login Rate Limiting ─────────────────────────────────────────────────