fix: unicode filenames in Content-Disposition headers (#378)

* Fix unicode filenames in file download headers * docs: v0.50.19 CHANGELOG entry for unicode filename fix (PR #378) * docs: fix test count in v0.50.19 CHANGELOG (924 not 926) --------- Co-authored-by: shaoxianbilly <40623436+shaoxianbilly@users.noreply.github.com> Co-authored-by: Nathan Esquenazi <nesquena@gmail.com>
2026-04-13 15:43:01 -07:00
parent a71047bbc3
commit 00eb13b316
3 changed files with 83 additions and 5 deletions
--- a/tests/test_sprint29.py
+++ b/tests/test_sprint29.py
@@ -21,6 +21,7 @@ import pathlib
 import sys
 import time
 import urllib.error
+import urllib.parse
 import urllib.request

 sys.path.insert(0, str(pathlib.Path(__file__).parent))
@@ -51,6 +52,12 @@ def post(path, body=None, headers=None):
        return json.loads(e.read()), e.code


+def get_raw_with_headers(path):
+    req = urllib.request.Request(BASE + path)
+    with urllib.request.urlopen(req, timeout=10) as r:
+        return r.read(), dict(r.headers.items()), r.status
+
+
 # ── 1. CSRF Protection ─────────────────────────────────────────────────────


@@ -550,6 +557,52 @@ class TestContentDisposition:
        assert "image/svg+xml" in src
        assert "dangerous_types" in src

+    def test_unicode_filename_download_header_is_latin1_safe(self, cleanup_test_sessions):
+        """Unicode filenames must not crash download responses."""
+        body, status = post("/api/session/new", {})
+        assert status == 200, body
+        sid = body["session"]["session_id"]
+        cleanup_test_sessions.append(sid)
+        ws = pathlib.Path(body["session"]["workspace"])
+        filename = "中文对照表.pdf"
+        pdf_bytes = b"%PDF-1.3\n1 0 obj\n<<>>\nendobj\ntrailer\n<<>>\n%%EOF\n"
+        (ws / filename).write_bytes(pdf_bytes)
+
+        encoded = urllib.parse.quote(filename)
+        raw, headers, raw_status = get_raw_with_headers(
+            f"/api/file/raw?session_id={sid}&path={encoded}&download=1"
+        )
+
+        assert raw_status == 200
+        assert raw == pdf_bytes
+        disp = headers["Content-Disposition"]
+        assert disp.startswith("attachment; ")
+        assert "filename*=UTF-8''" in disp
+        disp.encode("latin-1")
+
+    def test_unicode_filename_inline_header_is_latin1_safe(self, cleanup_test_sessions):
+        """Inline responses must also work for unicode filenames."""
+        body, status = post("/api/session/new", {})
+        assert status == 200, body
+        sid = body["session"]["session_id"]
+        cleanup_test_sessions.append(sid)
+        ws = pathlib.Path(body["session"]["workspace"])
+        filename = "预览.pdf"
+        pdf_bytes = b"%PDF-1.3\n1 0 obj\n<<>>\nendobj\ntrailer\n<<>>\n%%EOF\n"
+        (ws / filename).write_bytes(pdf_bytes)
+
+        encoded = urllib.parse.quote(filename)
+        raw, headers, raw_status = get_raw_with_headers(
+            f"/api/file/raw?session_id={sid}&path={encoded}"
+        )
+
+        assert raw_status == 200
+        assert raw == pdf_bytes
+        disp = headers["Content-Disposition"]
+        assert disp.startswith("inline; ")
+        assert "filename*=UTF-8''" in disp
+        disp.encode("latin-1")
+

 # ── 9. PBKDF2 Password Hashing ───────────────────────────────────────────