From 2810233af42a6c4e7dc1cbd71c5eec0b38729579 Mon Sep 17 00:00:00 2001 From: Hermes Agent Date: Tue, 14 Apr 2026 22:09:36 +0000 Subject: [PATCH] fix(renderer): extend _al_stash to include tags, preventing autolink from mangling src= URLs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug: the autolink pass stashed tags (via _al_stash) before running, but did not stash tags. When ![alt](url) was converted to an tag by the image pass, the subsequent autolink regex matched the URL inside src="..." and wrapped it in url, producing src="url" — a completely broken image source. Fix: extend the _al_stash regex from: (]*>[\s\S]*?<\/a>) to: (]*>[\s\S]*?<\/a>|]*>) This stashes both and self-closing tags before autolink runs, then restores them after, so the URL inside src= is never touched. Adds 7 regression tests in tests/test_issue487b.py. --- static/ui.js | 2 +- tests/test_issue487b.py | 131 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 tests/test_issue487b.py diff --git a/static/ui.js b/static/ui.js index 278cda1..3eb479c 100644 --- a/static/ui.js +++ b/static/ui.js @@ -552,7 +552,7 @@ function renderMd(raw){ // Autolink: convert plain URLs to clickable links. // Stash existing tags first so we never re-link a URL already inside href="...". const _al_stash=[]; - s=s.replace(/(]*>[\s\S]*?<\/a>)/g,m=>{_al_stash.push(m);return `\x00B${_al_stash.length-1}\x00`;}); + s=s.replace(/(]*>[\s\S]*?<\/a>|]*>)/g,m=>{_al_stash.push(m);return `\x00B${_al_stash.length-1}\x00`;}); s=s.replace(/(https?:\/\/[^\s<>"'\)\]]+)/g,(url)=>{ // Strip trailing punctuation that was likely not part of the URL const trail=url.match(/[.,;:!?)]$/)?url.slice(-1):''; diff --git a/tests/test_issue487b.py b/tests/test_issue487b.py new file mode 100644 index 0000000..097f043 --- /dev/null +++ b/tests/test_issue487b.py @@ -0,0 +1,131 @@ +""" +Regression test for image src URL corruption by the autolink pass. + +Bug: the _al_stash before the autolink pass only stashed tags. + tags produced by the ![alt](url) image pass were NOT stashed, +so the autolink regex matched the URL inside src="..." and wrapped it +in url, producing src="url" — +a completely broken image source. + +Fix: extend _al_stash regex to also stash tags: + (]*>[\s\S]*?<\/a>|]*>) +""" +import pathlib +import re + +REPO_ROOT = pathlib.Path(__file__).parent.parent +UI_JS = (REPO_ROOT / "static" / "ui.js").read_text() + + +# ── Source-level check ──────────────────────────────────────────────────────── + +def test_al_stash_includes_img_tags(): + """_al_stash regex must stash both and tags to protect src= from autolink.""" + assert ']*>' in UI_JS or ']*>' in UI_JS, ( + "_al_stash should include tag pattern to prevent autolink mangling src= URLs" + ) + + +# ── Behaviour tests (Python mirror of fixed pipeline) ───────────────────────── + +import html as _html +def esc(s): return _html.escape(str(s), quote=True) + +SAFE_TAGS = re.compile( + r'^]|$)', re.I +) + + +def render_with_image_and_autolink(raw): + """Simulate the image pass + SAFE_TAGS + _al_stash + autolink pipeline.""" + s = raw + # Image pass + s = re.sub( + r'!\[([^\]]*)\]\((https?://[^\)]+)\)', + lambda m: ( + f'' + ), + s, + ) + # SAFE_TAGS + s = re.sub( + r']*>', + lambda m: m.group() if SAFE_TAGS.match(m.group()) else esc(m.group()), + s, + ) + # _al_stash (fixed: stashes both and ) + al_stash = [] + s = re.sub( + r'(]*>[\s\S]*?<\/a>|]*>)', + lambda m: (al_stash.append(m.group(1)) or f'\x00B{len(al_stash)-1}\x00'), + s, + ) + # Autolink + def autolink(m): + url = m.group(1) + trail = url[-1] if url[-1] in '.,;:!?)' else '' + clean = url[:-1] if trail else url + return f'{esc(clean)}{trail}' + s = re.sub(r'(https?://[^\s<>"\')\]]+)', autolink, s) + # Restore + s = re.sub(r'\x00B(\d+)\x00', lambda m: al_stash[int(m.group(1))], s) + return s + + +def test_image_src_not_mangled_by_autolink(): + """The URL inside src= of a rendered must not be wrapped in by autolink.""" + url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/280px-PNG_transparency_demonstration_1.png' + result = render_with_image_and_autolink(f'![alt]({url})') + assert f'src="{url}"' in result, f"src= URL should be intact, got: {result[:200]}" + # The URL inside src= must NOT be wrapped in + src_part = result.split('src="')[1].split('"')[0] + assert ' tag, got: {src_part}" + assert src_part == url, f"src= URL mangled: expected {url}, got {src_part}" + + +def test_image_tag_renders_as_img(): + """![alt](url) must produce an tag, not a plain link.""" + result = render_with_image_and_autolink('![Test image](https://example.com/img.png)') + assert ' tag, got: {result}" + assert 'src="https://example.com/img.png"' in result + assert ' + src_part = result.split('src="')[1].split('"')[0] + assert ' tag.""" + result = render_with_image_and_autolink('![test](https://example.com/x.png)') + assert result.count(', got {result.count('