r""" Regression test for image src URL corruption by the autolink pass. Bug: the _al_stash before the autolink pass only stashed tags. tags produced by the ![alt](url) image pass were NOT stashed, so the autolink regex matched the URL inside src="..." and wrapped it in url, producing src="url" — a completely broken image source. Fix: extend _al_stash regex to also stash tags: (]*>[\s\S]*?<\/a>|]*>) """ import pathlib import re REPO_ROOT = pathlib.Path(__file__).parent.parent UI_JS = (REPO_ROOT / "static" / "ui.js").read_text() # ── Source-level check ──────────────────────────────────────────────────────── def test_al_stash_includes_img_tags(): """_al_stash regex must stash both and tags to protect src= from autolink.""" assert ']*>' in UI_JS or ']*>' in UI_JS, ( "_al_stash should include tag pattern to prevent autolink mangling src= URLs" ) # ── Behaviour tests (Python mirror of fixed pipeline) ───────────────────────── import html as _html def esc(s): return _html.escape(str(s), quote=True) SAFE_TAGS = re.compile( r'^]|$)', re.I ) def render_with_image_and_autolink(raw): """Simulate the image pass + SAFE_TAGS + _al_stash + autolink pipeline.""" s = raw # Image pass s = re.sub( r'!\[([^\]]*)\]\((https?://[^\)]+)\)', lambda m: ( f'' ), s, ) # SAFE_TAGS s = re.sub( r']*>', lambda m: m.group() if SAFE_TAGS.match(m.group()) else esc(m.group()), s, ) # _al_stash (fixed: stashes both and ) al_stash = [] s = re.sub( r'(]*>[\s\S]*?<\/a>|]*>)', lambda m: (al_stash.append(m.group(1)) or f'\x00B{len(al_stash)-1}\x00'), s, ) # Autolink def autolink(m): url = m.group(1) trail = url[-1] if url[-1] in '.,;:!?)' else '' clean = url[:-1] if trail else url return f'{esc(clean)}{trail}' s = re.sub(r'(https?://[^\s<>"\')\]]+)', autolink, s) # Restore s = re.sub(r'\x00B(\d+)\x00', lambda m: al_stash[int(m.group(1))], s) return s def test_image_src_not_mangled_by_autolink(): """The URL inside src= of a rendered must not be wrapped in by autolink.""" url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/280px-PNG_transparency_demonstration_1.png' result = render_with_image_and_autolink(f'![alt]({url})') assert f'src="{url}"' in result, f"src= URL should be intact, got: {result[:200]}" # The URL inside src= must NOT be wrapped in src_part = result.split('src="')[1].split('"')[0] assert ' tag, got: {src_part}" assert src_part == url, f"src= URL mangled: expected {url}, got {src_part}" def test_image_tag_renders_as_img(): """![alt](url) must produce an tag, not a plain link.""" result = render_with_image_and_autolink('![Test image](https://example.com/img.png)') assert ' tag, got: {result}" assert 'src="https://example.com/img.png"' in result assert ' src_part = result.split('src="')[1].split('"')[0] assert ' tag.""" result = render_with_image_and_autolink('![test](https://example.com/x.png)') assert result.count(', got {result.count('