"""
Regression test for image src URL corruption by the autolink pass.
Bug: the _al_stash before the autolink pass only stashed tags.
tags produced by the  image pass were NOT stashed,
so the autolink regex matched the URL inside src="..." and wrapped it
in url, producing src="url" —
a completely broken image source.
Fix: extend _al_stash regex to also stash
tags:
(]*>[\s\S]*?<\/a>|
]*>)
"""
import pathlib
import re
REPO_ROOT = pathlib.Path(__file__).parent.parent
UI_JS = (REPO_ROOT / "static" / "ui.js").read_text()
# ── Source-level check ────────────────────────────────────────────────────────
def test_al_stash_includes_img_tags():
"""_al_stash regex must stash both and
tags to protect src= from autolink."""
assert '
]*>' in UI_JS or '
]*>' in UI_JS, (
"_al_stash should include
tag pattern to prevent autolink mangling src= URLs"
)
# ── Behaviour tests (Python mirror of fixed pipeline) ─────────────────────────
import html as _html
def esc(s): return _html.escape(str(s), quote=True)
SAFE_TAGS = re.compile(
r'^?(strong|em|code|pre|h[1-6]|ul|ol|li|table|thead|tbody|tr|th|td'
r'|hr|blockquote|p|br|a|img|div|span)([\s>]|$)', re.I
)
def render_with_image_and_autolink(raw):
"""Simulate the image pass + SAFE_TAGS + _al_stash + autolink pipeline."""
s = raw
# Image pass
s = re.sub(
r'!\[([^\]]*)\]\((https?://[^\)]+)\)',
lambda m: (
f'
'
),
s,
)
# SAFE_TAGS
s = re.sub(
r'?[a-zA-Z][^>]*>',
lambda m: m.group() if SAFE_TAGS.match(m.group()) else esc(m.group()),
s,
)
# _al_stash (fixed: stashes both and
)
al_stash = []
s = re.sub(
r'(]*>[\s\S]*?<\/a>|
]*>)',
lambda m: (al_stash.append(m.group(1)) or f'\x00B{len(al_stash)-1}\x00'),
s,
)
# Autolink
def autolink(m):
url = m.group(1)
trail = url[-1] if url[-1] in '.,;:!?)' else ''
clean = url[:-1] if trail else url
return f'{esc(clean)}{trail}'
s = re.sub(r'(https?://[^\s<>"\')\]]+)', autolink, s)
# Restore
s = re.sub(r'\x00B(\d+)\x00', lambda m: al_stash[int(m.group(1))], s)
return s
def test_image_src_not_mangled_by_autolink():
"""The URL inside src= of a rendered
must not be wrapped in by autolink."""
url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/280px-PNG_transparency_demonstration_1.png'
result = render_with_image_and_autolink(f'')
assert f'src="{url}"' in result, f"src= URL should be intact, got: {result[:200]}"
# The URL inside src= must NOT be wrapped in
src_part = result.split('src="')[1].split('"')[0]
assert ' tag, got: {src_part}"
assert src_part == url, f"src= URL mangled: expected {url}, got {src_part}"
def test_image_tag_renders_as_img():
""" must produce an
tag, not a plain link."""
result = render_with_image_and_autolink('')
assert '
tag, got: {result}"
assert 'src="https://example.com/img.png"' in result
assert '
src_part = result.split('src="')[1].split('"')[0]
assert ' tag."""
result = render_with_image_and_autolink('')
assert result.count('
, got {result.count('