""" Tests for issue #470 — markdown link rendering bugs in renderMd(): 1. Double-linking: [label](url) converted to , then autolink re-matches the URL inside href="..." and wraps it in a second . 2. esc() applied to URLs in href attributes turns & → &, breaking URLs with query strings and producing & in displayed link text. 3. Same double-linking bug inside table cells via inlineMd(). These tests verify the fixes by asserting against the rendered HTML that ui.js serves, using a live server request to evaluate the actual JS output indirectly (via checking ui.js source for the fixed patterns) AND by running a lightweight Python mirror of the fixed renderMd logic. Strategy: verify the fix is present in the JS source, then test the expected rendering behaviour through the Python mirror. """ import pathlib import re import html as _html REPO_ROOT = pathlib.Path(__file__).parent.parent UI_JS = (REPO_ROOT / "static" / "ui.js").read_text() # ── Helpers ────────────────────────────────────────────────────────────────── def esc(s): return _html.escape(str(s), quote=True) def _make_link(url, label): """Expected output for a [label](url) link after fix: href is NOT esc()-ed.""" return f'{esc(label)}' # Minimal Python mirror of the FIXED renderMd() — enough to test link behaviour. # Mirrors the stash-based approach introduced by the fix. def render_links_only(text): """ Simplified render that only applies the link-related passes from the fixed renderMd(): [label](url) conversion + autolink, with the stash protection. Sufficient for testing that links render correctly without double-linking. """ s = text # Stash [label](url) links (fix: store href as raw URL, not esc(url)) link_stash = [] def stash_link(m): label, url = m.group(1), m.group(2) link_stash.append(f'{esc(label)}') return f'\x00L{len(link_stash)-1}\x00' s = re.sub(r'\[([^\]]+)\]\((https?://[^\)]+)\)', stash_link, s) # Autolink bare URLs (should NOT match inside already-stashed placeholders) def autolink(m): url = m.group(1) trail = url[-1] if url[-1] in '.,;:!?)' else '' clean = url[:-1] if trail else url return f'{esc(clean)}{trail}' s = re.sub(r'(https?://[^\s<>"\')\]]+)', autolink, s) # Restore stashed links s = re.sub(r'\x00L(\d+)\x00', lambda m: link_stash[int(m.group(1))], s) return s def render_table_with_links(md): """ Render a markdown table that may contain [label](url) cells. Mirrors the fixed inlineMd() + table rendering. """ lines = md.strip().split('\n') if len(lines) < 2: return md def is_sep(r): return bool(re.match(r'^\|[\s|:-]+\|$', r.strip())) if not is_sep(lines[1]): return md def inline_md_fixed(t): """Fixed inlineMd: stash links before autolink.""" stash = [] def stash_fn(m): lb, u = m.group(1), m.group(2) stash.append(f'{esc(lb)}') return f'\x00L{len(stash)-1}\x00' t = re.sub(r'\[([^\]]+)\]\((https?://[^\)]+)\)', stash_fn, t) # autolink remaining bare URLs def autolink(m): url = m.group(1) trail = url[-1] if url[-1] in '.,;:!?)' else '' clean = url[:-1] if trail else url return f'{esc(clean)}{trail}' t = re.sub(r'(https?://[^\s<>"\')\]]+)', autolink, t) t = re.sub(r'\x00L(\d+)\x00', lambda m: stash[int(m.group(1))], t) return t def parse_row(r): cells = r.strip().lstrip('|').rstrip('|').split('|') return ''.join(f'{inline_md_fixed(c.strip())}' for c in cells) def parse_header(r): cells = r.strip().lstrip('|').rstrip('|').split('|') return ''.join(f'{inline_md_fixed(c.strip())}' for c in cells) header = f'{parse_header(lines[0])}' body = ''.join(f'{parse_row(r)}' for r in lines[2:]) return f'{header}{body}
' # ── Source-level checks (verify fix is in the JS) ───────────────────────────── def test_inlinemd_uses_link_stash(): """Fixed inlineMd() must stash [label](url) links before autolink runs.""" assert '_link_stash' in UI_JS, ( "inlineMd() should use _link_stash to prevent double-linking" ) def test_inlinemd_no_esc_on_href(): """Fixed inlineMd() must not call esc() on the URL in href.""" # The old broken pattern had esc(u) inside the href assert 'href="${esc(u)}"' not in UI_JS, ( "inlineMd() should not call esc() on href URL — it breaks & in query strings" ) def test_outer_link_pass_uses_a_stash(): """Fixed outer link pass must stash existing tags before running.""" assert '_a_stash' in UI_JS, ( "Outer [label](url) pass should stash existing tags to prevent autolink re-matching" ) def test_autolink_pass_uses_al_stash(): """Fixed autolink pass must stash existing tags before running.""" assert '_al_stash' in UI_JS, ( "Autolink pass should stash existing tags to prevent double-linking" ) def test_autolink_no_esc_on_href(): """Fixed autolink pass must not call esc() on href URL.""" idx = UI_JS.find('// Autolink: convert plain URLs to clickable links.') assert idx != -1, "New autolink comment not found" autolink_section = UI_JS[idx:idx+600] # The return line should have href="${clean}" (JS template literal, no esc call) assert 'href="${clean}"' in autolink_section, ( 'Autolink should use href="${clean}" not href="${esc(clean)}"' ) assert 'href="${esc(clean)}"' not in autolink_section, ( "Autolink should not esc() the URL in href" ) # ── Behaviour tests (Python mirror of fixed renderMd) ───────────────────────── def test_labeled_link_renders_as_single_anchor(): """[#461](https://github.com/.../461) must produce exactly one tag.""" url = 'https://github.com/nesquena/hermes-webui/issues/461' md = f'[#461]({url})' result = render_links_only(md) assert result.count(' tag, got: {result}" assert result.count('') == 1 assert f'href="{url}"' in result assert '#461' in result # Must not contain the raw brackets assert '[#461]' not in result assert f']({url})' not in result def test_href_not_html_escaped(): """URLs with & must appear as literal & in href, not &.""" url = 'https://example.com/search?q=foo&bar=baz' md = f'[Search]({url})' result = render_links_only(md) assert f'href="{url}"' in result, ( f"& in URL should not be escaped to & in href. Got: {result}" ) assert '&' not in result def test_bare_url_not_double_linked(): """A bare https:// URL must produce exactly one tag.""" url = 'https://github.com/nesquena/hermes-webui/issues/461' result = render_links_only(url) assert result.count(' tag, got: {result}" assert result.count('') == 1 def test_labeled_link_in_table_cell_single_anchor(): """[#461](url) inside a markdown table cell must produce exactly one tag.""" url = 'https://github.com/nesquena/hermes-webui/issues/461' md = f'| Issue | Title |\n|---|---|\n| [#461]({url}) | Reasoning effort |' result = render_table_with_links(md) assert result.count(' in table, got: {result}" assert f'href="{url}"' in result assert '#461' in result # No raw brackets should appear in output assert '[#461]' not in result def test_multiple_links_in_table_no_double_linking(): """Multiple [label](url) links in a table must each produce exactly one .""" urls = [ 'https://github.com/nesquena/hermes-webui/issues/461', 'https://github.com/nesquena/hermes-webui/issues/462', 'https://github.com/nesquena/hermes-webui/issues/463', ] rows = '\n'.join(f'| [#{461+i}]({url}) | Title {i} |' for i, url in enumerate(urls)) md = f'| Issue | Title |\n|---|---|\n{rows}' result = render_table_with_links(md) assert result.count(' tags, got {result.count('') == 3 for url in urls: assert f'href="{url}"' in result def test_link_label_is_escaped(): """The label text (not the URL) must still be HTML-escaped.""" url = 'https://example.com' md = f'[Click ]({url})' result = render_links_only(md) assert '<here>' in result, "Label text should be HTML-escaped" assert '' not in result def test_link_not_broken_by_prior_autolink(): """A [label](url) followed by a bare URL must each produce one clean .""" url1 = 'https://github.com/issues/461' url2 = 'https://github.com/issues/462' md = f'See [#461]({url1}) and also {url2}' result = render_links_only(md) assert result.count('..., not with escaped <code> tags visible on screen.""" # This was the pre-existing bug: **`esc()`** → <code>esc()</code> text = '**`esc()` on `href`**: breaks URLs' # Simulate the fixed inlineMd() code_stash = [] t = text t = re.sub(r'`([^`\n]+)`', lambda m: (code_stash.append(f'{esc(m.group(1))}') or f'\x00C{len(code_stash)-1}\x00'), t) t = re.sub(r'\*\*(.+?)\*\*', lambda m: f'{esc(m.group(1))}', t) t = re.sub(r'\x00C(\d+)\x00', lambda m: code_stash[int(m.group(1))], t) assert '<code>' not in t, ( f"Code tags should not be HTML-escaped inside bold. Got: {t}" ) assert 'esc()' in t, ( f"Code tags should render as elements inside bold. Got: {t}" ) assert '' in t, "Bold should still render" def test_code_and_bold_mixed_no_escaping(): """Bold text containing multiple backtick spans must render all code tags correctly.""" cases = [ ('**`esc()` on `href`**', '', 'esc()', 'href'), ('***`code` in bold-italic***', '', 'code'), ('`code` then **bold**', 'code', 'bold'), ] for args in cases: text = args[0] expected_fragments = args[1:] code_stash = [] t = text t = re.sub(r'`([^`\n]+)`', lambda m: (code_stash.append(f'{esc(m.group(1))}') or f'\x00C{len(code_stash)-1}\x00'), t) t = re.sub(r'\*\*\*(.+?)\*\*\*', lambda m: f'{esc(m.group(1))}', t) t = re.sub(r'\*\*(.+?)\*\*', lambda m: f'{esc(m.group(1))}', t) t = re.sub(r'\x00C(\d+)\x00', lambda m: code_stash[int(m.group(1))], t) assert '<code>' not in t, f"Escaped code tag in: {text!r} → {t}" for frag in expected_fragments: assert frag in t, f"Expected {frag!r} in output of {text!r}, got: {t}"