diff --git a/static/ui.js b/static/ui.js
index 2bcdbf7..2a27d1c 100644
--- a/static/ui.js
+++ b/static/ui.js
@@ -463,8 +463,11 @@ function renderMd(raw){
t=t.replace(/\*\*(.+?)\*\*/g,(_,x)=>`${esc(x)}`);
t=t.replace(/\*([^*\n]+)\*/g,(_,x)=>`${esc(x)}`);
t=t.replace(/`([^`\n]+)`/g,(_,x)=>`${esc(x)}`);
- t=t.replace(/\[([^\]]+)\]\((https?:\/\/[^\)]+)\)/g,(_,lb,u)=>`${esc(lb)}`);
- t=t.replace(/(https?:\/\/[^\s<>"')\]]+)/g,(url)=>{const trail=url.match(/[.,;:!?)]$/)?url.slice(-1):'';const clean=trail?url.slice(0,-1):url;return `${esc(clean)}${trail}`;});
+ // Stash [label](url) links before autolink so the URL in href= is not re-linked
+ const _link_stash=[];
+ t=t.replace(/\[([^\]]+)\]\((https?:\/\/[^\)]+)\)/g,(_,lb,u)=>{_link_stash.push(`${esc(lb)}`);return `\x00L${_link_stash.length-1}\x00`;});
+ t=t.replace(/(https?:\/\/[^\s<>"')\]]+)/g,(url)=>{const trail=url.match(/[.,;:!?)]$/)?url.slice(-1):'';const clean=trail?url.slice(0,-1):url;return `${esc(clean)}${trail}`;});
+ t=t.replace(/\x00L(\d+)\x00/g,(_,i)=>_link_stash[+i]);;
// Escape any plain text that isn't already wrapped in a tag we produced
// by escaping bare < > that aren't part of our own tags
const SAFE_INLINE=/^<\/?(strong|em|code|a)([\s>]|$)/i;
@@ -498,7 +501,11 @@ function renderMd(raw){
}
return html+'';
});
- s=s.replace(/\[([^\]]+)\]\((https?:\/\/[^\)]+)\)/g,(_,label,url)=>`${esc(label)}`);
+ // Stash existing tags before link pass so autolink never re-links already-linked URLs
+ const _a_stash=[];
+ s=s.replace(/(]*>[\s\S]*?<\/a>)/g,m=>{_a_stash.push(m);return `\x00A${_a_stash.length-1}\x00`;});
+ s=s.replace(/\[([^\]]+)\]\((https?:\/\/[^\)]+)\)/g,(_,label,url)=>`${esc(label)}`);
+ s=s.replace(/\x00A(\d+)\x00/g,(_,i)=>_a_stash[+i]);
// Tables: | col | col | header row followed by | --- | --- | separator then data rows
s=s.replace(/((?:^\|.+\|\n?)+)/gm,block=>{
const rows=block.trim().split('\n').filter(r=>r.trim());
@@ -517,13 +524,17 @@ function renderMd(raw){
//
(mermaid/pre-header). Everything else is untrusted input.
const SAFE_TAGS=/^<\/?(strong|em|code|pre|h[1-6]|ul|ol|li|table|thead|tbody|tr|th|td|hr|blockquote|p|br|a|div|span)([\s>]|$)/i;
s=s.replace(/<\/?[a-z][^>]*>/gi,tag=>SAFE_TAGS.test(tag)?tag:esc(tag));
- // Autolink: convert plain URLs to clickable links (not inside existing
tags, not in code)
- s=s.replace(/(https?:\/\/[^\s<>"')\]]+)/g,(url)=>{
+ // Autolink: convert plain URLs to clickable links.
+ // Stash existing tags first so we never re-link a URL already inside href="...".
+ const _al_stash=[];
+ s=s.replace(/(]*>[\s\S]*?<\/a>)/g,m=>{_al_stash.push(m);return `\x00B${_al_stash.length-1}\x00`;});
+ s=s.replace(/(https?:\/\/[^\s<>"'\)\]]+)/g,(url)=>{
// Strip trailing punctuation that was likely not part of the URL
const trail=url.match(/[.,;:!?)]$/)?url.slice(-1):'';
const clean=trail?url.slice(0,-1):url;
- return `${esc(clean)}${trail}`;
+ return `
${esc(clean)}${trail}`;
});
+ s=s.replace(/\x00B(\d+)\x00/g,(_,i)=>_al_stash[+i]);
// Restore math stash → katex placeholder spans/divs
// These will be rendered by renderKatexBlocks() after DOM insertion
s=s.replace(/\x00M(\d+)\x00/g,(_,i)=>{
diff --git a/tests/test_issue342.py b/tests/test_issue342.py
index d095c79..be3b165 100644
--- a/tests/test_issue342.py
+++ b/tests/test_issue342.py
@@ -38,15 +38,23 @@ def test_autolink_regex_in_rendermd():
def test_autolink_uses_esc_for_xss_safety():
- """The autolink code must use esc() to escape URLs, preventing XSS."""
+ """The autolink code must use esc() to escape the display text of URLs, preventing XSS.
+ Note: esc() is intentionally NOT applied to the href value (that would corrupt & in
+ query strings). It IS applied to the visible link text (esc(clean)) to prevent XSS."""
content = read_ui_js()
# Find the autolink section (between the SAFE_TAGS pass and paragraph wrap)
autolink_idx = content.find('// Autolink: convert plain URLs')
assert autolink_idx != -1, "Autolink comment not found in ui.js"
- # Extract the autolink block (next ~300 chars after the comment)
- autolink_block = content[autolink_idx:autolink_idx + 400]
+ # Extract the autolink block (next ~600 chars after the comment)
+ autolink_block = content[autolink_idx:autolink_idx + 600]
+ # esc() must be used on the visible link text to prevent XSS
assert 'esc(clean)' in autolink_block, (
- "Autolink block should use esc(clean) for XSS-safe URL escaping, but it was not found."
+ "Autolink block should use esc(clean) for the link display text (XSS safety), "
+ "but it was not found."
+ )
+ # esc() must NOT be used on the href value — that breaks URLs containing &
+ assert 'href="${esc(clean)}"' not in autolink_block, (
+ "Autolink block should use href=\"${clean}\" (not esc'd) to preserve & in query strings."
)
@@ -87,12 +95,13 @@ def test_autolink_target_blank_and_rel():
content = read_ui_js()
autolink_idx = content.find('// Autolink: convert plain URLs')
assert autolink_idx != -1, "Autolink comment not found"
- autolink_block = content[autolink_idx:autolink_idx + 400]
+ # Use a larger window to account for the stash preamble added by the fix
+ autolink_block = content[autolink_idx:autolink_idx + 700]
assert 'target="_blank"' in autolink_block, (
- "Autolinked URLs should have target=\"_blank\""
+ 'Autolinked URLs should have target="_blank"'
)
assert 'rel="noopener"' in autolink_block, (
- "Autolinked URLs should have rel=\"noopener\" for security"
+ 'Autolinked URLs should have rel="noopener" for security'
)
diff --git a/tests/test_issue470.py b/tests/test_issue470.py
new file mode 100644
index 0000000..ecb18fb
--- /dev/null
+++ b/tests/test_issue470.py
@@ -0,0 +1,238 @@
+"""
+Tests for issue #470 — markdown link rendering bugs in renderMd():
+ 1. Double-linking: [label](url) converted to
, then autolink re-matches
+ the URL inside href="..." and wraps it in a second .
+ 2. esc() applied to URLs in href attributes turns & → &, breaking
+ URLs with query strings and producing & in displayed link text.
+ 3. Same double-linking bug inside table cells via inlineMd().
+
+These tests verify the fixes by asserting against the rendered HTML that
+ui.js serves, using a live server request to evaluate the actual JS output
+indirectly (via checking ui.js source for the fixed patterns) AND by
+running a lightweight Python mirror of the fixed renderMd logic.
+
+Strategy: verify the fix is present in the JS source, then test the
+expected rendering behaviour through the Python mirror.
+"""
+import pathlib
+import re
+import html as _html
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent
+UI_JS = (REPO_ROOT / "static" / "ui.js").read_text()
+
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+def esc(s):
+ return _html.escape(str(s), quote=True)
+
+
+def _make_link(url, label):
+ """Expected output for a [label](url) link after fix: href is NOT esc()-ed."""
+ return f'{esc(label)}'
+
+
+# Minimal Python mirror of the FIXED renderMd() — enough to test link behaviour.
+# Mirrors the stash-based approach introduced by the fix.
+
+def render_links_only(text):
+ """
+ Simplified render that only applies the link-related passes from the fixed
+ renderMd(): [label](url) conversion + autolink, with the stash protection.
+ Sufficient for testing that links render correctly without double-linking.
+ """
+ s = text
+
+ # Stash [label](url) links (fix: store href as raw URL, not esc(url))
+ link_stash = []
+ def stash_link(m):
+ label, url = m.group(1), m.group(2)
+ link_stash.append(f'
{esc(label)}')
+ return f'\x00L{len(link_stash)-1}\x00'
+ s = re.sub(r'\[([^\]]+)\]\((https?://[^\)]+)\)', stash_link, s)
+
+ # Autolink bare URLs (should NOT match inside already-stashed placeholders)
+ def autolink(m):
+ url = m.group(1)
+ trail = url[-1] if url[-1] in '.,;:!?)' else ''
+ clean = url[:-1] if trail else url
+ return f'
{esc(clean)}{trail}'
+ s = re.sub(r'(https?://[^\s<>"\')\]]+)', autolink, s)
+
+ # Restore stashed links
+ s = re.sub(r'\x00L(\d+)\x00', lambda m: link_stash[int(m.group(1))], s)
+ return s
+
+
+def render_table_with_links(md):
+ """
+ Render a markdown table that may contain [label](url) cells.
+ Mirrors the fixed inlineMd() + table rendering.
+ """
+ lines = md.strip().split('\n')
+ if len(lines) < 2:
+ return md
+ def is_sep(r):
+ return bool(re.match(r'^\|[\s|:-]+\|$', r.strip()))
+ if not is_sep(lines[1]):
+ return md
+
+ def inline_md_fixed(t):
+ """Fixed inlineMd: stash links before autolink."""
+ stash = []
+ def stash_fn(m):
+ lb, u = m.group(1), m.group(2)
+ stash.append(f'
{esc(lb)}')
+ return f'\x00L{len(stash)-1}\x00'
+ t = re.sub(r'\[([^\]]+)\]\((https?://[^\)]+)\)', stash_fn, t)
+ # autolink remaining bare URLs
+ def autolink(m):
+ url = m.group(1)
+ trail = url[-1] if url[-1] in '.,;:!?)' else ''
+ clean = url[:-1] if trail else url
+ return f'
{esc(clean)}{trail}'
+ t = re.sub(r'(https?://[^\s<>"\')\]]+)', autolink, t)
+ t = re.sub(r'\x00L(\d+)\x00', lambda m: stash[int(m.group(1))], t)
+ return t
+
+ def parse_row(r):
+ cells = r.strip().lstrip('|').rstrip('|').split('|')
+ return ''.join(f'
{inline_md_fixed(c.strip())} | ' for c in cells)
+
+ def parse_header(r):
+ cells = r.strip().lstrip('|').rstrip('|').split('|')
+ return ''.join(f'
{inline_md_fixed(c.strip())} | ' for c in cells)
+
+ header = f'
{parse_header(lines[0])}
'
+ body = ''.join(f'
{parse_row(r)}
' for r in lines[2:])
+ return f'
'
+
+
+# ── Source-level checks (verify fix is in the JS) ─────────────────────────────
+
+def test_inlinemd_uses_link_stash():
+ """Fixed inlineMd() must stash [label](url) links before autolink runs."""
+ assert '_link_stash' in UI_JS, (
+ "inlineMd() should use _link_stash to prevent double-linking"
+ )
+
+
+def test_inlinemd_no_esc_on_href():
+ """Fixed inlineMd() must not call esc() on the URL in href."""
+ # The old broken pattern had esc(u) inside the href
+ assert 'href="${esc(u)}"' not in UI_JS, (
+ "inlineMd() should not call esc() on href URL — it breaks & in query strings"
+ )
+
+
+def test_outer_link_pass_uses_a_stash():
+ """Fixed outer link pass must stash existing
tags before running."""
+ assert '_a_stash' in UI_JS, (
+ "Outer [label](url) pass should stash existing tags to prevent autolink re-matching"
+ )
+
+
+def test_autolink_pass_uses_al_stash():
+ """Fixed autolink pass must stash existing tags before running."""
+ assert '_al_stash' in UI_JS, (
+ "Autolink pass should stash existing tags to prevent double-linking"
+ )
+
+
+def test_autolink_no_esc_on_href():
+ """Fixed autolink pass must not call esc() on href URL."""
+ idx = UI_JS.find('// Autolink: convert plain URLs to clickable links.')
+ assert idx != -1, "New autolink comment not found"
+ autolink_section = UI_JS[idx:idx+600]
+ # The return line should have href="${clean}" (JS template literal, no esc call)
+ assert 'href="${clean}"' in autolink_section, (
+ 'Autolink should use href="${clean}" not href="${esc(clean)}"'
+ )
+ assert 'href="${esc(clean)}"' not in autolink_section, (
+ "Autolink should not esc() the URL in href"
+ )
+
+
+# ── Behaviour tests (Python mirror of fixed renderMd) ─────────────────────────
+
+def test_labeled_link_renders_as_single_anchor():
+ """[#461](https://github.com/.../461) must produce exactly one tag."""
+ url = 'https://github.com/nesquena/hermes-webui/issues/461'
+ md = f'[#461]({url})'
+ result = render_links_only(md)
+ assert result.count(' tag, got: {result}"
+ assert result.count('') == 1
+ assert f'href="{url}"' in result
+ assert '#461' in result
+ # Must not contain the raw brackets
+ assert '[#461]' not in result
+ assert f']({url})' not in result
+
+
+def test_href_not_html_escaped():
+ """URLs with & must appear as literal & in href, not &."""
+ url = 'https://example.com/search?q=foo&bar=baz'
+ md = f'[Search]({url})'
+ result = render_links_only(md)
+ assert f'href="{url}"' in result, (
+ f"& in URL should not be escaped to & in href. Got: {result}"
+ )
+ assert '&' not in result
+
+
+def test_bare_url_not_double_linked():
+ """A bare https:// URL must produce exactly one
tag."""
+ url = 'https://github.com/nesquena/hermes-webui/issues/461'
+ result = render_links_only(url)
+ assert result.count(' tag, got: {result}"
+ assert result.count('') == 1
+
+
+def test_labeled_link_in_table_cell_single_anchor():
+ """[#461](url) inside a markdown table cell must produce exactly one
tag."""
+ url = 'https://github.com/nesquena/hermes-webui/issues/461'
+ md = f'| Issue | Title |\n|---|---|\n| [#461]({url}) | Reasoning effort |'
+ result = render_table_with_links(md)
+ assert result.count(' in table, got: {result}"
+ assert f'href="{url}"' in result
+ assert '#461' in result
+ # No raw brackets should appear in output
+ assert '[#461]' not in result
+
+
+def test_multiple_links_in_table_no_double_linking():
+ """Multiple [label](url) links in a table must each produce exactly one ."""
+ urls = [
+ 'https://github.com/nesquena/hermes-webui/issues/461',
+ 'https://github.com/nesquena/hermes-webui/issues/462',
+ 'https://github.com/nesquena/hermes-webui/issues/463',
+ ]
+ rows = '\n'.join(f'| [#{461+i}]({url}) | Title {i} |' for i, url in enumerate(urls))
+ md = f'| Issue | Title |\n|---|---|\n{rows}'
+ result = render_table_with_links(md)
+ assert result.count(' tags, got {result.count('') == 3
+ for url in urls:
+ assert f'href="{url}"' in result
+
+
+def test_link_label_is_escaped():
+ """The label text (not the URL) must still be HTML-escaped."""
+ url = 'https://example.com'
+ md = f'[Click ]({url})'
+ result = render_links_only(md)
+ assert '<here>' in result, "Label text should be HTML-escaped"
+ assert '' not in result
+
+
+def test_link_not_broken_by_prior_autolink():
+ """A [label](url) followed by a bare URL must each produce one clean ."""
+ url1 = 'https://github.com/issues/461'
+ url2 = 'https://github.com/issues/462'
+ md = f'See [#461]({url1}) and also {url2}'
+ result = render_links_only(md)
+ assert result.count('