feat: autolink plain URLs in chat messages (fixes #342) (#346)

Co-authored-by: Nathan Esquenazi <nesquena@gmail.com>
2026-04-13 00:05:04 -07:00
parent 3c771c4d2c
commit 05f09012a5
3 changed files with 132 additions and 0 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,15 @@
 ---


+## [v0.50.11] Auto-link plain URLs in chat messages (fixes #342)
+
+- **Plain URLs in chat messages are now clickable** (`static/ui.js`): The `renderMd()` function previously only converted Markdown-style `[label](url)` links to `<a>` tags. Bare URLs like `https://example.com` were rendered as plain text. A new autolink pass converts `https?://...` URLs to `<a href="..." target="_blank" rel="noopener">` tags automatically.
+  - The pass runs after the `[label](url)` link pass and the `SAFE_TAGS` HTML-escape pass (protecting code blocks in `<pre>`), but before the paragraph-wrapping stage.
+  - The same autolink pass is applied inside `inlineMd()` so URLs in list items, blockquotes, and table cells are linked too.
+  - Trailing punctuation (`.`, `,`, `;`, `:`, `!`, `?`, `)`) is stripped from the end of URLs to avoid linking sentence-ending characters.
+  - URLs are passed through `esc()` before placement in `href` and link text — no XSS risk.
+  - 7 new structural tests in `tests/test_issue342.py`; 809 tests total (up from 802)
+
 ## [v0.50.10] Title auto-generation fix + mobile close button (PR #333)

 - **Session title now auto-generates for all default title values** (`'Untitled'`, `'New Chat'`, empty string): The condition in `api/streaming.py` that triggers `title_from()` previously only matched `'Untitled'`. It now also covers `'New Chat'` (used by some external clients/forks) and any empty/falsy title, so sessions started from those states get a proper auto-generated title after the first message.
--- a/static/ui.js
+++ b/static/ui.js
@@ -331,6 +331,7 @@ function renderMd(raw){
    t=t.replace(/\*([^*\n]+)\*/g,(_,x)=>`<em>${esc(x)}</em>`);
    t=t.replace(/`([^`\n]+)`/g,(_,x)=>`<code>${esc(x)}</code>`);
    t=t.replace(/\[([^\]]+)\]\((https?:\/\/[^\)]+)\)/g,(_,lb,u)=>`<a href="${esc(u)}" target="_blank" rel="noopener">${esc(lb)}</a>`);
+    t=t.replace(/(https?:\/\/[^\s<>"')\]]+)/g,(url)=>{const trail=url.match(/[.,;:!?)]$/)?url.slice(-1):'';const clean=trail?url.slice(0,-1):url;return `<a href="${esc(clean)}" target="_blank" rel="noopener">${esc(clean)}</a>${trail}`;});
    // Escape any plain text that isn't already wrapped in a tag we produced
    // by escaping bare < > that aren't part of our own tags
    const SAFE_INLINE=/^<\/?(strong|em|code|a)([\s>]|$)/i;
@@ -383,6 +384,13 @@ function renderMd(raw){
  // <div class="..."> (mermaid/pre-header). Everything else is untrusted input.
  const SAFE_TAGS=/^<\/?(strong|em|code|pre|h[1-6]|ul|ol|li|table|thead|tbody|tr|th|td|hr|blockquote|p|br|a|div)([\s>]|$)/i;
  s=s.replace(/<\/?[a-z][^>]*>/gi,tag=>SAFE_TAGS.test(tag)?tag:esc(tag));
+  // Autolink: convert plain URLs to clickable links (not inside existing <a> tags, not in code)
+  s=s.replace(/(https?:\/\/[^\s<>"')\]]+)/g,(url)=>{
+    // Strip trailing punctuation that was likely not part of the URL
+    const trail=url.match(/[.,;:!?)]$/)?url.slice(-1):'';
+    const clean=trail?url.slice(0,-1):url;
+    return `<a href="${esc(clean)}" target="_blank" rel="noopener">${esc(clean)}</a>${trail}`;
+  });
  const parts=s.split(/\n{2,}/);
  s=parts.map(p=>{p=p.trim();if(!p)return '';if(/^<(h[1-6]|ul|ol|pre|hr|blockquote)/.test(p))return p;return `<p>${p.replace(/\n/g,'<br>')}</p>`;}).join('\n');
  return s;
--- a/tests/test_issue342.py
+++ b/tests/test_issue342.py
@@ -0,0 +1,115 @@
+"""
+Tests for GitHub issue #342: auto-link plain URLs in chat messages.
+
+These are structural tests that verify the fix is present in static/ui.js
+without requiring a running server or JavaScript engine.
+"""
+import os
+import re
+
+UI_JS = os.path.join(os.path.dirname(__file__), '..', 'static', 'ui.js')
+
+
+def read_ui_js():
+    with open(UI_JS, 'r') as f:
+        return f.read()
+
+
+def test_autolink_comment_present():
+    """The Autolink comment should be present in renderMd() to document the feature."""
+    content = read_ui_js()
+    assert 'Autolink: convert plain URLs' in content, (
+        "Expected 'Autolink: convert plain URLs' comment not found in static/ui.js. "
+        "Did the autolink pass get added?"
+    )
+
+
+def test_autolink_regex_in_rendermd():
+    """The autolink regex pattern (https?://) should appear in renderMd()."""
+    content = read_ui_js()
+    # Locate the renderMd function body
+    rendermd_start = content.find('function renderMd(raw){')
+    assert rendermd_start != -1, "renderMd function not found in ui.js"
+    # Find the closing brace after renderMd (look for the autolink pattern within it)
+    rendermd_body = content[rendermd_start:rendermd_start + 5000]
+    assert 'https?:\\/\\/' in rendermd_body, (
+        "Autolink regex (https?:\\/\\/) not found inside renderMd() body."
+    )
+
+
+def test_autolink_uses_esc_for_xss_safety():
+    """The autolink code must use esc() to escape URLs, preventing XSS."""
+    content = read_ui_js()
+    # Find the autolink section (between the SAFE_TAGS pass and paragraph wrap)
+    autolink_idx = content.find('// Autolink: convert plain URLs')
+    assert autolink_idx != -1, "Autolink comment not found in ui.js"
+    # Extract the autolink block (next ~300 chars after the comment)
+    autolink_block = content[autolink_idx:autolink_idx + 400]
+    assert 'esc(clean)' in autolink_block, (
+        "Autolink block should use esc(clean) for XSS-safe URL escaping, but it was not found."
+    )
+
+
+def test_autolink_in_inline_md():
+    """The autolink pass should also be present inside the inlineMd() helper."""
+    content = read_ui_js()
+    # Find inlineMd function
+    inline_start = content.find('function inlineMd(t){')
+    assert inline_start != -1, "inlineMd function not found in ui.js"
+    # Find closing brace of inlineMd by looking for 'return t;' followed by '}'
+    inline_end = content.find('return t;\n  }', inline_start)
+    assert inline_end != -1, "Could not locate end of inlineMd function"
+    inline_body = content[inline_start:inline_end + 20]
+    assert 'https?:\\/\\/' in inline_body, (
+        "Autolink regex not found inside inlineMd() — plain URLs in list items "
+        "and blockquotes won't be autolinked."
+    )
+
+
+def test_autolink_after_safe_tags_pass():
+    """The autolink pass must come AFTER the SAFE_TAGS escape pass (ordering matters)."""
+    content = read_ui_js()
+    safe_tags_idx = content.find('s=s.replace(/<\\/?[a-z][^>]*>/gi,tag=>SAFE_TAGS.test(tag)?tag:esc(tag));')
+    autolink_idx = content.find('// Autolink: convert plain URLs')
+    parts_idx = content.find('const parts=s.split(/\\n{2,}/);')
+    assert safe_tags_idx != -1, "SAFE_TAGS pass not found"
+    assert autolink_idx != -1, "Autolink pass not found"
+    assert parts_idx != -1, "Paragraph-wrap parts line not found"
+    assert safe_tags_idx < autolink_idx < parts_idx, (
+        f"Ordering wrong: SAFE_TAGS at {safe_tags_idx}, autolink at {autolink_idx}, "
+        f"parts (paragraph wrap) at {parts_idx}. "
+        "Autolink must come between SAFE_TAGS pass and paragraph wrap."
+    )
+
+
+def test_autolink_target_blank_and_rel():
+    """Autolinked URLs should open in a new tab with rel=noopener for security."""
+    content = read_ui_js()
+    autolink_idx = content.find('// Autolink: convert plain URLs')
+    assert autolink_idx != -1, "Autolink comment not found"
+    autolink_block = content[autolink_idx:autolink_idx + 400]
+    assert 'target="_blank"' in autolink_block, (
+        "Autolinked URLs should have target=\"_blank\""
+    )
+    assert 'rel="noopener"' in autolink_block, (
+        "Autolinked URLs should have rel=\"noopener\" for security"
+    )
+
+
+def test_safe_tags_includes_anchor():
+    """SAFE_TAGS regex must include 'a' so <a> tags from autolink are not escaped."""
+    content = read_ui_js()
+    # Find the SAFE_TAGS definition line — the pattern contains slashes so we
+    # search for the line directly rather than extracting the regex literal.
+    safe_tags_line = None
+    for line in content.splitlines():
+        if 'const SAFE_TAGS=' in line:
+            safe_tags_line = line
+            break
+    assert safe_tags_line is not None, "SAFE_TAGS const definition not found in ui.js"
+    # The pattern should include 'a' as a tag alternative (e.g. |a|)
+    assert '|a|' in safe_tags_line or '|a)' in safe_tags_line, (
+        f"SAFE_TAGS line does not include 'a' tag — "
+        "<a> tags emitted by autolink would be escaped!\n"
+        f"Line: {safe_tags_line}"
+    )