feat: autolink plain URLs in chat messages (fixes #342) (#346)

Co-authored-by: Nathan Esquenazi <nesquena@gmail.com>
This commit is contained in:
nesquena-hermes
2026-04-13 00:05:04 -07:00
committed by GitHub
parent 3c771c4d2c
commit 05f09012a5
3 changed files with 132 additions and 0 deletions

View File

@@ -6,6 +6,15 @@
---
## [v0.50.11] Auto-link plain URLs in chat messages (fixes #342)
- **Plain URLs in chat messages are now clickable** (`static/ui.js`): The `renderMd()` function previously only converted Markdown-style `[label](url)` links to `<a>` tags. Bare URLs like `https://example.com` were rendered as plain text. A new autolink pass converts `https?://...` URLs to `<a href="..." target="_blank" rel="noopener">` tags automatically.
- The pass runs after the `[label](url)` link pass and the `SAFE_TAGS` HTML-escape pass (protecting code blocks in `<pre>`), but before the paragraph-wrapping stage.
- The same autolink pass is applied inside `inlineMd()` so URLs in list items, blockquotes, and table cells are linked too.
- Trailing punctuation (`.`, `,`, `;`, `:`, `!`, `?`, `)`) is stripped from the end of URLs to avoid linking sentence-ending characters.
- URLs are passed through `esc()` before placement in `href` and link text — no XSS risk.
- 7 new structural tests in `tests/test_issue342.py`; 809 tests total (up from 802)
## [v0.50.10] Title auto-generation fix + mobile close button (PR #333)
- **Session title now auto-generates for all default title values** (`'Untitled'`, `'New Chat'`, empty string): The condition in `api/streaming.py` that triggers `title_from()` previously only matched `'Untitled'`. It now also covers `'New Chat'` (used by some external clients/forks) and any empty/falsy title, so sessions started from those states get a proper auto-generated title after the first message.

View File

@@ -331,6 +331,7 @@ function renderMd(raw){
t=t.replace(/\*([^*\n]+)\*/g,(_,x)=>`<em>${esc(x)}</em>`);
t=t.replace(/`([^`\n]+)`/g,(_,x)=>`<code>${esc(x)}</code>`);
t=t.replace(/\[([^\]]+)\]\((https?:\/\/[^\)]+)\)/g,(_,lb,u)=>`<a href="${esc(u)}" target="_blank" rel="noopener">${esc(lb)}</a>`);
t=t.replace(/(https?:\/\/[^\s<>"')\]]+)/g,(url)=>{const trail=url.match(/[.,;:!?)]$/)?url.slice(-1):'';const clean=trail?url.slice(0,-1):url;return `<a href="${esc(clean)}" target="_blank" rel="noopener">${esc(clean)}</a>${trail}`;});
// Escape any plain text that isn't already wrapped in a tag we produced
// by escaping bare < > that aren't part of our own tags
const SAFE_INLINE=/^<\/?(strong|em|code|a)([\s>]|$)/i;
@@ -383,6 +384,13 @@ function renderMd(raw){
// <div class="..."> (mermaid/pre-header). Everything else is untrusted input.
const SAFE_TAGS=/^<\/?(strong|em|code|pre|h[1-6]|ul|ol|li|table|thead|tbody|tr|th|td|hr|blockquote|p|br|a|div)([\s>]|$)/i;
s=s.replace(/<\/?[a-z][^>]*>/gi,tag=>SAFE_TAGS.test(tag)?tag:esc(tag));
// Autolink: convert plain URLs to clickable links (not inside existing <a> tags, not in code)
s=s.replace(/(https?:\/\/[^\s<>"')\]]+)/g,(url)=>{
// Strip trailing punctuation that was likely not part of the URL
const trail=url.match(/[.,;:!?)]$/)?url.slice(-1):'';
const clean=trail?url.slice(0,-1):url;
return `<a href="${esc(clean)}" target="_blank" rel="noopener">${esc(clean)}</a>${trail}`;
});
const parts=s.split(/\n{2,}/);
s=parts.map(p=>{p=p.trim();if(!p)return '';if(/^<(h[1-6]|ul|ol|pre|hr|blockquote)/.test(p))return p;return `<p>${p.replace(/\n/g,'<br>')}</p>`;}).join('\n');
return s;

115
tests/test_issue342.py Normal file
View File

@@ -0,0 +1,115 @@
"""
Tests for GitHub issue #342: auto-link plain URLs in chat messages.
These are structural tests that verify the fix is present in static/ui.js
without requiring a running server or JavaScript engine.
"""
import os
import re
UI_JS = os.path.join(os.path.dirname(__file__), '..', 'static', 'ui.js')
def read_ui_js():
with open(UI_JS, 'r') as f:
return f.read()
def test_autolink_comment_present():
"""The Autolink comment should be present in renderMd() to document the feature."""
content = read_ui_js()
assert 'Autolink: convert plain URLs' in content, (
"Expected 'Autolink: convert plain URLs' comment not found in static/ui.js. "
"Did the autolink pass get added?"
)
def test_autolink_regex_in_rendermd():
"""The autolink regex pattern (https?://) should appear in renderMd()."""
content = read_ui_js()
# Locate the renderMd function body
rendermd_start = content.find('function renderMd(raw){')
assert rendermd_start != -1, "renderMd function not found in ui.js"
# Find the closing brace after renderMd (look for the autolink pattern within it)
rendermd_body = content[rendermd_start:rendermd_start + 5000]
assert 'https?:\\/\\/' in rendermd_body, (
"Autolink regex (https?:\\/\\/) not found inside renderMd() body."
)
def test_autolink_uses_esc_for_xss_safety():
"""The autolink code must use esc() to escape URLs, preventing XSS."""
content = read_ui_js()
# Find the autolink section (between the SAFE_TAGS pass and paragraph wrap)
autolink_idx = content.find('// Autolink: convert plain URLs')
assert autolink_idx != -1, "Autolink comment not found in ui.js"
# Extract the autolink block (next ~300 chars after the comment)
autolink_block = content[autolink_idx:autolink_idx + 400]
assert 'esc(clean)' in autolink_block, (
"Autolink block should use esc(clean) for XSS-safe URL escaping, but it was not found."
)
def test_autolink_in_inline_md():
"""The autolink pass should also be present inside the inlineMd() helper."""
content = read_ui_js()
# Find inlineMd function
inline_start = content.find('function inlineMd(t){')
assert inline_start != -1, "inlineMd function not found in ui.js"
# Find closing brace of inlineMd by looking for 'return t;' followed by '}'
inline_end = content.find('return t;\n }', inline_start)
assert inline_end != -1, "Could not locate end of inlineMd function"
inline_body = content[inline_start:inline_end + 20]
assert 'https?:\\/\\/' in inline_body, (
"Autolink regex not found inside inlineMd() — plain URLs in list items "
"and blockquotes won't be autolinked."
)
def test_autolink_after_safe_tags_pass():
"""The autolink pass must come AFTER the SAFE_TAGS escape pass (ordering matters)."""
content = read_ui_js()
safe_tags_idx = content.find('s=s.replace(/<\\/?[a-z][^>]*>/gi,tag=>SAFE_TAGS.test(tag)?tag:esc(tag));')
autolink_idx = content.find('// Autolink: convert plain URLs')
parts_idx = content.find('const parts=s.split(/\\n{2,}/);')
assert safe_tags_idx != -1, "SAFE_TAGS pass not found"
assert autolink_idx != -1, "Autolink pass not found"
assert parts_idx != -1, "Paragraph-wrap parts line not found"
assert safe_tags_idx < autolink_idx < parts_idx, (
f"Ordering wrong: SAFE_TAGS at {safe_tags_idx}, autolink at {autolink_idx}, "
f"parts (paragraph wrap) at {parts_idx}. "
"Autolink must come between SAFE_TAGS pass and paragraph wrap."
)
def test_autolink_target_blank_and_rel():
"""Autolinked URLs should open in a new tab with rel=noopener for security."""
content = read_ui_js()
autolink_idx = content.find('// Autolink: convert plain URLs')
assert autolink_idx != -1, "Autolink comment not found"
autolink_block = content[autolink_idx:autolink_idx + 400]
assert 'target="_blank"' in autolink_block, (
"Autolinked URLs should have target=\"_blank\""
)
assert 'rel="noopener"' in autolink_block, (
"Autolinked URLs should have rel=\"noopener\" for security"
)
def test_safe_tags_includes_anchor():
"""SAFE_TAGS regex must include 'a' so <a> tags from autolink are not escaped."""
content = read_ui_js()
# Find the SAFE_TAGS definition line — the pattern contains slashes so we
# search for the line directly rather than extracting the regex literal.
safe_tags_line = None
for line in content.splitlines():
if 'const SAFE_TAGS=' in line:
safe_tags_line = line
break
assert safe_tags_line is not None, "SAFE_TAGS const definition not found in ui.js"
# The pattern should include 'a' as a tag alternative (e.g. |a|)
assert '|a|' in safe_tags_line or '|a)' in safe_tags_line, (
f"SAFE_TAGS line does not include 'a' tag — "
"<a> tags emitted by autolink would be escaped!\n"
f"Line: {safe_tags_line}"
)