feat: KaTeX math rendering for LaTeX in chat + workspace previews (#352)

* feat: KaTeX math rendering for $..$ and $$..$$ in chat and previews (fixes #347)

- Stash math delimiters before markdown pipeline, restore as .katex-block/.katex-inline elements
- KaTeX JS lazy-loaded from CDN on first math block (mirrors mermaid pattern)
- KaTeX CSS loaded eagerly in <head> to prevent layout shift
- SRI hashes on both CDN tags
- throwOnError:false — bad LaTeX degrades to code span
- Supports $$, $, \\(...\\), \\[...\\] delimiters
- 18 new tests, 831/831 passing

* fix: remove invalid \' escape sequences in math stash lines

Lines 311, 314, 316, 317 had \' (backslash-quote) instead of plain '
in the arrow function bodies. This is a JS syntax error — node --check
fails with 'Invalid or unexpected token'. Likely caused by a
serialization artifact during code generation.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix: swap stash order (fence before math) to protect code spans; add renderKatexBlocks to workspace preview

- static/ui.js: fence_stash now runs BEFORE math_stash so dollar signs
  inside backtick code spans are not extracted as math. Previously
  `$x$` would render as KaTeX inside a <code> tag instead of
  showing the literal string $x$.
- static/workspace.js: add requestAnimationFrame(renderKatexBlocks)
  after markdown preview renders so math works in workspace file
  previews, not only in chat messages.

* feat: KaTeX math rendering + stash order fix + workspace wiring (#352)

- tests/test_issue347.py: 11 new tests (29 total) covering fence-before-math
  ordering, workspace.js renderKatexBlocks call, stash token distinctness,
  false-positive prevention, safe-tags boundary check
- CHANGELOG.md: v0.50.15 entry; 870 tests total (up from 841)

* fix: use literal null byte (\x00M) in math stash token — matches restore regex

The original PR's second commit (fix: remove invalid \' escapes) accidentally
doubled the backslash in the math stash tokens: '\\x00M' is a 5-char string
(backslash + x + 0 + 0 + M) but the restore regex /\x00M/ expects a null byte.
Result: $...$ in messages produced visible \x00M0\x00 tokens instead of
KaTeX spans.

Changed all 4 math stash return statements to use '\x00M' (single backslash =
null byte, same convention as fence_stash's '\x00F').

Also updates test_stash_tokens_distinct to check for the correct pattern.

* fix: add null-byte token test; update CHANGELOG to v0.50.15 with fixes

- tests/test_issue347.py: add test_math_stash_token_uses_single_backslash_null_byte
  to catch the \\x00M double-backslash regression; 30 tests total (up from 29)
- CHANGELOG.md: v0.50.15 entry documents all fixes including the token bug
  and workspace preview wiring; 871 tests total

---------

Co-authored-by: Nathan Esquenazi <nesquena@gmail.com>
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
nesquena-hermes
2026-04-13 11:40:15 -07:00
committed by GitHub
parent dd17a0e9b7
commit d0e08fee88
6 changed files with 436 additions and 3 deletions

View File

@@ -304,8 +304,21 @@ function renderMd(raw){
// Only runs OUTSIDE fenced code blocks and backtick spans (stash + restore).
// Unsafe tags (anything not in the allowlist) are left as-is and will be
// HTML-escaped by esc() when they reach an innerHTML assignment -- no XSS risk.
// Fence stash: protect code blocks and backtick spans from all further processing
// Must run BEFORE math_stash so $..$ inside code spans is not extracted as math
const fence_stash=[];
s=s.replace(/(```[\s\S]*?```|`[^`\n]+`)/g,m=>{fence_stash.push(m);return '\x00F'+(fence_stash.length-1)+'\x00';});
// Math stash: protect $$..$$ and $..$ from markdown processing
// Runs AFTER fence_stash so backtick code spans protect their dollar-sign contents
const math_stash=[];
// Display math: $$...$$ (must come before inline to avoid mis-parsing)
s=s.replace(/\$\$([\s\S]+?)\$\$/g,(_,m)=>{math_stash.push({type:'display',src:m});return '\x00M'+(math_stash.length-1)+'\x00';});
// Inline math: $...$ — require non-space at boundaries to avoid false positives
// e.g. "costs $5 and $10" should not trigger (space after opening $)
s=s.replace(/\$([^\s$\n][^$\n]*?[^\s$\n]|\S)\$/g,(_,m)=>{math_stash.push({type:'inline',src:m});return '\x00M'+(math_stash.length-1)+'\x00';});
// Also stash \(...\) and \[...\] LaTeX delimiters
s=s.replace(/\\\\\((.+?)\\\\\)/g,(_,m)=>{math_stash.push({type:'inline',src:m});return '\x00M'+(math_stash.length-1)+'\x00';});
s=s.replace(/\\\\\[(.+?)\\\\\]/gs,(_,m)=>{math_stash.push({type:'display',src:m});return '\x00M'+(math_stash.length-1)+'\x00';});
// Safe tag → markdown equivalent (these produce the same output as **text** etc.)
s=s.replace(/<strong>([\s\S]*?)<\/strong>/gi,(_,t)=>'**'+t+'**');
s=s.replace(/<b>([\s\S]*?)<\/b>/gi,(_,t)=>'**'+t+'**');
@@ -382,7 +395,7 @@ function renderMd(raw){
// Our pipeline only emits: <strong>,<em>,<code>,<pre>,<h1-6>,<ul>,<ol>,<li>,
// <table>,<thead>,<tbody>,<tr>,<th>,<td>,<hr>,<blockquote>,<p>,<br>,<a>,
// <div class="..."> (mermaid/pre-header). Everything else is untrusted input.
const SAFE_TAGS=/^<\/?(strong|em|code|pre|h[1-6]|ul|ol|li|table|thead|tbody|tr|th|td|hr|blockquote|p|br|a|div)([\s>]|$)/i;
const SAFE_TAGS=/^<\/?(strong|em|code|pre|h[1-6]|ul|ol|li|table|thead|tbody|tr|th|td|hr|blockquote|p|br|a|div|span)([\s>]|$)/i;
s=s.replace(/<\/?[a-z][^>]*>/gi,tag=>SAFE_TAGS.test(tag)?tag:esc(tag));
// Autolink: convert plain URLs to clickable links (not inside existing <a> tags, not in code)
s=s.replace(/(https?:\/\/[^\s<>"')\]]+)/g,(url)=>{
@@ -391,6 +404,15 @@ function renderMd(raw){
const clean=trail?url.slice(0,-1):url;
return `<a href="${esc(clean)}" target="_blank" rel="noopener">${esc(clean)}</a>${trail}`;
});
// Restore math stash → katex placeholder spans/divs
// These will be rendered by renderKatexBlocks() after DOM insertion
s=s.replace(/\x00M(\d+)\x00/g,(_,i)=>{
const item=math_stash[+i];
if(item.type==='display'){
return `<div class="katex-block" data-katex="display">${esc(item.src)}</div>`;
}
return `<span class="katex-inline" data-katex="inline">${esc(item.src)}</span>`;
});
const parts=s.split(/\n{2,}/);
s=parts.map(p=>{p=p.trim();if(!p)return '';if(/^<(h[1-6]|ul|ol|pre|hr|blockquote)/.test(p))return p;return `<p>${p.replace(/\n/g,'<br>')}</p>`;}).join('\n');
return s;
@@ -963,7 +985,7 @@ function renderMessages(){
}
scrollToBottom();
// Apply syntax highlighting after DOM is built
requestAnimationFrame(()=>{highlightCode();addCopyButtons();renderMermaidBlocks();});
requestAnimationFrame(()=>{highlightCode();addCopyButtons();renderMermaidBlocks();renderKatexBlocks();});
// Refresh todo panel if it's currently open
if(typeof loadTodos==='function' && document.getElementById('panelTodos') && document.getElementById('panelTodos').classList.contains('active')){
loadTodos();
@@ -1237,6 +1259,47 @@ function renderMermaidBlocks(){
});
}
let _katexLoading=false;
let _katexReady=false;
function renderKatexBlocks(){
const blocks=document.querySelectorAll('.katex-block:not([data-rendered]),.katex-inline:not([data-rendered])');
if(!blocks.length) return;
if(!_katexReady){
if(!_katexLoading){
_katexLoading=true;
const script=document.createElement('script');
script.src='https://cdn.jsdelivr.net/npm/katex@0.16.22/dist/katex.min.js';
script.integrity='sha384-cMkvdD8LoxVzGF/RPUKAcvmm49FQ0oxwDF3BGKtDXcEc+T1b2N+teh/OJfpU0jr6';
script.crossOrigin='anonymous';
script.onload=()=>{
if(typeof katex!=='undefined'){
_katexReady=true;
renderKatexBlocks();
}
};
document.head.appendChild(script);
}
return;
}
blocks.forEach(el=>{
el.dataset.rendered='true';
const src=el.textContent||'';
const displayMode=el.dataset.katex==='display';
try{
katex.render(src,el,{
displayMode,
throwOnError:false,
trust:false,
strict:'ignore',
});
}catch(e){
// Leave as raw text in a code span on failure
el.outerHTML=`<code>${esc(src)}</code>`;
}
});
}
function appendThinking(){
$('emptyState').style.display='none';
const row=document.createElement('div');row.className='msg-row';row.id='thinkingRow';