Skip to content

Commit

Permalink
feat: add KaTeX support for block and inline math rendering in Markdo…
Browse files Browse the repository at this point in the history
…wn (#1643)

* feat: add KaTeX support for block and inline math rendering in Markdown

* fix: Reintroduce escapeHTML to prevent raw HTML injection

- Properly escape user-generated HTML to avoid DOM injection vulnerabilities.
- Ensures KaTeX rendering remains unaffected while blocking malicious content.

* feat: add KaTeX mhchem extension to enable KaTeX the \ce and \pu functions from the mhchem package.

- https://github.com/KaTeX/KaTeX/tree/main/contrib/mhchem

---------

Co-authored-by: Nathan Sarrazin <[email protected]>
  • Loading branch information
calycekr and nsarrazin authored Jan 16, 2025
1 parent 795bf39 commit 03a28b2
Showing 1 changed file with 135 additions and 53 deletions.
188 changes: 135 additions & 53 deletions src/lib/components/chat/MarkdownRenderer.svelte
Original file line number Diff line number Diff line change
@@ -1,13 +1,144 @@
<script lang="ts">
import type { WebSearchSource } from "$lib/types/WebSearch";
import katex from "katex";
import "katex/dist/contrib/mhchem.mjs";
import DOMPurify from "isomorphic-dompurify";
import { Marked } from "marked";
import type { Tokens, TokenizerExtension, RendererExtension } from "marked";
import CodeBlock from "../CodeBlock.svelte";
export let content: string;
export let sources: WebSearchSource[] = [];
interface katexBlockToken extends Tokens.Generic {
type: "katexBlock";
raw: string;
text: string;
displayMode: true;
}
interface katexInlineToken extends Tokens.Generic {
type: "katexInline";
raw: string;
text: string;
displayMode: false;
}
export const katexBlockExtension: TokenizerExtension & RendererExtension = {
name: "katexBlock",
level: "block",
start(src: string): number | undefined {
const match = src.match(/(\${2}|\\\[)/);
return match ? match.index : -1;
},
tokenizer(src: string): katexBlockToken | undefined {
// 1) $$ ... $$
const rule1 = /^\${2}([\s\S]+?)\${2}/;
const match1 = rule1.exec(src);
if (match1) {
const token: katexBlockToken = {
type: "katexBlock",
raw: match1[0],
text: match1[1].trim(),
displayMode: true,
};
return token;
}
// 2) \[ ... \]
const rule2 = /^\\\[([\s\S]+?)\\\]/;
const match2 = rule2.exec(src);
if (match2) {
const token: katexBlockToken = {
type: "katexBlock",
raw: match2[0],
text: match2[1].trim(),
displayMode: true,
};
return token;
}
return undefined;
},
renderer(token) {
if (token.type === "katexBlock") {
return katex.renderToString(token.text, {
throwOnError: false,
displayMode: token.displayMode,
});
}
return undefined;
},
};
const katexInlineExtension: TokenizerExtension & RendererExtension = {
name: "katexInline",
level: "inline",
start(src: string): number | undefined {
const match = src.match(/(\$|\\\()/);
return match ? match.index : -1;
},
tokenizer(src: string): katexInlineToken | undefined {
// 1) $...$
const rule1 = /^\$([^$]+?)\$/;
const match1 = rule1.exec(src);
if (match1) {
const token: katexInlineToken = {
type: "katexInline",
raw: match1[0],
text: match1[1].trim(),
displayMode: false,
};
return token;
}
// 2) \(...\)
const rule2 = /^\\\(([\s\S]+?)\\\)/;
const match2 = rule2.exec(src);
if (match2) {
const token: katexInlineToken = {
type: "katexInline",
raw: match2[0],
text: match2[1].trim(),
displayMode: false,
};
return token;
}
return undefined;
},
renderer(token) {
if (token.type === "katexInline") {
return katex.renderToString(token.text, {
throwOnError: false,
displayMode: token.displayMode,
});
}
return undefined;
},
};
function escapeHTML(content: string) {
return content.replace(
/[<>&"']/g,
(x) =>
({
"<": "&lt;",
">": "&gt;",
"&": "&amp;",
"'": "&#39;",
'"': "&quot;",
}[x] || x)
);
}
function addInlineCitations(md: string, webSearchSources: WebSearchSource[] = []): string {
const linkStyle =
"color: rgb(59, 130, 246); text-decoration: none; hover:text-decoration: underline;";
Expand All @@ -30,63 +161,14 @@
});
}
function escapeHTML(content: string) {
return content.replace(
/[<>&\n]/g,
(x) =>
({
"<": "&lt;",
">": "&gt;",
"&": "&amp;",
}[x] || x)
);
}
function processLatex(parsed: string) {
const delimiters = [
{ left: "$$", right: "$$", display: true },
{ left: "$", right: "$", display: false },
{ left: "( ", right: " )", display: false },
{ left: "[ ", right: " ]", display: true },
];
for (const { left, right, display } of delimiters) {
// Escape special regex characters in the delimiters
const escapedLeft = left.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const escapedRight = right.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
// Create regex pattern that matches content between delimiters
const pattern = new RegExp(`(?<!\\w)${escapedLeft}([^]*?)${escapedRight}(?!\\w)`, "g");
parsed = parsed.replace(pattern, (match, latex) => {
try {
// Remove the delimiters from the latex content
const cleanLatex = latex.trim();
const rendered = katex.renderToString(cleanLatex, { displayMode: display });
// For display mode, wrap in centered paragraph
if (display) {
return `<p style="width:100%;text-align:center;">${rendered}</p>`;
}
return rendered;
} catch (error) {
console.error("KaTeX error:", error);
return match; // Return original on error
}
});
}
return parsed;
}
const marked = new Marked({
hooks: {
preprocess: (md) => addInlineCitations(escapeHTML(md), sources),
postprocess: (html) => {
return DOMPurify.sanitize(processLatex(html));
},
preprocess: (md) => addInlineCitations(md, sources),
postprocess: (html) => DOMPurify.sanitize(html),
},
extensions: [katexBlockExtension, katexInlineExtension],
renderer: {
codespan: (code) => `<code>${code.replaceAll("&amp;", "&")}</code>`,
html: (html) => escapeHTML(html),
link: (href, title, text) =>
`<a href="${href?.replace(/>$/, "")}" target="_blank" rel="noreferrer">${text}</a>`,
},
Expand Down

0 comments on commit 03a28b2

Please sign in to comment.