diff --git a/.changeset/fix-greedy-math.md b/.changeset/fix-greedy-math.md new file mode 100644 index 000000000..7c065cc93 --- /dev/null +++ b/.changeset/fix-greedy-math.md @@ -0,0 +1,5 @@ +--- +default: patch +--- + +Updated the math detection to avoid accidental detection when talking about math or spamming dollar signs. diff --git a/src/app/plugins/markdown/extensions/matrix-math.ts b/src/app/plugins/markdown/extensions/matrix-math.ts index 7ceece744..42abbae67 100644 --- a/src/app/plugins/markdown/extensions/matrix-math.ts +++ b/src/app/plugins/markdown/extensions/matrix-math.ts @@ -1,6 +1,6 @@ import type { TokenizerExtension, RendererExtension } from 'marked'; -/** Private-use char so math extensions do not match `$` / `$$` inside code spans. Not U+E000–U+E002 (emoticon placeholders). */ +/** Private-use char so math extensions do not match `$` / `$$` inside code spans. Not U+E000–U+E002 (emoticon placeholders). {@link shieldDollarRunsForMarked} uses U+E021–U+E022. */ export const MATH_CODE_DOLLAR_MASK = '\uE020'; function findSameLineFenceClose(md: string, from: number, tick: string, minLen: number): number { @@ -155,6 +155,31 @@ export function unmaskMathCodeDollarPlaceholders(html: string): string { return html.replaceAll(MATH_CODE_DOLLAR_MASK, '$'); } +const MARKED_MATH_BLOCK_SHIELD = '\uE021'; +const MARKED_MATH_BLOCK_SHIELD_END = '\uE022'; + +export function shieldDollarRunsForMarked(markdown: string): string { + const blocks: string[] = []; + const blockRe = /\$\$([^$]+)\$\$\n?/g; + let m: RegExpExecArray | null; + let shielded = ''; + let last = 0; + while ((m = blockRe.exec(markdown)) !== null) { + shielded += markdown.slice(last, m.index); + blocks.push(m[0]); + shielded += `${MARKED_MATH_BLOCK_SHIELD}${blocks.length - 1}${MARKED_MATH_BLOCK_SHIELD_END}`; + last = m.index + m[0].length; + } + shielded += markdown.slice(last); + + shielded = shielded.replace(/\${2,}/g, (run) => run.replace(/\$/g, () => '\\$')); + + return shielded.replace( + new RegExp(`${MARKED_MATH_BLOCK_SHIELD}(\\d+)${MARKED_MATH_BLOCK_SHIELD_END}`, 'g'), + (_, i) => blocks[parseInt(i, 10)] ?? '' + ); +} + function escapeHtml(text: string): string { return text .replace(/&/g, '&') @@ -163,6 +188,49 @@ function escapeHtml(text: string): string { .replace(/"/g, '"'); } +function isIgnorableMathContent(latex: string): boolean { + const t = latex.replace(/[\u200B-\u200D\uFEFF]/g, '').trim(); + if (t === '') return true; + return /^\$+$/.test(t); +} + +/** + * Inline math delimiters use `$...$` but must not greedily pair across dollar amounts + * (e.g. "$10 ... $20"). We only treat a pair as math when: + * - the opening `$` is not followed by whitespace, and + * - the closing `$` is not preceded by whitespace, and + * - the closing `$` is not immediately followed by an ASCII digit. + */ +function tryTokenizeInlineMath( + src: string +): { type: 'math'; raw: string; latex: string } | undefined { + if (!src.startsWith('$')) { + return undefined; + } + if (src.startsWith('$$') && (src.length < 3 || src.charAt(2) !== '$')) { + return undefined; + } + if (src.length < 3 || /\s/.test(src.charAt(1))) { + return undefined; + } + for (let j = 1; j < src.length; j++) { + if (src.charAt(j) !== '$') continue; + const before = src.charAt(j - 1); + if (/\s/.test(before)) continue; + const after = j + 1 < src.length ? src.charAt(j + 1) : ''; + if (after !== '' && /[0-9]/.test(after)) continue; + const latex = src.slice(1, j); + if (isIgnorableMathContent(latex)) continue; + if (latex.trimStart().startsWith('$$')) continue; + return { + type: 'math', + raw: src.slice(0, j + 1), + latex, + }; + } + return undefined; +} + // Inline math: $...$ export const matrixMathExtension = { name: 'math', @@ -171,15 +239,7 @@ export const matrixMathExtension = { return src.indexOf('$'); }, tokenizer(src: string) { - const match = /^\$([^$]+)\$/.exec(src); - if (match) { - return { - type: 'math', - raw: match[0], - latex: match[1], - }; - } - return undefined; + return tryTokenizeInlineMath(src); }, renderer(token) { return `${token.latex}`; @@ -196,10 +256,12 @@ export const matrixMathBlockExtension = { tokenizer(src: string) { const match = /^\$\$([^$]+)\$\$\n?/.exec(src); if (match) { + const latex = match[1]?.trim() ?? ''; + if (isIgnorableMathContent(latex)) return undefined; return { type: 'mathBlock', raw: match[0], - latex: match[1]?.trim() ?? '', + latex, }; } return undefined; diff --git a/src/app/plugins/markdown/extensions/matrix.test.ts b/src/app/plugins/markdown/extensions/matrix.test.ts index ed1b254a8..707a3f003 100644 --- a/src/app/plugins/markdown/extensions/matrix.test.ts +++ b/src/app/plugins/markdown/extensions/matrix.test.ts @@ -1,7 +1,11 @@ import { describe, expect, it } from 'vitest'; import { marked } from 'marked'; import { matrixSpoilerExtension } from './matrix-spoiler'; -import { matrixMathExtension, matrixMathBlockExtension } from './matrix-math'; +import { + matrixMathBlockExtension, + matrixMathExtension, + shieldDollarRunsForMarked, +} from './matrix-math'; import { matrixSubscriptExtension } from './matrix-subscript'; function parse(input: string): string { @@ -13,7 +17,7 @@ function parse(input: string): string { matrixSubscriptExtension, ], }); - return processor.parse(input) as string; + return processor.parse(shieldDollarRunsForMarked(input)) as string; } describe('matrixSpoilerExtension', () => { @@ -48,6 +52,36 @@ describe('matrixMathExtension (inline)', () => { it('does not parse unmatched $', () => { expect(parse('No $ math here')).not.toContain('data-mx-maths'); }); + + it('does not parse dollar amounts in a sentence as inline math', () => { + const input = 'I just bought something for $10 on sale, it was originally $20!'; + const result = parse(input); + expect(result).not.toContain('data-mx-maths'); + expect(result).toContain('$10'); + expect(result).toContain('$20'); + }); + + it('does not treat $ as math when the opening is followed by whitespace', () => { + expect(parse('$ E = mc^2$')).not.toContain('data-mx-maths'); + }); + + it('still parses valid inline math', () => { + expect(parse('$E = mc^2$')).toContain('data-mx-maths'); + expect(parse('$2+2$')).toContain('data-mx-maths'); + }); + + it('does not parse inline math when inner trims to empty (e.g. zero-width only)', () => { + expect(parse(`empty $\u200B$ here`)).not.toContain('data-mx-maths'); + }); + + it('does not parse long runs of dollar signs as inline math', () => { + expect(parse('hey $$$$$$$ there')).not.toContain('data-mx-maths'); + }); + + it('does not parse block math when inner is only whitespace or dollars', () => { + expect(parse('$$ $$')).not.toContain('data-mx-maths'); + expect(parse('$$ $ $$')).not.toContain('data-mx-maths'); + }); }); describe('matrixMathBlockExtension (block)', () => { diff --git a/src/app/plugins/markdown/markdownToHtml.test.ts b/src/app/plugins/markdown/markdownToHtml.test.ts index 14eb8bc9d..0352b71b0 100644 --- a/src/app/plugins/markdown/markdownToHtml.test.ts +++ b/src/app/plugins/markdown/markdownToHtml.test.ts @@ -48,6 +48,26 @@ describe('markdownToHtml', () => { expect(result).toContain('E = mc^2'); }); + it('does not mangle messages with dollar amounts', () => { + const result = markdownToHtml( + 'I just bought something for $10 on sale, it was originally $20!' + ); + expect(result).not.toContain('data-mx-maths'); + expect(result).toContain('$10'); + expect(result).toContain('$20'); + }); + + it('does not treat empty or dollar-only block math as KaTeX', () => { + expect(markdownToHtml('$$ $$')).not.toContain('data-mx-maths'); + expect(markdownToHtml('$$ $ $$')).not.toContain('data-mx-maths'); + }); + + it('does not parse five consecutive dollar signs in a sentence as math', () => { + const result = markdownToHtml('hey $$$$$ there'); + expect(result).not.toContain('data-mx-maths'); + expect(result).toContain('$$$$$'); + }); + it('does not parse dollars inside fenced code as math', () => { expect(markdownToHtml('```\n$$test$$\n```')).not.toContain('data-mx-maths'); expect(markdownToHtml('```\n$$test$$\n```')).toContain('$$test$$'); diff --git a/src/app/plugins/markdown/markdownToHtml.ts b/src/app/plugins/markdown/markdownToHtml.ts index 78356271f..8e4eb6d17 100644 --- a/src/app/plugins/markdown/markdownToHtml.ts +++ b/src/app/plugins/markdown/markdownToHtml.ts @@ -5,6 +5,7 @@ import { matrixMathExtension, matrixMathBlockExtension, maskDollarSignsInsideMarkdownCode, + shieldDollarRunsForMarked, unmaskMathCodeDollarPlaceholders, } from './extensions/matrix-math'; import { matrixSubscriptExtension } from './extensions/matrix-subscript'; @@ -65,7 +66,7 @@ export function markdownToHtml(markdown: string): string { const preprocessed = preprocessEmoticon(blockquotePrefixed); - const mathInput = maskDollarSignsInsideMarkdownCode(preprocessed); + const mathInput = shieldDollarRunsForMarked(maskDollarSignsInsideMarkdownCode(preprocessed)); // Parse markdown to HTML using marked with our Matrix extensions const html = processor.parse(mathInput) as string;