From af8fc5073361de40a84901518ca08158b8d3b107 Mon Sep 17 00:00:00 2001 From: Victor Giers Date: Fri, 5 Dec 2025 17:39:35 +0100 Subject: [PATCH] auto-git: [change] markdown.js --- markdown.js | 171 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 101 insertions(+), 70 deletions(-) diff --git a/markdown.js b/markdown.js index 6e52638..c5ab6bc 100644 --- a/markdown.js +++ b/markdown.js @@ -1,11 +1,8 @@ export function markdownToHTML(text) { // 0) Remove .../... blocks - // This regex will match an an opening or tag, - // followed by any characters (non-greedy), until either a closing - // or tag is found, OR the end of the string ($). text = text.replace(/[\s\S]*?(?:<\/think(?:ing)?>|$)/gi, ''); - // Normalize exotic spaces (narrow/non-breaking) to regular spaces for consistent rendering + // Normalize exotic spaces (narrow/non-breaking) to regular spaces text = text.replace(/[\u00a0\u202f\u2007]/g, ' '); text = balanceStreamingCodeFence(text); @@ -15,10 +12,12 @@ export function markdownToHTML(text) { .replace(/&/g, '&') .replace(//g, '>'); + const escapeAttr = (value = '') => escapeHtml(value) .replace(/"/g, '"') .replace(/'/g, '''); + const applyInline = (source) => { const codeRuns = []; let tmp = source.replace(/`([^`]+?)`/g, (_, code) => { @@ -40,7 +39,7 @@ export function markdownToHTML(text) { return tmp .replace(/@@STRONG(\d+)@@/g, (_, idx) => `${strongRuns[+idx]}`) - .replace(/@@EM(\d+)@@/g, (_, idx) => `${emphasisRuns[+idx]}`) + .replace(/@@EM(\d+)@@/g, (_, idx) => `${emphasisRuns[+idx]}`) .replace(/@@CODEINLINE(\d+)@@/g, (_, idx) => `${codeRuns[+idx]}`); }; @@ -49,9 +48,9 @@ export function markdownToHTML(text) { // 2) Extract code blocks and replace with placeholders (protect from all formatting) const codeblocks = []; - const placeholder = idx => `@@CODEBLOCK${idx}@@`; + const placeholder = (idx) => `@@CODEBLOCK${idx}@@`; + tmp = tmp.replace(/```([^\n]*)\n([\s\S]*?)```/g, (_, lang, code) => { - // Strip trailing whitespace-only lines at the end of the block let cleaned = (code || '').replace(/\r\n/g, '\n').replace(/\r/g, '\n'); const lines = cleaned.split('\n'); while (lines.length > 0 && /^\s*$/.test(lines[lines.length - 1])) lines.pop(); @@ -60,15 +59,33 @@ export function markdownToHTML(text) { return placeholder(codeblocks.length - 1); }); - // 3) HTML-escape special characters (outside of code blocks) + // 3) HTML-escape special characters (outside of fenced code blocks) let escaped = escapeHtml(tmp); - // 4) Headings + // 4) Headings (with consistent hooks) escaped = escaped - .replace(/^#### (.+)$/gm, "

$1

") - .replace(/^### (.+)$/gm, "

$1

") - .replace(/^## (.+)$/gm, "

$1

") - .replace(/^# (.+)$/gm, "

$1

"); + .replace(/^#### (.+)$/gm, '

$1

') + .replace(/^### (.+)$/gm, '

$1

') + .replace(/^## (.+)$/gm, '

$1

') + .replace(/^# (.+)$/gm, '

$1

'); + + // 4.1) Horizontal rules: --- or *** or ___ on a line + escaped = escaped.replace(/^(?:-{3,}|\*{3,}|_{3,})\s*$/gm, '
'); + + // 4.2) Ordered lists: lines starting with "1. ", "2. ", ... + escaped = escaped.replace( + /(^|\n)([ \t]*\d+\. .+(?:\n[ \t]*\d+\. .+)*)/g, + (_, lead, listBlock) => { + const items = listBlock + .split(/\n/) + .map((line) => line.replace(/^[ \t]*\d+\.\s+/, '').trim()) + .filter((item) => item.length > 0) + .map((item) => `
  • ${item}
  • `) + .join(''); + if (!items) return listBlock; + return `${lead}
      ${items}
    `; + } + ); // 4.3) Blockquotes escaped = escaped.replace( @@ -76,9 +93,9 @@ export function markdownToHTML(text) { (_, lead, blockquoteBlock) => { const lines = blockquoteBlock .split(/\n/) - .map(line => line.replace(/^[ \t]*>\s*/, '').trim()) + .map((line) => line.replace(/^[ \t]*>\s*/, '').trim()) .join('\n'); - return `${lead}
    ${lines}
    `; + return `${lead}
    ${lines}
    `; } ); @@ -88,10 +105,12 @@ export function markdownToHTML(text) { (_, lead, listBlock) => { const items = listBlock .split(/\n/) - .map(line => line.replace(/^[ \t]*[-*]\s+/, '').trim()) - .map(item => `
  • ${item}
  • `) + .map((line) => line.replace(/^[ \t]*[-*]\s+/, '').trim()) + .filter((item) => item.length > 0) + .map((item) => `
  • ${item}
  • `) .join(''); - return `${lead}`; + if (!items) return listBlock; + return `${lead}`; } ); @@ -103,15 +122,15 @@ export function markdownToHTML(text) { const hadTrailingNewline = /\n$/.test(block); const lines = block.replace(/\n$/, '').split('\n'); - const split = (line) => line.replace(/^\||\|$/g, '').split('|').map(s => s.trim()); + const split = (line) => line.replace(/^\||\|$/g, '').split('|').map((s) => s.trim()); const headers = split(lines[0]); - const seps = split(lines[1]); + const seps = split(lines[1]); if (headers.length < 2 || seps.length < 2) return block; - if (!seps.every(s => /^[ :\-]+$/.test(s) && /-/.test(s))) return block; + if (!seps.every((s) => /^[ :\-]+$/.test(s) && /-/.test(s))) return block; - const aligns = seps.map(seg => { - const s = seg.replace(/\s+/g,''); + const aligns = seps.map((seg) => { + const s = seg.replace(/\s+/g, ''); const left = s.startsWith(':'); const right = s.endsWith(':'); if (left && right) return 'center'; @@ -119,25 +138,35 @@ export function markdownToHTML(text) { return 'left'; }); - const bodyLines = lines.slice(2).filter(l => /^\|/.test(l.trim())); + const bodyLines = lines.slice(2).filter((l) => /^\|/.test(l.trim())); const alignClass = (i) => `md-align-${aligns[i] || 'left'}`; - const ths = headers.map((h,i)=>`${h}`).join(''); - const rows = bodyLines.map(line => { - const cells = split(line); - const tds = cells.map((c,i)=>`${c}`).join(''); - return `${tds}`; - }).join(''); + const ths = headers + .map( + (h, i) => + `${h}` + ) + .join(''); + + const rows = bodyLines + .map((line) => { + const cells = split(line); + const tds = cells + .map( + (c, i) => + `${c}` + ) + .join(''); + return `${tds}`; + }) + .join(''); const table = `${ths}${rows}
    `; return table + (hadTrailingNewline ? '\n' : ''); }); - // 4.75) Horizontal rules - escaped = escaped.replace(/^---\s*$/gm, "
    "); - - // 5) Bold, italic, inline code (inline code only; fenced were extracted) + // 5) Bold, italic, inline code let html = applyInline(escaped); // 5.5) Links @@ -150,95 +179,97 @@ export function markdownToHTML(text) { return ''; }; - html = html.replace(/\[([^\]]+?)\]\(([^)]+?)\)/g, (_, label, href) => { + html = html.replace(/$begin:math:display$\(\[\^$end:math:display$]+?)\]$begin:math:text$\(\[\^\)\]\+\?\)$end:math:text$/g, (_, label, href) => { const url = safeLink(href); const tooltip = escapeHtml(href || ''); if (!url) return label; - return `${label} ${tooltip}`; + return `${label} ${tooltip}`; }); - // 6) Convert line-breaks to
    for NON-code content (code is still placeholdered) - html = html.replace(/\n/g, "
    "); + // 6) Convert line-breaks to
    for NON-code content + html = html.replace(/\n/g, '
    '); - // 6.1) Light cleanup: collapse 3+ consecutive
    into a double-break, keep intentional spacing otherwise - html = html.replace(/(?:
    [\s]*){3,}/g, "

    "); + // 6.1) Collapse 3+ consecutive
    into a double-break + html = html.replace(/(?:
    [\s]*){3,}/g, '

    '); - // 6.2) Normalize spacing around block elements: - // - keep a single break before block elements when breaks exist - // - after a block element, keep at most one break + // 6.2) Normalize spacing around block elements html = html - .replace(/(
    \s*)+(<(?:h[1-4]|hr|table|ul|ol|blockquote)\b[^>]*>)/g, "
    $2") - .replace(/(<\/(?:h[1-4]|table|ul|ol|blockquote)>\s*)(
    \s*)+/g, "$1
    "); + .replace( + /(
    \s*)+(<(?:h[1-4]|hr|table|ul|ol|blockquote)\b[^>]*>)/g, + '
    $2' + ) + .replace( + /(<\/(?:h[1-4]|table|ul|ol|blockquote)>\s*)(
    \s*)+/g, + '$1
    ' + ); - // 6.3) Trim stray breaks immediately after headings but leave at most one (tighter) - html = html.replace(/(<\/h[1-4]>)(
    \s*)+/g, "$1"); - // 6.4) Trim trailing breaks after lists; rely on CSS margins for spacing - html = html.replace(/(<\/(?:ul|ol)>)(
    \s*)+/g, "$1"); - // 7) Restore code blocks with title bar (language) + copy button (no inline handlers) + // 6.3) Trim breaks after headings + html = html.replace(/(<\/h[1-4]>)(
    \s*)+/g, '$1'); + + // 6.4) Trim trailing breaks after lists + html = html.replace(/(<\/(?:ul|ol)>)(
    \s*)+/g, '$1'); + + // 7) Restore code blocks with header + copy button html = html.replace(/@@CODEBLOCK(\d+)@@/g, (_, idx) => { const { lang, code } = codeblocks[+idx]; const title = (lang && lang.trim()) ? lang.trim() : 'code'; const titleLabel = escapeHtml(title); const languageClass = title.toLowerCase().replace(/[^a-z0-9_-]/g, '') || 'code'; - // Escape only for HTML rendering inside ; keep raw \n (no
    here!) const escapedCode = escapeHtml(code); - - // Single-line header to avoid global
    interference const encodedForCopy = encodeURIComponent(code); - const head = `
    ${titleLabel}
    `; - // Ensure wrapping inside container and preserve newlines for copy/paste + const head = `
    ${titleLabel}
    `; + const body = `
    ${escapedCode}
    `; return `
    ${head}${body}
    `; }); - // 8) Final cleanup around codeblocks specifically (remove stray
    added next to placeholders) + // 8) Cleanup around codeblocks html = html - .replace(/
    \s*(?=
    before opening - .replace(/(
    ]*>[\s\S]*?<\/div>)\s*
    /g, "$1"); //
    right after closing + .replace(/
    \s*(?=
    ]*>[\s\S]*?<\/div>)\s*
    /g, + '$1' + ); return html; } // Virtually close an unfinished fenced code block so it renders during streaming. function balanceStreamingCodeFence(md) { - // Split into lines; we only consider fences that start a line. const lines = md.split(/\r?\n/); - - // Track the last unmatched opening fence we see while scanning. - // { fenceChar: '`' or '~', fenceLen: number } let open = null; for (let i = 0; i < lines.length; i++) { const line = lines[i]; - // Opening fence? ^\s*([`~]{3,})(.*)$ if (!open) { const m = line.match(/^\s*([`~]{3,})([^\s]*)?.*$/); if (m) { - // Treat as an opening fence open = { fenceChar: m[1][0], fenceLen: m[1].length }; continue; } } else { - // Closing fence: must match same char and length or longer - const re = new RegExp(`^\\s*(${open.fenceChar}{${open.fenceLen},})\\s*$`); + const re = new RegExp( + `^\\s*(${open.fenceChar}{${open.fenceLen},})\\s*$` + ); if (re.test(line)) { - // Closed open = null; continue; } - // Otherwise still inside the code block; keep scanning } } if (open) { - // Virtually close with the same fence so the block renders now const virtual = `${open.fenceChar.repeat(open.fenceLen)}`; return md.endsWith('\n') ? md + virtual : md + '\n' + virtual; } return md; -} +} \ No newline at end of file