From 987a58000f940661e69d6e1d4b5270f61c9e8a2e Mon Sep 17 00:00:00 2001 From: Victor Giers Date: Fri, 5 Dec 2025 17:30:37 +0100 Subject: [PATCH] auto-git: [add] markdown.js --- markdown.js | 244 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 244 insertions(+) create mode 100644 markdown.js diff --git a/markdown.js b/markdown.js new file mode 100644 index 0000000..6e52638 --- /dev/null +++ b/markdown.js @@ -0,0 +1,244 @@ +export function markdownToHTML(text) { + // 0) Remove .../... blocks + // This regex will match an an opening or tag, + // followed by any characters (non-greedy), until either a closing + // or tag is found, OR the end of the string ($). + text = text.replace(/[\s\S]*?(?:<\/think(?:ing)?>|$)/gi, ''); + + // Normalize exotic spaces (narrow/non-breaking) to regular spaces for consistent rendering + text = text.replace(/[\u00a0\u202f\u2007]/g, ' '); + + text = balanceStreamingCodeFence(text); + + const escapeHtml = (value = '') => + value + .replace(/&/g, '&') + .replace(//g, '>'); + const escapeAttr = (value = '') => + escapeHtml(value) + .replace(/"/g, '"') + .replace(/'/g, '''); + const applyInline = (source) => { + const codeRuns = []; + let tmp = source.replace(/`([^`]+?)`/g, (_, code) => { + const idx = codeRuns.push(code) - 1; + return `@@CODEINLINE${idx}@@`; + }); + + const strongRuns = []; + tmp = tmp.replace(/\*\*([\s\S]+?)\*\*/g, (_, content) => { + const idx = strongRuns.push(content) - 1; + return `@@STRONG${idx}@@`; + }); + + const emphasisRuns = []; + tmp = tmp.replace(/(? { + const idx = emphasisRuns.push(content) - 1; + return `@@EM${idx}@@`; + }); + + return tmp + .replace(/@@STRONG(\d+)@@/g, (_, idx) => `${strongRuns[+idx]}`) + .replace(/@@EM(\d+)@@/g, (_, idx) => `${emphasisRuns[+idx]}`) + .replace(/@@CODEINLINE(\d+)@@/g, (_, idx) => `${codeRuns[+idx]}`); + }; + + // 1) Normalize line endings + let tmp = text.replace(/\r\n/g, '\n').replace(/\r/g, '\n'); + + // 2) Extract code blocks and replace with placeholders (protect from all formatting) + const codeblocks = []; + const placeholder = idx => `@@CODEBLOCK${idx}@@`; + tmp = tmp.replace(/```([^\n]*)\n([\s\S]*?)```/g, (_, lang, code) => { + // Strip trailing whitespace-only lines at the end of the block + let cleaned = (code || '').replace(/\r\n/g, '\n').replace(/\r/g, '\n'); + const lines = cleaned.split('\n'); + while (lines.length > 0 && /^\s*$/.test(lines[lines.length - 1])) lines.pop(); + cleaned = lines.join('\n'); + codeblocks.push({ lang: (lang || '').trim(), code: cleaned }); + return placeholder(codeblocks.length - 1); + }); + + // 3) HTML-escape special characters (outside of code blocks) + let escaped = escapeHtml(tmp); + + // 4) Headings + escaped = escaped + .replace(/^#### (.+)$/gm, "

$1

") + .replace(/^### (.+)$/gm, "

$1

") + .replace(/^## (.+)$/gm, "

$1

") + .replace(/^# (.+)$/gm, "

$1

"); + + // 4.3) Blockquotes + escaped = escaped.replace( + /(^|\n)([ \t]*> .+(?:\n[ \t]*> .+)*)/g, + (_, lead, blockquoteBlock) => { + const lines = blockquoteBlock + .split(/\n/) + .map(line => line.replace(/^[ \t]*>\s*/, '').trim()) + .join('\n'); + return `${lead}
${lines}
`; + } + ); + + // 4.5) Unordered lists + escaped = escaped.replace( + /(^|\n)([ \t]*[-*] .+(?:\n[ \t]*[-*] .+)*)/g, + (_, lead, listBlock) => { + const items = listBlock + .split(/\n/) + .map(line => line.replace(/^[ \t]*[-*]\s+/, '').trim()) + .map(item => `
  • ${item}
  • `) + .join(''); + return `${lead}`; + } + ); + + // 4.6) Markdown tables (GitHub-style). Strict: requires header, separator, ≥2 cols. + const mdTableBlockRe = + /(^\|[^\n]*\|?\s*\n\|\s*[:\-]+(?:\s*\|\s*[:\-]+)+\s*\|?\s*\n(?:\|[^\n]*\|?\s*(?:\n|$))*)/gm; + + escaped = escaped.replace(mdTableBlockRe, (block) => { + const hadTrailingNewline = /\n$/.test(block); + const lines = block.replace(/\n$/, '').split('\n'); + + const split = (line) => line.replace(/^\||\|$/g, '').split('|').map(s => s.trim()); + + const headers = split(lines[0]); + const seps = split(lines[1]); + if (headers.length < 2 || seps.length < 2) return block; + if (!seps.every(s => /^[ :\-]+$/.test(s) && /-/.test(s))) return block; + + const aligns = seps.map(seg => { + const s = seg.replace(/\s+/g,''); + const left = s.startsWith(':'); + const right = s.endsWith(':'); + if (left && right) return 'center'; + if (right) return 'right'; + return 'left'; + }); + + const bodyLines = lines.slice(2).filter(l => /^\|/.test(l.trim())); + const alignClass = (i) => `md-align-${aligns[i] || 'left'}`; + + const ths = headers.map((h,i)=>`${h}`).join(''); + const rows = bodyLines.map(line => { + const cells = split(line); + const tds = cells.map((c,i)=>`${c}`).join(''); + return `${tds}`; + }).join(''); + + const table = `${ths}${rows}
    `; + + return table + (hadTrailingNewline ? '\n' : ''); + }); + + // 4.75) Horizontal rules + escaped = escaped.replace(/^---\s*$/gm, "
    "); + + // 5) Bold, italic, inline code (inline code only; fenced were extracted) + let html = applyInline(escaped); + + // 5.5) Links + const safeLink = (hrefRaw) => { + const href = (hrefRaw || '').trim(); + if (!href) return ''; + if (/^https?:\/\//i.test(href)) return href; + if (/^mailto:/i.test(href) || /^tel:/i.test(href)) return href; + if (href.startsWith('/') || href.startsWith('#')) return href; + return ''; + }; + + html = html.replace(/\[([^\]]+?)\]\(([^)]+?)\)/g, (_, label, href) => { + const url = safeLink(href); + const tooltip = escapeHtml(href || ''); + if (!url) return label; + return `${label} ${tooltip}`; + }); + + // 6) Convert line-breaks to
    for NON-code content (code is still placeholdered) + html = html.replace(/\n/g, "
    "); + + // 6.1) Light cleanup: collapse 3+ consecutive
    into a double-break, keep intentional spacing otherwise + html = html.replace(/(?:
    [\s]*){3,}/g, "

    "); + + // 6.2) Normalize spacing around block elements: + // - keep a single break before block elements when breaks exist + // - after a block element, keep at most one break + html = html + .replace(/(
    \s*)+(<(?:h[1-4]|hr|table|ul|ol|blockquote)\b[^>]*>)/g, "
    $2") + .replace(/(<\/(?:h[1-4]|table|ul|ol|blockquote)>\s*)(
    \s*)+/g, "$1
    "); + + // 6.3) Trim stray breaks immediately after headings but leave at most one (tighter) + html = html.replace(/(<\/h[1-4]>)(
    \s*)+/g, "$1"); + // 6.4) Trim trailing breaks after lists; rely on CSS margins for spacing + html = html.replace(/(<\/(?:ul|ol)>)(
    \s*)+/g, "$1"); + // 7) Restore code blocks with title bar (language) + copy button (no inline handlers) + html = html.replace(/@@CODEBLOCK(\d+)@@/g, (_, idx) => { + const { lang, code } = codeblocks[+idx]; + const title = (lang && lang.trim()) ? lang.trim() : 'code'; + const titleLabel = escapeHtml(title); + const languageClass = title.toLowerCase().replace(/[^a-z0-9_-]/g, '') || 'code'; + + // Escape only for HTML rendering inside ; keep raw \n (no
    here!) + const escapedCode = escapeHtml(code); + + // Single-line header to avoid global
    interference + const encodedForCopy = encodeURIComponent(code); + const head = `
    ${titleLabel}
    `; + + // Ensure wrapping inside container and preserve newlines for copy/paste + const body = `
    ${escapedCode}
    `; + + return `
    ${head}${body}
    `; + }); + + // 8) Final cleanup around codeblocks specifically (remove stray
    added next to placeholders) + html = html + .replace(/
    \s*(?=
    before opening + .replace(/(
    ]*>[\s\S]*?<\/div>)\s*
    /g, "$1"); //
    right after closing + + return html; +} + +// Virtually close an unfinished fenced code block so it renders during streaming. +function balanceStreamingCodeFence(md) { + // Split into lines; we only consider fences that start a line. + const lines = md.split(/\r?\n/); + + // Track the last unmatched opening fence we see while scanning. + // { fenceChar: '`' or '~', fenceLen: number } + let open = null; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + + // Opening fence? ^\s*([`~]{3,})(.*)$ + if (!open) { + const m = line.match(/^\s*([`~]{3,})([^\s]*)?.*$/); + if (m) { + // Treat as an opening fence + open = { fenceChar: m[1][0], fenceLen: m[1].length }; + continue; + } + } else { + // Closing fence: must match same char and length or longer + const re = new RegExp(`^\\s*(${open.fenceChar}{${open.fenceLen},})\\s*$`); + if (re.test(line)) { + // Closed + open = null; + continue; + } + // Otherwise still inside the code block; keep scanning + } + } + + if (open) { + // Virtually close with the same fence so the block renders now + const virtual = `${open.fenceChar.repeat(open.fenceLen)}`; + return md.endsWith('\n') ? md + virtual : md + '\n' + virtual; + } + + return md; +}