export function markdownToHTML(text) {
// 0) Remove .../... blocks
text = text.replace(/[\s\S]*?(?:<\/think(?:ing)?>|$)/gi, '');
// Normalize exotic spaces (narrow/non-breaking) to regular spaces
text = text.replace(/[\u00a0\u202f\u2007]/g, ' ');
text = balanceStreamingCodeFence(text);
const escapeHtml = (value = '') =>
value
.replace(/&/g, '&')
.replace(//g, '>');
const escapeAttr = (value = '') =>
escapeHtml(value)
.replace(/"/g, '"')
.replace(/'/g, ''');
const applyInline = (source) => {
const codeRuns = [];
let tmp = source.replace(/`([^`]+?)`/g, (_, code) => {
const idx = codeRuns.push(code) - 1;
return `@@CODEINLINE${idx}@@`;
});
const strongRuns = [];
tmp = tmp.replace(/\*\*([\s\S]+?)\*\*/g, (_, content) => {
const idx = strongRuns.push(content) - 1;
return `@@STRONG${idx}@@`;
});
const emphasisRuns = [];
tmp = tmp.replace(/(? {
const idx = emphasisRuns.push(content) - 1;
return `@@EM${idx}@@`;
});
return tmp
.replace(/@@STRONG(\d+)@@/g, (_, idx) => `${strongRuns[+idx]}`)
.replace(/@@EM(\d+)@@/g, (_, idx) => `${emphasisRuns[+idx]}`)
.replace(/@@CODEINLINE(\d+)@@/g, (_, idx) => `${codeRuns[+idx]}`);
};
// 1) Normalize line endings
let tmp = text.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
// 2) Extract code blocks and replace with placeholders (protect from all formatting)
const codeblocks = [];
const placeholder = (idx) => `@@CODEBLOCK${idx}@@`;
tmp = tmp.replace(/```([^\n]*)\n([\s\S]*?)```/g, (_, lang, code) => {
let cleaned = (code || '').replace(/\r\n/g, '\n').replace(/\r/g, '\n');
const lines = cleaned.split('\n');
while (lines.length > 0 && /^\s*$/.test(lines[lines.length - 1])) lines.pop();
cleaned = lines.join('\n');
codeblocks.push({ lang: (lang || '').trim(), code: cleaned });
return placeholder(codeblocks.length - 1);
});
// 3) HTML-escape special characters (outside of fenced code blocks)
let escaped = escapeHtml(tmp);
// 4) Headings (with consistent hooks)
escaped = escaped
.replace(/^#### (.+)$/gm, '$1
')
.replace(/^### (.+)$/gm, '$1
')
.replace(/^## (.+)$/gm, '$1
')
.replace(/^# (.+)$/gm, '$1
');
// 4.1) Horizontal rules: --- or *** or ___ on a line
escaped = escaped.replace(/^(?:-{3,}|\*{3,}|_{3,})\s*$/gm, '
');
// 4.2) Ordered lists: lines starting with "1. ", "2. ", ...
escaped = escaped.replace(
/(^|\n)([ \t]*\d+\. .+(?:\n[ \t]*\d+\. .+)*)/g,
(_, lead, listBlock) => {
const items = listBlock
.split(/\n/)
.map((line) => line.replace(/^[ \t]*\d+\.\s+/, '').trim())
.filter((item) => item.length > 0)
.map((item) => `${item}`)
.join('');
if (!items) return listBlock;
return `${lead}${items}
`;
}
);
// 4.3) Blockquotes
escaped = escaped.replace(
/(^|\n)([ \t]*> .+(?:\n[ \t]*> .+)*)/g,
(_, lead, blockquoteBlock) => {
const lines = blockquoteBlock
.split(/\n/)
.map((line) => line.replace(/^[ \t]*>\s*/, '').trim())
.join('
');
return `${lead}${lines}
`;
}
);
// 4.5) Unordered lists
escaped = escaped.replace(
/(^|\n)([ \t]*[-*] .+(?:\n[ \t]*[-*] .+)*)/g,
(_, lead, listBlock) => {
const items = listBlock
.split(/\n/)
.map((line) => line.replace(/^[ \t]*[-*]\s+/, '').trim())
.filter((item) => item.length > 0)
.map((item) => `${item}`)
.join('');
if (!items) return listBlock;
return `${lead}`;
}
);
// 4.6) Markdown tables (GitHub-style). Strict: requires header, separator, ≥2 cols.
const mdTableBlockRe =
/(^\|[^\n]*\|?\s*\n\|\s*[:\-]+(?:\s*\|\s*[:\-]+)+\s*\|?\s*\n(?:\|[^\n]*\|?\s*(?:\n|$))*)/gm;
escaped = escaped.replace(mdTableBlockRe, (block) => {
const hadTrailingNewline = /\n$/.test(block);
const lines = block.replace(/\n$/, '').split('\n');
const split = (line) => line.replace(/^\||\|$/g, '').split('|').map((s) => s.trim());
const headers = split(lines[0]);
const seps = split(lines[1]);
if (headers.length < 2 || seps.length < 2) return block;
if (!seps.every((s) => /^[ :\-]+$/.test(s) && /-/.test(s))) return block;
const aligns = seps.map((seg) => {
const s = seg.replace(/\s+/g, '');
const left = s.startsWith(':');
const right = s.endsWith(':');
if (left && right) return 'center';
if (right) return 'right';
return 'left';
});
const bodyLines = lines.slice(2).filter((l) => /^\|/.test(l.trim()));
const alignClass = (i) => `md-align-${aligns[i] || 'left'}`;
const ths = headers
.map(
(h, i) =>
`${h} | `
)
.join('');
const rows = bodyLines
.map((line) => {
const cells = split(line);
const tds = cells
.map(
(c, i) =>
`${c} | `
)
.join('');
return `${tds}
`;
})
.join('');
const table = ``;
return table + (hadTrailingNewline ? '\n' : '');
});
// 5) Bold, italic, inline code
let html = applyInline(escaped);
// 5.5) Links
const safeLink = (hrefRaw) => {
const href = (hrefRaw || '').trim();
if (!href) return '';
if (/^https?:\/\//i.test(href)) return href;
if (/^mailto:/i.test(href) || /^tel:/i.test(href)) return href;
if (href.startsWith('/') || href.startsWith('#')) return href;
return '';
};
html = html.replace(/\[([^\]]+?)\]\(([^)]+?)\)/g, (_, label, href) => {
const url = safeLink(href);
const tooltip = escapeHtml(href || '');
if (!url) return label;
return `${label} ${tooltip}`;
});
// 6) Convert line-breaks to HTML paragraphs and
inside paragraphs
const linesWithHtml = html.split("\n");
const htmlLines = [];
let paragraph = [];
let emptyCount = 0;
let seenContent = false;
const flushParagraph = () => {
if (paragraph.length === 0) return;
htmlLines.push(`${paragraph.join("
")}
`);
paragraph = [];
seenContent = true;
};
const pushExtraBreaks = () => {
if (!seenContent) {
emptyCount = 0;
return;
}
const extraBreaks = Math.max(0, emptyCount - 1);
for (let j = 0; j < extraBreaks; j += 1) {
htmlLines.push("
");
}
emptyCount = 0;
};
const isBlockLine = (value) => {
const trimmed = value.trim();
if (!trimmed) return false;
if (/^@@CODEBLOCK\d+@@$/.test(trimmed)) return true;
if (!trimmed.startsWith("<")) return false;
return /^()/.test(
trimmed
);
};
for (let i = 0; i < linesWithHtml.length; i += 1) {
const line = linesWithHtml[i] ?? "";
if (line.trim().length === 0) {
if (paragraph.length > 0) {
flushParagraph();
}
emptyCount += 1;
continue;
}
if (isBlockLine(line)) {
if (paragraph.length > 0) {
flushParagraph();
}
pushExtraBreaks();
htmlLines.push(line);
seenContent = true;
continue;
}
if (emptyCount > 0) {
pushExtraBreaks();
}
paragraph.push(line);
}
flushParagraph();
html = htmlLines.join("");
// 7) Restore code blocks with header + copy button
html = html.replace(/@@CODEBLOCK(\d+)@@/g, (_, idx) => {
const { lang, code } = codeblocks[+idx];
const title = (lang && lang.trim()) ? lang.trim() : 'code';
const titleLabel = escapeHtml(title);
const languageClass = title.toLowerCase().replace(/[^a-z0-9_-]/g, '') || 'code';
const escapedCode = escapeHtml(code);
const encodedForCopy = encodeURIComponent(code);
const head = ``;
const body = `${escapedCode}
`;
return `${head}${body}
`;
});
return html;
}
// Virtually close an unfinished fenced code block so it renders during streaming.
function balanceStreamingCodeFence(md) {
const lines = md.split(/\r?\n/);
let open = null;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (!open) {
const m = line.match(/^\s*([`~]{3,})([^\s]*)?.*$/);
if (m) {
open = { fenceChar: m[1][0], fenceLen: m[1].length };
continue;
}
} else {
const re = new RegExp(
`^\\s*(${open.fenceChar}{${open.fenceLen},})\\s*$`
);
if (re.test(line)) {
open = null;
continue;
}
}
}
if (open) {
const virtual = `${open.fenceChar.repeat(open.fenceLen)}`;
return md.endsWith('\n') ? md + virtual : md + '\n' + virtual;
}
return md;
}