diff --git a/packages/markdown-html/src/rules.js b/packages/markdown-html/src/rules.js index 2d72f9bd..06bcbb2b 100644 --- a/packages/markdown-html/src/rules.js +++ b/packages/markdown-html/src/rules.js @@ -498,42 +498,155 @@ const HTML_BLOCK_RULE = { } }; +const CAPTION_RULE = { + deserialize(el, next, ignoreSpace) { + if (el.tagName && el.tagName.toLowerCase() === 'caption') { + return { + type: 'caption', + nodes: next(el.childNodes, ignoreSpace) + }; + } + } +}; + +/** + * Clean table cell nodes by removing Softbreaks and normalizing whitespace. + * @param {Array} nodes - the list of nodes + * @returns {Array} - the cleaned list of nodes + */ +function cleanTableNodes(nodes) { + const NS = CommonMarkModel.NAMESPACE; + const TEXT = `${NS}.Text`; + const SOFT = `${NS}.Softbreak`; + + if (!nodes) return []; + nodes = Array.isArray(nodes) ? nodes : [nodes]; + + const merged = nodes.reduce((acc, node) => { + if (!node) return acc; + + let newNode = { ...node }; + if (newNode.nodes) newNode = { ...newNode, nodes: cleanTableNodes(newNode.nodes) }; + + if (newNode.$class === SOFT) { + newNode = { $class: TEXT, text: ' ' }; + } + + const last = acc[acc.length - 1]; + if (last && last.$class === TEXT && newNode.$class === TEXT) + { + last.text += newNode.text; + } + else + { + acc.push(newNode); + } + + return acc; + }, []); + + // Normalize whitespace inside Text nodes + merged.forEach(n => { + if (n.$class === TEXT) n.text = n.text.replace(/\s+/g, ' '); + }); + + + if (merged.length > 0 && merged[0].$class === TEXT) { + merged[0].text = merged[0].text.replace(/^\s+/, ''); + } + if (merged.length > 0 && merged[merged.length - 1].$class === TEXT) { + merged[merged.length - 1].text = merged[merged.length - 1].text.replace(/\s+$/, ''); + } + + return merged.filter(n => n.$class !== TEXT || n.text.length > 0); +} + + const TABLE_RULE = { deserialize(el, next, ignoreSpace) { if (el.tagName && el.tagName.toLowerCase() === 'table') { - return { + const children = next(el.childNodes, ignoreSpace); + const captionNode = children.find(c => c.type === 'caption'); + let tableNodes = children.filter(node => + node.$class === `${CommonMarkModel.NAMESPACE}.TableHead` || + node.$class === `${CommonMarkModel.NAMESPACE}.TableBody` + ); + + let head = tableNodes.find(n => n.$class === `${CommonMarkModel.NAMESPACE}.TableHead`); + const body = tableNodes.find(n => n.$class === `${CommonMarkModel.NAMESPACE}.TableBody`); + + if (!head && body && body.nodes && body.nodes.length > 0) { + const firstRow = body.nodes[0]; + const hasHeaderCells = firstRow.nodes && firstRow.nodes.some(n => n.$class === `${CommonMarkModel.NAMESPACE}.HeaderCell`); + + if (hasHeaderCells) { + head = { + $class: `${CommonMarkModel.NAMESPACE}.TableHead`, + nodes: [firstRow] + }; + const newBody = { + $class: `${CommonMarkModel.NAMESPACE}.TableBody`, + nodes: body.nodes.slice(1) + }; + tableNodes = [head, newBody]; + } + } + + const table = { $class: `${CommonMarkModel.NAMESPACE}.Table`, - nodes: next(el.childNodes), + nodes: tableNodes, }; + + if (captionNode) { + const captionParagraph = { + $class: `${CommonMarkModel.NAMESPACE}.Paragraph`, + nodes: [ + { + $class: `${CommonMarkModel.NAMESPACE}.Strong`, + nodes: cleanTableNodes(captionNode.nodes) + }, + { + $class: `${CommonMarkModel.NAMESPACE}.Text`, + text: '\n\n' + } + ] + }; + return [captionParagraph, table]; + } + + return table; } if (el.tagName && el.tagName.toLowerCase() === 'thead') { + const nodes = next(el.childNodes); return { $class: `${CommonMarkModel.NAMESPACE}.TableHead`, - nodes: next(el.childNodes), + nodes: nodes.filter(n => n.$class === `${CommonMarkModel.NAMESPACE}.TableRow`), }; } if (el.tagName && el.tagName.toLowerCase() === 'tbody') { + const nodes = next(el.childNodes); return { $class: `${CommonMarkModel.NAMESPACE}.TableBody`, - nodes: next(el.childNodes), + nodes: nodes.filter(n => n.$class === `${CommonMarkModel.NAMESPACE}.TableRow`), }; } if (el.tagName && el.tagName.toLowerCase() === 'tr') { + const nodes = next(el.childNodes); return { $class: `${CommonMarkModel.NAMESPACE}.TableRow`, - nodes: next(el.childNodes), + nodes: nodes.filter(n => n.$class === `${CommonMarkModel.NAMESPACE}.HeaderCell` || n.$class === `${CommonMarkModel.NAMESPACE}.TableCell`), }; } if (el.tagName && el.tagName.toLowerCase() === 'th') { return { $class: `${CommonMarkModel.NAMESPACE}.HeaderCell`, - nodes: next(el.childNodes), + nodes: cleanTableNodes(next(el.childNodes)), }; } if (el.tagName && el.tagName.toLowerCase() === 'td') { return { $class: `${CommonMarkModel.NAMESPACE}.TableCell`, - nodes: next(el.childNodes), + nodes: cleanTableNodes(next(el.childNodes)), }; } }, @@ -560,7 +673,8 @@ const rules = [ HTML_INLINE_RULE, HTML_BLOCK_RULE, IMAGE_RULE, - TABLE_RULE + TABLE_RULE, + CAPTION_RULE ];