diff --git a/packages/super-editor/src/core/super-converter/field-references/preProcessNodesForFldChar.js b/packages/super-editor/src/core/super-converter/field-references/preProcessNodesForFldChar.js index 86ff899194..e6f21ef847 100644 --- a/packages/super-editor/src/core/super-converter/field-references/preProcessNodesForFldChar.js +++ b/packages/super-editor/src/core/super-converter/field-references/preProcessNodesForFldChar.js @@ -35,6 +35,7 @@ export const preProcessNodesForFldChar = (nodes = [], docx) => { let currentFieldStack = []; let unpairedEnd = null; let collecting = false; + const rawNodeSourceTokens = new WeakMap(); /** * Finalizes the current field. If collecting nodes, it processes them. @@ -66,18 +67,50 @@ export const preProcessNodesForFldChar = (nodes = [], docx) => { } }; - for (const node of nodes) { - const rawNode = carbonCopy(node); + /** + * Captures the original raw node at most once for the currently active field. + * @param {OpenXmlNode} rawNode + * @param {Set} capturedRawNodes + * @param {object} rawSourceToken + */ + const captureRawNodeForCurrentField = (rawNode, capturedRawNodes, rawSourceToken) => { + if (rawCollectedNodesStack.length === 0) return; + if (capturedRawNodes.has(rawNode)) return; + const currentRawStack = rawCollectedNodesStack[rawCollectedNodesStack.length - 1]; + const lastRawNode = currentRawStack[currentRawStack.length - 1]; + const canMergeIntoLastNode = + lastRawNode?.name === 'w:r' && + rawNode?.name === 'w:r' && + rawNodeSourceTokens.get(lastRawNode) === rawSourceToken && + Array.isArray(lastRawNode.elements) && + Array.isArray(rawNode.elements); + if (canMergeIntoLastNode) { + lastRawNode.elements.push(...carbonCopy(rawNode.elements)); + } else { + currentRawStack.push(rawNode); + rawNodeSourceTokens.set(rawNode, rawSourceToken); + } + capturedRawNodes.add(rawNode); + }; + + /** + * Processes a single logical node against the fldChar state machine. + * @param {OpenXmlNode} node + * @param {OpenXmlNode} rawNode + * @param {Set} capturedRawNodes + * @param {object} rawSourceToken + */ + const processNode = (node, rawNode, capturedRawNodes, rawSourceToken) => { collecting = collectedNodesStack.length > 0; if (shouldSkipFieldProcessing(node)) { if (collecting) { collectedNodesStack[collectedNodesStack.length - 1].push(node); - rawCollectedNodesStack[collectedNodesStack.length - 1].push(rawNode); + captureRawNodeForCurrentField(rawNode, capturedRawNodes, rawSourceToken); } else { processedNodes.push(node); } - continue; + return; } const fldCharEl = node.elements?.find((el) => el.name === 'w:fldChar'); @@ -97,16 +130,19 @@ export const preProcessNodesForFldChar = (nodes = [], docx) => { } else { processedNodes.push(...processed); } - continue; + return; } } } if (fldType === 'begin') { collectedNodesStack.push([]); - rawCollectedNodesStack.push([rawNode]); + const rawStack = [rawNode]; + rawCollectedNodesStack.push(rawStack); + rawNodeSourceTokens.set(rawNode, rawSourceToken); + capturedRawNodes.add(rawNode); currentFieldStack.push({ instrText: '', instructionTokens: [], afterSeparate: false }); - continue; + return; } // If collecting and still in instruction run, aggregate instruction tokens/text. @@ -115,7 +151,7 @@ export const preProcessNodesForFldChar = (nodes = [], docx) => { if (!currentField.afterSeparate) { const instructionTokens = extractInstructionTokensFromNode(node); if (instructionTokens.length > 0) { - rawCollectedNodesStack[rawCollectedNodesStack.length - 1].push(rawNode); + captureRawNodeForCurrentField(rawNode, capturedRawNodes, rawSourceToken); currentField.instructionTokens.push(...instructionTokens); const instrTextValue = instrTextEl?.elements?.[0]?.text; if (instrTextValue != null) { @@ -125,27 +161,27 @@ export const preProcessNodesForFldChar = (nodes = [], docx) => { currentField.instrText += '\t'; } // We can ignore instruction nodes - continue; + return; } } } if (fldType === 'end') { if (collecting) { - rawCollectedNodesStack[rawCollectedNodesStack.length - 1].push(rawNode); + captureRawNodeForCurrentField(rawNode, capturedRawNodes, rawSourceToken); } finalizeField(); - continue; + return; } else if (fldType === 'separate') { if (collecting) { - rawCollectedNodesStack[rawCollectedNodesStack.length - 1].push(rawNode); + captureRawNodeForCurrentField(rawNode, capturedRawNodes, rawSourceToken); const currentField = currentFieldStack[currentFieldStack.length - 1]; if (currentField) { currentField.afterSeparate = true; } } // We can ignore the 'fldChar' nodes - continue; + return; } if (Array.isArray(node.elements)) { @@ -161,27 +197,41 @@ export const preProcessNodesForFldChar = (nodes = [], docx) => { // The current node should be added to the collected nodes collectedNodesStack.push([node]); - rawCollectedNodesStack.push([rawNode]); + const rawStack = [rawNode]; + rawCollectedNodesStack.push(rawStack); + rawNodeSourceTokens.set(rawNode, rawSourceToken); + capturedRawNodes.add(rawNode); }); } else if (childResult.unpairedEnd) { // A field from this level or higher ended in the children. collectedNodesStack[collectedNodesStack.length - 1].push(node); - rawCollectedNodesStack[rawCollectedNodesStack.length - 1].push(rawNode); + captureRawNodeForCurrentField(rawNode, capturedRawNodes, rawSourceToken); finalizeField(); } else if (collecting) { // This node is part of a field being collected at this level. collectedNodesStack[collectedNodesStack.length - 1].push(node); - rawCollectedNodesStack[rawCollectedNodesStack.length - 1].push(rawNode); + captureRawNodeForCurrentField(rawNode, capturedRawNodes, rawSourceToken); } else { // This node is not part of any field. processedNodes.push(node); } } else if (collecting) { collectedNodesStack[collectedNodesStack.length - 1].push(node); - rawCollectedNodesStack[rawCollectedNodesStack.length - 1].push(rawNode); + captureRawNodeForCurrentField(rawNode, capturedRawNodes, rawSourceToken); } else { processedNodes.push(node); } + }; + + for (const node of nodes) { + const rawNode = carbonCopy(node); + const logicalNodes = expandNodeForFieldProcessing(node); + const rawLogicalNodes = expandNodeForFieldProcessing(rawNode); + const capturedRawNodes = new Set(); + const rawSourceToken = {}; + logicalNodes.forEach((logicalNode, index) => { + processNode(logicalNode, rawLogicalNodes[index] ?? rawNode, capturedRawNodes, rawSourceToken); + }); } let unpairedBegin = null; @@ -262,3 +312,74 @@ const extractInstructionTokensFromNode = (node) => { }); return tokens; }; + +const FIELD_CONTROL_ELEMENT_NAMES = new Set(['w:fldChar']); +const INSTRUCTION_ELEMENT_NAMES = new Set(['w:instrText', 'w:tab']); + +const cloneNodeWithElements = (node, elements) => ({ + ...node, + elements: carbonCopy(elements), +}); + +/** + * Expands mixed-content runs into logical subnodes so the fldChar state machine + * can process multiple field markers stored inside a single w:r in document order. + * + * @param {OpenXmlNode} node + * @returns {OpenXmlNode[]} + */ +const expandNodeForFieldProcessing = (node) => { + const elements = Array.isArray(node?.elements) ? node.elements : null; + if (node?.name !== 'w:r' || !elements || elements.length === 0) { + return [node]; + } + + const runProperties = elements.filter((el) => el?.name === 'w:rPr'); + const contentElements = elements.filter((el) => el?.name !== 'w:rPr'); + const logicalNodes = []; + let currentKind = null; + let currentElements = []; + + const flushCurrentGroup = () => { + if (currentElements.length === 0) return; + logicalNodes.push(cloneNodeWithElements(node, [...runProperties, ...currentElements])); + currentElements = []; + currentKind = null; + }; + + contentElements.forEach((element) => { + if (!element?.name) { + if (currentKind !== 'content') { + flushCurrentGroup(); + currentKind = 'content'; + } + currentElements.push(element); + return; + } + + if (FIELD_CONTROL_ELEMENT_NAMES.has(element.name)) { + flushCurrentGroup(); + logicalNodes.push(cloneNodeWithElements(node, [...runProperties, element])); + return; + } + + if (INSTRUCTION_ELEMENT_NAMES.has(element.name)) { + if (currentKind !== 'instruction') { + flushCurrentGroup(); + currentKind = 'instruction'; + } + currentElements.push(element); + return; + } + + if (currentKind !== 'content') { + flushCurrentGroup(); + currentKind = 'content'; + } + currentElements.push(element); + }); + + flushCurrentGroup(); + + return logicalNodes.length > 1 ? logicalNodes : [node]; +}; diff --git a/packages/super-editor/src/core/super-converter/field-references/preProcessNodesForFldChar.test.js b/packages/super-editor/src/core/super-converter/field-references/preProcessNodesForFldChar.test.js index 56dfe75275..1f223ca819 100644 --- a/packages/super-editor/src/core/super-converter/field-references/preProcessNodesForFldChar.test.js +++ b/packages/super-editor/src/core/super-converter/field-references/preProcessNodesForFldChar.test.js @@ -119,6 +119,190 @@ describe('preProcessNodesForFldChar', () => { ]); }); + it('processes TOC fields when begin, instrText, separate, and end share a single run', () => { + const nodes = [ + { + name: 'w:r', + elements: [ + { name: 'w:fldChar', attributes: { 'w:fldCharType': 'begin' } }, + { name: 'w:instrText', elements: [{ type: 'text', text: 'TOC \\o "1-1" \\h \\z \\u' }] }, + { name: 'w:fldChar', attributes: { 'w:fldCharType': 'separate' } }, + { name: 'w:fldChar', attributes: { 'w:fldCharType': 'end' } }, + ], + }, + ]; + + const { processedNodes } = preProcessNodesForFldChar(nodes, mockDocx); + + expect(processedNodes).toEqual([ + { + name: 'sd:tableOfContents', + type: 'element', + attributes: { + instruction: 'TOC \\o "1-1" \\h \\z \\u', + }, + elements: [], + }, + ]); + }); + + it('preserves unknown fields when begin, instrText, separate, and end share a single run', () => { + const nodes = [ + { + name: 'w:r', + elements: [ + { name: 'w:fldChar', attributes: { 'w:fldCharType': 'begin' } }, + { name: 'w:instrText', elements: [{ type: 'text', text: 'CUSTOMFIELD foo' }] }, + { name: 'w:fldChar', attributes: { 'w:fldCharType': 'separate' } }, + { name: 'w:t', elements: [{ type: 'text', text: 'value' }] }, + { name: 'w:fldChar', attributes: { 'w:fldCharType': 'end' } }, + ], + }, + ]; + + const { processedNodes } = preProcessNodesForFldChar(nodes, mockDocx); + + expect(processedNodes).toEqual(nodes); + }); + + it('does not duplicate later fields when an unknown field and a TOC share one run', () => { + const nodes = [ + { + name: 'w:r', + elements: [ + { name: 'w:fldChar', attributes: { 'w:fldCharType': 'begin' } }, + { name: 'w:instrText', elements: [{ type: 'text', text: 'CUSTOMFIELD foo' }] }, + { name: 'w:fldChar', attributes: { 'w:fldCharType': 'separate' } }, + { name: 'w:t', elements: [{ type: 'text', text: 'value' }] }, + { name: 'w:fldChar', attributes: { 'w:fldCharType': 'end' } }, + { name: 'w:fldChar', attributes: { 'w:fldCharType': 'begin' } }, + { name: 'w:instrText', elements: [{ type: 'text', text: 'TOC \\o "1-1" \\h \\z \\u' }] }, + { name: 'w:fldChar', attributes: { 'w:fldCharType': 'separate' } }, + { name: 'w:fldChar', attributes: { 'w:fldCharType': 'end' } }, + ], + }, + ]; + + const { processedNodes } = preProcessNodesForFldChar(nodes, mockDocx); + + expect(processedNodes).toEqual([ + { + name: 'w:r', + elements: [ + { name: 'w:fldChar', attributes: { 'w:fldCharType': 'begin' } }, + { name: 'w:instrText', elements: [{ type: 'text', text: 'CUSTOMFIELD foo' }] }, + { name: 'w:fldChar', attributes: { 'w:fldCharType': 'separate' } }, + { name: 'w:t', elements: [{ type: 'text', text: 'value' }] }, + { name: 'w:fldChar', attributes: { 'w:fldCharType': 'end' } }, + ], + }, + { + name: 'sd:tableOfContents', + type: 'element', + attributes: { + instruction: 'TOC \\o "1-1" \\h \\z \\u', + }, + elements: [], + }, + ]); + }); + + it('preserves w:drawing and w:pict nodes while collecting field content', () => { + const nodes = [ + { name: 'w:r', elements: [{ name: 'w:fldChar', attributes: { 'w:fldCharType': 'begin' } }] }, + { + name: 'w:r', + elements: [{ name: 'w:instrText', elements: [{ type: 'text', text: 'HYPERLINK "http://example.com"' }] }], + }, + { name: 'w:r', elements: [{ name: 'w:fldChar', attributes: { 'w:fldCharType': 'separate' } }] }, + { name: 'w:drawing', elements: [{ name: 'wp:inline', elements: [] }] }, + { name: 'w:pict', elements: [{ name: 'v:shape', elements: [] }] }, + { name: 'w:r', elements: [{ name: 'w:fldChar', attributes: { 'w:fldCharType': 'end' } }] }, + ]; + + const { processedNodes } = preProcessNodesForFldChar(nodes, mockDocx); + + expect(processedNodes).toEqual([ + { + name: 'w:hyperlink', + type: 'element', + attributes: { 'r:id': 'rIdabc12345' }, + elements: [ + { name: 'w:drawing', elements: [{ name: 'wp:inline', elements: [] }] }, + { name: 'w:pict', elements: [{ name: 'v:shape', elements: [] }] }, + ], + }, + ]); + }); + + it('processes fields that begin and end inside child nodes', () => { + const nodes = [ + { + name: 'w:p', + elements: [ + { name: 'w:r', elements: [{ name: 'w:fldChar', attributes: { 'w:fldCharType': 'begin' } }] }, + { + name: 'w:r', + elements: [{ name: 'w:instrText', elements: [{ type: 'text', text: 'HYPERLINK "http://example.com"' }] }], + }, + { name: 'w:r', elements: [{ name: 'w:fldChar', attributes: { 'w:fldCharType': 'separate' } }] }, + { name: 'w:r', elements: [{ name: 'w:t', elements: [{ type: 'text', text: 'link text' }] }] }, + { name: 'w:r', elements: [{ name: 'w:fldChar', attributes: { 'w:fldCharType': 'end' } }] }, + ], + }, + ]; + + const { processedNodes } = preProcessNodesForFldChar(nodes, mockDocx); + + expect(processedNodes).toEqual([ + { + name: 'w:p', + elements: [ + { + name: 'w:hyperlink', + type: 'element', + attributes: { 'r:id': 'rIdabc12345' }, + elements: [{ name: 'w:r', elements: [{ name: 'w:t', elements: [{ type: 'text', text: 'link text' }] }] }], + }, + ], + }, + ]); + }); + + it('processes fields that end inside child nodes after starting at the parent level', () => { + const nodes = [ + { name: 'w:r', elements: [{ name: 'w:fldChar', attributes: { 'w:fldCharType': 'begin' } }] }, + { + name: 'w:r', + elements: [{ name: 'w:instrText', elements: [{ type: 'text', text: 'HYPERLINK "http://example.com"' }] }], + }, + { name: 'w:r', elements: [{ name: 'w:fldChar', attributes: { 'w:fldCharType': 'separate' } }] }, + { + name: 'w:p', + elements: [ + { name: 'w:r', elements: [{ name: 'w:t', elements: [{ type: 'text', text: 'link text' }] }] }, + { name: 'w:r', elements: [{ name: 'w:fldChar', attributes: { 'w:fldCharType': 'end' } }] }, + ], + }, + ]; + + const { processedNodes } = preProcessNodesForFldChar(nodes, mockDocx); + + expect(processedNodes).toEqual([ + { + name: 'w:hyperlink', + type: 'element', + attributes: { 'r:id': 'rIdabc12345' }, + elements: [ + { + name: 'w:p', + elements: [{ name: 'w:r', elements: [{ name: 'w:t', elements: [{ type: 'text', text: 'link text' }] }] }], + }, + ], + }, + ]); + }); + it('should handle unpaired begin', () => { const nodes = [ { name: 'w:r', elements: [{ name: 'w:fldChar', attributes: { 'w:fldCharType': 'begin' } }] }, diff --git a/packages/super-editor/src/core/super-converter/v3/handlers/w/sdt/helpers/handle-doc-part-obj.js b/packages/super-editor/src/core/super-converter/v3/handlers/w/sdt/helpers/handle-doc-part-obj.js index 945ece187a..883a6c9519 100644 --- a/packages/super-editor/src/core/super-converter/v3/handlers/w/sdt/helpers/handle-doc-part-obj.js +++ b/packages/super-editor/src/core/super-converter/v3/handlers/w/sdt/helpers/handle-doc-part-obj.js @@ -41,11 +41,7 @@ export function handleDocPartObj(params) { */ export const tableOfContentsHandler = (params) => { const node = params.nodes[0]; - const translatedContent = params.nodeListHandler.handler({ - ...params, - nodes: node.elements, - path: [...(params.path || []), node], - }); + const translatedContent = translateTocSdtContent(node, params); const normalizedContent = normalizeDocPartContent(translatedContent); const sdtPr = params.extraParams.sdtPr; const id = sdtPr.elements?.find((el) => el.name === 'w:id')?.attributes['w:val'] || ''; @@ -114,11 +110,70 @@ const validGalleryTypeMap = { }; const inlineNodeTypes = new Set(['bookmarkStart', 'bookmarkEnd']); +const SD_TOC_XML_NAME = 'sd:tableOfContents'; +const PARAGRAPH_XML_NAME = 'w:p'; +const PARAGRAPH_PROPERTIES_XML_NAME = 'w:pPr'; const wrapInlineNode = (node) => ({ type: 'paragraph', content: [node], }); +const hasMeaningfulParagraphContent = (elements = []) => + elements.some((element) => element?.name && element.name !== PARAGRAPH_PROPERTIES_XML_NAME); + +const translateNodes = (params, nodes, pathTail = []) => + params.nodeListHandler.handler({ + ...params, + nodes, + path: [...(params.path || []), ...pathTail], + }); + +/** + * Hoists sd:tableOfContents blocks out of their wrapper paragraph so the + * resulting PM tree can represent them as block children of documentPartObject. + * + * @param {Object} sdtContent + * @param {Object} params + * @returns {Array} + */ +const translateTocSdtContent = (sdtContent, params) => { + const translatedContent = []; + const parentPath = [sdtContent]; + + (sdtContent?.elements || []).forEach((child) => { + const childElements = Array.isArray(child?.elements) ? child.elements : []; + const tocElements = + child?.name === PARAGRAPH_XML_NAME ? childElements.filter((el) => el?.name === SD_TOC_XML_NAME) : []; + + if (tocElements.length === 0) { + translatedContent.push(...translateNodes(params, [child], parentPath)); + return; + } + + const remainingElements = childElements.filter((el) => el?.name !== SD_TOC_XML_NAME); + if (hasMeaningfulParagraphContent(remainingElements)) { + translatedContent.push( + ...translateNodes( + params, + [ + { + ...child, + elements: remainingElements, + }, + ], + parentPath, + ), + ); + } + + tocElements.forEach((tocElement) => { + translatedContent.push(...translateNodes(params, [tocElement], [...parentPath, child])); + }); + }); + + return translatedContent; +}; + export const normalizeDocPartContent = (nodes = []) => { const normalized = []; nodes.forEach((node) => { diff --git a/packages/super-editor/src/core/super-converter/v3/handlers/w/sdt/helpers/handle-doc-part-obj.test.js b/packages/super-editor/src/core/super-converter/v3/handlers/w/sdt/helpers/handle-doc-part-obj.test.js index 4ef3822302..5b4ca98ca3 100644 --- a/packages/super-editor/src/core/super-converter/v3/handlers/w/sdt/helpers/handle-doc-part-obj.test.js +++ b/packages/super-editor/src/core/super-converter/v3/handlers/w/sdt/helpers/handle-doc-part-obj.test.js @@ -168,6 +168,153 @@ describe('tableOfContentsHandler', () => { expect(result.attrs.sdtPr).toHaveProperty('elements'); }); + it('hoists nested sd:tableOfContents blocks out of wrapper paragraphs', () => { + const sdtPr = { + name: 'w:sdtPr', + elements: [ + { name: 'w:id', attributes: { 'w:val': '456' } }, + { + name: 'w:docPartObj', + elements: [{ name: 'w:docPartGallery', attributes: { 'w:val': 'Table of Contents' } }], + }, + ], + }; + const contentNode = { + name: 'w:sdtContent', + elements: [ + { + name: 'w:p', + elements: [ + { name: 'w:r', elements: [{ name: 'w:t', elements: [{ type: 'text', text: 'Table of Contents' }] }] }, + ], + }, + { + name: 'w:p', + elements: [ + { name: 'sd:tableOfContents', attributes: { instruction: 'TOC \\o "1-1" \\h \\z \\u' }, elements: [] }, + ], + }, + ], + }; + const handler = vi.fn(({ nodes }) => { + const node = nodes[0]; + if (node.name === 'sd:tableOfContents') { + return [{ type: 'tableOfContents', attrs: { instruction: node.attributes.instruction }, content: [] }]; + } + return [{ type: 'paragraph', content: [{ type: 'text', text: 'Table of Contents' }] }]; + }); + const params = { + nodes: [contentNode], + nodeListHandler: { handler }, + extraParams: { sdtPr }, + path: [], + }; + + const result = tableOfContentsHandler(params); + + expect(result.content).toEqual([ + { type: 'paragraph', content: [{ type: 'text', text: 'Table of Contents' }] }, + { type: 'tableOfContents', attrs: { instruction: 'TOC \\o "1-1" \\h \\z \\u' }, content: [] }, + ]); + expect(handler).toHaveBeenCalledWith( + expect.objectContaining({ + nodes: [{ name: 'sd:tableOfContents', attributes: { instruction: 'TOC \\o "1-1" \\h \\z \\u' }, elements: [] }], + }), + ); + }); + + it('does not emit an empty paragraph when the wrapper only contains pPr and sd:tableOfContents', () => { + const sdtPr = { + name: 'w:sdtPr', + elements: [ + { name: 'w:id', attributes: { 'w:val': '456' } }, + { + name: 'w:docPartObj', + elements: [{ name: 'w:docPartGallery', attributes: { 'w:val': 'Table of Contents' } }], + }, + ], + }; + const contentNode = { + name: 'w:sdtContent', + elements: [ + { + name: 'w:p', + elements: [ + { name: 'w:pPr', elements: [] }, + { name: 'sd:tableOfContents', attributes: { instruction: 'TOC \\o "1-1" \\h \\z \\u' }, elements: [] }, + ], + }, + ], + }; + const handler = vi.fn(({ nodes }) => { + const node = nodes[0]; + if (node.name === 'sd:tableOfContents') { + return [{ type: 'tableOfContents', attrs: { instruction: node.attributes.instruction }, content: [] }]; + } + return [{ type: 'paragraph', content: [] }]; + }); + const params = { + nodes: [contentNode], + nodeListHandler: { handler }, + extraParams: { sdtPr }, + path: [], + }; + + const result = tableOfContentsHandler(params); + + expect(result.content).toEqual([ + { type: 'tableOfContents', attrs: { instruction: 'TOC \\o "1-1" \\h \\z \\u' }, content: [] }, + ]); + expect(handler).toHaveBeenCalledTimes(1); + }); + + it('preserves paragraph content when a wrapper paragraph contains text and sd:tableOfContents', () => { + const sdtPr = { + name: 'w:sdtPr', + elements: [ + { name: 'w:id', attributes: { 'w:val': '456' } }, + { + name: 'w:docPartObj', + elements: [{ name: 'w:docPartGallery', attributes: { 'w:val': 'Table of Contents' } }], + }, + ], + }; + const contentNode = { + name: 'w:sdtContent', + elements: [ + { + name: 'w:p', + elements: [ + { name: 'w:pPr', elements: [] }, + { name: 'w:r', elements: [{ name: 'w:t', elements: [{ type: 'text', text: 'Intro text' }] }] }, + { name: 'sd:tableOfContents', attributes: { instruction: 'TOC \\o "1-1" \\h \\z \\u' }, elements: [] }, + ], + }, + ], + }; + const handler = vi.fn(({ nodes }) => { + const node = nodes[0]; + if (node.name === 'sd:tableOfContents') { + return [{ type: 'tableOfContents', attrs: { instruction: node.attributes.instruction }, content: [] }]; + } + return [{ type: 'paragraph', content: [{ type: 'text', text: 'Intro text' }] }]; + }); + const params = { + nodes: [contentNode], + nodeListHandler: { handler }, + extraParams: { sdtPr }, + path: [], + }; + + const result = tableOfContentsHandler(params); + + expect(result.content).toEqual([ + { type: 'paragraph', content: [{ type: 'text', text: 'Intro text' }] }, + { type: 'tableOfContents', attrs: { instruction: 'TOC \\o "1-1" \\h \\z \\u' }, content: [] }, + ]); + expect(handler).toHaveBeenCalledTimes(2); + }); + it('should handle empty sdtPr.elements array', () => { const sdtPr = { name: 'w:sdtPr', diff --git a/packages/super-editor/src/core/super-converter/v3/handlers/w/sdt/helpers/translate-document-part-obj.js b/packages/super-editor/src/core/super-converter/v3/handlers/w/sdt/helpers/translate-document-part-obj.js index c632821131..d1dc4206e7 100644 --- a/packages/super-editor/src/core/super-converter/v3/handlers/w/sdt/helpers/translate-document-part-obj.js +++ b/packages/super-editor/src/core/super-converter/v3/handlers/w/sdt/helpers/translate-document-part-obj.js @@ -30,6 +30,13 @@ export function translateDocumentPartObj(params) { return result; } +function sanitizeId(id) { + if (typeof id === 'string' && id.trim() !== '') { + return id.trim(); + } + return undefined; +} + /** * Generate sdtPr element for document part object with passthrough support. * Builds core w:id and w:docPartObj elements, then appends any additional @@ -46,7 +53,7 @@ function generateSdtPrForDocPartObj(attrs) { const existingDocPartGallery = existingDocPartObj?.elements?.find((el) => el.name === 'w:docPartGallery') ?.attributes?.['w:val']; const docPartGallery = attrs.docPartGallery ?? existingDocPartGallery ?? null; - const id = attrs.id ?? attrs.sdtPr?.elements?.find((el) => el.name === 'w:id')?.attributes?.['w:val'] ?? ''; + const id = sanitizeId(attrs.id ?? attrs.sdtPr?.elements?.find((el) => el.name === 'w:id')?.attributes?.['w:val']); // Per OOXML spec: presence of w:docPartUnique element = true, absence = false const docPartUnique = attrs.docPartUnique ?? existingDocPartObj?.elements?.some((el) => el.name === 'w:docPartUnique') ?? false; @@ -54,22 +61,32 @@ function generateSdtPrForDocPartObj(attrs) { // If we do not know the gallery type, prefer full passthrough to avoid emitting invalid XML if (docPartGallery === null) { if (attrs.sdtPr) { - return attrs.sdtPr; + // Filter out any w:id elements with empty values to avoid invalid XML, but preserve all other passthrough elements. + const filteredSdtPr = { + ...attrs.sdtPr, + elements: Array.isArray(attrs.sdtPr.elements) + ? attrs.sdtPr.elements.filter((el) => !(el.name === 'w:id' && el.attributes?.['w:val']?.trim() === '')) + : attrs.sdtPr.elements, + }; + return filteredSdtPr; + } + const elements = [ + { + name: 'w:docPartObj', + elements: [], + }, + ]; + if (id != undefined) { + elements.unshift({ + name: 'w:id', + attributes: { + 'w:val': id, + }, + }); } return { name: 'w:sdtPr', - elements: [ - { - name: 'w:id', - attributes: { - 'w:val': id, - }, - }, - { - name: 'w:docPartObj', - elements: [], - }, - ], + elements, }; } @@ -89,18 +106,21 @@ function generateSdtPrForDocPartObj(attrs) { // Start with explicitly managed elements const sdtPrElements = [ - { - name: 'w:id', - attributes: { - 'w:val': id, - }, - }, { name: 'w:docPartObj', elements: docPartObjElements, }, ]; + if (id != undefined) { + sdtPrElements.unshift({ + name: 'w:id', + attributes: { + 'w:val': id, + }, + }); + } + // Passthrough: preserve any sdtPr elements not explicitly managed if (attrs.sdtPr?.elements && Array.isArray(attrs.sdtPr.elements)) { const elementsToExclude = ['w:id', 'w:docPartObj']; diff --git a/packages/super-editor/src/core/super-converter/v3/handlers/w/sdt/helpers/translate-document-part-obj.test.js b/packages/super-editor/src/core/super-converter/v3/handlers/w/sdt/helpers/translate-document-part-obj.test.js index f569363505..702ef8cdb1 100644 --- a/packages/super-editor/src/core/super-converter/v3/handlers/w/sdt/helpers/translate-document-part-obj.test.js +++ b/packages/super-editor/src/core/super-converter/v3/handlers/w/sdt/helpers/translate-document-part-obj.test.js @@ -33,4 +33,63 @@ describe('translateDocumentPartObj', () => { ), ).toBeUndefined(); }); + + it('omits w:id when document part id is empty', () => { + const node = { + type: 'documentPartObject', + content: [], + attrs: { + id: '', + docPartGallery: 'Table of Contents', + docPartUnique: true, + sdtPr: { + name: 'w:sdtPr', + elements: [ + { name: 'w:id', attributes: { 'w:val': '' } }, + { + name: 'w:docPartObj', + elements: [{ name: 'w:docPartGallery', attributes: { 'w:val': 'Table of Contents' } }], + }, + ], + }, + }, + }; + + const result = translateDocumentPartObj({ node }); + const sdtPr = result.elements[0]; + + expect(sdtPr.elements.find((el) => el.name === 'w:id')).toBeUndefined(); + expect(sdtPr.elements.find((el) => el.name === 'w:docPartObj')).toBeDefined(); + }); + + it('strips empty passthrough w:id when docPartGallery is unknown', () => { + const passthroughSdtPr = { + name: 'w:sdtPr', + elements: [ + { name: 'w:id', attributes: { 'w:val': '' } }, + { name: 'w:docPartObj', elements: [] }, + { name: 'w:foo', attributes: { 'w:val': 'bar' } }, + ], + }; + + const node = { + type: 'documentPartObject', + content: [], + attrs: { + id: '', + docPartGallery: null, + docPartUnique: true, + sdtPr: passthroughSdtPr, + }, + }; + + const result = translateDocumentPartObj({ node }); + const sdtPr = result.elements[0]; + + expect(sdtPr).not.toBe(passthroughSdtPr); + expect(sdtPr.elements.find((el) => el.name === 'w:id')).toBeUndefined(); + expect(sdtPr.elements.find((el) => el.name === 'w:docPartObj')).toBeDefined(); + expect(sdtPr.elements.find((el) => el.name === 'w:foo')).toBeDefined(); + expect(passthroughSdtPr.elements.find((el) => el.name === 'w:id')).toBeDefined(); + }); }); diff --git a/packages/super-editor/src/extensions/table-of-contents/table-of-contents.js b/packages/super-editor/src/extensions/table-of-contents/table-of-contents.js index 7f10df5ffc..4090fe89f8 100644 --- a/packages/super-editor/src/extensions/table-of-contents/table-of-contents.js +++ b/packages/super-editor/src/extensions/table-of-contents/table-of-contents.js @@ -5,7 +5,7 @@ export const TableOfContents = Node.create({ group: 'block', - content: 'paragraph+', + content: 'paragraph*', inline: false, diff --git a/packages/super-editor/src/extensions/table-of-contents/table-of-contents.schema.test.js b/packages/super-editor/src/extensions/table-of-contents/table-of-contents.schema.test.js new file mode 100644 index 0000000000..37dfbe04cd --- /dev/null +++ b/packages/super-editor/src/extensions/table-of-contents/table-of-contents.schema.test.js @@ -0,0 +1,27 @@ +import { describe, expect, it } from 'vitest'; +import { initTestEditor } from '@tests/helpers/helpers.js'; + +describe('tableOfContents schema', () => { + it('accepts a tableOfContents node with no paragraph children', () => { + const doc = { + type: 'doc', + content: [ + { + type: 'tableOfContents', + attrs: { + instruction: 'TOC \\o "1-3" \\h \\z \\u', + }, + content: [], + }, + ], + }; + + const { editor } = initTestEditor({ loadFromSchema: true, content: doc, isHeadless: true }); + + expect(editor.state.doc.childCount).toBe(1); + expect(editor.state.doc.firstChild.type.name).toBe('tableOfContents'); + expect(editor.state.doc.firstChild.childCount).toBe(0); + + editor.destroy(); + }); +});