ImportNode(node, true, ImportFormatMode.KeepSourceFormatting)未导入正确的格式

suicheng · March 27, 2025, 3:46am

private static Aspose.Words.Document ExtractContentBetweenNodes(Aspose.Words.Document doc, Node startNode, Node endNode)
{
    Aspose.Words.Document newDoc = new Aspose.Words.Document();
    //newDoc.CopyStylesFromTemplate(doc);

    bool isExtracting = false;
    foreach (Node node in doc.GetChildNodes(NodeType.Paragraph, true))
    {
        try
        {
            if (node == startNode)
            {
                isExtracting = true;
            }

            if (isExtracting)
            {
                Node importNode = newDoc.ImportNode(node, true, ImportFormatMode.KeepSourceFormatting);
                newDoc.FirstSection.Body.AppendChild(importNode);
            }

            if (node == endNode)
            {
                newDoc.FirstSection.Body.RemoveChild(newDoc.FirstSection.Body.LastChild);
                break;
            }
        }
        catch (Exception ex)
        {
            _ = ex.Message;
        }
    }
    return newDoc;
}

suicheng · March 27, 2025, 3:48am

我想使用Aspose.Words，将两个outline之间的内容复制到新文档中，但是要保留原文档的格式，但是使用了ImportNode(node, true, ImportFormatMode.KeepSourceFormatting)方法之后，格式还是变了，源文档中有表格，但是新文档中的表格没有了，并且格式也没了，请问这是什么原因造成的。我使用的是23.8版本

suicheng · March 27, 2025, 3:49am

上面的图片是源文档，下面的图片是截取的新文档，可以明显的看到表格的格式没有了

我使用的代码在最上面已经粘贴出来了

vyacheslav.deryushev · March 27, 2025, 6:46am

@suicheng 请尝试使用我们的 "ExtractContentHelper "类来获取结果。

以下链接可能对您有所帮助：

github.com

aspose-words/Aspose.Words-for-.NET/blob/master/Examples/DocsExamples/DocsExamples/Programming with Documents/Contents Management/Extract content helper.cs

using System;
using System.Collections.Generic;
using Aspose.Words;

namespace DocsExamples.Programming_with_Documents.Contents_Management
{
    internal class ExtractContentHelper
    {
        public static List<Node> ExtractContent(Node startNode, Node endNode, bool isInclusive)
        {
            // First, check that the nodes passed to this method are valid for use.
            VerifyParameterNodes(startNode, endNode);

            // Create a list to store the extracted nodes.
            List<Node> nodes = new List<Node>();
            // If either marker is part of a comment, including the comment itself, we need to move the pointer
            // forward to the Comment Node found after the CommentRangeEnd node.
            if (endNode.NodeType == NodeType.CommentRangeEnd && isInclusive)
            {
                Node node = FindNextNode(NodeType.Comment, endNode.NextSibling);

This file has been truncated. show original

github.com

aspose-words/Aspose.Words-for-.NET/blob/master/Examples/DocsExamples/DocsExamples/Programming with Documents/Contents Management/Extract content.cs#L85


      
          
              // Secondly, extract the content between these nodes without the comment.
              List<Node> extractedNodesExclusive = ExtractContentHelper.ExtractContent(commentStart, commentEnd, false);
              
              dstDoc = ExtractContentHelper.GenerateDocument(doc, extractedNodesExclusive);
              dstDoc.Save(ArtifactsDir + "ExtractContent.ExtractContentBetweenCommentRange.WithoutComment.docx");
              //ExEnd:ExtractContentBetweenCommentRange
          }
          
          [Test]
          public void ExtractContentBetweenParagraphs()
          {
              //ExStart:ExtractContentBetweenParagraphs
              //GistId:1f94e59ea4838ffac2f0edf921f67060
              Document doc = new Document(MyDir + "Extract content.docx");
          
              Paragraph startPara = (Paragraph) doc.FirstSection.Body.GetChild(NodeType.Paragraph, 6, true);
              Paragraph endPara = (Paragraph) doc.FirstSection.Body.GetChild(NodeType.Paragraph, 10, true);
              // Extract the content between these nodes in the document. Include these markers in the extraction.
              List<Node> extractedNodes = ExtractContentHelper.ExtractContent(startPara, endPara, true);

suicheng · March 27, 2025, 8:50am

@vyacheslav.deryushev 非常感谢您提供的代码，这些代码确实是有用的，完整的分割了文档并且保留了样式，感谢您🙏

suicheng · March 27, 2025, 9:23am

@vyacheslav.deryushev 请问如果我想在拆分的时候，保留原文档的headerfooter属性，应该如何做？

vyacheslav.deryushev · March 27, 2025, 10:23am

@suicheng 例如，您可以更新以下方法来获取章节节点，并在创建文档时使用它：

public static List<Node> ExtractContent(Node startNode, Node endNode, bool isInclusive)
{
    // First, check that the nodes passed to this method are valid for use.
    VerifyParameterNodes(startNode, endNode);

    // Create a list to store the extracted nodes.
    List<Node> nodes = new List<Node>();

    // Include section header and footer.
    Section section = (Section)startNode.GetAncestor(NodeType.Section);
    nodes.Add(section.Clone(true));

    // If either marker is part of a comment, including the comment itself, we need to move the pointer
    // forward to the Comment Node found after the CommentRangeEnd node.
    if (endNode.NodeType == NodeType.CommentRangeEnd && isInclusive)
    {
        Node node = FindNextNode(NodeType.Comment, endNode.NextSibling);
        if (node != null)
            endNode = node;
    }

    // Keep a record of the original nodes passed to this method to split marker nodes if needed.
    Node originalStartNode = startNode;
    Node originalEndNode = endNode;

    // Extract content based on block-level nodes (paragraphs and tables). Traverse through parent nodes to find them.
    // We will split the first and last nodes' content, depending if the marker nodes are inline.
    startNode = GetAncestorInBody(startNode);
    endNode = GetAncestorInBody(endNode);

    bool isExtracting = true;
    bool isStartingNode = true;
    // The current node we are extracting from the document.
    Node currNode = startNode;

    // Begin extracting content. Process all block-level nodes and specifically split the first
    // and last nodes when needed, so paragraph formatting is retained.
    // Method is a little more complicated than a regular extractor as we need to factor
    // in extracting using inline nodes, fields, bookmarks, etc. to make it useful.
    while (isExtracting)
    {
        // Clone the current node and its children to obtain a copy.
        Node cloneNode = currNode.Clone(true);
        bool isEndingNode = currNode.Equals(endNode);

        if (isStartingNode || isEndingNode)
        {
            // We need to process each marker separately, so pass it off to a separate method instead.
            // End should be processed at first to keep node indexes.
            if (isEndingNode)
            {
                // !isStartingNode: don't add the node twice if the markers are the same node.
                ProcessMarker(cloneNode, nodes, originalEndNode, currNode, isInclusive,
                    false, !isStartingNode, false);
                isExtracting = false;
            }

            // Conditional needs to be separate as the block level start and end markers, maybe the same node.
            if (isStartingNode)
            {
                ProcessMarker(cloneNode, nodes, originalStartNode, currNode, isInclusive,
                    true, true, false);
                isStartingNode = false;
            }
        }
        else
            // Node is not a start or end marker, simply add the copy to the list.
            nodes.Add(cloneNode);

        // Move to the next node and extract it. If the next node is null,
        // the rest of the content is found in a different section.
        if (currNode.NextSibling == null && isExtracting)
        {
            // Move to the next section.
            Section nextSection = (Section) currNode.GetAncestor(NodeType.Section).NextSibling;
            currNode = nextSection.Body.FirstChild;
        }
        else                
            // Move to the next node in the body.
            currNode = currNode.NextSibling;                
    }

    // For compatibility with mode with inline bookmarks, add the next paragraph (empty).
    if (isInclusive && originalEndNode == endNode && !originalEndNode.IsComposite)
        IncludeNextParagraph(endNode, nodes);

    // Return the nodes between the node markers.
    return nodes;
}

public static Document GenerateDocument(Document srcDoc, List<Node> nodes)
{
    Document dstDoc = new Document();
    // Create a new section in the destination document.
    Section currentSection = dstDoc.FirstSection;

    // Import each node from the list into the new document. Keep the original formatting of the node.
    NodeImporter importer = new NodeImporter(srcDoc, dstDoc, ImportFormatMode.KeepSourceFormatting);
    foreach (Node node in nodes)
    {
        if (node.NodeType == NodeType.Section)
        {
            // If we encounter a new section, create a new section in the destination document.
            Section srcSection = (Section)node;
            // Import headers and footers.
            foreach (HeaderFooter hf in srcSection.HeadersFooters)
                currentSection.HeadersFooters.Add(importer.ImportNode(hf, true));
        }
        else
        {
            // Import the node into the current section's body.
            Node importNode = importer.ImportNode(node, true);
            currentSection.Body.AppendChild(importNode);
        }
    }

    return dstDoc;
}

vyacheslav.deryushev · March 27, 2025, 2:57pm

@suicheng 下面是一个改进代码，它可以收集并使用开始节点和结束节点之间的所有部分，而不仅仅是一个部分：

internal class ExtractContentHelper
{
    public static List<Node> ExtractContent(Node startNode, Node endNode, bool isInclusive)
    {
        // First, check that the nodes passed to this method are valid for use.
        VerifyParameterNodes(startNode, endNode);

        // Create a list to store the extracted nodes.
        List<Node> nodes = new List<Node>();

        // If either marker is part of a comment, including the comment itself, we need to move the pointer
        // forward to the Comment Node found after the CommentRangeEnd node.
        if (endNode.NodeType == NodeType.CommentRangeEnd && isInclusive)
        {
            Node node = FindNextNode(NodeType.Comment, endNode.NextSibling);
            if (node != null)
                endNode = node;
        }

        // Keep a record of the original nodes passed to this method to split marker nodes if needed.
        Node originalStartNode = startNode;
        Node originalEndNode = endNode;

        // Extract content based on block-level nodes (paragraphs and tables). Traverse through parent nodes to find them.
        // We will split the first and last nodes' content, depending if the marker nodes are inline.
        startNode = GetAncestorInBody(startNode);
        endNode = GetAncestorInBody(endNode);

        bool isExtracting = true;
        bool isStartingNode = true;
        // The current node we are extracting from the document.
        Node currNode = startNode;

        // Begin extracting content. Process all block-level nodes and specifically split the first
        // and last nodes when needed, so paragraph formatting is retained.
        // Method is a little more complicated than a regular extractor as we need to factor
        // in extracting using inline nodes, fields, bookmarks, etc. to make it useful.
        while (isExtracting)
        {
            Node section = currNode.GetAncestor(NodeType.Section);
            if (!nodes.Any(o => o.Range.Text.Equals(section.Range.Text)))
                nodes.Add(section.Clone(true));

            // Clone the current node and its children to obtain a copy.
            Node cloneNode = currNode.Clone(true);
            bool isEndingNode = currNode.Equals(endNode);

            if (isStartingNode || isEndingNode)
            {
                // We need to process each marker separately, so pass it off to a separate method instead.
                // End should be processed at first to keep node indexes.
                if (isEndingNode)
                {
                    // !isStartingNode: don't add the node twice if the markers are the same node.
                    ProcessMarker(cloneNode, nodes, originalEndNode, currNode, isInclusive,
                        false, !isStartingNode, false);
                    isExtracting = false;
                }

                // Conditional needs to be separate as the block level start and end markers, maybe the same node.
                if (isStartingNode)
                {
                    ProcessMarker(cloneNode, nodes, originalStartNode, currNode, isInclusive,
                        true, true, false);
                    isStartingNode = false;
                }
            }
            else
                // Node is not a start or end marker, simply add the copy to the list.
                nodes.Add(cloneNode);

            // Move to the next node and extract it. If the next node is null,
            // the rest of the content is found in a different section.
            if (currNode.NextSibling == null && isExtracting)
            {
                // Move to the next section.
                Section nextSection = (Section) currNode.GetAncestor(NodeType.Section).NextSibling;
                currNode = nextSection.Body.FirstChild;
            }
            else
                // Move to the next node in the body.
                currNode = currNode.NextSibling;
        }

        // For compatibility with mode with inline bookmarks, add the next paragraph (empty).
        if (isInclusive && originalEndNode == endNode && !originalEndNode.IsComposite)
            IncludeNextParagraph(endNode, nodes);

        // Return the nodes between the node markers.
        return nodes;
    }

    private static void VerifyParameterNodes(Node startNode, Node endNode)
    {
        // The order in which these checks are done is important.
        if (startNode == null)
            throw new ArgumentException("Start node cannot be null");
        if (endNode == null)
            throw new ArgumentException("End node cannot be null");

        if (!startNode.Document.Equals(endNode.Document))
            throw new ArgumentException("Start node and end node must belong to the same document");

        if (startNode.GetAncestor(NodeType.Body) == null || endNode.GetAncestor(NodeType.Body) == null)
            throw new ArgumentException("Start node and end node must be a child or descendant of a body");

        // Check the end node is after the start node in the DOM tree.
        // First, check if they are in different sections, then if they're not,
        // check their position in the body of the same section.
        Section startSection = (Section) startNode.GetAncestor(NodeType.Section);
        Section endSection = (Section) endNode.GetAncestor(NodeType.Section);

        int startIndex = startSection.ParentNode.IndexOf(startSection);
        int endIndex = endSection.ParentNode.IndexOf(endSection);

        if (startIndex == endIndex)
        {
            if (startSection.Body.IndexOf(GetAncestorInBody(startNode)) >
                endSection.Body.IndexOf(GetAncestorInBody(endNode)))
                throw new ArgumentException("The end node must be after the start node in the body");
        }
        else if (startIndex > endIndex)
            throw new ArgumentException("The section of end node must be after the section start node");
    }

    private static Node FindNextNode(NodeType nodeType, Node fromNode)
    {
        if (fromNode == null || fromNode.NodeType == nodeType)
            return fromNode;

        if (fromNode.IsComposite)
        {
            Node node = FindNextNode(nodeType, ((CompositeNode) fromNode).FirstChild);
            if (node != null)
                return node;
        }

        return FindNextNode(nodeType, fromNode.NextSibling);
    }

    private static void ProcessMarker(Node cloneNode, List<Node> nodes, Node node, Node blockLevelAncestor,
        bool isInclusive, bool isStartMarker, bool canAdd, bool forceAdd)
    {
        // If we are dealing with a block-level node, see if it should be included and add it to the list.
        if (node == blockLevelAncestor)
        {
            if (canAdd && isInclusive)
                nodes.Add(cloneNode);
            return;
        }

        // cloneNode is a clone of blockLevelNode. If node != blockLevelNode, blockLevelAncestor
        // is the node's ancestor that means it is a composite node.
        System.Diagnostics.Debug.Assert(cloneNode.IsComposite);

        // If a marker is a FieldStart node check if it's to be included or not.
        // We assume for simplicity that the FieldStart and FieldEnd appear in the same paragraph.
        if (node.NodeType == NodeType.FieldStart)
        {
            // If the marker is a start node and is not included, skip to the end of the field.
            // If the marker is an end node and is to be included, then move to the end field so the field will not be removed.
            if (isStartMarker && !isInclusive || !isStartMarker && isInclusive)
            {
                while (node.NextSibling != null && node.NodeType != NodeType.FieldEnd)
                    node = node.NextSibling;
            }
        }

        // Support a case if the marker node is on the third level of the document body or lower.
        List<Node> nodeBranch = FillSelfAndParents(node, blockLevelAncestor);

        // Process the corresponding node in our cloned node by index.
        Node currentCloneNode = cloneNode;
        for (int i = nodeBranch.Count - 1; i >= 0; i--)
        {
            Node currentNode = nodeBranch[i];
            int nodeIndex = currentNode.ParentNode.IndexOf(currentNode);
            currentCloneNode = ((CompositeNode) currentCloneNode).GetChildNodes(NodeType.Any, false)[nodeIndex];

            RemoveNodesOutsideOfRange(currentCloneNode, isInclusive || (i > 0), isStartMarker);
        }

        // After processing, the composite node may become empty if it has doesn't include it.
        if (canAdd &&
            (forceAdd || ((CompositeNode) cloneNode).HasChildNodes))
            nodes.Add(cloneNode);
    }

    private static void RemoveNodesOutsideOfRange(Node markerNode, bool isInclusive, bool isStartMarker)
    {
        bool isProcessing = true;
        bool isRemoving = isStartMarker;
        Node nextNode = markerNode.ParentNode.FirstChild;

        while (isProcessing && nextNode != null)
        {
            Node currentNode = nextNode;
            bool isSkip = false;

            if (currentNode.Equals(markerNode))
            {
                if (isStartMarker)
                {
                    isProcessing = false;
                    if (isInclusive)
                        isRemoving = false;
                }
                else
                {
                    isRemoving = true;
                    if (isInclusive)
                        isSkip = true;
                }
            }

            nextNode = nextNode.NextSibling;
            if (isRemoving && !isSkip)
                currentNode.Remove();
        }
    }

    private static List<Node> FillSelfAndParents(Node node, Node tillNode)
    {
        List<Node> list = new List<Node>();
        Node currentNode = node;

        while (currentNode != tillNode)
        {
            list.Add(currentNode);
            currentNode = currentNode.ParentNode;
        }

        return list;
    }

    private static void IncludeNextParagraph(Node node, List<Node> nodes)
    {
        Paragraph paragraph = (Paragraph) FindNextNode(NodeType.Paragraph, node.NextSibling);
        if (paragraph != null)
        {
            // Move to the first child to include paragraphs without content.
            Node markerNode = paragraph.HasChildNodes ? paragraph.FirstChild : paragraph;
            Node rootNode = GetAncestorInBody(paragraph);

            ProcessMarker(rootNode.Clone(true), nodes, markerNode, rootNode,
                markerNode == paragraph, false, true, true);
        }
    }

    private static Node GetAncestorInBody(Node startNode)
    {
        while (startNode.ParentNode.NodeType != NodeType.Body)
            startNode = startNode.ParentNode;
        return startNode;
    }

    public static Document GenerateDocument(Document srcDoc, List<Node> nodes)
    {
        Document dstDoc = new Document();
        // Create a new section in the destination document.
        dstDoc.FirstSection.Remove();

        // Import each node from the list into the new document. Keep the original formatting of the node.
        NodeImporter importer = new NodeImporter(srcDoc, dstDoc, ImportFormatMode.KeepSourceFormatting);
        Section importedSection = null;
        foreach (Node node in nodes)
        {
            if (node.NodeType == NodeType.Section)
            {
                Section srcSection = (Section)node;
                importedSection = (Section)importer.ImportNode(srcSection, false);
                importedSection.AppendChild(importer.ImportNode(srcSection.Body, false));
                foreach (HeaderFooter hf in srcSection.HeadersFooters)
                    importedSection.HeadersFooters.Add(importer.ImportNode(hf, true));

                dstDoc.AppendChild(importedSection);
            }
            else
            {
                Node importNode = importer.ImportNode(node, true);
                importedSection.Body.AppendChild(importNode);
            }
        }

        return dstDoc;
    }
}

suicheng · March 28, 2025, 2:18am

感谢您提供的代码，它对我很有帮助，解决了我的问题，十分感谢！