private static Aspose.Words.Document ExtractContentBetweenNodes(Aspose.Words.Document doc, Node startNode, Node endNode)
{
Aspose.Words.Document newDoc = new Aspose.Words.Document();
//newDoc.CopyStylesFromTemplate(doc);
bool isExtracting = false;
foreach (Node node in doc.GetChildNodes(NodeType.Paragraph, true))
{
try
{
if (node == startNode)
{
isExtracting = true;
}
if (isExtracting)
{
Node importNode = newDoc.ImportNode(node, true, ImportFormatMode.KeepSourceFormatting);
newDoc.FirstSection.Body.AppendChild(importNode);
}
if (node == endNode)
{
newDoc.FirstSection.Body.RemoveChild(newDoc.FirstSection.Body.LastChild);
break;
}
}
catch (Exception ex)
{
_ = ex.Message;
}
}
return newDoc;
}
我想使用Aspose.Words,将两个outline之间的内容复制到新文档中,但是要保留原文档的格式,但是使用了ImportNode(node, true, ImportFormatMode.KeepSourceFormatting)
方法之后,格式还是变了,源文档中有表格,但是新文档中的表格没有了,并且格式也没了,请问这是什么原因造成的。我使用的是23.8版本
@suicheng 例如,您可以更新以下方法来获取章节节点,并在创建文档时使用它:
public static List<Node> ExtractContent(Node startNode, Node endNode, bool isInclusive)
{
// First, check that the nodes passed to this method are valid for use.
VerifyParameterNodes(startNode, endNode);
// Create a list to store the extracted nodes.
List<Node> nodes = new List<Node>();
// Include section header and footer.
Section section = (Section)startNode.GetAncestor(NodeType.Section);
nodes.Add(section.Clone(true));
// If either marker is part of a comment, including the comment itself, we need to move the pointer
// forward to the Comment Node found after the CommentRangeEnd node.
if (endNode.NodeType == NodeType.CommentRangeEnd && isInclusive)
{
Node node = FindNextNode(NodeType.Comment, endNode.NextSibling);
if (node != null)
endNode = node;
}
// Keep a record of the original nodes passed to this method to split marker nodes if needed.
Node originalStartNode = startNode;
Node originalEndNode = endNode;
// Extract content based on block-level nodes (paragraphs and tables). Traverse through parent nodes to find them.
// We will split the first and last nodes' content, depending if the marker nodes are inline.
startNode = GetAncestorInBody(startNode);
endNode = GetAncestorInBody(endNode);
bool isExtracting = true;
bool isStartingNode = true;
// The current node we are extracting from the document.
Node currNode = startNode;
// Begin extracting content. Process all block-level nodes and specifically split the first
// and last nodes when needed, so paragraph formatting is retained.
// Method is a little more complicated than a regular extractor as we need to factor
// in extracting using inline nodes, fields, bookmarks, etc. to make it useful.
while (isExtracting)
{
// Clone the current node and its children to obtain a copy.
Node cloneNode = currNode.Clone(true);
bool isEndingNode = currNode.Equals(endNode);
if (isStartingNode || isEndingNode)
{
// We need to process each marker separately, so pass it off to a separate method instead.
// End should be processed at first to keep node indexes.
if (isEndingNode)
{
// !isStartingNode: don't add the node twice if the markers are the same node.
ProcessMarker(cloneNode, nodes, originalEndNode, currNode, isInclusive,
false, !isStartingNode, false);
isExtracting = false;
}
// Conditional needs to be separate as the block level start and end markers, maybe the same node.
if (isStartingNode)
{
ProcessMarker(cloneNode, nodes, originalStartNode, currNode, isInclusive,
true, true, false);
isStartingNode = false;
}
}
else
// Node is not a start or end marker, simply add the copy to the list.
nodes.Add(cloneNode);
// Move to the next node and extract it. If the next node is null,
// the rest of the content is found in a different section.
if (currNode.NextSibling == null && isExtracting)
{
// Move to the next section.
Section nextSection = (Section) currNode.GetAncestor(NodeType.Section).NextSibling;
currNode = nextSection.Body.FirstChild;
}
else
// Move to the next node in the body.
currNode = currNode.NextSibling;
}
// For compatibility with mode with inline bookmarks, add the next paragraph (empty).
if (isInclusive && originalEndNode == endNode && !originalEndNode.IsComposite)
IncludeNextParagraph(endNode, nodes);
// Return the nodes between the node markers.
return nodes;
}
public static Document GenerateDocument(Document srcDoc, List<Node> nodes)
{
Document dstDoc = new Document();
// Create a new section in the destination document.
Section currentSection = dstDoc.FirstSection;
// Import each node from the list into the new document. Keep the original formatting of the node.
NodeImporter importer = new NodeImporter(srcDoc, dstDoc, ImportFormatMode.KeepSourceFormatting);
foreach (Node node in nodes)
{
if (node.NodeType == NodeType.Section)
{
// If we encounter a new section, create a new section in the destination document.
Section srcSection = (Section)node;
// Import headers and footers.
foreach (HeaderFooter hf in srcSection.HeadersFooters)
currentSection.HeadersFooters.Add(importer.ImportNode(hf, true));
}
else
{
// Import the node into the current section's body.
Node importNode = importer.ImportNode(node, true);
currentSection.Body.AppendChild(importNode);
}
}
return dstDoc;
}
@suicheng 下面是一个改进代码,它可以收集并使用开始节点和结束节点之间的所有部分,而不仅仅是一个部分:
internal class ExtractContentHelper
{
public static List<Node> ExtractContent(Node startNode, Node endNode, bool isInclusive)
{
// First, check that the nodes passed to this method are valid for use.
VerifyParameterNodes(startNode, endNode);
// Create a list to store the extracted nodes.
List<Node> nodes = new List<Node>();
// If either marker is part of a comment, including the comment itself, we need to move the pointer
// forward to the Comment Node found after the CommentRangeEnd node.
if (endNode.NodeType == NodeType.CommentRangeEnd && isInclusive)
{
Node node = FindNextNode(NodeType.Comment, endNode.NextSibling);
if (node != null)
endNode = node;
}
// Keep a record of the original nodes passed to this method to split marker nodes if needed.
Node originalStartNode = startNode;
Node originalEndNode = endNode;
// Extract content based on block-level nodes (paragraphs and tables). Traverse through parent nodes to find them.
// We will split the first and last nodes' content, depending if the marker nodes are inline.
startNode = GetAncestorInBody(startNode);
endNode = GetAncestorInBody(endNode);
bool isExtracting = true;
bool isStartingNode = true;
// The current node we are extracting from the document.
Node currNode = startNode;
// Begin extracting content. Process all block-level nodes and specifically split the first
// and last nodes when needed, so paragraph formatting is retained.
// Method is a little more complicated than a regular extractor as we need to factor
// in extracting using inline nodes, fields, bookmarks, etc. to make it useful.
while (isExtracting)
{
Node section = currNode.GetAncestor(NodeType.Section);
if (!nodes.Any(o => o.Range.Text.Equals(section.Range.Text)))
nodes.Add(section.Clone(true));
// Clone the current node and its children to obtain a copy.
Node cloneNode = currNode.Clone(true);
bool isEndingNode = currNode.Equals(endNode);
if (isStartingNode || isEndingNode)
{
// We need to process each marker separately, so pass it off to a separate method instead.
// End should be processed at first to keep node indexes.
if (isEndingNode)
{
// !isStartingNode: don't add the node twice if the markers are the same node.
ProcessMarker(cloneNode, nodes, originalEndNode, currNode, isInclusive,
false, !isStartingNode, false);
isExtracting = false;
}
// Conditional needs to be separate as the block level start and end markers, maybe the same node.
if (isStartingNode)
{
ProcessMarker(cloneNode, nodes, originalStartNode, currNode, isInclusive,
true, true, false);
isStartingNode = false;
}
}
else
// Node is not a start or end marker, simply add the copy to the list.
nodes.Add(cloneNode);
// Move to the next node and extract it. If the next node is null,
// the rest of the content is found in a different section.
if (currNode.NextSibling == null && isExtracting)
{
// Move to the next section.
Section nextSection = (Section) currNode.GetAncestor(NodeType.Section).NextSibling;
currNode = nextSection.Body.FirstChild;
}
else
// Move to the next node in the body.
currNode = currNode.NextSibling;
}
// For compatibility with mode with inline bookmarks, add the next paragraph (empty).
if (isInclusive && originalEndNode == endNode && !originalEndNode.IsComposite)
IncludeNextParagraph(endNode, nodes);
// Return the nodes between the node markers.
return nodes;
}
private static void VerifyParameterNodes(Node startNode, Node endNode)
{
// The order in which these checks are done is important.
if (startNode == null)
throw new ArgumentException("Start node cannot be null");
if (endNode == null)
throw new ArgumentException("End node cannot be null");
if (!startNode.Document.Equals(endNode.Document))
throw new ArgumentException("Start node and end node must belong to the same document");
if (startNode.GetAncestor(NodeType.Body) == null || endNode.GetAncestor(NodeType.Body) == null)
throw new ArgumentException("Start node and end node must be a child or descendant of a body");
// Check the end node is after the start node in the DOM tree.
// First, check if they are in different sections, then if they're not,
// check their position in the body of the same section.
Section startSection = (Section) startNode.GetAncestor(NodeType.Section);
Section endSection = (Section) endNode.GetAncestor(NodeType.Section);
int startIndex = startSection.ParentNode.IndexOf(startSection);
int endIndex = endSection.ParentNode.IndexOf(endSection);
if (startIndex == endIndex)
{
if (startSection.Body.IndexOf(GetAncestorInBody(startNode)) >
endSection.Body.IndexOf(GetAncestorInBody(endNode)))
throw new ArgumentException("The end node must be after the start node in the body");
}
else if (startIndex > endIndex)
throw new ArgumentException("The section of end node must be after the section start node");
}
private static Node FindNextNode(NodeType nodeType, Node fromNode)
{
if (fromNode == null || fromNode.NodeType == nodeType)
return fromNode;
if (fromNode.IsComposite)
{
Node node = FindNextNode(nodeType, ((CompositeNode) fromNode).FirstChild);
if (node != null)
return node;
}
return FindNextNode(nodeType, fromNode.NextSibling);
}
private static void ProcessMarker(Node cloneNode, List<Node> nodes, Node node, Node blockLevelAncestor,
bool isInclusive, bool isStartMarker, bool canAdd, bool forceAdd)
{
// If we are dealing with a block-level node, see if it should be included and add it to the list.
if (node == blockLevelAncestor)
{
if (canAdd && isInclusive)
nodes.Add(cloneNode);
return;
}
// cloneNode is a clone of blockLevelNode. If node != blockLevelNode, blockLevelAncestor
// is the node's ancestor that means it is a composite node.
System.Diagnostics.Debug.Assert(cloneNode.IsComposite);
// If a marker is a FieldStart node check if it's to be included or not.
// We assume for simplicity that the FieldStart and FieldEnd appear in the same paragraph.
if (node.NodeType == NodeType.FieldStart)
{
// If the marker is a start node and is not included, skip to the end of the field.
// If the marker is an end node and is to be included, then move to the end field so the field will not be removed.
if (isStartMarker && !isInclusive || !isStartMarker && isInclusive)
{
while (node.NextSibling != null && node.NodeType != NodeType.FieldEnd)
node = node.NextSibling;
}
}
// Support a case if the marker node is on the third level of the document body or lower.
List<Node> nodeBranch = FillSelfAndParents(node, blockLevelAncestor);
// Process the corresponding node in our cloned node by index.
Node currentCloneNode = cloneNode;
for (int i = nodeBranch.Count - 1; i >= 0; i--)
{
Node currentNode = nodeBranch[i];
int nodeIndex = currentNode.ParentNode.IndexOf(currentNode);
currentCloneNode = ((CompositeNode) currentCloneNode).GetChildNodes(NodeType.Any, false)[nodeIndex];
RemoveNodesOutsideOfRange(currentCloneNode, isInclusive || (i > 0), isStartMarker);
}
// After processing, the composite node may become empty if it has doesn't include it.
if (canAdd &&
(forceAdd || ((CompositeNode) cloneNode).HasChildNodes))
nodes.Add(cloneNode);
}
private static void RemoveNodesOutsideOfRange(Node markerNode, bool isInclusive, bool isStartMarker)
{
bool isProcessing = true;
bool isRemoving = isStartMarker;
Node nextNode = markerNode.ParentNode.FirstChild;
while (isProcessing && nextNode != null)
{
Node currentNode = nextNode;
bool isSkip = false;
if (currentNode.Equals(markerNode))
{
if (isStartMarker)
{
isProcessing = false;
if (isInclusive)
isRemoving = false;
}
else
{
isRemoving = true;
if (isInclusive)
isSkip = true;
}
}
nextNode = nextNode.NextSibling;
if (isRemoving && !isSkip)
currentNode.Remove();
}
}
private static List<Node> FillSelfAndParents(Node node, Node tillNode)
{
List<Node> list = new List<Node>();
Node currentNode = node;
while (currentNode != tillNode)
{
list.Add(currentNode);
currentNode = currentNode.ParentNode;
}
return list;
}
private static void IncludeNextParagraph(Node node, List<Node> nodes)
{
Paragraph paragraph = (Paragraph) FindNextNode(NodeType.Paragraph, node.NextSibling);
if (paragraph != null)
{
// Move to the first child to include paragraphs without content.
Node markerNode = paragraph.HasChildNodes ? paragraph.FirstChild : paragraph;
Node rootNode = GetAncestorInBody(paragraph);
ProcessMarker(rootNode.Clone(true), nodes, markerNode, rootNode,
markerNode == paragraph, false, true, true);
}
}
private static Node GetAncestorInBody(Node startNode)
{
while (startNode.ParentNode.NodeType != NodeType.Body)
startNode = startNode.ParentNode;
return startNode;
}
public static Document GenerateDocument(Document srcDoc, List<Node> nodes)
{
Document dstDoc = new Document();
// Create a new section in the destination document.
dstDoc.FirstSection.Remove();
// Import each node from the list into the new document. Keep the original formatting of the node.
NodeImporter importer = new NodeImporter(srcDoc, dstDoc, ImportFormatMode.KeepSourceFormatting);
Section importedSection = null;
foreach (Node node in nodes)
{
if (node.NodeType == NodeType.Section)
{
Section srcSection = (Section)node;
importedSection = (Section)importer.ImportNode(srcSection, false);
importedSection.AppendChild(importer.ImportNode(srcSection.Body, false));
foreach (HeaderFooter hf in srcSection.HeadersFooters)
importedSection.HeadersFooters.Add(importer.ImportNode(hf, true));
dstDoc.AppendChild(importedSection);
}
else
{
Node importNode = importer.ImportNode(node, true);
importedSection.Body.AppendChild(importNode);
}
}
return dstDoc;
}
}
感谢您提供的代码,它对我很有帮助,解决了我的问题,十分感谢!