The bellow methods we used
public Document ExtractContentFromBookmarks(Document doc, int ColumnID)
{
BookmarkStart bookmarkStart = doc.Range.Bookmarks[bookMarks[ColumnID - 1].ToString()].BookmarkStart;
BookmarkStart bookmarkEnd = doc.Range.Bookmarks[bookMarks[ColumnID].ToString()].BookmarkStart;
string bookmarkName = GetSectionNameFromString(bookmarkStart.Bookmark.Text);
DocumentBuilder builderss = new DocumentBuilder();
builderss.ParagraphFormat.StyleIdentifier = StyleIdentifier.Heading1;
// Firstly extract the content between these nodes including the bookmark.
ArrayList extractedNodes = ExtractContent(bookmarkStart, bookmarkEnd, false);
Document doc2 = GenerateDocument(doc, extractedNodes);
//to delete the section name
if (doc2.FirstSection.Body.FirstParagraph != null)
{
if (doc2.FirstSection.Body.FirstParagraph.ParagraphFormat.IsHeading)
doc2.FirstSection.Body.FirstParagraph.Remove();
else
{
string styleName = doc2.FirstSection.Body.FirstParagraph.ParagraphFormat.StyleName;
if (!string.IsNullOrEmpty(styleName))
if (styleName.Contains("Heading"))
doc2.FirstSection.Body.FirstParagraph.Remove();
}
}
string secContenttext = doc2.ToString(SaveFormat.Text).Trim();
string Content = string.Empty;
Document WordDoc = new Document();
if (secContenttext != "")
{
Content = doc2.ToString(SaveFormat.Html).Trim();
Content = Content.Replace(" position:absolute", "");
WordDoc = doc2;
}
return WordDoc;
}
public string GetSectionNameFromString(string SectionName)
{
try
{
SectionName = SectionName.Trim();
// string regexDigit = @"^(\d+\.?[ ]{0,})?(\d+\.?[ ]{0,})?(\d+\.?[ ]{0,})?(\d+\.?[ ]{0,})?(\d+\.?[ ]{0,})?(\d+\.?[ ]{0,})?(\d+\.?[ ]{0,})?(\d+\.?[ ]{0,})?(\d+\.?[ ]{0,})";
string regexRomanLower = @"^([ivxlcdm]+\.?[ ]{0,})?([ivxlcdm]+\.?[ ]{0,})?([ivxlcdm]+\.?[ ]{0,})?([ivxlcdm]+\.?[ ]{0,})?([ivxlcdm]+\.?[ ]{0,})?([ivxlcdm]+\.?[ ]{0,})?([ivxlcdm]+\.?[ ]{0,})?([ivxlcdm]+\.?[ ]{0,})?([ivxlcdm]+\.?[ ]{0,})";
string regexRomanUpper = @"^([IVXLCDM]+\.?[ ]{0,})?([IVXLCDM]+\.?[ ]{0,})?([IVXLCDM]+\.?[ ]{0,})?([IVXLCDM]+\.?[ ]{0,})?([IVXLCDM]+\.?[ ]{0,})?([IVXLCDM]+\.?[ ]{0,})?([IVXLCDM]+\.?[ ]{0,})?([IVXLCDM]+\.?[ ]{0,})?([IVXLCDM]+\.?[ ]{0,})";
string regexLower = @"^([a-z]+\.?[ ]{0,})?([a-z]+\.?[ ]{0,})?([a-z]+\.?[ ]{0,})?([a-z]+\.?[ ]{0,})?([a-z]+\.?[ ]{0,})?([a-z]+\.?[ ]{0,})?([a-z]+\.?[ ]{0,})?([a-z]+\.?[ ]{0,})?([a-z]+\.?[ ]{0,})";
string regexUpper = @"^([A-Z]{4}+\.?[ ]{0,})?([A-Z]+\.?[ ]{0,})?([A-Z]+\.?[ ]{0,})?([A-Z]+\.?[ ]{0,})?([A-Z]+\.?[ ]{0,})?([A-Z]+\.?[ ]{0,})?([A-Z]+\.?[ ]{0,})?([A-Z]+\.?[ ]{0,})?([A-Z]+\.?[ ]{0,})";
bool isregcheck = true;
if (isregcheck && Regex.IsMatch(SectionName, regexRomanLower))
{
SectionName = Regex.Replace(SectionName, regexRomanLower, " ");
isregcheck = false;
}
if (isregcheck && Regex.IsMatch(SectionName, regexRomanUpper))
{
SectionName = Regex.Replace(SectionName, regexRomanUpper, " ");
isregcheck = false;
}
SectionName = SectionName.Trim();
}
catch (Exception ex)
{
utility.writelogfile("Problem occured in " + MethodBase.GetCurrentMethod().Name + " method : " + ex.Message);
}
return SectionName;
}
public ArrayList ExtractContent(Node startNode, Node endNode, bool isInclusive)
{
// First check that the nodes passed to this method are valid for use.
VerifyParameterNodes(startNode, endNode);
// Create a list to store the extracted nodes.
ArrayList nodes = new ArrayList();
// Keep a record of the original nodes passed to this method so we can split marker nodes if needed.
Node originalStartNode = startNode;
Node originalEndNode = endNode;
// Extract content based on block level nodes (paragraphs and tables). Traverse through parent nodes to find them.
// We will split the content of first and last nodes depending if the marker nodes are inline
while (startNode.ParentNode.NodeType != NodeType.Body)
startNode = startNode.ParentNode;
while (endNode.ParentNode.NodeType != NodeType.Body)
endNode = endNode.ParentNode;
bool isExtracting = true;
bool isStartingNode = true;
bool isEndingNode = false;
// The current node we are extracting from the document.
Node currNode = startNode;
// Begin extracting content. Process all block level nodes and specifically split the first and last nodes when needed so paragraph formatting is retained.
// Method is little more complex than a regular extractor as we need to factor in extracting using inline nodes, fields, bookmarks etc as to make it really useful.
while (isExtracting)
{
// Clone the current node and its children to obtain a copy.
CompositeNode cloneNode = (CompositeNode)currNode.Clone(true);
isEndingNode = currNode.Equals(endNode);
if (isStartingNode || isEndingNode)
{
// We need to process each marker separately so pass it off to a separate method instead.
if (isStartingNode)
{
ProcessMarker(cloneNode, nodes, originalStartNode, isInclusive, isStartingNode, isEndingNode);
isStartingNode = false;
}
// Conditional needs to be separate as the block level start and end markers maybe the same node.
if (isEndingNode)
{
ProcessMarker(cloneNode, nodes, originalEndNode, isInclusive, isStartingNode, isEndingNode);
isExtracting = false;
}
}
else
// Node is not a start or end marker, simply add the copy to the list.
nodes.Add(cloneNode);
// Move to the next node and extract it. If next node is null that means the rest of the content is found in a different section.
if (currNode.NextSibling == null && isExtracting)
{
// Move to the next section.
Aspose.Words.Section nextSection = (Aspose.Words.Section)currNode.GetAncestor(NodeType.Section).NextSibling;
currNode = nextSection.Body.FirstChild;
}
else
{
// Move to the next node in the body.
currNode = currNode.NextSibling;
}
}
// Return the nodes between the node markers.
return nodes;
}
public Aspose.Words.Document GenerateDocument(Aspose.Words.Document srcDoc, ArrayList nodes)
{
// Create a blank document.
Aspose.Words.Document dstDoc = new Aspose.Words.Document();
//Aspose.Words.Saving.HtmlSaveOptions options = new HtmlSaveOptions();
//options.ExportImagesAsBase64 = true;
// Remove the first paragraph from the empty document.
dstDoc.FirstSection.Body.RemoveAllChildren();
// Import each node from the list into the new document. Keep the original formatting of the node.
NodeImporter importer = new NodeImporter(srcDoc, dstDoc, ImportFormatMode.KeepSourceFormatting);
foreach (Node node in nodes)
{
Node importNode = importer.ImportNode(node, true);
dstDoc.FirstSection.Body.AppendChild(importNode);
}
// Return the generated document.
return dstDoc;
}