What’s the best way to extract the formatted text/HTML from a bookmark in a document?
I’ve been using the code below for a while, but it seems there are some issues with this, as extra pagragraphs & spaces are being pulled.
How can this be done without the extra spaces?
Code:
Bookmark mark = docClone.Range.Bookmarks[bookmarkName];
foreach (Section ThisSection in docClone.Sections)
ThisSection.HeadersFooters.Clear();
RemoveContentBeforeNode(docClone, mark.BookmarkStart);
RemoveContentAfterNode(docClone, mark.BookmarkEnd);
mark.Remove();
private static void RemoveContentBeforeNode(Document doc, Node endNode)
{
Node curNode = endNode.PreviousPreOrder(doc);
while (curNode != null)
{
//Move to next node
Node nextNode = curNode.PreviousPreOrder(doc);
//Check whether current contains end node
if (curNode.IsComposite)
{
if (!(curNode as CompositeNode).GetChildNodes(NodeType.Any, true).Contains(endNode))
{
//nextNode = curNode.PreviousSibling;
nextNode = curNode.PreviousPreOrder(doc);
curNode.Remove();
}
}
else
{
curNode.Remove();
}
curNode = nextNode;
}
}
private static void RemoveContentAfterNode(Document doc, Node startNode)
{
Node curNode = startNode.NextPreOrder(doc);
while (curNode != null)
{
//Move to next node
Node nextNode = curNode.NextPreOrder(doc);
//Check whether current contains start node
if (curNode.IsComposite)
{
if (!(curNode as CompositeNode).GetChildNodes(NodeType.Any, true).Contains(startNode))
{
nextNode = curNode.NextSibling;
curNode.Remove();
}
}
else
{
curNode.Remove();
}
curNode = nextNode;
}
if (doc.LastSection.Body.LastParagraph != null && string.IsNullOrEmpty(doc.LastSection.Body.LastParagraph.GetText().Trim()))
doc.LastSection.Body.LastParagraph.Remove();
}
Hi Melissa,
Thanks for your inquiry. Yes, you can extract content (Formatted text, Paragraphs, Tables etc) from inside Bookmark nodes.Please read the following article for more details:
Hope this helps you. If you face any issue, please attach your input Word document here. We will then provide you more information about your query along with code.