We are using .NET 4.5, C# development environment.
Does this product (or some other that you publish) have the ability to extract the Table of Contents (TOC) from a Word document in the format .doc AND .docx?
I am just looking for a plain text representation of the TOC - I do not need all the hidden tags etc.
Document doc = new Document("E:\\SampleDocs\\sample-input.docx");
foreach (FieldStart field in doc.GetChildNodes(NodeType.FieldStart, true))
{
if (field.FieldType.Equals(FieldType.FieldHyperlink))
{
FieldHyperlink hyperlink = (FieldHyperlink)field.GetField();
if (hyperlink.SubAddress != null && hyperlink.SubAddress.StartsWith("_Toc"))
{
Paragraph tocItem = (Paragraph)field.GetAncestor(NodeType.Paragraph);
if (tocItem != null)
{
// To get text representation of a TOC Entry
Console.WriteLine(tocItem.ToString(SaveFormat.Text).Trim());
//// To get page numbers only
//foreach (Field nestedField in tocItem.Range.Fields)
//{
// if (nestedField.Type.Equals(FieldType.FieldPageRef))
// {
// //nestedField.Unlink();
// Console.WriteLine(nestedField.DisplayResult);
// }
//}
}
}
}
}
Please take sample document from my previous post and try running the following code:
Document doc = new Document("E:\\SampleDocs\\sample-input.docx");
FindReplaceOptions opts = new FindReplaceOptions();
opts.ReplacingCallback = new ReplaceEvaluator();
doc.Range.Replace("Heading 2", "", opts);
private class ReplaceEvaluator : IReplacingCallback
{
ReplaceAction IReplacingCallback.Replacing(ReplacingArgs e)
{
// This is a Run node that contains either the beginning or the complete match.
Node currentNode = e.MatchNode;
// The first (and may be the only) run can contain text before the match,
// in this case it is necessary to split the run.
if (e.MatchOffset > 0)
currentNode = SplitRun((Run)currentNode, e.MatchOffset);
// This array is used to store all nodes of the match for further removing.
ArrayList runs = new ArrayList();
// Find all runs that contain parts of the match string.
int remainingLength = e.Match.Value.Length;
while (
(remainingLength > 0) &&
(currentNode != null) &&
(currentNode.GetText().Length <= remainingLength))
{
runs.Add(currentNode);
remainingLength = remainingLength - currentNode.GetText().Length;
// Select the next Run node.
// Have to loop because there could be other nodes such as BookmarkStart etc.
do
{
currentNode = currentNode.NextSibling;
}
while ((currentNode != null) && (currentNode.NodeType != NodeType.Run));
}
// Split the last run that contains the match if there is any text left.
if ((currentNode != null) && (remainingLength > 0))
{
SplitRun((Run)currentNode, remainingLength);
runs.Add(currentNode);
}
LayoutCollector collector = new LayoutCollector((Document)e.MatchNode.Document);
int startPage = collector.GetStartPageIndex((Run)runs[0]);
Console.WriteLine("Page number is {0}", startPage);
return ReplaceAction.Skip;
}
private static Run SplitRun(Run run, int position)
{
Run afterRun = (Run)run.Clone(true);
afterRun.Text = run.Text.Substring(position);
run.Text = run.Text.Substring((0), (0) + (position));
run.ParentNode.InsertAfter(afterRun, run);
return afterRun;
}
}
This might be because you are using a different account now than the one you used to create this forum thread. I am attaching the file again here for your reference: