Document doc = new Document(@"C:\Temp\in.docx");
// Split all Run nodes in the document to make them not more than one word.
List<Run> runs = doc.GetChildNodes(NodeType.Run, true).Cast<Run>().ToList();
foreach (Run r in runs)
{
Run current = r;
while (current.Text.IndexOf(' ') >= 0)
current = SplitRun(current, current.Text.IndexOf(' ') + 1);
}
// Wrap all runs in the document with bookmakrs to make it possibel to work with LayoutCollector and LayoutEnumerator
runs = doc.GetChildNodes(NodeType.Run, true).Cast<Run>().ToList();
List<string> tmpBookmakrs = new List<string>();
int bkIndex = 0;
foreach (Run r in runs)
{
// LayoutCollector and LayoutEnumerator does nto work with nodes in header/footer or in textboxes.
if (r.GetAncestor(NodeType.HeaderFooter) != null || r.GetAncestor(NodeType.Shape) != null)
continue;
BookmarkStart start = new BookmarkStart(doc, string.Format("r{0}", bkIndex));
BookmarkEnd end = new BookmarkEnd(doc, start.Name);
r.ParentNode.InsertBefore(start, r);
r.ParentNode.InsertAfter(end, r);
tmpBookmakrs.Add(start.Name);
bkIndex++;
}
// Now we can use collector and enumerator to get runs per line in MS Word document.
LayoutCollector collector = new LayoutCollector(doc);
LayoutEnumerator enumerator = new LayoutEnumerator(doc);
object currentLine = null;
foreach (string bkName in tmpBookmakrs)
{
Bookmark bk = doc.Range.Bookmarks[bkName];
enumerator.Current = collector.GetEntity(bk.BookmarkStart);
while (enumerator.Type != LayoutEntityType.Line)
enumerator.MoveParent();
if (currentLine != enumerator.Current)
{
currentLine = enumerator.Current;
Console.WriteLine();
Console.WriteLine("-------=========Start Of Line=========-------");
}
Run run = bk.BookmarkStart.NextSibling as Run;
if (run != null)
Console.Write(run.Text);
}
private static Run SplitRun(Run run, int position)
{
Run afterRun = (Run)run.Clone(true);
run.ParentNode.InsertAfter(afterRun, run);
afterRun.Text = run.Text.Substring(position);
run.Text = run.Text.Substring(0, position);
return afterRun;
}