Hi Anil,
Thanks for your inquiry. Please check following sample code snippet to read TOC contents. It will help you to accomplish the task.
Public Sub ReadTOC()
Dim doc As New Document("D:/Downloads/Aspose_updated (1).docx")
Dim tocTable As DataTable = TableOfContentsToDataTable(doc, 0)
For Each row As DataRow In tocTable.Rows
Console.WriteLine(String.Format("Entry name: {0}, Heading Level: {1}, Page number: {2}", row("EntryName"), DirectCast(row("EntryStyle"), Style).StyleIdentifier, row("Page")))
Next
End Sub
Public Function TableOfContentsToDataTable(ByVal doc As Document, ByVal tocIndex As Integer) As DataTable
Dim table As New DataTable()
table.TableName = "Toc " + tocIndex.ToString
'******* Needed for Aspose's code
table.Columns.Add("EntryRef")
'****** end
table.Columns.Add("EntryName")
table.Columns.Add("ResultStartNode", GetType(Node))
table.Columns.Add("ResultRuns", GetType(List(Of Run)))
table.Columns.Add("EntryStyle", GetType(Style))
table.Columns.Add("PageRef")
table.Columns.Add("Page")
' Get the FieldStart of the specified TOC.
Dim currentNode As Node = DirectCast(FindTocStartFromIndex(doc, tocIndex), Node)
' Skip forward to the first field separator (after the TOC field code).
While currentNode.NodeType <> NodeType.FieldSeparator
currentNode = currentNode.NextPreOrder(doc)
End While
' First node of the paragraph
currentNode = currentNode.NextPreOrder(doc)
Dim isCollecting As Boolean = True
Dim countOfFieldItems As Integer = 0
Dim isAfterFirstTocEntry As Boolean = False
Dim isHyperlinked As Boolean = currentNode.NodeType = NodeType.FieldStart
While isCollecting
Dim entryRefCode As New StringBuilder()
Dim entryText As New StringBuilder()
Dim pageRefCode As New StringBuilder()
Dim pageText As New StringBuilder()
' Ensures that first entry is gotten from TOC
If Not isAfterFirstTocEntry Then
' Skip nodes until encounters a run
While currentNode.NodeType <> NodeType.Run
currentNode = currentNode.NextPreOrder(doc)
End While
isAfterFirstTocEntry = True
End If
If isHyperlinked Then
' Collect all runs in the field code until we encounter the field separator
While currentNode.NodeType <> NodeType.FieldSeparator
entryRefCode.Append(currentNode.Range.Text.Trim())
currentNode = currentNode.NextPreOrder(doc)
End While
' Skip past field separator
currentNode = currentNode.NextPreOrder(doc)
End If
' Break if no data products in IDMP
If currentNode.Range.Text.Contains("No table of contents entries found.") Then
table.Columns.Clear()
Return table
End If
Dim entryPositionNode As Node = Nothing
Dim fieldResultRuns As New List(Of Run)()
Dim entryStyle As Style = Nothing
While currentNode.NodeType <> NodeType.FieldStart
countOfFieldItems += 1
If currentNode.NodeType = NodeType.Run Then
If entryPositionNode Is Nothing Then
entryPositionNode = currentNode.PreviousPreOrder(doc)
End If
fieldResultRuns.Add(DirectCast(currentNode.Clone(False), Run))
entryStyle = DirectCast(currentNode, Run).ParentParagraph.ParagraphFormat.Style
End If
entryText.Append(currentNode.Range.Text.Trim())
currentNode = currentNode.NextPreOrder(doc)
End While
countOfFieldItems = 0
' Skip nodes until FieldStart (of PAGEREF)
While currentNode.NodeType <> NodeType.FieldStart
currentNode = currentNode.NextPreOrder(doc)
End While
currentNode = currentNode.NextPreOrder(doc)
pageRefCode.Append(currentNode.Range.Text)
' Skip nodes until FieldSeparator (of PAGEREF)
While currentNode.NodeType <> NodeType.FieldSeparator
currentNode = currentNode.NextPreOrder(doc)
End While
' Add the runs from the field which should be the page number
currentNode = currentNode.NextPreOrder(doc)
pageText.Append(currentNode.Range.Text)
' Add to datatable
table.Rows.Add(New Object() {entryRefCode.ToString(), entryText.ToString(), entryPositionNode, fieldResultRuns, entryStyle, pageRefCode.ToString(), _
pageText.ToString()})
currentNode = currentNode.NextPreOrder(doc)
' Skip to the first run of the the next paragraph (should be next entry). Check if a TOC field end is found at the same time
Dim isNextPara As Boolean = False
Dim isChecking As Boolean = True
While isChecking
currentNode = currentNode.NextPreOrder(doc)
' No node found, break.
If currentNode Is Nothing Then
isCollecting = False
Exit While
End If
' Passed a new paragraph
If currentNode.NodeType = NodeType.Paragraph Then
isNextPara = True
End If
' Found first run of a new paragraph
If isNextPara AndAlso currentNode.NodeType = NodeType.Run Then
isChecking = False
End If
' Once we encounter a FieldEnd node of type FieldTOC then we know we are at the end
' of the current TOC and we can stop here.
If currentNode.NodeType = NodeType.FieldEnd Then
Dim fieldEnd As Aspose.Words.Fields.FieldEnd = DirectCast(currentNode, Aspose.Words.Fields.FieldEnd)
If fieldEnd.FieldType = Aspose.Words.Fields.FieldType.FieldTOC Then
isCollecting = False
Exit While
End If
End If
End While
End While
Return table
End Function
Public Function FindTocStartFromIndex(ByVal doc As Document, ByVal tocIndex As Integer) As FieldStart
' Store the FieldStart nodes of TOC fields in the document for quick access.
Dim fieldStarts As New ArrayList()
' This is a list to store the nodes found inside the specified TOC. They will be removed
' at thee end of this method.
Dim nodeList As New ArrayList()
For Each start As FieldStart In doc.GetChildNodes(NodeType.FieldStart, True)
If start.FieldType = FieldType.FieldTOC Then
' Add all FieldStarts which are of type FieldTOC.
fieldStarts.Add(start)
End If
Next
' Ensure the TOC specified by the passed index exists.
If tocIndex > fieldStarts.Count - 1 Then
Throw New ArgumentOutOfRangeException("TOC index is out of range")
End If
Return DirectCast(fieldStarts(tocIndex), FieldStart)
End Function
Please feel free to contact us for any further assistance.
Best Regards,