I have 100’s of Biosketch templated documents and I need to pull the data out of them. I have worked with Aspose Word for many years but always on the side of creation. I’ve attached my code and would like to know if I am on the right path. It seems to work but I would like someone else opinion
Private Sub Parse_Word_Doc_Par(filePath As String)
Dim Table1 As DataTable = New DataTable()
Dim column1 As DataColumn = New DataColumn("Column1")
column1.DataType = System.Type.GetType("System.String")
Dim column2 As DataColumn = New DataColumn("Column2")
column2.DataType = System.Type.GetType("System.String")
Dim column3 As DataColumn = New DataColumn("Column3")
column3.DataType = System.Type.GetType("System.String")
Dim column4 As DataColumn = New DataColumn("Column4")
column1.DataType = System.Type.GetType("System.String")
Dim column5 As DataColumn = New DataColumn("Column5")
column1.DataType = System.Type.GetType("System.String")
Dim column6 As DataColumn = New DataColumn("Column6")
column1.DataType = System.Type.GetType("System.String")
Dim column7 As DataColumn = New DataColumn("Column7")
column1.DataType = System.Type.GetType("System.String")
Table1.Columns.Add(column1)
Table1.Columns.Add(column2)
Table1.Columns.Add(column3)
Table1.Columns.Add(column4)
Table1.Columns.Add(column5)
Table1.Columns.Add(column6)
Table1.Columns.Add(column7)
Dim doc As Aspose.Words.Document = New Aspose.Words.Document(filePath)
Dim fileName() As String = Path.GetFileName(filePath).Split(".")
Dim Paragraphs As NodeCollection = doc.GetChildNodes(NodeType.Paragraph, True)
Dim rowCount As Integer = 0
For Each P In Paragraphs '
Dim text As String = P.ToString(SaveFormat.Text).Trim()
Dim personalStatementData As String = ""
Dim Positions As String = ""
Dim Awards As String = ""
Dim eraCommons As String = ""
Dim MyBibliography As String = ""
If text.Contains("eRA COMMONS USER NAME") Then
Dim Commons() = text.Split(":")
If Commons.Length > 0 Then
eraCommons = Commons(1)
Table1.Rows.Add(fileName(0), "eraCommons", "0", "1", eraCommons)
End If
ElseIf text.Contains("Personal Statement") Then
Do While Not text.Contains("completed projects")
personalStatementData += P.NextSibling.ToString(SaveFormat.Text).Trim()
P = P.NextSibling
text = P.NextSibling.ToString(SaveFormat.Text).Trim()
Loop
Table1.Rows.Add(fileName(0), "Personal Statement", "0", "1", personalStatementData)
ElseIf text.Contains("Positions and Scientific Appointments") Then
rowCount = 0
Do While Not text.Contains("Awards and Honors")
rowCount += 1
Positions = P.NextSibling.ToString(SaveFormat.Text).Trim()
Table1.Rows.Add(fileName(0), "Positions", rowCount, "1", Positions)
P = P.NextSibling
text = P.NextSibling.ToString(SaveFormat.Text).Trim()
Loop
ElseIf text.Contains("Awards and Honors") Then
rowCount = 0
Do While Not text.Contains("Contributions to Science")
rowCount += 1
Awards = P.NextSibling.ToString(SaveFormat.Text).Trim()
Table1.Rows.Add(fileName(0), "Awards", rowCount, "1", Awards)
P = P.NextSibling
text = P.NextSibling.ToString(SaveFormat.Text).Trim()
Loop
ElseIf text.Contains("List of Published") Then
rowCount = 0
MyBibliography = text
Table1.Rows.Add(fileName(0), "MyBibliography", rowCount, "1", MyBibliography)
ElseIf text.Contains("myncbi") Then
rowCount = 0
MyBibliography = text
Table1.Rows.Add(fileName(0), "myncbi", rowCount, "1", MyBibliography)
End If
Next
Dim count As Integer = Table1.Rows.Count
End Sub