I am doing a very basic table extraction from a PDF with well-defined tables. No matter what I try, I never get data back in cell.TextFragments for any cell. Is there anything I need to do differently to extract the actual text. I do get the correct number of tables, rows, and cells. Just never any text.
public async Task ExtractTableData()
{
Aspose.Pdf.License license = new Aspose.Pdf.License();
license.SetLicense("C:\\Users\\awilson\\Desktop\\Aspose.PDF.NET.lic");
var filePath = "C:\\Users\\awilson\\Desktop\\86853449-11db-4aa7-8c3e-c278b11d6bbc_51.pdf";
Aspose.Pdf.Document pdfDocument = new Aspose.Pdf.Document(filePath);
foreach (var page in pdfDocument.Pages)
{
Aspose.Pdf.Text.TableAbsorber absorber = new Aspose.Pdf.Text.TableAbsorber();
absorber.Visit(page);
foreach (AbsorbedTable table in absorber.TableList)
{
Console.WriteLine("Table");
foreach (AbsorbedRow row in table.RowList)
{
foreach (AbsorbedCell cell in row.CellList)
{
foreach (TextFragment fragment in cell.TextFragments)
{
var sb = new StringBuilder();
foreach (TextSegment seg in fragment.Segments)
sb.Append(seg.Text);
Console.Write($"{sb.ToString()}|");
}
}
Console.WriteLine();
}
}
}
return true;
}