Hello,
We are trying to extract data from cells in pdf file.
Apparently, we can reach all the tables and the rows but we have always in textFragment the headers of the table .
We tried with the versions 17.3 and 17.10.
Here is the code we are using,
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Aspose.Pdf;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
var doc = new Aspose.Pdf.Document("C:/temp/OVL_20170601_S70582032.pdf");
var absorber = new Aspose.Pdf.Text.TableAbsorber();
long cpt = 0;
Console.WriteLine("Begin");
Aspose.Pdf.License license = new Aspose.Pdf.License();
license.SetLicense("c:/temp/Aspose.Total.lic");
foreach (Aspose.Pdf.Page page in doc.Pages)
{
absorber.Visit(page);
//Tableaux
for (int idTable = 0; idTable < absorber.TableList.Count; idTable++)
{
Aspose.Pdf.Text.AbsorbedTable table = absorber.TableList[idTable];
//ligne
for (int idRow = 0; idRow < table.RowList.Count; idRow++)
//foreach (AbsorbedRow row in table.RowList)
{
Aspose.Pdf.Text.AbsorbedRow row = table.RowList[idRow];
//cellule
foreach (Aspose.Pdf.Text.AbsorbedCell cell in row.CellList)
{
foreach (Aspose.Pdf.Text.TextFragment text in cell.TextFragments)
{
cpt = cpt + 1;
Console.WriteLine(cpt.ToString() + " - " + text.Text);
}
}
}
}
}
Console.WriteLine("End");
}
}
}
And I also attach the pdf file to this topic.
OVL_20170601_S70582032.pdf (390.2 KB)
thank you