using your pdf table data extractor in csharp the whole table is being processed as one cell, I have no idea how to fix it. I am new
TableAbsorber absorber = new TableAbsorber();
absorber.Visit(page);
foreach (AbsorbedTable table in absorber.TableList)
{
foreach (AbsorbedRow row in table.RowList)
{
var rowData = new List<string>();
foreach (AbsorbedCell cell in row.CellList)
{
var cellText = new StringBuilder();
foreach (TextFragment fragment in cell.TextFragments)
{
cellText.Append(fragment.Text);
}
rowData.Add(cellText.ToString().Trim());
}
// Log the row data for debugging
Console.WriteLine($"Row Data: {string.Join(", ", rowData)}");
if (currentHeaders.Count == 0)
{
currentHeaders = new List<string>(rowData); // Set headers for a new table
}
else
{
var document = new BsonDocument();
for (int i = 0; i < currentHeaders.Count && i < rowData.Count; i++)
{
if (!string.IsNullOrWhiteSpace(rowData[i]))
{
document[currentHeaders[i]] = rowData[i];
}
}
if (document.ElementCount > 0)
{
tableData.Add(document);
}
}
}
}
the entire first page of the table is processed as one cell, and printed out on the rowdata printstatement when the data hits the end of the page