we are trying to extract a table using the below code and not able to extract all the values from table cells.
example_011.pdf (26.8 KB)
@Test
public void extractTableTest() throws Exception {
PdfTextExtract extact = null;
try {
File srcFile = getFileFromResource(“Alcon_FortWorth.pdf”);
FileInputStream fis = new FileInputStream(srcFile);
Document pdfDocument = new Document(fis);
TableAbsorber ta = new TableAbsorber();
PageCollection pages = pdfDocument.getPages();
System.out.println(pages.size() + " Pages.");
for (int i = 1; i <= pages.size(); i++) {
System.out.println(" Page " + i);
ta.visit(pdfDocument.getPages().get_Item(i));
IGenericList tableList = ta.getTableList();
for (AbsorbedTable at : tableList) {
System.out.println("Table: " + at.getRectangle().getLLY() + ", " + at.getRectangle().getURY());
IGenericList<AbsorbedRow> rowList = at.getRowList();
for (AbsorbedRow row : rowList) {
IGenericList<AbsorbedCell> cellList = row.getCellList();
for (AbsorbedCell cell : cellList) {
TextFragmentCollection textFragments = cell.getTextFragments();
for (TextFragment tf : textFragments) {
System.out.println(tf.getText() + " ");
}
}
System.out.println("\n");
}
System.out.println("\n");
}
}
} finally {
if (extact != null) {
extact.close();
}
}
}