Dear team,
We are extracting images from docx but below case its getting companied 2 images with single image please refer attached source files and source code.
Source code :
if (table.getChildNodes(NodeType.SHAPE, true).getCount() > 0)
{
// if (table.getChildNodes(NodeType.SHAPE, true).getCount() > 0 && imageCount!=eqCount && mathType.contains(false)) {
try
{
while (table.getNextSibling().toString(SaveFormat.TEXT).trim().length() == 0
&& (((Paragraph)table.getNextSibling()).getChildNodes(NodeType.SHAPE, true).getCount() == 0
&& table.getNextSibling().getNextSibling() != null
&& table.getNextSibling().getNextSibling().getNodeType() != NodeType.TABLE))
{
table.getNextSibling().remove();
}
}
catch (ClassCastException e)
{
logger.info("ClassCastException occur, {0}", e.getMessage());
}
if (table.getNextSibling() != null && table.getNextSibling().getText().trim().matches(matches)
&& !table.toString(SaveFormat.TEXT).toLowerCase().contains("fig")
&& !table.toString(SaveFormat.TEXT).trim().contains(SCHEME))
{
// supplementary check
if (AIE.supplymentryCheck(table.getNextSibling().toString(SaveFormat.TEXT).trim()))
{
AIE.insertBookmark(interimdoc, (Paragraph)table.getNextSibling(), AIE.fileName);
continue;
}
// supplementary check
Document tableDoc = docSetup(interimdoc, table);
//tableDoc.save("D:\\AIE_Samples\\IssueFile\\July\\15.07.2022\\Sprint_005\\REDMINE_AIE\\#90202\\JEST_114574\\Document\\1\\a.docx");
NodeImporter importers = new NodeImporter(interimdoc, tableDoc,
ImportFormatMode.KEEP_SOURCE_FORMATTING);
tableDoc.getFirstSection().getBody().appendChild(importers.importNode(table, true));
boolean a = tableDoc.getChildNodes(NodeType.TABLE, true) != null;
//if(a==false) {
tableDoc.save(AIE.pdfFolder + saveAS);
//}
if ((table.getPreviousSibling() != null && !AIE.removeEquationShapes(tableDoc)
&& table.getPreviousSibling().getNodeType() == NodeType.PARAGRAPH)
|| ((Paragraph)table.getNextSibling()).getParentSection().getBody().indexOf(table) == 0)
{
try
{
AIE.imgCaption = table.getNextSibling().toString(SaveFormat.TEXT).trim();
String saveName = AIE.formatImgcaption(AIE.imgCaption, AIE.fileName);
//create interim work it-15
ArrayList list = new ArrayList();
boolean containsImage = true;
String bookmarkname = saveName;
Document dstDoc = tableDoc;
Paragraph figPara = (Paragraph)table.getNextSibling();
list.add(containsImage);
list.add(bookmarkname);
list.add(AIE.imgCaption);
list.add(dstDoc);
list.add(figPara);
AIE.interimwork(list, interimdoc);
//create interim work
}
catch (Exception e)
{
logger.info("Exception occurs, {0}", e.getMessage());
}
}
File delete = new File(AIE.pdfFolder + saveAS);
Files.deleteIfExists(delete.toPath());
}
else if (table.getPreviousSibling() != null
&& table.getPreviousSibling().toString(SaveFormat.TEXT).trim().matches(matches)
&& !table.toString(SaveFormat.TEXT).toLowerCase().contains("fig")
&& !table.toString(SaveFormat.TEXT).trim().contains(SCHEME)
&& table.getPreviousSibling().getPreviousSibling().getNodeType() != NodeType.SHAPE)
{
// supplementary check
if (AIE.supplymentryCheck(table.getPreviousSibling().toString(SaveFormat.TEXT).trim()))
{
AIE.insertBookmark(interimdoc, (Paragraph)table.getPreviousSibling(), AIE.fileName);
continue;
}
// supplementary check
Document tableDoc = docSetup(interimdoc, table);
NodeImporter importers = new NodeImporter(interimdoc, tableDoc,
ImportFormatMode.KEEP_SOURCE_FORMATTING);
tableDoc.getFirstSection().getBody().appendChild(importers.importNode(table, true));
tableDoc.save(AIE.pdfFolder + saveAS);
if (table.getPreviousSibling() != null && !AIE.removeEquationShapes(tableDoc)
&& table.getPreviousSibling().getNodeType() == NodeType.PARAGRAPH)
{
try
{
AIE.imgCaption = table.getPreviousSibling().toString(SaveFormat.TEXT).trim();
String saveName = AIE.formatImgcaption(AIE.imgCaption, AIE.fileName);
//create interim work it-15
ArrayList list = new ArrayList();
boolean containsImage = true;
String bookmarkname = saveName;
Document dstDoc = tableDoc;
Paragraph figPara = (Paragraph)table.getPreviousSibling();
list.add(containsImage);
list.add(bookmarkname);
list.add(AIE.imgCaption);
list.add(dstDoc);
list.add(figPara);
AIE.interimwork(list, interimdoc);
}
Source File : New Microsoft Word Document.zip (2.0 MB)
Output : pdf.zip (1.1 MB)