I want to extract all the images from this document. doc3.zip (283.1 KB) sample input is attached here.Expected output doc3 _Output.zip (282.8 KB)
Thanks in advance.
Thanks for your inquiry. Please use the following code example to get the desired output. You can get the code of extractContent and generateDocument methods from the following article.
Extract Selected Content Between Nodes
Document doc = new Document(MyDir + "doc3.docx");
DocumentBuilder builder = new DocumentBuilder(doc);
ArrayList tables = new ArrayList();
int bookmark = 1;
int i = 1;
NodeCollection paragraphs = doc.getChildNodes(NodeType.PARAGRAPH, true);
for (Paragraph paragraph : (Iterable<Paragraph>) paragraphs)
{
if(paragraph.toString(SaveFormat.TEXT).trim().startsWith("Fig"))
{
Node PreviousPara = paragraph.getPreviousSibling();
while (PreviousPara != null &&
(PreviousPara.toString(SaveFormat.TEXT).trim().length() == 0
|| (PreviousPara.toString(SaveFormat.TEXT).trim().contains("(a)") ||
PreviousPara.toString(SaveFormat.TEXT).trim().contains("(b)") ||
PreviousPara.toString(SaveFormat.TEXT).trim().contains("(b)") ||
PreviousPara.toString(SaveFormat.TEXT).trim().contains("(d)")
)
)
&& ((Paragraph)PreviousPara).getChildNodes(NodeType.SHAPE, true).getCount() > 0
)
{
PreviousPara = PreviousPara.getPreviousSibling();
}
if(PreviousPara == null)
{
builder.moveToDocumentStart();
builder.insertParagraph();
builder.startBookmark("Bookmark" + bookmark);
builder.moveToParagraph(paragraphs.indexOf(paragraph), 0);
builder.endBookmark("Bookmark" + bookmark);
bookmark++;
}
else if(PreviousPara.getNodeType() == NodeType.PARAGRAPH)
{
Node node = ((Paragraph)PreviousPara).getParentNode().insertAfter(new Paragraph(doc), PreviousPara);
builder.moveTo(node);
builder.startBookmark("Bookmark" + bookmark);
builder.moveTo(paragraph);
builder.endBookmark("Bookmark" + bookmark);
bookmark++;
}
else if(PreviousPara.getNodeType() == NodeType.TABLE)
{
if(((Table)PreviousPara).getChildNodes(NodeType.SHAPE, true).getCount() > 0)
tables.add(((Table)PreviousPara));
}
}
}
for (Bookmark bm : doc.getRange().getBookmarks())
{
if(bm.getName().startsWith("Bookmark"))
{
ArrayList nodes = ExtractContents.extractContent(bm.getBookmarkStart(), bm.getBookmarkEnd(), true);
Document dstDoc = ExtractContents.generateDocument(doc, nodes);
PageSetup sourcePageSetup = ((Paragraph)bm.getBookmarkStart().getParentNode()).getParentSection().getPageSetup();
dstDoc.getFirstSection().getPageSetup().setPaperSize(sourcePageSetup.getPaperSize());
dstDoc.getFirstSection().getPageSetup().setLeftMargin(sourcePageSetup.getLeftMargin());
dstDoc.getFirstSection().getPageSetup().setRightMargin(sourcePageSetup.getRightMargin());
if(dstDoc.getLastSection().getBody().getLastParagraph().toString(SaveFormat.TEXT).trim().startsWith("Fig"))
dstDoc.getLastSection().getBody().getLastParagraph().remove();
if(dstDoc.getFirstSection().getBody().getFirstParagraph().getChildNodes(NodeType.SHAPE, true).getCount() == 0)
dstDoc.getFirstSection().getBody().getFirstParagraph().remove();
dstDoc.save(MyDir + "output"+i+".docx");
i++;
}
}