Free Support Forum - aspose.com

Extract images from this document

Hi @tahir.manzoor

I want to extract all the images from this document. doc3.zip (283.1 KB) sample input is attached here.Expected output doc3 _Output.zip (282.8 KB)
Thanks in advance.

@MikeLak

Thanks for your inquiry. Please use the following code example to get the desired output. You can get the code of extractContent and generateDocument methods from the following article.
Extract Selected Content Between Nodes

Document doc = new Document(MyDir + "doc3.docx");

DocumentBuilder builder = new DocumentBuilder(doc);
ArrayList tables = new ArrayList();
int bookmark = 1;
int i = 1;
NodeCollection paragraphs = doc.getChildNodes(NodeType.PARAGRAPH, true);
for (Paragraph  paragraph : (Iterable<Paragraph>) paragraphs)
{
    if(paragraph.toString(SaveFormat.TEXT).trim().startsWith("Fig"))
    {

        Node PreviousPara = paragraph.getPreviousSibling();

        while (PreviousPara != null &&

                (PreviousPara.toString(SaveFormat.TEXT).trim().length() == 0
                        || (PreviousPara.toString(SaveFormat.TEXT).trim().contains("(a)") ||
                        PreviousPara.toString(SaveFormat.TEXT).trim().contains("(b)") ||
                        PreviousPara.toString(SaveFormat.TEXT).trim().contains("(b)") ||
                        PreviousPara.toString(SaveFormat.TEXT).trim().contains("(d)")
                )
                )

                && ((Paragraph)PreviousPara).getChildNodes(NodeType.SHAPE, true).getCount() > 0
                )
        {
            PreviousPara = PreviousPara.getPreviousSibling();
        }

        
        if(PreviousPara == null)
        {
            builder.moveToDocumentStart();
            builder.insertParagraph();
            builder.startBookmark("Bookmark" + bookmark);
            builder.moveToParagraph(paragraphs.indexOf(paragraph), 0);
            builder.endBookmark("Bookmark" + bookmark);
            bookmark++;
        }
        else if(PreviousPara.getNodeType() == NodeType.PARAGRAPH)
        {
            Node node = ((Paragraph)PreviousPara).getParentNode().insertAfter(new Paragraph(doc), PreviousPara);
            builder.moveTo(node);
            builder.startBookmark("Bookmark" + bookmark);
            builder.moveTo(paragraph);
            builder.endBookmark("Bookmark" + bookmark);
            bookmark++;
        }
        else if(PreviousPara.getNodeType() == NodeType.TABLE)
        {
            if(((Table)PreviousPara).getChildNodes(NodeType.SHAPE, true).getCount() > 0)
                tables.add(((Table)PreviousPara));
        }

    }
}

for (Bookmark bm : doc.getRange().getBookmarks())
{
    if(bm.getName().startsWith("Bookmark"))
    {
        ArrayList nodes =  ExtractContents.extractContent(bm.getBookmarkStart(), bm.getBookmarkEnd(), true);
        Document dstDoc = ExtractContents.generateDocument(doc, nodes);

        PageSetup sourcePageSetup = ((Paragraph)bm.getBookmarkStart().getParentNode()).getParentSection().getPageSetup();
        dstDoc.getFirstSection().getPageSetup().setPaperSize(sourcePageSetup.getPaperSize());
        dstDoc.getFirstSection().getPageSetup().setLeftMargin(sourcePageSetup.getLeftMargin());
        dstDoc.getFirstSection().getPageSetup().setRightMargin(sourcePageSetup.getRightMargin());

        if(dstDoc.getLastSection().getBody().getLastParagraph().toString(SaveFormat.TEXT).trim().startsWith("Fig"))
            dstDoc.getLastSection().getBody().getLastParagraph().remove();

        if(dstDoc.getFirstSection().getBody().getFirstParagraph().getChildNodes(NodeType.SHAPE, true).getCount() == 0)
            dstDoc.getFirstSection().getBody().getFirstParagraph().remove();

        dstDoc.save(MyDir + "output"+i+".docx");
        i++;
    }
}