Adding bookmark to this block of code

Saranya_Sekar · October 30, 2018, 4:34am

Hi Team,

I am looking to add bookmark to this block of working code.The bookmark should be placed in the location of image removed and the bookmark placed in that location. Input document is Numbered_Image_Input.zip (1.0 MB)
and the expected interim document to be generated is Numbered_Image_interim.zip (15.1 KB)
The code is attached here.

private static void numberedImagesExtraction(Document interimdoc) {
try {
ArrayList nodes = new ArrayList();
for (Paragraph paragraph : (Iterable) interimdoc.getChildNodes(NodeType.PARAGRAPH, true)) {
nodes = new ArrayList();
if (paragraph.toString(SaveFormat.TEXT).trim() .startsWith(“Fig”))
{
nodes.add(paragraph);
Node previousPara = paragraph.getPreviousSibling();
while (previousPara != null && previousPara.getNodeType() == NodeType.PARAGRAPH
&& ((Paragraph) previousPara).getChildNodes(NodeType.SHAPE, true).getCount() > 0) {
if (previousPara != null) nodes.add(previousPara);
previousPara = previousPara.getPreviousSibling();
}
// Remove text only paragraph
if (nodes.size() == 1 && ((Paragraph) nodes.get(0)).getChildNodes( NodeType.SHAPE, true).getCount() == 0)
nodes.clear();
if (nodes.size() > 0) {
// Reverse the node collection.
Collections.reverse(nodes);
// Extract the consecutive shapes and export them into
// new document
Document dstDoc = new Document();
dstDoc.removeAllChildren();
dstDoc.ensureMinimum();
dstDoc.getChildNodes(NodeType.RUN, true).clear();
for (Paragraph para : (Iterable) nodes)
{
NodeImporter importer = new NodeImporter(interimdoc, dstDoc,
ImportFormatMode.KEEP_SOURCE_FORMATTING);
if (dstDoc.getFirstSection().getBody().getFirstParagraph()
.toString(SaveFormat.TEXT).trim()
.startsWith(“Figure”)) {
dstDoc.getFirstSection().getBody().getFirstParagraph().remove();
}
//** OUTPUT FILENAME START //
String Imgcaption = paragraph.toString(SaveFormat.TEXT);
int k = 0;
while (k < Imgcaption.length()
&& !Character.isDigit(Imgcaption.charAt(k)))
k++;
int j = k;
while (j < Imgcaption.length()
&& Character.isDigit(Imgcaption.charAt(j)))
j++;
int l = Integer.parseInt(Imgcaption.substring(k, j));
String strI = Integer.toString(l);
Pattern pattern = Pattern.compile(strI);
Matcher matcher = pattern.matcher(Imgcaption);
String name = null;
while (matcher.find()) {
name = Imgcaption.substring(0, matcher.end());
name = name.replace(".", “");
}
if (name.startsWith(“Fig”)) {
name = “Fig” + "” + l;
}
//* OUTPUT FILENAME END *//
Node newNode = importer.importNode(para, true);
dstDoc.getFirstSection().getBody().appendChild(newNode);
String filename = folderName + name +"_ToCheck"+ DOCX;
dstDoc.save(filename);
previousPara.remove();
interimdoc.save(interim);
}
i++;
nodes.clear();
}
}

         }
        
         }catch(Exception e){
         }
         }

tahir.manzoor · October 30, 2018, 3:06pm

@Saranya_Sekar,

Thanks for your inquiry. Please use the following code example to get the desired output. We have attached the output documents and interim document with this post for your kind reference. Docs.zip (1.1 MB)

Document doc = new Document(MyDir + "Numbered_Image_Input.doc");
DocumentBuilder builder = new DocumentBuilder(doc);
UseCase(doc, builder);
ExtractImages(doc, "uc", builder);
doc.save(MyDir + "interim_out.docx");

public static void UseCase(Document doc, DocumentBuilder builder) throws Exception
{
    int bookmark = 1;
    int i = 1;
    NodeCollection paragraphs = doc.getChildNodes(NodeType.PARAGRAPH, true);
    for (Paragraph  paragraph : (Iterable<Paragraph>) paragraphs)
    {
        if(paragraph.toString(SaveFormat.TEXT).trim().startsWith("Fig"))
        {
            System.out.println(paragraph.getText());
            Boolean bln = false;
            Node PreviousPara = paragraph.getPreviousSibling();
            while (PreviousPara != null && PreviousPara.getNodeType() == NodeType.PARAGRAPH
                    && (PreviousPara.toString(SaveFormat.TEXT).trim().length() == 0 ||
                       ((Paragraph)PreviousPara).getChildNodes(NodeType.SHAPE, true).getCount() > 0)
            )
            {
                PreviousPara = PreviousPara.getPreviousSibling();
            }

            if(PreviousPara == null)
            {
                builder.moveToDocumentStart();
                builder.insertParagraph();
                builder.startBookmark("Bookmark" + bookmark);
                //builder.moveToParagraph(paragraphs.indexOf(paragraph), 0);
                builder.moveTo(paragraph);
                builder.endBookmark("Bookmark" + bookmark);
                bookmark++;
            }
            else
            if(PreviousPara.getNodeType() == NodeType.PARAGRAPH)
            {
                Node node = ((Paragraph)PreviousPara).getParentNode().insertBefore(new Paragraph(doc), PreviousPara);
                builder.moveTo(node);
                builder.startBookmark("BookmarkUC1" + bookmark);
                builder.moveTo(paragraph);
                builder.endBookmark("BookmarkUC1" + bookmark);
                bookmark++;
            }
        }
    }
}

public  static void ExtractImages(Document doc, String uc, DocumentBuilder builder) throws Exception
{
    int i = 1;
    String bookmark = "bm_extract";
    for (Bookmark bm : doc.getRange().getBookmarks()) {
        if (bm.getName().startsWith("Bookmark")) {
            bm.getBookmarkEnd().getParentNode().insertBefore(new BookmarkEnd(doc, bm.getName()), bm.getBookmarkEnd().getParentNode().getFirstChild());
        }
    }
    doc.updatePageLayout();
    for (Bookmark bm : doc.getRange().getBookmarks()) {
        if (bm.getName().startsWith("Bookmark")) {
            Node currentNode = bm.getBookmarkStart();
            while (currentNode.getNodeType() != NodeType.SHAPE && currentNode.getNodeType() != NodeType.GROUP_SHAPE)
                currentNode = currentNode.nextPreOrder(doc);

            builder.moveTo(currentNode);
            builder.startBookmark(bookmark + i);
            builder.moveTo(bm.getBookmarkEnd());
            builder.endBookmark(bookmark + i);
            i++;
        }
    }

    for (Bookmark bm : doc.getRange().getBookmarks()) {
        if (bm.getName().startsWith("Bookmark")) {
            bm.remove();
        }
    }
    doc.updatePageLayout();
    for (Bookmark bm : doc.getRange().getBookmarks())
    {
        if(bm.getName().startsWith("bm_extract"))
        {
            ArrayList nodes =  ExtractContents.extractContent(bm.getBookmarkStart(), bm.getBookmarkEnd(), true);
            Document dstDoc = ExtractContents.generateDocument(doc, nodes);

            PageSetup sourcePageSetup = ((Paragraph)bm.getBookmarkStart().getParentNode()).getParentSection().getPageSetup();
            dstDoc.getFirstSection().getPageSetup().setPaperSize(sourcePageSetup.getPaperSize());
            dstDoc.getFirstSection().getPageSetup().setLeftMargin(sourcePageSetup.getLeftMargin());
            dstDoc.getFirstSection().getPageSetup().setRightMargin(sourcePageSetup.getRightMargin());

            dstDoc.updatePageLayout();
            if(dstDoc.getLastSection().getBody().getLastParagraph().toString(SaveFormat.TEXT).trim().startsWith("Fig"))
                dstDoc.getLastSection().getBody().getLastParagraph().remove();

            dstDoc.updatePageLayout();
            while(dstDoc.getFirstSection().getBody().getFirstParagraph()!= null && dstDoc.getFirstSection().getBody().getFirstParagraph().getChildNodes(NodeType.SHAPE, true).getCount() == 0)
                dstDoc.getFirstSection().getBody().getFirstParagraph().remove();

            dstDoc.updatePageLayout();
            if(dstDoc.getFirstSection().getBody().getFirstParagraph().getChildNodes(NodeType.SHAPE, true).getCount() > 0)
            {
                String filename = bm.getBookmarkEnd().getParentNode().toString(SaveFormat.TEXT);
                if(filename.trim().length() > 0)
                    dstDoc.save(MyDir + filename.substring(0, 7) + "_out.docx");
                i++;
            }

        }
    }

    for (Bookmark bm : doc.getRange().getBookmarks()) {
        if (bm.getName().startsWith("bm_extract")) {
            String figText = bm.getBookmarkEnd().getParentNode().toString(SaveFormat.TEXT);
            if(figText.trim().length() > 0)
                bm.setText("<Fig>"+figText.trim().substring(0, 7)+"</Fig>" + ControlChar.PARAGRAPH_BREAK);
        }
    }
}

Saranya_Sekar · November 1, 2018, 7:44am

@tahir.manzoor

I am unable to extract all the images in this document with this code. Sample input is Bernardo_et_al_RevisedPaper_shape.zip (3.8 MB) and the output is Bernardo_et_al_RevisedPaper-shapes.zip (3.9 MB) and the intermediate interim document must be Bernardo_et_al_RevisedPaper_Interim.zip (44.4 KB) Kindly help please.

tahir.manzoor · November 1, 2018, 4:13pm

@Saranya_Sekar,

Thanks for your inquiry. The code example does not export the images that are inside table. Please check my reply in the following thread.