Remove Empty Pages

is there a way to programmatically search and remove empty pages inside a document in java?Problem.zip (205.4 KB)

Problem.zip (205.4 KB)

Hi there,
Thanks for your inquiry. Please use following code example to remove empty pages from the document. Hope this helps you.

private static ArrayList GetParagraphsByPage(int page, Document document) throws Exception
{
    ArrayList nodes = new ArrayList();
    LayoutCollector lc = new LayoutCollector(document);
    for (Paragraph paragraph: (Iterable <Paragraph> ) document.getChildNodes(NodeType.PARAGRAPH, true))
    {
        if (lc.getStartPageIndex(paragraph) == page)
            nodes.add(paragraph);
    }
    return nodes;
}

public static void RemoveEmptyPages() throws Exception
{
    Document doc = new Document(MyDir + "Problem.docx");
    doc.updatePageLayout();

    Boolean PageBreak = false;
    String PageText = "";
    LayoutCollector lc = new LayoutCollector(doc);
    int pages = doc.getPageCount();

    ArrayList <Node> removenodes = new ArrayList <Node> ();
    ArrayList <Node> pagebreaknodes = new ArrayList <Node> ();
    for (int i = 1; i <= pages; i++)
    {
        PageBreak = false;
        PageText = "";
        ArrayList <Paragraph> nodes = GetParagraphsByPage(i, doc);

        for (int page = 0; page <nodes.size(); page++)
        {
            Paragraph para = nodes.get(page);
            if (para.hasChildNodes())
            {
                if (para.getText().contains(ControlChar.PAGE_BREAK))
                {
                    if (para.getRuns().get(0).getText().contains(ControlChar.PAGE_BREAK))
                    {
                        pagebreaknodes.add(para);
                    }
                }
                PageText = "Page is not empty";
                break;
            }
            else
            {
                for (Paragraph paragraph: nodes)
                {
                    if (paragraph.getText().contains(ControlChar.PAGE_BREAK))
                    {
                        PageBreak = true;
                    }
                    PageText += paragraph.toString(SaveFormat.TEXT).trim();
                }

                // If page's text is empty and there is only page break
                // then remove the page break
                if (PageText.length() == 0 && PageBreak == true)
                {
                    for (Node node: nodes)
                    {
                        if (node.getText().contains(ControlChar.PAGE_BREAK))
                        {
                            pagebreaknodes.add(node);
                        }
                    }
                }

                if (PageText.equals("")) //Empty Page
                {
                    for (Node node: nodes)
                    {
                        removenodes.add(node);
                    }
                }
                nodes.clear();
                PageBreak = false;
            }
        }
    }

    //Remove nodes from empty pages
    for (Node node: removenodes)
    {
        node.remove();
    }

    //Remove page breaks
    for (Node node: pagebreaknodes)
    {
        node.getRange().replace(ControlChar.PAGE_BREAK, "", new FindReplaceOptions());
    }

    //Remove empty pages from the end of document
    while (!doc.getLastSection().getBody().getLastParagraph().hasChildNodes() && doc.getLastSection().getBody().getLastParagraph().toString(SaveFormat.TEXT).trim().equals(""))
    {
        if (doc.getLastSection().getBody().getLastParagraph().getPreviousSibling() != null &&
            doc.getLastSection().getBody().getLastParagraph().getNodeType() != NodeType.PARAGRAPH)
            break;
        doc.getLastSection().getBody().getLastParagraph().remove();

        // If the current section becomes empty, we should remove it.
        if (!doc.getLastSection().getBody().hasChildNodes())
            doc.getLastSection().remove();

        // We should exit the loop if the document becomes empty.
        if (!doc.hasChildNodes())
            break;
    }

    doc.save(MyDir + "Output.docx");
}

Best Regards,
Tahir Manzoor
Aspose - File Format APIs
Keep in touch! We’re on Twitter and Facebook

Thank you
Regards
Priya