I’ve attached the document containing the code for your review.
There are two content controls in the document. When I remove the last content control, an extra page is added after the page break in the first content control due to an additional line break after page break.
package com.sirionlabs.api;
import com.aspose.words.*;
import org.apache.commons.lang3.StringUtils;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class ClauseAssemblyRemoveLogic {
public static final String IGNORE_TEXT_IDENTIFIERS = "(\\t|\\n|\\f|\\r|\\f\\r)";
public static void main(String[] args) throws Exception {
com.aspose.words.License license = new com.aspose.words.License();
license.setLicense(new FileInputStream("/home/mohitkumar/IdeaProjects/contract-authoring/auto-tagging/target/test-classes/aspose-licence"));
Document document;
InputStream stream = null;
try {
document = new Document("/home/mohitkumar/Downloads/ClauseRemoveOrig2.docx");
} catch (Exception ex) {
throw new Exception("");
} finally {
if (stream != null) {
stream.close();
}
}
try {
Set<String> uniqueClausesToRemove = new HashSet<>();
uniqueClausesToRemove.add("BASIC__1005__6835__138__138");
updateDocumentForClauseDelete(document, uniqueClausesToRemove);
document.save("/home/mohitkumar/Downloads/TestClauseRemove.docx");
} catch (Exception ex) {
}
}
public static void updateDocumentForClauseDelete(Document document, Set<String> uniqueClausesToRemove) {
for (Object st : document.getChildNodes(NodeType.STRUCTURED_DOCUMENT_TAG, true)) {
StructuredDocumentTag std = (StructuredDocumentTag) st;
if (std.getSdtType() == SdtType.RICH_TEXT && uniqueClausesToRemove.contains(std.getTag())) {
NodeCollection childNodes = std.getChildNodes(NodeType.PARAGRAPH, true);
for (Object ob : childNodes) {
Paragraph para = (Paragraph) ob;
para.remove();
}
Node nextSibling = std.getNextSibling();
std.remove();
if (nextSibling != null && NodeType.PARAGRAPH == nextSibling.getNodeType() && (StringUtils.isEmpty(nextSibling.getText()) || StringUtils.isEmpty(nextSibling.getText().trim()) || nextSibling.getText().matches(IGNORE_TEXT_IDENTIFIERS))) {
nextSibling.remove();
}
}
}
}
}
GenerateClauseRemoveDocument.docx (314.1 KB)
ClauseRemoveOrig.docx (391.4 KB)