actual.PNG (7.8 KB)
extracted.PNG (22.4 KB)
FYI… I used below source code to extract numbering text from word document.
private static Document extractContent(Node startNode, Node endNode) throws Exception {
// Check whether start and end nodes are children of boby
if (startNode.getParentNode().getNodeType() != NodeType.BODY
|| endNode.getParentNode().getNodeType() != NodeType.BODY)
throw new Exception(“Start and end nodes should be children of main story(body)”);
// Clone the original document,
// this is needed to preserve styles of the original document
Document srcDoc = (Document) startNode.getDocument();
Document dstDoc = srcDoc.deepClone();
dstDoc.removeAllChildren();
// Now we should copy parent nodes of the start node to the destination document
// these will Section, Body.
Node firstSect = dstDoc.importNode(startNode.getAncestor(NodeType.SECTION), true,
ImportFormatMode.USE_DESTINATION_STYLES);
dstDoc.appendChild(firstSect);
// Remove content from the section, except headers/footers
dstDoc.getLastSection().getBody().removeAllChildren();
Node currNode = startNode;
Node dstNode;
// Copy content
while (!currNode.equals(endNode)) {
// Import node
dstNode = dstDoc.importNode(currNode, true, ImportFormatMode.USE_DESTINATION_STYLES);
dstDoc.getLastSection().getBody().appendChild(dstNode);
// move to the next node
if (currNode.getNextSibling() != null)
currNode = currNode.getNextSibling();
// Move to the next section
else {
Node sect = currNode.getAncestor(NodeType.SECTION);
if (sect.getNextSibling() != null) {
dstNode = dstDoc.importNode(sect.getNextSibling(), true, ImportFormatMode.USE_DESTINATION_STYLES);
dstDoc.appendChild(dstNode);
dstDoc.getLastSection().getBody().removeAllChildren();
currNode = ((Section) sect.getNextSibling()).getBody().getFirstChild();
} else {
break;
}
}
}
return dstDoc;
}