@tahir.manzoor
The figures other than anchored are extracted. The input anchored image is
Article reviewed [13-05-2017]_test.zip (3.1 MB)
and the output derived is
Article reviewed [13-05-2017]_test_Interim.zip (22.2 KB)
and the interim document is Anchored_Fig_Interim.zip (7.2 KB) Kindly help please.
The code I am using is
public static void anchored_extract_Images(Document doc, String uc) throws Exception
{
int i = 1;
for (Bookmark bm : doc.getRange().getBookmarks())
{
if(bm.getName().startsWith(“Bookmark”))
{
ArrayList nodes = extractContent(bm.getBookmarkStart(), bm.getBookmarkEnd(), true);
Document dstDoc = generateDocument(doc, nodes);
PageSetup sourcePageSetup = ((Paragraph)bm.getBookmarkStart().getParentNode()).getParentSection().getPageSetup();
dstDoc.getFirstSection().getPageSetup().setPaperSize(sourcePageSetup.getPaperSize());
dstDoc.getFirstSection().getPageSetup().setLeftMargin(sourcePageSetup.getLeftMargin());
dstDoc.getFirstSection().getPageSetup().setRightMargin(sourcePageSetup.getRightMargin());
dstDoc.updatePageLayout();
if(dstDoc.getLastSection().getBody().getLastParagraph().toString(SaveFormat.TEXT).trim().startsWith("Fig"))
dstDoc.getLastSection().getBody().getLastParagraph().remove();
dstDoc.updatePageLayout();
while(dstDoc.getFirstSection().getBody().getFirstParagraph()!= null && dstDoc.getFirstSection().getBody().getFirstParagraph().getChildNodes(NodeType.SHAPE, true).getCount() == 0)
dstDoc.getFirstSection().getBody().getFirstParagraph().remove();
dstDoc.updatePageLayout();
if(dstDoc.getFirstSection().getBody().getFirstParagraph().getChildNodes(NodeType.SHAPE, true).getCount() > 0)
{
dstDoc.save(folderName + "anchored_image_"+i+".docx");
i++;
}
}
}
for (Bookmark bm : doc.getRange().getBookmarks()) {
if (bm.getName().startsWith("Bookmark")) {
String figText = bm.getBookmarkEnd().getParentNode().toString(SaveFormat.TEXT);
if(figText.trim().length() > 0)
bm.setText("<Anchored-Fig>"+figText.trim().substring(0, 7)+"</Anchored-Fig>" + ControlChar.PARAGRAPH_BREAK);
}
}
}
public static void UseCaseAnchoreInterim(Document doc, DocumentBuilder builder) throws Exception
{
int bookmark = 1;
int i = 1;
NodeCollection paragraphs = doc.getChildNodes(NodeType.PARAGRAPH, true);
for (Paragraph paragraph : (Iterable<Paragraph>) paragraphs)
{
if(paragraph.toString(SaveFormat.TEXT).trim().startsWith("Fig"))
{
Boolean bln = false;
Node PreviousPara = paragraph.getPreviousSibling();
while (PreviousPara != null &&
(PreviousPara.toString(SaveFormat.TEXT).trim().length() == 0 ||
(
PreviousPara.toString(SaveFormat.TEXT).trim().contains("(a)") ||
PreviousPara.toString(SaveFormat.TEXT).trim().contains("(b)") ||
PreviousPara.toString(SaveFormat.TEXT).trim().contains("(b)") ||
PreviousPara.toString(SaveFormat.TEXT).trim().contains("(d)") ||
PreviousPara.toString(SaveFormat.TEXT).trim().startsWith("(Fig"))
)
)
{
PreviousPara = PreviousPara.getPreviousSibling();
bln = true;
}
if(!bln)
continue;
if(PreviousPara == null)
{
builder.moveToDocumentStart();
builder.insertParagraph();
builder.startBookmark("Bookmark" + bookmark);
//builder.moveToParagraph(paragraphs.indexOf(paragraph), 0);
builder.moveTo(paragraph);
builder.endBookmark("Bookmark" + bookmark);
bookmark++;
}
else
if(PreviousPara.getNodeType() == NodeType.PARAGRAPH)
{
Node node = ((Paragraph)PreviousPara).getParentNode().insertBefore(new Paragraph(doc), PreviousPara);
builder.moveTo(node);
builder.startBookmark("Bookmark" + bookmark);
builder.moveTo(paragraph);
builder.endBookmark("Bookmark" + bookmark);
bookmark++;
}
}
}
}
public static void UseCaseAnchored(Document doc, DocumentBuilder builder) throws Exception
{
int bookmark = 1;
int i = 1;
NodeCollection paragraphs = doc.getChildNodes(NodeType.PARAGRAPH, true);
for (Paragraph paragraph : (Iterable<Paragraph>) paragraphs)
{
if(paragraph.toString(SaveFormat.TEXT).trim().startsWith("Fig"))
{
Node PreviousPara = paragraph.getPreviousSibling();
while (PreviousPara != null && PreviousPara.getNodeType() == NodeType.PARAGRAPH
&& (PreviousPara.toString(SaveFormat.TEXT).trim().length() == 0 ||
((Paragraph)PreviousPara).getChildNodes(NodeType.SHAPE, true).getCount() > 0)
)
{
PreviousPara = PreviousPara.getPreviousSibling();
}
if(PreviousPara == null)
{
builder.moveToDocumentStart();
builder.insertParagraph();
builder.startBookmark("Bookmark" + bookmark);
//builder.moveToParagraph(paragraphs.indexOf(paragraph), 0);
builder.moveTo(paragraph);
builder.endBookmark("Bookmark" + bookmark);
bookmark++;
}
else
if(PreviousPara.getNodeType() == NodeType.PARAGRAPH)
{
Node node = ((Paragraph)PreviousPara).getParentNode().insertBefore(new Paragraph(doc), PreviousPara);
builder.moveTo(node);
builder.startBookmark("BookmarkUC1" + bookmark);
builder.moveTo(paragraph);
builder.endBookmark("BookmarkUC1" + bookmark);
bookmark++;
}
}
}
}
public static ArrayList extractContent(Node startNode, Node endNode, boolean isInclusive) throws Exception {
verifyParameterNodes(startNode, endNode);
ArrayList nodes = new ArrayList();
Node originalStartNode = startNode;
Node originalEndNode = endNode;
while (startNode.getParentNode().getNodeType() != NodeType.BODY)
startNode = startNode.getParentNode();
while (endNode.getParentNode().getNodeType() != NodeType.BODY)
endNode = endNode.getParentNode();
boolean isExtracting = true;
boolean isStartingNode = true;
boolean isEndingNode;
Node currNode = startNode;
while (isExtracting) {
CompositeNode cloneNode = (CompositeNode) currNode.deepClone(true);
isEndingNode = currNode.equals(endNode);
if (isStartingNode || isEndingNode) {
if (isStartingNode) {
processMarker(cloneNode, nodes, originalStartNode, isInclusive, isStartingNode, isEndingNode);
isStartingNode = false;
}
if (isEndingNode) {
processMarker(cloneNode, nodes, originalEndNode, isInclusive, isStartingNode, isEndingNode);
isExtracting = false;
}
} else
nodes.add(cloneNode);
if (currNode.getNextSibling() == null && isExtracting) {
Section nextSection = (Section) currNode.getAncestor(NodeType.SECTION).getNextSibling();
currNode = nextSection.getBody().getFirstChild();
} else {
currNode = currNode.getNextSibling();
}
}
return nodes;
}
private static void verifyParameterNodes(Node startNode, Node endNode) throws Exception {
if (startNode == null)
throw new IllegalArgumentException("Start node cannot be null");
if (endNode == null)
throw new IllegalArgumentException("End node cannot be null");
if (!startNode.getDocument().equals(endNode.getDocument()))
throw new IllegalArgumentException("Start node and end node must belong to the same document");
if (startNode.getAncestor(NodeType.BODY) == null || endNode.getAncestor(NodeType.BODY) == null)
throw new IllegalArgumentException("Start node and end node must be a child or descendant of a body");
Section startSection = (Section) startNode.getAncestor(NodeType.SECTION);
Section endSection = (Section) endNode.getAncestor(NodeType.SECTION);
int startIndex = startSection.getParentNode().indexOf(startSection);
int endIndex = endSection.getParentNode().indexOf(endSection);
if (startIndex == endIndex) {
if (startSection.getBody().indexOf(startNode) > endSection.getBody().indexOf(endNode))
throw new IllegalArgumentException("The end node must be after the start node in the body");
} else if (startIndex > endIndex)
throw new IllegalArgumentException("The section of end node must be after the section start node");
}
private static boolean isInline(Node node) throws Exception {
return ((node.getAncestor(NodeType.PARAGRAPH) != null || node.getAncestor(NodeType.TABLE) != null) && !(node.getNodeType() == NodeType.PARAGRAPH || node.getNodeType() == NodeType.TABLE));
}
private static void processMarker(CompositeNode cloneNode, ArrayList nodes, Node node, boolean isInclusive, boolean isStartMarker, boolean isEndMarker) throws Exception {
if (!isInline(node)) {
if (!(isStartMarker && isEndMarker)) {
if (isInclusive)
nodes.add(cloneNode);
}
return;
}
if (node.getNodeType() == NodeType.FIELD_START) {
if ((isStartMarker && !isInclusive) || (!isStartMarker && isInclusive)) {
while (node.getNextSibling() != null && node.getNodeType() != NodeType.FIELD_END)
node = node.getNextSibling();
}
}
if (node.getNodeType() == NodeType.COMMENT_RANGE_END) {
while (node.getNextSibling() != null && node.getNodeType() != NodeType.COMMENT)
node = node.getNextSibling();
}
int indexDiff = node.getParentNode().getChildNodes().getCount() - cloneNode.getChildNodes().getCount();
if (indexDiff == 0)
node = cloneNode.getChildNodes().get(node.getParentNode().indexOf(node));
else
node = cloneNode.getChildNodes().get(node.getParentNode().indexOf(node) - indexDiff);
boolean isSkip;
boolean isProcessing = true;
boolean isRemoving = isStartMarker;
Node nextNode = cloneNode.getFirstChild();
while (isProcessing && nextNode != null) {
Node currentNode = nextNode;
isSkip = false;
if (currentNode.equals(node)) {
if (isStartMarker) {
isProcessing = false;
if (isInclusive)
isRemoving = false;
} else {
isRemoving = true;
if (isInclusive)
isSkip = true;
}
}
nextNode = nextNode.getNextSibling();
if (isRemoving && !isSkip)
currentNode.remove();
}
// After processing the composite node may become empty. If it has don't include it.
if (!(isStartMarker && isEndMarker)) {
if (cloneNode.hasChildNodes())
nodes.add(cloneNode);
}
}