Hi Team,
I am extracting the images using paragraph .
How to bookmark and delete the extracted images paragraph nodes. from the document.
The sample code
ArrayList nodes = new ArrayList();
Document interimdoc11 = new Document(interim);
// Remove empty paragraphs
for (Paragraph paragraph : (Iterable) interimdoc.getChildNodes(NodeType.PARAGRAPH, true)) {
if (paragraph.toString(SaveFormat.TEXT).trim().length() == 0
&& paragraph.getChildNodes(NodeType.SHAPE, true).getCount() == 0
&& paragraph.getText().contains(ControlChar.PAGE_BREAK) == false) {
paragraph.remove();
}
}
// Get the paragraphs that start with "Fig".
for (Paragraph paragraph : (Iterable<Paragraph>) interimdoc.getChildNodes(NodeType.PARAGRAPH, true)) {
if (paragraph.toString(SaveFormat.TEXT).trim().startsWith("Fig")
|| paragraph.toString(SaveFormat.TEXT).trim().startsWith("Sch")) {
Node previousPara = paragraph.getPreviousSibling();
while (previousPara != null && previousPara.getNodeType() == NodeType.PARAGRAPH
&& previousPara.toString(SaveFormat.TEXT).trim().length() == 0
&& ((Paragraph) previousPara).getChildNodes(NodeType.SHAPE, true).getCount() > 0) {
if (previousPara != null)
nodes.add(previousPara);
previousPara = previousPara.getPreviousSibling();
}
if (nodes.size() > 0) {
// Reverse the node collection.
Collections.reverse(nodes);
// Extract the consecutive shapes and export them into
// new document
Document dstDoc = new Document();
for (Paragraph para : (Iterable<Paragraph>) nodes) {
NodeImporter importer = new NodeImporter(interimdoc, dstDoc,
ImportFormatMode.KEEP_SOURCE_FORMATTING);
Node newNode = importer.importNode(para, true);
dstDoc.getFirstSection().getBody().appendChild(newNode);
para.remove();
interimdoc.save(interim);
}
// Remove the first empty paragraph
if (dstDoc.getFirstSection().getBody().getFirstParagraph().toString(SaveFormat.TEXT).trim()
.length() == 0)
dstDoc.getFirstSection().getBody().getFirstParagraph().remove();
/** OUTPUT FILENAME START **/
String Imgcaption = paragraph.toString(SaveFormat.TEXT);
int k = 0;
while (k < Imgcaption.length() && !Character.isDigit(Imgcaption.charAt(k)))
k++;
int j = k;
while (j < Imgcaption.length() && Character.isDigit(Imgcaption.charAt(j)))
j++;
int l = Integer.parseInt(Imgcaption.substring(k, j));
strI = Integer.toString(l);
Pattern pattern = Pattern.compile(strI);
Matcher matcher = pattern.matcher(Imgcaption);
while (matcher.find()) {
name = Imgcaption.substring(0, matcher.end());
name = name.replace(".", "_");
}
if (name.startsWith("Fig")) {
name = "Fig" + "_" + l;
}
/** OUTPUT FILENAME END **/
filename = folder_name + "_" + "Fig_a" + i + "_" + name + ".docx";
dstDoc.save(filename);
// RemoveEmptyPages(filename);
i++;
nodes.clear();
}
}
}
/** SECTION A END **/
Thanks & Regards,
pria