Single big table(docx) data into multiple pages

if i have big table then how can i convert into images.

Input: below attached file is input file
WTR SUSTAINABLE INDEX CONSTITUENTS.docx (46.4 KB)

output: below attached document is getting output
Out1.jpeg (223.5 KB)

problems: not getting all the tables data as images. only getting a single page as images .

requirements: is to get all table data as images , if table is large

@aelum This is expected behavior. Aspose.Words layouts the document into pages while saving document to image. In order to get the whole big table as a single image, you should enlarge page height using PageSetup.PageHeight . But you should note that the maximum allowed height of the page in MS Word document is 1584 points.
You can use LayoutCollector to determine page numbers where table starts and ends.

just need to extract images using table data.
my data fits in two images. just needs to get two images from single data

input data tables:
WTR SUSTAINABLE INDEX CONSTITUENTS.docx (46.4 KB)

@aelum You can use this code to save all pages of the document as images:

Document doc = new Document(@"C:\Temp\in.docx");
ImageSaveOptions opt = new ImageSaveOptions(SaveFormat.Png);
for (int page = 0; page < doc.PageCount; page++)
{
    opt.PageSet = new PageSet(page);
    doc.Save(string.Format(@"C:\Temp\out_{0}.png", page), opt);
}

input file:
ClimateTech_Industry_101022_WTR.docx (336.1 KB)

my requirements: we are extracting all the data from the above documents i.e images, charts, tables , text

Images and charts working fine but issue with tables

when using tables :
problems: not getting all the tables data as images. only getting a single page image e.g (if a table having data which needs two pages to store it as images) . output shown in first reply.

my code for tables:

             {
            int i=1;
		NodeCollection tables = doc.getChildNodes(NodeType.TABLE, true);
		for (Table table : (Iterable<Table>) tables) {
			renderNode(table, new ImageSaveOptions(SaveFormat.JPEG), i);
			i++;
		}

	public static void renderNode(Node node, ImageSaveOptions imageOptions, int i) throws Exception

	{

		if (node == null)

			throw new Exception("Node cannot be null");

		// If no image options are supplied, create default options.
		
//		imageOptions.set

		if (imageOptions == null)
			imageOptions = new ImageSaveOptions(SaveFormat.JPEG);

		imageOptions.setPaperColor(new Color(0, 0, 0, 0));

		// There a bug which affects the cache of a cloned node. To avoid this we
		// instead clone the entire document including all nodes,

		// find the matching node in the cloned document and render that instead.

		Document doc = ((Document) node.getDocument()).deepClone();

		node = doc.getChild(NodeType.ANY, node.getDocument().getChildNodes(NodeType.ANY, true).indexOf(node), true);

		// Create a temporary shape to store the target node in. This shape will be
		// rendered to retrieve

		// the rendered content of the node.

		Shape shape = new Shape(doc, ShapeType.TEXT_BOX);

		Section parentSection = (Section) node.getAncestor(NodeType.SECTION);

		// Assume that the node cannot be larger than the page in size.

		shape.setWidth(parentSection.getPageSetup().getPageWidth());

		shape.setHeight(parentSection.getPageSetup().getPageHeight());

		shape.setFillColor(new Color(0,0,0,0)); // We must make the shape and paper color transparent.

		// Don't draw a surronding line on the shape.

		shape.setStroked(false);

		Node currentNode = node;

		// If the node contains block level nodes then just add a copy of these nodes to
		// the shape.

		if (currentNode instanceof InlineStory || currentNode instanceof Story)

		{

			CompositeNode composite = (CompositeNode) currentNode;

			for (Node childNode : (Iterable<Node>) composite.getChildNodes())

			{

				shape.appendChild(childNode.deepClone(true));

			}

		}

		else

		{

			// Move up through the DOM until we find node which is suitable to insert into a
			// Shape (a node with a parent can contain paragraph, tables the same as a
			// shape).

			// Each parent node is cloned on the way up so even a descendant node passed to
			// this method can be rendered.

			// Since we are working with the actual nodes of the document we need to clone
			// the target node into the temporary shape.

			while (!(currentNode.getParentNode() instanceof InlineStory || currentNode.getParentNode() instanceof Story
					|| currentNode.getParentNode() instanceof ShapeBase
					|| currentNode.getNodeType() == NodeType.PARAGRAPH))

			{

				CompositeNode parent = (CompositeNode) currentNode.getParentNode().deepClone(false);

				currentNode = currentNode.getParentNode();

				parent.appendChild(node.deepClone(true));

				node = parent; // Store this new node to be inserted into the shape.

			}

			// Add the node to the shape.

			shape.appendChild(node.deepClone(true));

		}

		// We must add the shape to the document tree to have it rendered.

		parentSection.getBody().getFirstParagraph().appendChild(shape);		
		
		shape.getShapeRenderer().save("C:\\Users\\DHANANJAY\\Desktop\\jovus-up\\wordReader file\\Question_ask\\file made\\one\\" + "Out"+i+""+".jpeg", imageOptions);

		BufferedImage renderedImage = ImageIO.read(new File("C:\\Users\\DHANANJAY\\Desktop\\jovus-up\\wordReader file\\Question_ask\\file made\\one\\" + "Out"+i+""+".jpeg"));

		// Extract the actual content of the image by cropping transparent space around

		// the rendered shape.

		Rectangle cropRectangle = FindBoundingBoxAroundNode(renderedImage);

		BufferedImage out = renderedImage.getSubimage(cropRectangle.x, cropRectangle.y, cropRectangle.width,
				cropRectangle.height);

		File outputfile = new File("C:\\Users\\DHANANJAY\\Desktop\\jovus-up\\wordReader file\\Question_ask\\file made one\\" + "Out"+i+""+".jpeg");

		ImageIO.write(out, "jpeg", outputfile);

	}

	public static Rectangle FindBoundingBoxAroundNode(BufferedImage originalBitmap)

	{

		Point min = new Point(Integer.MAX_VALUE, Integer.MAX_VALUE);

		Point max = new Point(Integer.MIN_VALUE, Integer.MIN_VALUE);

		for (int x = 0; x < originalBitmap.getWidth(); ++x)

		{

			for (int y = 0; y < originalBitmap.getHeight(); ++y)

			{

				int argb = originalBitmap.getRGB(x, y);

				if (argb != new Color(0, 0, 0, 0).getRGB())

				{

					min.x = Math.min(x, min.x);

					min.y = Math.min(y, min.y);

					max.x = Math.max(x, max.x);

					max.y = Math.max(y, max.y);

				}

			}

		}

		return new Rectangle(min.x, min.y, (max.x - min.x) + 1, (max.y - min.y) + 1);

	}

@aelum Unfortunately, it is not quite clear what is the expected output. Could you please elaborate your requirements in more details and provide the expected output you would like to get?
If your requirement is to get whole table as a single image, then, as I mentioned earlier, to get the whole big table as a single image, you should enlarge page height using PageSetup.PageHeight to make the table fit one page.

suppose i have a docx ,which has table containing 250 records(rows)
my requirements is to get 40 records per images from table ,
for 250 records it should be generating 7 images.
Then how can i do this. ?

@aelum In your case you should copy the table that should be converted to image into a separate document and use code I have suggested earlier to render this document to images:

Document doc = new Document(@"C:\Temp\in.docx");
ImageSaveOptions opt = new ImageSaveOptions(SaveFormat.Png);
for (int page = 0; page < doc.PageCount; page++)
{
    opt.PageSet = new PageSet(page);
    doc.Save(string.Format(@"C:\Temp\out_{0}.png", page), opt);
}

please provide me code for copying tables from existing document to new document.

@aelum You can use code like the following:

Document doc = new Document(@"C:\Temp\in.docx");
RenderTable(doc.FirstSection.Body.Tables[0]);
private static void RenderTable(Table table)
{
    Document tempDoc = (Document)table.Document.Clone(false);
    tempDoc.AppendChild(tempDoc.ImportNode(table.GetAncestor(NodeType.Section).Clone(false), false));
    tempDoc.EnsureMinimum();

    tempDoc.FirstSection.Body.PrependChild(tempDoc.ImportNode(table, true));

    ImageSaveOptions opt = new ImageSaveOptions(SaveFormat.Png);
    for (int i = 0; i < tempDoc.PageCount; i++)
    {
        opt.PageSet = new PageSet(i);
        tempDoc.Save(string.Format(@"C:\Temp\out_{0}.png", i), opt);
    }
}

This code is unable to render some the tables from my docx,
mydocx file attached: MLKN_IOC_062722_WTR (1).docx (4.8 MB)

code i have used:

Document doc = new Document("C:\\Users\\DHANANJAY\\Desktop\\jovus-up\\wordReader file\\MLKN_IOC_062722_WTR.docx");
for(int j=0; j<doc.getFirstSection().getBody().getTables().getCount(); j++) {
    RenderTable(doc.getFirstSection().getBody().getTables().get(j), j);
}
private static void RenderTable(Table table, int j) throws Exception
{
	Document tempDoc = (Document)table.getDocument().deepClone(false);
	tempDoc.appendChild(tempDoc.importNode(table.getAncestor(NodeType.SECTION).deepClone(false), false));
	tempDoc.ensureMinimum();

	tempDoc.getFirstSection().getBody().prependChild(tempDoc.importNode(table, true));

	ImageSaveOptions opt = new ImageSaveOptions(SaveFormat.PNG);
	for (int i = 0; i < tempDoc.getPageCount(); i++)
	{
	    opt.setPageSet(new PageSet(i));
	    tempDoc.save("C:\\Temp\\mlkn\\out_"+j+"."+i+".png", opt);
	}
}

output image attached:mlkn.zip (163.9 KB)

the below code working fine but how can i ignore nested tables

the code i used:

int i = 0;
NodeCollection tables = doc.getChildNodes(NodeType.TABLE, true)
for (Table table : (Iterable<Table>) tables) {
	RenderTable(table, i);
	i++;
}
private static void RenderTable(Table table, int j) throws Exception
{
	Document tempDoc = (Document)table.getDocument().deepClone(false);
	tempDoc.appendChild(tempDoc.importNode(table.getAncestor(NodeType.SECTION).deepClone(false), false));
	tempDoc.ensureMinimum();

	tempDoc.getFirstSection().getBody().prependChild(tempDoc.importNode(table, true));

	ImageSaveOptions opt = new ImageSaveOptions(SaveFormat.PNG);
	for (int i = 0; i < tempDoc.getPageCount(); i++)
	{
	    opt.setPageSet(new PageSet(i));
	    tempDoc.save("C:\\Temp\\mlknother\\out_"+j+"."+i+".png", opt);
	}
}

output image attached: mlknother.zip (465.1 KB)

How can i ignore nested tables in second code

@aelum You can use the following code to skip nested tables:

NodeCollection tables = doc.getChildNodes(NodeType.TABLE, true)
for (Table table : (Iterable<Table>) tables) {
    if(table.getAncestor(NodeType.TABLE)==null) {
        RenderTable(table, i);
        i++;
    }
}

All the code is working fine. just need to remove all the white margins from images

all the images attached below:
cli.zip (480.1 KB)

please provide code for the same.

@aelum White margins are page margins, to remove them you can reset them in the temporary document. Try modifying your code like the following:

private static void RenderTable(Table table, int j) throws Exception
{
    Document tempDoc = (Document)table.getDocument().deepClone(false);
    tempDoc.appendChild(tempDoc.importNode(table.getAncestor(NodeType.SECTION).deepClone(false), false));
    tempDoc.ensureMinimum();

    tempDoc.getFirstSection().getBody().prependChild(tempDoc.importNode(table, true));

    // Reset page margins.
    PageSetup ps = tempDoc.getFirstSection().getPageSetup();
    ps.setPageWidth(ps.getPageWidth()-ps.getLeftMargin()-ps.getRightMargin());
    ps.setLeftMargin(0);
    ps.setRightMargin(0);
    ps.setPageHeight(ps.getPageHeight()-ps.getTopMargin()-ps.getBottomMargin());
    ps.setTopMargin(0);
    ps.setBottomMargin(0);

    ImageSaveOptions opt = new ImageSaveOptions(SaveFormat.PNG);
    for (int i = 0; i < tempDoc.getPageCount(); i++)
    {
        opt.setPageSet(new PageSet(i));
        tempDoc.save("C:\\Temp\\mlknother\\out_"+j+"."+i+".png", opt);
    }
}

i need cropped image with only data part.

your code gives this images:
table_1.1.0.png (11.5 KB)

my requirements is to get the cropped images. i.e. only data part should be there. without any white part

@aelum In this case you need to determine the actual size of the table and adjust page size accordingly. You can use LayoutCollector and LayoutEnumerator to get actual bounds of a table. For example see the following that demonstrates the technique:

Document doc = new Document("C:\\Temp\\in.docx");
LayoutCollector collector = new LayoutCollector(doc);
LayoutEnumerator enumerator = new LayoutEnumerator(doc);

Table table = doc.getFirstSection().getBody().getTables().get(0);

// Calculate table size.
// For demonstration purposes the example purposes the while table is on the same page.
enumerator.setCurrent(collector.getEntity(table.getFirstRow().getFirstCell().getFirstParagraph()));
// Move enumerator to a row.
while (enumerator.getType()!= LayoutEntityType.ROW)
    enumerator.moveParent();

double top = enumerator.getRectangle().y;
double left = enumerator.getRectangle().x;

// Move enumerator to the last row.
enumerator.setCurrent(collector.getEntity(table.getLastRow().getFirstCell().getFirstParagraph()));
// Move enumerator to a row.
while (enumerator.getType()!= LayoutEntityType.ROW)
    enumerator.moveParent();

double bottom = enumerator.getRectangle().y + enumerator.getRectangle().height;
double right = enumerator.getRectangle().x + enumerator.getRectangle().width;

System.out.println("X=" + left + "; Y="+top+"; X1="+right+"; Y1="+bottom);

how can i implement the above code(LayoutCollector, LayoutEnumerator) in my current code, currently i am using below code.

public static void generateTableAsPng(Document doc) {
    int k = 1;
    try {
        for (Section section : doc.getSections()) {
            int j = 0;
            for (Table table : section.getBody().getTables()) {
                RenderTable(table, j++, k);
            }
            k++;
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

private static void RenderTable(Table table, int j, int k) throws Exception {
    Document tempDoc = (Document) table.getDocument().deepClone(false);
    tempDoc.appendChild(tempDoc.importNode(table.getAncestor(NodeType.SECTION).deepClone(false), false));
    tempDoc.ensureMinimum();

    tempDoc.getFirstSection().getBody().prependChild(tempDoc.importNode(table, true));

    ImageSaveOptions opt = new ImageSaveOptions(SaveFormat.PNG);
    for (int i = 0; i < tempDoc.getPageCount(); i++) {
        opt.setPageSet(new PageSet(i));
        tempDoc.save("C:\Users\DeLL\Downloads\output1\temp\tables\table_" + k + "." + j + "." + i + ".png",
                opt);
    }
}

@aelum Once you calculated size of the table using the suggested code, you can set height and width of page accordingly in temporary document page setup:

// Reset page margins.
PageSetup ps = tempDoc.getFirstSection().getPageSetup();
ps.setPageWidth(ps.getPageWidth()-ps.getLeftMargin()-ps.getRightMargin());
ps.setLeftMargin(0);
ps.setRightMargin(0);
ps.setPageHeight(ps.getPageHeight()-ps.getTopMargin()-ps.getBottomMargin());
ps.setTopMargin(0);
ps.setBottomMargin(0);

// Set calculated width
ps.setPageWidth(calculateWidth);
ps.setPageHeight(calculatedHeight);

some extra images is generating while using the below code.
please provide a accurate solution.

public static void generateTableAsPng(Document doc) throws Exception {

    LayoutCollector collector = new LayoutCollector(doc);
    LayoutEnumerator enumerator = new LayoutEnumerator(doc);

    int sectionNumber = 1;
    try {
        for (Section section : doc.getSections()) {
            int tableNumber = 0;
            for (Table table : section.getBody().getTables()) {
                //Table table = doc.getFirstSection().getBody().getTables().get(0);

                // Calculate table size.
                // For demonstration purposes the example purposes the while table is on the
                // same page.
                enumerator.setCurrent(collector.getEntity(table.getFirstRow().getFirstCell().getFirstParagraph()));
                // Move enumerator to a row.
                while (enumerator.getType() != LayoutEntityType.ROW)
                    enumerator.moveParent();

                double top = enumerator.getRectangle().y;
                double left = enumerator.getRectangle().x;

                // Move enumerator to the last row.
                enumerator.setCurrent(collector.getEntity(table.getLastRow().getFirstCell().getFirstParagraph()));
                // Move enumerator to a row.
                while (enumerator.getType() != LayoutEntityType.ROW)
                    enumerator.moveParent();

                double bottom = enumerator.getRectangle().y + enumerator.getRectangle().height;
                double right = enumerator.getRectangle().x + enumerator.getRectangle().width;

                RenderTable(table, tableNumber++, sectionNumber, right - left, bottom - top);
            }
            sectionNumber++;
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

private static void RenderTable(Table table, int tableNumber, int sectionNumber, double width, double height)
        throws Exception {
    Document tempDoc = (Document) table.getDocument().deepClone(false);
    tempDoc.appendChild(tempDoc.importNode(table.getAncestor(NodeType.SECTION).deepClone(false), false));
    tempDoc.ensureMinimum();

    tempDoc.getFirstSection().getBody().prependChild(tempDoc.importNode(table, true));

    // Reset page margins.
    PageSetup ps = tempDoc.getFirstSection().getPageSetup();
    ps.setPageWidth(ps.getPageWidth() - ps.getLeftMargin() - ps.getRightMargin());
    ps.setLeftMargin(0);
    ps.setRightMargin(0);
    ps.setPageHeight(ps.getPageHeight() - ps.getTopMargin() - ps.getBottomMargin());
    ps.setTopMargin(0);
    ps.setBottomMargin(0);

    // Set calculated width
    ps.setPageWidth(width);
    ps.setPageHeight(height);

    ImageSaveOptions opt = new ImageSaveOptions(SaveFormat.PNG);
    for (int i = 0; i < tempDoc.getPageCount(); i++) {
        opt.setPageSet(new PageSet(i));
        tempDoc.save("C:\\Users\\DeLL\\Downloads\\output1\\temp\\tables\\table_" + sectionNumber + "." + tableNumber
                + "." + i + ".png", opt);
    }
}

without cropping done image attached: cli.zip (480.1 KB)

output image attached: tables1.zip (435.5 KB)

please check the output image attached and without cropped output image

@aelum Please try using code like this:

Document doc = new Document("C:\\Temp\\in.docx");
Iterable<Table> tables = doc.getChildNodes(NodeType.TABLE, true);
int tableIndex = 0;
for (Table t : tables)
{
    if (t.getAncestor(NodeType.TABLE) == null)
        RenderTable(t, tableIndex++);
}
private static void RenderTable(Table table, int tableIndex) throws Exception
{
    Document tempDoc = (Document)table.getDocument().deepClone(false);
    tempDoc.appendChild(tempDoc.importNode(table.getAncestor(NodeType.SECTION).deepClone(false), false));
    tempDoc.ensureMinimum();

    tempDoc.getFirstSection().getBody().prependChild(tempDoc.importNode(table, true));

    if(tempDoc.getPageCount()==1) {
        RenderTablePart(tempDoc, tableIndex,0);
    }
    else {
        for (int i = 0; i < tempDoc.getPageCount(); i++)
        {
            RenderTablePart(tempDoc.extractPages(i, 1), tableIndex, i);
        }
    }
}

private static void RenderTablePart(Document oneTableDoc, int tableIndex, int partIndex) throws Exception
{
    LayoutCollector collector = new LayoutCollector(oneTableDoc);
    LayoutEnumerator enumerator = new LayoutEnumerator(oneTableDoc);

    Table table = oneTableDoc.getFirstSection().getBody().getTables().get(0);

    // Calculate table size.
    // For demonstration purposes the example purposes the while table is on the same page.
    enumerator.setCurrent(collector.getEntity(table.getFirstRow().getFirstCell().getFirstParagraph()));
    // Move enumerator to a row.
    while (enumerator.getType()!= LayoutEntityType.ROW)
        enumerator.moveParent();

    double top = enumerator.getRectangle().y;
    double left = enumerator.getRectangle().x;

    // Move enumerator to the last row.
    enumerator.setCurrent(collector.getEntity(table.getLastRow().getFirstCell().getFirstParagraph()));
    // Move enumerator to a row.
    while (enumerator.getType()!= LayoutEntityType.ROW)
        enumerator.moveParent();

    double bottom = enumerator.getRectangle().y + enumerator.getRectangle().height;
    double right = enumerator.getRectangle().x + enumerator.getRectangle().width;

    // Reset margins
    PageSetup ps = oneTableDoc.getFirstSection().getPageSetup();
    ps.setPageWidth(ps.getPageWidth()-ps.getLeftMargin()-ps.getRightMargin());
    ps.setLeftMargin(0);
    ps.setRightMargin(0);
    ps.setPageHeight(ps.getPageHeight()-ps.getTopMargin()-ps.getBottomMargin());
    ps.setTopMargin(0);
    ps.setBottomMargin(0);

    // Set calculated width
    ps.setPageWidth(right - left);
    ps.setPageHeight(bottom - top);

    oneTableDoc.updatePageLayout();

    // Save table part.
    oneTableDoc.save("C:\\Temp\\out_" + tableIndex + "_" + partIndex + ".png");
}