Split tables running over pages

Nachti · January 25, 2023, 1:36pm

Hi,

i want to edit an already existing document and split tables running over pages.
In detail, a table starts on page 1 and runs till middle of page 2.

I want to split the table at the first row on page 2.

Can you help me with code ?

Kind regards,
@Nachti

eduardo.canal · January 25, 2023, 1:38pm

@Nachti, can you provide the base file that you want to edit?

Nachti · January 25, 2023, 2:44pm

Sure RunningOverPages.docx (16.3 KB)

eduardo.canal · January 25, 2023, 3:39pm

@Nachti thanks for the additional information, for your case you can use the following code:

Document doc = new Document(@"C:\\Temp\\input.docx");

Document tempDoc = (Document)doc.Clone(true);

NodeCollection tables = tempDoc.GetChildNodes(NodeType.Table, true);
for (int i = 0; i < tables.Count; i++)
{
    Table table = (Table)tables[i];

    ArrayList splitIndices = new ArrayList();
    int rowCount = table.Rows.Count;

    LayoutCollector collector = new LayoutCollector(tempDoc);
    int startPage = collector.GetStartPageIndex(table.FirstRow.FirstCell.FirstParagraph);
    int endPage = collector.GetEndPageIndex(table.LastRow.LastCell.LastParagraph);

    if (endPage > startPage)
    {
        int startRow = startPage;
        for (int x = 0; x < rowCount; x++)
        {
            Row row = table.Rows[x];
            int endRow = collector.GetEndPageIndex(row.LastCell.LastParagraph);

            if (endRow > startRow)
            {
                splitIndices.Add(x);
                startRow = endRow;
            }
        }

        splitIndices.Add(rowCount);

        for (int x = splitIndices.Count - 1; x > 0; x--)
        {
            SplitTable(table, (int)splitIndices[x - 1], (int)splitIndices[x]);
        }

        for (int x = (int)splitIndices[0]; x < rowCount; x++)
        {
            table.LastRow.Remove();
        }
    }
}
tempDoc.Save(@"C:\\Temp\\output.docx");
static Table SplitTable(Table table, int startIndex, int endIndex)
{
    Table newTable = (Table)table.Clone(true);
    table.ParentNode.InsertAfter(newTable, table);

    for (int i = 0; i < startIndex; i++)
    {
        newTable.FirstRow.Remove();
    }

    for (int i = endIndex; i < table.Rows.Count; i++)
    {
        newTable.LastRow.Remove();
    }

    if (table.FirstRow.RowFormat.HeadingFormat)
    {
        Row headingRow = (Row)table.FirstRow.Clone(true);
        newTable.InsertBefore(headingRow, newTable.FirstRow);
    }

    Paragraph separator = new Paragraph(table.Document);
    table.ParentNode.InsertAfter(separator, table);

    return newTable;
}

output.docx (12.1 KB)

Nachti · January 25, 2023, 4:40pm

Thank you very much. That is very helpful.

RunningOverColumns.docx (20.1 KB)

What is about running of pages ?

eduardo.canal · January 25, 2023, 4:49pm

Can you please be more specific about what do you want to achieve, the second file that you posted have a complete different format for the table section (in this case the document have two columns).

Nachti · January 25, 2023, 5:04pm

Sure,

I want to split the table on the first row on column2.

eduardo.canal · January 25, 2023, 8:23pm

@Nachti this case is wide more complex than the first one, I created a piece of code based in the previous solution:

Document doc = new Document(@"C:\\Temp\\input.docx");

Document tempDoc = (Document)doc.Clone(true);
Document tempDocNoCol = (Document)tempDoc.Clone(true);

// Clear multiple column to use as reference
foreach (Section section in tempDocNoCol.Sections)
{
    section.PageSetup.TextColumns.SetCount(1);
}

int sectionIndex = 0;
foreach (Section section in tempDoc.Sections)
{
    NodeCollection tables = section.GetChildNodes(NodeType.Table, true);
    NodeCollection tablesNoCol = tempDocNoCol.Sections[sectionIndex++].GetChildNodes(NodeType.Table, true);

    // Amount of columns in the page
    var textColCount = section.PageSetup.TextColumns.Count;
    for (int i = 0; i < tables.Count; i++)
    {
        Table table = (Table)tables[i];
        ArrayList splitIndices = new ArrayList();
        if (tablesNoCol != null && tablesNoCol.Count > i)
        {
            Table tableNoCol = (Table)tablesNoCol[i];
            int rowCount = tableNoCol.Rows.Count;

            // Evaluate if the current section contains more than 1 text column 
            if (textColCount > 1)
            { 
                LayoutCollector collector = new LayoutCollector(tempDocNoCol);
                int startPage = collector.GetStartPageIndex(tableNoCol.FirstRow.FirstCell.FirstParagraph);
                int endPage = collector.GetEndPageIndex(tableNoCol.LastRow.LastCell.LastParagraph);

                if (endPage > startPage)
                {
                    // Check how many rows fit in the first page of the table
                    int rowsInFirstPage = 0;
                    for (int x = 0; rowsInFirstPage == 0 && x < rowCount; x++)
                    {
                        Row row = tableNoCol.Rows[x];
                        int endRow = collector.GetEndPageIndex(row.LastCell.LastParagraph);

                        if (endRow > startPage)
                        {
                            rowsInFirstPage = x;
                        }
                    }
                    
                    if(rowsInFirstPage == 0)
                    {
                        rowsInFirstPage = rowCount;
                    }
                    splitIndices.Add(rowsInFirstPage < rowCount ? rowsInFirstPage : rowCount);

                    // Set indices to split the table in the first page
                    for (int x = 2; x <= textColCount; x++)
                    {
                        var rowIndex = rowsInFirstPage * x;
                        // Removing 2 to compensate the extra space generated for the line break in between the tables
                        splitIndices.Add(rowIndex < (rowCount - 2) ? rowIndex - 2 : rowCount);
                    }

                    // Check if the table ends in he first page, if not that implies that all the rest of the table should be placed in a new page
                    var totalRowsInFirstPage = rowsInFirstPage * textColCount - 2 * textColCount;
                    if (totalRowsInFirstPage < rowCount)
                    {
                        // Need to check how many rows fit in an empty page
                        Document temp = (Document)tempDocNoCol.Clone(true);
                        temp.FirstSection.Body.RemoveAllChildren();
                        temp.FirstSection.Body.AppendChild(temp.ImportNode(tableNoCol, true));
                        LayoutCollector tempCollector = new LayoutCollector(temp);
                        Table tempTable = temp.FirstSection.Body.Tables[0];
                        startPage = tempCollector.GetStartPageIndex(tempTable.FirstRow.FirstCell.FirstParagraph);
                        endPage = tempCollector.GetEndPageIndex(tempTable.LastRow.LastCell.LastParagraph);

                        if (endPage > startPage)
                        {
                            // Check how many rows enter in a single page
                            int rowsPerPage = 0;
                            for (int x = 0; rowsPerPage == 0 && x < rowCount; x++)
                            {
                                Row row = tempTable.Rows[x];
                                int endRow = tempCollector.GetEndPageIndex(row.LastCell.LastParagraph);

                                if (endRow > startPage)
                                {
                                    rowsPerPage = x;
                                }
                            }

                            // Insert the rest of the indices
                            if (rowsPerPage == 0)
                            {
                                rowsPerPage = rowCount;
                            }

                            var remainingRows = rowCount - totalRowsInFirstPage;
                            var rowIndex = totalRowsInFirstPage + rowsPerPage - 2;
                            splitIndices.Add(rowIndex > remainingRows ? remainingRows : rowIndex);
                            while (rowIndex < remainingRows)
                            {
                                rowIndex += rowsPerPage - 2;
                                splitIndices.Add(rowIndex > remainingRows ? remainingRows : rowIndex);
                            }

                        }
                        else // This mean that the table fit in a single page
                        {
                            splitIndices.Add(rowCount);
                        }
                    }

                    for (int x = splitIndices.Count - 1; x > 0; x--)
                    {
                        SplitTable(table, (int)splitIndices[x - 1], (int)splitIndices[x]);
                    }

                    for (int x = (int)splitIndices[0]; x < rowCount; x++)
                    {
                        table.LastRow.Remove();
                    }
                }
            }
        }
    }
}
tempDoc.Save(@"C:\\Temp\\output.docx");
static Table SplitTable(Table table, int startIndex, int endIndex)
{
    Table newTable = (Table)table.Clone(true);
    table.ParentNode.InsertAfter(newTable, table);

    for (int i = 0; i < startIndex; i++)
    {
        newTable.FirstRow.Remove();
    }

    for (int i = endIndex; i < table.Rows.Count; i++)
    {
        newTable.LastRow.Remove();
    }

    if (table.FirstRow.RowFormat.HeadingFormat)
    {
        Row headingRow = (Row)table.FirstRow.Clone(true);
        newTable.InsertBefore(headingRow, newTable.FirstRow);
    }

    Paragraph separator = new Paragraph(table.Document);
    table.ParentNode.InsertAfter(separator, table);

    return newTable;
}

Nachti · January 26, 2023, 2:47pm

Thank you very much.

Could it be that the table collection get corrupted after splitting a table
and I have to restart from scratch?

Greetings

eduardo.canal · January 26, 2023, 2:56pm

@Nachti that’s not happening to me for the document that you posted I’m getting the following result:
output.docx (15.1 KB)
Can you please post a print of the error?

Nachti · January 26, 2023, 6:14pm

Hi @Eduardo_Canal,

here is the document. I get a null pointer exception.
Annual Financial Report_V2.docx (133.0 KB)

alexey.noskov · January 27, 2023, 2:23pm

@Nachti Please try using the following code. It uses LayoutCollector and LayoutEnumerator to determine where the table need to be split. One contiotion is the page index change and another X coordinate of the row change (column break):

Document doc = new Document(@"C:\Temp\in.docx");
LayoutCollector collector = new LayoutCollector(doc);
LayoutEnumerator enumerator = new LayoutEnumerator(doc);

NodeCollection tables = doc.GetChildNodes(NodeType.Table, true);
foreach (Table t in tables)
{
    // Process only top level table in the main document's body.
    if (t.ParentNode.NodeType != NodeType.Body)
        continue;

    Table table = t;
    while (table != null)
    {
        table = SplitTalbeTextColumns(table, collector, enumerator);
        if (table != null)
        {
            // Do not update layout if it is not required.
            collector.Clear();
            doc.UpdatePageLayout();
        }
    }
}

doc.Save(@"C:\Temp\out.docx");

private static Table SplitTalbeTextColumns(Table table, LayoutCollector collector, LayoutEnumerator enumerator)
{
    int startPageIndex = collector.GetStartPageIndex(table.FirstRow);

    enumerator.Current = collector.GetEntity(table.FirstRow.FirstCell.FirstParagraph);
    while (enumerator.Type != LayoutEntityType.Row)
        enumerator.MoveParent();
    double startRowX = enumerator.Rectangle.Left;

    int breakIndex = -1;
    int firstDataRowIndex = -1;

    // Determine index of row where page breaks. And index of the first data row.
    for (int i = 1; i < table.Rows.Count; i++)
    {
        Row r = table.Rows[i];
        if (!r.RowFormat.HeadingFormat && firstDataRowIndex < 0)
            firstDataRowIndex = i;

        int rowPageIndex = collector.GetEndPageIndex(r);
        if (rowPageIndex > startPageIndex)
        {
            breakIndex = i;
            break;
        }
        // Determine X coordinate of the row.
        enumerator.Current = collector.GetEntity(r.FirstCell.FirstParagraph);
        while (enumerator.Type != LayoutEntityType.Row)
            enumerator.MoveParent();
        double currentRowX = enumerator.Rectangle.Left;
        if (startRowX != currentRowX)
        {
            breakIndex = i;
            break;
        }
    }

    if (breakIndex > 0)
    {
        Table clone = (Table)table.Clone(true);

        // Insert a cloned table after the main table.
        Paragraph para = new Paragraph(table.Document);
        para.AppendChild(new Run(table.Document, ControlChar.ColumnBreak + "Continuation of the table"));

        table.ParentNode.InsertAfter(para, table);
        para.ParentNode.InsertAfter(clone, para);

        // Remove content after the breaking row from the main table.
        while (table.Rows.Count > breakIndex)
            table.LastRow.Remove();

        // Remove rows before the breaking row from the clonned table.
        for (int i = 1; i < breakIndex; i++)
            clone.Rows.RemoveAt(firstDataRowIndex);

        return clone;
    }

    return null;
}

FYI: @Eduardo_Canal