Removing whitespace between content controls in child documents

I am inserting child documents into a master document, or technically, merging a bunch of different files together. I am placing content in content controls, and removing those content controls if there is no content to be put in them. The issue is that removing these content controls is not deleting the space that they occupied and is leaving empty paragraphs. Can I remove all of the leftover whitespace from the removed content control?

private void PopulateContentControls(Document doc, List<TemplateItem> children, HashSet<string> processedContent)
{
    var controls = doc.GetChildNodes(NodeType.StructuredDocumentTag, true);
    _logger.LogDebug(AppGlobals.DebugMessages.StartProcessingContentControls, controls.Count);
    foreach (StructuredDocumentTag sdt in controls.Cast<StructuredDocumentTag>())
    {
        try
        {
            _logger.LogDebug(AppGlobals.DebugMessages.ProcessingSDT, sdt.Tag);
            var correspondingChild = children.FirstOrDefault(c => c.Name == sdt.Tag);
            if (correspondingChild != null)
            {
                _logger.LogInformation(AppGlobals.DebugMessages.FoundMatchingChild, sdt.Tag, correspondingChild.Name);
                if (sdt.GetChildNodes(NodeType.Run, true).FirstOrDefault() is Run run)
                {
                    _logger.LogDebug(AppGlobals.DebugMessages.RunNodePresent);
                    run.Text = correspondingChild.Content;
                    processedContent.Add(correspondingChild.Content);
                }
                else
                {
                    _logger.LogWarning(AppGlobals.DebugMessages.NoRunNodePresent);
                    sdt.RemoveAllChildren();
                    sdt.AppendChild(new Run(doc, correspondingChild.Content));
                    processedContent.Add(correspondingChild.Content);
                }
            }
            else
            {
                _logger.LogInformation(AppGlobals.DebugMessages.NoMatchingChild, sdt.Tag);
                sdt.RemoveAllChildren();
                sdt.Remove();
            }
            CleanParagraphs(sdt.ParentNode);
        }
        catch (Exception ex)
        {
            _logger.LogError(ex, AppGlobals.ErrorMessages.ProcessSDTError, sdt.Tag);
        }
    }
}

private void CleanParagraphs(Node node)
{
    if (node is Paragraph paragraph)
    {
        bool hasNonEmptyRun = false;
        foreach (Run run in paragraph.Runs.Cast<Run>().ToList())
        {
            if (string.IsNullOrWhiteSpace(run.Text))
            {
                run.Remove();
            }
            else
            {
                hasNonEmptyRun = true;
            }
        }
        if (!hasNonEmptyRun)
        {
            paragraph.Remove();
        }
    }
}

@amattice Could you please attach the problematic document where SDT cannot be not removed entirely? We will check the issue on our side and provide you more information.

@alexey.noskov Here is the file I am inserting. Just for example some of those content controls may be removed due to no result content. It’s not specific to this document though.
dehum.docx (55.6 KB)

@amattice You should remove empty paragraphs between SDTs too. For example see the following code:

Document doc = new Document(@"C:\Temp\in.docx");

// Remove SDTs.
List<StructuredDocumentTag> sdts = doc.GetChildNodes(NodeType.StructuredDocumentTag, true).Cast<StructuredDocumentTag>().ToList();
foreach(StructuredDocumentTag tag in sdts)
{
    if (tag.Level == MarkupLevel.Block)
    {
        // Remove the next empty paragraph.
        Paragraph nextParagraph = tag.NextSibling as Paragraph;
        if (nextParagraph != null && string.IsNullOrEmpty(nextParagraph.ToString(SaveFormat.Text).Trim()))
            nextParagraph.Remove();
        // Remove the tag itself
        tag.Remove();
    }
    else if (tag.Level == MarkupLevel.Inline)
    {
        // Get parent paragraph. 
        Paragraph parentParagraph = (Paragraph)tag.GetAncestor(NodeType.Paragraph);
        // Remove tag
        tag.Remove();
        // Remove the next empty paragraph.
        Paragraph nextParagraph = parentParagraph.NextSibling as Paragraph;
        if (nextParagraph != null && string.IsNullOrEmpty(nextParagraph.ToString(SaveFormat.Text).Trim()))
            nextParagraph.Remove();
        // Remove empty parent paragraph.
        if (string.IsNullOrEmpty(parentParagraph.ToString(SaveFormat.Text).Trim()))
            parentParagraph.Remove();
    }
}

doc.Save(@"C:\Temp\out.docx");

Much thanks that works perfectly!

1 Like