How to create a content control(SDT) when you have search text across two bullets

Hi Team,

Let’s take an example such that a documents consists of below text

  1. Hello
  2. World

How to write a regex pattern to match and create a content control? I need to match both Hello and World and it should create a content control

string pattern = @"1\s*Hello\s*2\s*World";

var options = new FindReplaceOptions {
    ReplacingCallback = new ReplaceWithContentControlHandler(doc, "MyContentControl"),
    MatchCase = false,
    FindWholeWordsOnly = false,
    SmartParagraphBreakReplacement = true
};
var patern = new Regex(pattern);

doc.Range.Replace(new Regex(pattern, RegexOptions.IgnoreCase), "", options);

class ReplaceWithContentControlHandler : IReplacingCallback {
    private readonly Document _doc;
    private readonly string _title;
    private readonly string _tag;

    public ReplaceWithContentControlHandler(Document doc, string title) {
        _doc = doc;
        _title = title;
    }

    public ReplaceAction Replacing(ReplacingArgs e) {
        // Create a new RichText content control
        StructuredDocumentTag sdt = new StructuredDocumentTag(_doc, SdtType.RichText, MarkupLevel.Inline) {
            LockContentControl = false,
            LockContents = false,
            Title = "Clause",
            IsShowingPlaceholderText = false
        };
        sdt.RemoveAllChildren();

        // Create a new Run node and add it to the content control
        Run run1 = new Run(_doc, e.Match.Value);
        sdt.AppendChild(run1);

        // Insert the content control into the document
        Node currentNode = e.MatchNode;
        // Check if the current node is a paragraph
        if (currentNode.NodeType == NodeType.Paragraph) {
            Paragraph currentParagraph = (Paragraph)currentNode;

            // If the match is at the beginning of the paragraph
            if (e.MatchOffset == 0) {
                // Create a new Run node and add it to the content control
                Run run = new Run(_doc, e.Match.Value);
                sdt.AppendChild(run);

                // Insert the content control at the beginning of the paragraph
                currentParagraph.InsertBefore(sdt, currentParagraph.FirstChild);
            }
            else {
                // If the match is not at the beginning, you need to split the paragraph text
                // Create two runs: one before the match and one for the match
                string textBeforeMatch = currentParagraph.GetText().Substring(0, e.MatchOffset);
                string matchText = e.Match.Value;

                // Clear the paragraph
                currentParagraph.RemoveAllChildren();

                // Add the text before the match as a new run
                if (!string.IsNullOrEmpty(textBeforeMatch)) {
                    Run runBefore = new Run(_doc, textBeforeMatch);
                    currentParagraph.AppendChild(runBefore);
                }

                // Create a run for the match text and add it to the content control
                Run runMatch = new Run(_doc, matchText);
                sdt.AppendChild(runMatch);

                // Insert the content control into the paragraph
                currentParagraph.AppendChild(sdt);
            }
        }
        else if (currentNode.NodeType == NodeType.Run) {
            Run currentRun = (Run)currentNode;
            CompositeNode parent = currentRun.ParentNode;

            // Split the run and insert the content control at the split point
            if (e.MatchOffset > 0) {
                var totalLength = currentRun.Text.Length;
                Run beforeRun = (Run)currentRun.Clone(true);
                beforeRun.Text = currentRun.Text.Substring(0, e.MatchOffset);
                parent.InsertBefore(beforeRun, currentRun);

                Run afterRun = (Run)currentRun.Clone(true);
                var sdtTextLength = sdt.GetText().Length;
                var replaceLength = e.MatchOffset + sdtTextLength;
                afterRun.Text = currentRun.Text.Substring(replaceLength);
                parent.InsertAfter(afterRun, currentRun);

                parent.InsertAfter(sdt, beforeRun);
                parent.RemoveChild(currentRun);
            }
            else {
                parent.RemoveAllChildren();
                parent.AppendChild(sdt);
                //parent.InsertBefore(sdt, currentRun);
                
                //parent.RemoveChild(currentRun);
            }
        }

        return ReplaceAction.Skip;
    }
}

Is it possible to create it? I have the above code but it’s unable to match text between two bullets

@imsuhas Could you please attach your input and expected output documents here for testing? We will check your document and provide you more information.

@alexey.noskov
Input_Doc.docx (15.0 KB)

Output_Doc.docx (21.8 KB)

Please find the Input_Doc and Ouput_Doc attached

@imsuhas You can use the following approach to achieve this:

// This is text we would like to wrap into SDT.
string content = "Hello\r\nWorld";

Document doc = new Document(@"C:\Temp\in.docx");
DocumentBuilder builder = new DocumentBuilder(doc);

// Paragraph break in the document can be matched with `&p` meta-character.
// The following string will be used as a marker.
string placeholder = "TMP_PLACEHOLDER";
doc.Range.Replace(content.Replace("\r\n", "&p"), placeholder);

// Find placeholder, insert SDT and insert the content into SDT.
foreach (Run r in doc.GetChildNodes(NodeType.Run, true))
{
    if (r.Text == placeholder)
    {
        Paragraph para = r.ParentParagraph;

        // Wrap paragraph into SDT.
        StructuredDocumentTag sdt = new StructuredDocumentTag(doc, SdtType.RichText, MarkupLevel.Block);
        sdt.RemoveAllChildren();
        para.ParentNode.InsertBefore(sdt, para);
        sdt.AppendChild(para);
        r.Text = "";

        builder.MoveTo(sdt);
        builder.Write(content);
    }
}

doc.Save(@"C:\Temp\out.docx");

@alexey.noskov It doesn’t work. The content control is being marked only forr Hello

@imsuhas Here is output produced on my side: out.docx (16.1 KB)

@alexey.noskov I use Aspose.Words(24.10.0) and this is the ouput I’m getting
output2.docx (16.1 KB)

@imsuhas Please try using the latest 25.4 version of Aspose.Words.

@alexey.noskov Now it’s working. But I need to match using Regex Pattern. Why it’s not working with regex?

@imsuhas The same approach will work with regex. But you should note that list labels are not part of content and paragraph break should be matched with &p meta character as shown in the example above.

1 Like