Find Tags in Word Document & Return ArrayList of Paragraphs Containing those Keywords Text (VB.NET)

I have a document that has text tags (eg #Start#).

I need to get either;

  • an ArrayList of only those Paragraphs that contain the tag, OR
  • an object containing only those Nodes that contain the tag

We are using Aspose.Words 21.4

@caseflow,

You can meet this requirement by using the Find and Replace functionality of Aspose.Words for .NET API. Also, please try the following C# Code:

Document doc = new Document("C:\\Temp\\Word.docx");

FindReplaceOptions findReplaceOptions = new FindReplaceOptions();
MyReplaceEvaluator evaluator = new MyReplaceEvaluator();
findReplaceOptions.ReplacingCallback = evaluator;

doc.Range.Replace("#Start#", "", findReplaceOptions); // You can also use Regex

ArrayList list = evaluator.list; // contains the Paragraphs

private class MyReplaceEvaluator : IReplacingCallback
{
    public ArrayList list = new ArrayList();
    ReplaceAction IReplacingCallback.Replacing(ReplacingArgs e)
    {
        // This is a Run node that contains either the beginning or the complete match.
        Node currentNode = e.MatchNode;

        // The first (and may be the only) run can contain text before the match,
        // in this case it is necessary to split the run.
        if (e.MatchOffset > 0)
            currentNode = SplitRun((Run)currentNode, e.MatchOffset);

        // This array is used to store all nodes of the match for further processing.
        ArrayList runs = new ArrayList();

        // Find all runs that contain parts of the match string.
        int remainingLength = e.Match.Value.Length;
        while (
            (remainingLength > 0) &&
            (currentNode != null) &&
            (currentNode.GetText().Length <= remainingLength))
        {
            runs.Add(currentNode);
            remainingLength = remainingLength - currentNode.GetText().Length;

            // Select the next Run node.
            // Have to loop because there could be other nodes such as BookmarkStart etc.
            do
            {
                currentNode = currentNode.NextSibling;
            }
            while ((currentNode != null) && (currentNode.NodeType != NodeType.Run));
        }

        // Split the last run that contains the match if there is any text left.
        if ((currentNode != null) && (remainingLength > 0))
        {
            SplitRun((Run)currentNode, remainingLength);
            runs.Add(currentNode);
        }


        Run targetRun = (Run)runs[runs.Count - 1];
        Paragraph parentParagraph = targetRun.ParentParagraph;
        if (!list.Contains(parentParagraph))
            list.Add(parentParagraph);

        return ReplaceAction.Skip;
    }

    ///
    /// Splits text of the specified run into two runs.
    /// Inserts the new run just after the specified run.
    ///
    private static Run SplitRun(Run run, int position)
    {
        Run afterRun = (Run)run.Clone(true);
        afterRun.Text = run.Text.Substring(position);
        run.Text = run.Text.Substring(0, position);
        run.ParentNode.InsertAfter(afterRun, run);
        return afterRun;
    }
}

Thanks for the prompt response.

It worked

@caseflow,

It is great that you were able to resolve this issue on your end. In case you have further inquiries or may need any help in future, then please let us know by posting a new thread in Aspose.Words’ forum.