Highlight and add comment in hyperlink making it two times

Jaibir · February 7, 2024, 7:08am

Hi Team,
While using Aspose.Word to highlight and add comment feature. It is adding comments two times when finding is hyperlink.
Test-Document-With-Link.docx (14.3 KB)

I’m using following line of code.

ReplaceAction IReplacingCallback.Replacing(ReplacingArgs e)
{
    try
    {
        // This is a Run node that contains either the beginning or the complete match.
        AsposeWord.Node currentNode = e.MatchNode;

        if (e.MatchNode.ParentNode != null && e.MatchNode.ParentNode.ParentNode != null && AsposeWord.NodeType.Comment == e.MatchNode.ParentNode.ParentNode.NodeType)
        {
            AsposeWord.Comment comment = (AsposeWord.Comment)e.MatchNode.ParentNode.ParentNode;
            if (comment.Author == "Aspose Tool" && comment.Initial == "Aspose Tool")
            {
                return ReplaceAction.Skip;
            }
        }

        string keyword = e != null && e.Match != null ? e.Match.Value : "";
        string pageNo = AsposeDocHighlightHelper.Layout.GetStartPageIndex(e.MatchNode).ToString();
        if (pageNo == "0" && e.MatchNode.ParentNode != null)
            pageNo = AsposeDocHighlightHelper.Layout.GetStartPageIndex(e.MatchNode.ParentNode).ToString();
        if (pageNo == "0")
        {
            if (e.MatchNode.GetAncestor(AsposeWord.NodeType.HeaderFooter) != null)
            {
                AsposeWord.HeaderFooter hf = (AsposeWord.HeaderFooter)e.MatchNode.GetAncestor(AsposeWord.NodeType.HeaderFooter);
                pageNo = hf.HeaderFooterType == AsposeWord.HeaderFooterType.FooterEven || hf.HeaderFooterType == AsposeWord.HeaderFooterType.FooterFirst || hf.HeaderFooterType == AsposeWord.HeaderFooterType.FooterPrimary
                    ? "Footer" : "Header";
            }
        }
        string comments = "This is comment added";

        // The first (and may be the only) run can contain text before the match, 
        // In this case it is necessary to split the run.
        if (e.MatchOffset > 0)
            currentNode = SplitRun((AsposeWord.Run)currentNode, e.Match, e.MatchOffset);//, e.MatchOffset

        // This array is used to store all nodes of the match for further highlighting.
        ArrayList runs = new ArrayList();

        // Find all runs that contain parts of the match string.
        int remainingLength = e.Match.Value.Length;
        while (
            (remainingLength > 0) &&
            (currentNode != null) &&
            (currentNode.GetText().Length <= remainingLength) &&
            (currentNode.GetText() != e.Match.Value))
        {
            runs.Add(currentNode);
            remainingLength = remainingLength - currentNode.GetText().Length;

            // Select the next Run node. 
            // Have to loop because there could be other nodes such as BookmarkStart etc.
            do
            {
                currentNode = currentNode.NextSibling;
            }
            while ((currentNode != null) && (currentNode.NodeType != AsposeWord.NodeType.Run));
        }

        // Split the last run that contains the match if there is any text left.
        if ((currentNode != null) && (remainingLength > 0))
        {
            SplitNotMatchRun((AsposeWord.Run)currentNode, e.Match, e.MatchOffset);
            runs.Add(currentNode);
        }

        //// Now highlight all runs in the sequence.
        foreach (AsposeWord.Run run in runs)
            run.Font.HighlightColor = System.Drawing.Color.Yellow;

        if (!string.IsNullOrWhiteSpace(comments) && pageNo != "Header" && pageNo != "Footer")
        {

            AsposeWord.Run commentRun = (AsposeWord.Run)e.MatchNode.Clone(true);
            commentRun.Text = "**";// "doc-review";
            commentRun.Font.Superscript = true;
            if (currentNode != null)
                e.MatchNode.ParentNode.InsertAfter(commentRun, currentNode);
            else
                e.MatchNode.ParentNode.InsertAfter(commentRun, e.MatchNode);

            AsposeWord.Comment comment = new AsposeWord.Comment(e.MatchNode.Document, "Document Review Tool", "Document Review Tool", DateTime.Today);
            comment.Paragraphs.Add(new AsposeWord.Paragraph(e.MatchNode.Document));
            comment.FirstParagraph.Runs.Add(new AsposeWord.Run(e.MatchNode.Document, comments));
            AsposeWord.CommentRangeStart commentRangeStart = new AsposeWord.CommentRangeStart(e.MatchNode.Document, comment.Id);
            AsposeWord.CommentRangeEnd commentRangeEnd = new AsposeWord.CommentRangeEnd(e.MatchNode.Document, comment.Id);
            AsposeWord.Run runStart = commentRun;
            AsposeWord.Run runEnd = commentRun;
            runStart.ParentNode.InsertBefore(commentRangeStart, runStart);
            runEnd.ParentNode.InsertAfter(commentRangeEnd, runEnd);
            commentRangeEnd.ParentNode.InsertAfter(comment, commentRangeEnd);

        }
    }
    catch (Exception ex)
    {
        throw ex;
    }
    // Signal to the replace engine to do nothing because we have already done all what we wanted.
    return ReplaceAction.Skip;
}

/// <summary>
/// Splits text of the specified run into two runs.
/// Inserts the new run just after the specified run.
/// </summary>
private static AsposeWord.Run SplitRun(AsposeWord.Run run, Match match, int position)
{
    AsposeWord.Run afterRun = (AsposeWord.Run)run.Clone(true);
    string matchText = Regex.Escape(match.Value);
    Match match2 = Regex.Match(run.Text, matchText);
    if (match2.Success)
    {
        position = match2.Index;
        afterRun.Text = run.Text.Substring(position);
        run.Text = run.Text.Substring(0, position);
        run.ParentNode.InsertAfter(afterRun, run);
    }
    else
    {
        string parentText = run.ParentParagraph.GetText();
        string text1;
        AsposeWord.Run nextSibling = (AsposeWord.Run)run.ParentParagraph.FirstOrDefault(j => Regex.Match(j.GetText(), matchText).Success);
        if (nextSibling != null)
        {
            afterRun = (AsposeWord.Run)nextSibling.Clone(true);
            text1 = nextSibling.GetText();
            match2 = Regex.Match(text1, matchText);
            if (match2.Success)
            {
                position = match2.Index;
                afterRun.Text = text1.Substring(position);
                nextSibling.Text = text1.Substring(0, position);
                nextSibling.ParentNode.InsertAfter(afterRun, nextSibling);
            }
        }
        else
        {//"nextSibling not found";
            afterRun.Text = run.Text.Substring(position);
            run.Text = run.Text.Substring(0, position);
            run.ParentNode.InsertAfter(afterRun, run);
        }
    }
    return afterRun;
}

//setting up values in run as per match and elemenating not to highlight characters
private static void SplitNotMatchRun(AsposeWord.Run run, Match match, int position)
{
    AsposeWord.Run afterRun = (AsposeWord.Run)run.Clone(true);
    string matchText = Regex.Escape(match.Value);
    Match match2 = Regex.Match(run.Text, matchText);
    if (match2.Success)
    {
        afterRun.Text = run.Text.Substring(match2.Length);
        run.Text = run.Text.Substring(0, match2.Length);
        run.ParentNode.InsertAfter(afterRun, run);
    }
    else
    {
        afterRun.Text = run.Text.Substring(position);
        run.Text = run.Text.Substring(0, position);
        run.ParentNode.InsertAfter(afterRun, run);
    }
}

alexey.noskov · February 7, 2024, 7:47am

@Jaibir Hyperlink in your document is a field, so the hyperlink text appears twice in the document mode - in the field code and field value. You can specify FindReplaceOptions.IgnoreFieldCodes property to ignore field codes upon performing find replace operation.

Jaibir · February 7, 2024, 12:52pm

Hi Alex,
Can you please explain the purpose of FindReplaceOptions.Direction = FindReplaceDirection.Forward or FindReplaceDirection.Backward ?

Thanks!

alexey.noskov · February 7, 2024, 2:28pm

@Jaibir When FindReplaceDirection.Forward is used, Aspose.Words searches for occurrences from the beginning to the end of the document. If FindReplaceDirection.Backward is used, it searches from the end to the beginning.