Convert plain text URL to Hyperlink

Jiyeon_Shin · March 25, 2022, 9:01pm

I’m currently evaluating if “Aspose.Words” can identify all plain text URLs included in a Word doc and convert them into hyperlinks.

I think the following is a way to do that. But, I wonder if there’s a quicker/easier way to do the conversion.

static void Main(string[] args)
{
    Document doc = new Document(FOLDER_PATH + "example.docx");
    DocumentBuilder builder = new DocumentBuilder(doc);

    NodeCollection paragraphs = doc.GetChildNodes(NodeType.Paragraph, true);
    // Loop through all paragraphs
    foreach (Node paragraph in paragraphs)
    {
        // Get paragraph
        Paragraph par = (Paragraph)paragraph;

        // Get all text elements for the paragraph
        NodeCollection runs = par.GetChildNodes(NodeType.Run, true);
        for (int i = 0; i < runs.Count; i++)
        {
            Run curElement = (Run)runs[i];
            string curText = curElement.GetText();

            // If text starts with "http" but not a hyperlink, convert the text URL to a hyperlink
            if (!string.IsNullOrEmpty(curText) && curText.Contains("http") && !curText.Contains("HYPERLINK"))
            {
                Run prevElement = i > 0 ? (Run)runs[i - 1] : null;
                if (prevElement == null || !prevElement.GetText().Contains("HYPERLINK"))
                {
                    builder.MoveTo(curElement);
                    builder.Font.StyleIdentifier = StyleIdentifier.Hyperlink;
                    builder.InsertHyperlink(curText, curText, false);
                    curElement.Remove();
                }
            }
        }
    }

    doc.Save(FOLDER_PATH + "example.rtf");
}

alexey.noskov · March 26, 2022, 5:26am

@Jiyeon_Shin You can achieve this using IReplacingCallback. In the below example I have used the same technique as in Find and Highlight example in our github.

Regex urlRegex = new Regex(@"(ht|f)tp(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*(\/?)([a-zA-Z0-9\-\.\?\,\'\/\\\+&amp;%\$#_]*)?");

Document doc = new Document(@"C:\Temp\in.docx");

// Ignore fields.
FindReplaceOptions options = new FindReplaceOptions();
options.IgnoreFieldCodes = true;
options.IgnoreFields = true;
options.ReplacingCallback = new ReplaceEvaluatorInsertHyperlink();

doc.Range.Replace(urlRegex, "", options);

doc.Save(@"C:\Temp\out.docx");

private class ReplaceEvaluatorInsertHyperlink : IReplacingCallback
{
    /// <summary>
    /// This method is called by the Aspose.Words find and replace engine for each match.
    /// This method replaces the match string with a hyperlink.
    /// </summary>
    ReplaceAction IReplacingCallback.Replacing(ReplacingArgs e)
    {
        // This is a Run node that contains either the beginning or the complete match.
        Node currentNode = e.MatchNode;

        // The first (and may be the only) run can contain text before the match, 
        // in this case it is necessary to split the run.
        if (e.MatchOffset > 0)
            currentNode = SplitRun((Run)currentNode, e.MatchOffset);

        // This array is used to store all nodes of the match for further highlighting.
        List<Run> runs = new List<Run>();

        // Find all runs that contain parts of the match string.
        int remainingLength = e.Match.Value.Length;
        while (
            remainingLength > 0 &&
            currentNode != null &&
            currentNode.GetText().Length <= remainingLength)
        {
            runs.Add((Run)currentNode);
            remainingLength -= currentNode.GetText().Length;

            // Select the next Run node.
            // Have to loop because there could be other nodes such as BookmarkStart etc.
            do
            {
                currentNode = currentNode.NextSibling;
            } while (currentNode != null && currentNode.NodeType != NodeType.Run);
        }

        // Split the last run that contains the match if there is any text left.
        if (currentNode != null && remainingLength > 0)
        {
            SplitRun((Run)currentNode, remainingLength);
            runs.Add((Run)currentNode);
        }

        // Now insert a hyperlink and remove the matched runs.
        DocumentBuilder builder = new DocumentBuilder((Document)e.MatchNode.Document);
        builder.MoveTo(runs[runs.Count - 1]);
        builder.Font.StyleIdentifier = StyleIdentifier.Hyperlink;
        builder.InsertHyperlink(e.Match.Value, e.Match.Value, false);

        // Now highlight all runs in the sequence.
        foreach (Run run in runs)
            run.Remove();

        // Signal to the replace engine to do nothing because we have already done all what we wanted.
        return ReplaceAction.Skip;
    }
}

private static Run SplitRun(Run run, int position)
{
    Run afterRun = (Run)run.Clone(true);
    afterRun.Text = run.Text.Substring(position);
    run.Text = run.Text.Substring((0), (0) + (position));
    run.ParentNode.InsertAfter(afterRun, run);
    return afterRun;
}