I’m currently evaluating if “Aspose.Words” can identify all plain text URLs included in a Word doc and convert them into hyperlinks.
I think the following is a way to do that. But, I wonder if there’s a quicker/easier way to do the conversion.
static void Main(string[] args)
{
Document doc = new Document(FOLDER_PATH + "example.docx");
DocumentBuilder builder = new DocumentBuilder(doc);
NodeCollection paragraphs = doc.GetChildNodes(NodeType.Paragraph, true);
// Loop through all paragraphs
foreach (Node paragraph in paragraphs)
{
// Get paragraph
Paragraph par = (Paragraph)paragraph;
// Get all text elements for the paragraph
NodeCollection runs = par.GetChildNodes(NodeType.Run, true);
for (int i = 0; i < runs.Count; i++)
{
Run curElement = (Run)runs[i];
string curText = curElement.GetText();
// If text starts with "http" but not a hyperlink, convert the text URL to a hyperlink
if (!string.IsNullOrEmpty(curText) && curText.Contains("http") && !curText.Contains("HYPERLINK"))
{
Run prevElement = i > 0 ? (Run)runs[i - 1] : null;
if (prevElement == null || !prevElement.GetText().Contains("HYPERLINK"))
{
builder.MoveTo(curElement);
builder.Font.StyleIdentifier = StyleIdentifier.Hyperlink;
builder.InsertHyperlink(curText, curText, false);
curElement.Remove();
}
}
}
}
doc.Save(FOLDER_PATH + "example.rtf");
}
@Jiyeon_Shin You can achieve this using IReplacingCallback. In the below example I have used the same technique as in Find and Highlight example in our github.
Regex urlRegex = new Regex(@"(ht|f)tp(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*(\/?)([a-zA-Z0-9\-\.\?\,\'\/\\\+&%\$#_]*)?");
Document doc = new Document(@"C:\Temp\in.docx");
// Ignore fields.
FindReplaceOptions options = new FindReplaceOptions();
options.IgnoreFieldCodes = true;
options.IgnoreFields = true;
options.ReplacingCallback = new ReplaceEvaluatorInsertHyperlink();
doc.Range.Replace(urlRegex, "", options);
doc.Save(@"C:\Temp\out.docx");
private class ReplaceEvaluatorInsertHyperlink : IReplacingCallback
{
/// <summary>
/// This method is called by the Aspose.Words find and replace engine for each match.
/// This method replaces the match string with a hyperlink.
/// </summary>
ReplaceAction IReplacingCallback.Replacing(ReplacingArgs e)
{
// This is a Run node that contains either the beginning or the complete match.
Node currentNode = e.MatchNode;
// The first (and may be the only) run can contain text before the match,
// in this case it is necessary to split the run.
if (e.MatchOffset > 0)
currentNode = SplitRun((Run)currentNode, e.MatchOffset);
// This array is used to store all nodes of the match for further highlighting.
List<Run> runs = new List<Run>();
// Find all runs that contain parts of the match string.
int remainingLength = e.Match.Value.Length;
while (
remainingLength > 0 &&
currentNode != null &&
currentNode.GetText().Length <= remainingLength)
{
runs.Add((Run)currentNode);
remainingLength -= currentNode.GetText().Length;
// Select the next Run node.
// Have to loop because there could be other nodes such as BookmarkStart etc.
do
{
currentNode = currentNode.NextSibling;
} while (currentNode != null && currentNode.NodeType != NodeType.Run);
}
// Split the last run that contains the match if there is any text left.
if (currentNode != null && remainingLength > 0)
{
SplitRun((Run)currentNode, remainingLength);
runs.Add((Run)currentNode);
}
// Now insert a hyperlink and remove the matched runs.
DocumentBuilder builder = new DocumentBuilder((Document)e.MatchNode.Document);
builder.MoveTo(runs[runs.Count - 1]);
builder.Font.StyleIdentifier = StyleIdentifier.Hyperlink;
builder.InsertHyperlink(e.Match.Value, e.Match.Value, false);
// Now highlight all runs in the sequence.
foreach (Run run in runs)
run.Remove();
// Signal to the replace engine to do nothing because we have already done all what we wanted.
return ReplaceAction.Skip;
}
}
private static Run SplitRun(Run run, int position)
{
Run afterRun = (Run)run.Clone(true);
afterRun.Text = run.Text.Substring(position);
run.Text = run.Text.Substring((0), (0) + (position));
run.ParentNode.InsertAfter(afterRun, run);
return afterRun;
}