I have to find all matches of a specific word within a document and add hyperlinks to those matched words. It is crucial that the matches are based solely on visible text, excluding any URLs associated with existing hyperlinks
@naveenVdas You can use IReplacingCallback to achieve this. You can ignore existing hyperlinks in the document by specifying FindReplaceOptions.IgnoreFields property. Please see the following code:
Document doc = new Document(@"C:\Temp\in.docx");
FindReplaceOptions opt = new FindReplaceOptions(FindReplaceDirection.Backward);
opt.IgnoreFields = true;
opt.ReplacingCallback = new ReplacingCallbackWrapWithHyperlink();
doc.Range.Replace("aspose", "https://www.aspose.com", opt);
doc.Save(@"C:\Temp\out.docx");
public class ReplacingCallbackWrapWithHyperlink : IReplacingCallback
{
public ReplaceAction Replacing(ReplacingArgs args)
{
Document doc = (Document)args.MatchNode.Document;
List<Run> matchedRuns = GetMatchedRuns(args);
DocumentBuilder builder = new DocumentBuilder(doc);
// Move DocumentBuilder to the matched text and insert hyperlink
builder.MoveTo(matchedRuns[0]);
builder.Font.StyleIdentifier = StyleIdentifier.Hyperlink;
builder.InsertHyperlink(args.Match.Value, args.Replacement, false);
// Delete matched runs
foreach (Run run in matchedRuns)
run.Remove();
// Signal to the replace engine to do nothing because we have already done all what we wanted.
return ReplaceAction.Skip;
}
private static List<Run> GetMatchedRuns(ReplacingArgs args)
{
// This is a Run node that contains either the beginning or the complete match.
Node currentNode = args.MatchNode;
// The first (and may be the only) run can contain text before the match,
// in this case it is necessary to split the run.
if (args.MatchOffset > 0)
currentNode = SplitRun((Run)currentNode, args.MatchOffset);
// This array is used to store all nodes of the match for further deleting.
List<Run> runs = new List<Run>();
// Find all runs that contain parts of the match string.
int remainingLength = args.Match.Value.Length;
while (
remainingLength > 0 &&
currentNode != null &&
currentNode.GetText().Length <= remainingLength)
{
runs.Add((Run)currentNode);
remainingLength -= currentNode.GetText().Length;
// Select the next Run node.
// Have to loop because there could be other nodes such as BookmarkStart etc.
do
{
currentNode = currentNode.NextSibling;
} while (currentNode != null && currentNode.NodeType != NodeType.Run);
}
// Split the last run that contains the match if there is any text left.
if (currentNode != null && remainingLength > 0)
{
SplitRun((Run)currentNode, remainingLength);
runs.Add((Run)currentNode);
}
return runs;
}
private static Run SplitRun(Run run, int position)
{
Run afterRun = (Run)run.Clone(true);
run.ParentNode.InsertAfter(afterRun, run);
afterRun.Text = run.Text.Substring(position);
run.Text = run.Text.Substring((0), (0) + (position));
return afterRun;
}
}
Hi @alexey.noskov,
Thank you for your prompt response.
I have one additional scenario to address. When searching for specific text within the document, if there is a visible hyperlink such as “https://www.aspose.words.com”, it should be considered a match and replaced with the desired hyperlink.
However, if the hyperlink is formatted as “Aspose”, it should not be considered a match, as the URL “https://www.aspose.words.com” is not visible to the user
@naveenVdas Instead of simple text you can use regular expression to more precisely configure what text should be matched. Also, you can use FindReplaceOptions
to configure find/replace process.