Find and Replace Highlighted Text or Numbers in Word Document & Insert Footnotes (C# .NET)

Hello All,

I am using Aspose word .net and I would like to achieve the following:
I have a word file with a text (footnotes) at the end “always after an end line” with numbers at the beginning of each paragraph which resemble a footnote number what I want to do is searching the text before this line to find the highlighted text in yellow (which is a number) and add the corresponding paragraph from the end of the file as a footnote in the same place.

Please check both source and destination to see the final results it would be more clearer than my explanation :stuck_out_tongue:

Thank you in advanced Destination.docx (17.8 KB)
source.docx (14.8 KB)

@lssupport,

You can build logic on the following C# code of Aspose.Words for .NET API to get the desired results:

Document doc = new Document("C:\\Temp\\source.docx");

Dictionary<string, string> keyValuePairs = new Dictionary<string, string>();
foreach (Section section in doc.Sections)
{
    foreach (Paragraph para in section.Body.GetChildNodes(NodeType.Paragraph, true))
    {
        string paraText = para.ToString(SaveFormat.Text).Trim();
        string strUntilFirstLetter = new string(paraText.TakeWhile(c => !Char.IsLetter(c)).ToArray());

        if (!string.IsNullOrEmpty(strUntilFirstLetter))
        {
            StringBuilder sb = new StringBuilder();
            foreach (char character in strUntilFirstLetter)
                if (char.IsDigit(character))
                    sb.Append(character);

            string number = sb.ToString();
            string remainingText = paraText.Substring(strUntilFirstLetter.Length);

            keyValuePairs.Add(number, remainingText);
        }
    }
}

FindReplaceOptions findReplaceOptions = new FindReplaceOptions();
findReplaceOptions.ReplacingCallback = new MyReplaceEvaluator();

// hack to skip processing 1500s & 1960s but you can use appropriate Regex
doc.Range.Replace("1500s", "");
doc.Range.Replace("1960s", "");

foreach (KeyValuePair<string, string> kvp in keyValuePairs)
    doc.Range.Replace(kvp.Key, kvp.Value.Trim(), findReplaceOptions);

doc.Save("C:\\temp\\21.7.docx");

private class MyReplaceEvaluator : IReplacingCallback
{
    ReplaceAction IReplacingCallback.Replacing(ReplacingArgs e)
    {
        // This is a Run node that contains either the beginning or the complete match.
        Node currentNode = e.MatchNode;

        // The first (and may be the only) run can contain text before the match,
        // in this case it is necessary to split the run.
        if (e.MatchOffset > 0)
            currentNode = SplitRun((Run)currentNode, e.MatchOffset);

        // This array is used to store all nodes of the match for further processing.
        ArrayList runs = new ArrayList();

        // Find all runs that contain parts of the match string.
        int remainingLength = e.Match.Value.Length;
        while (
            (remainingLength > 0) &&
            (currentNode != null) &&
            (currentNode.GetText().Length <= remainingLength))
        {
            runs.Add(currentNode);
            remainingLength = remainingLength - currentNode.GetText().Length;

            // Select the next Run node.
            // Have to loop because there could be other nodes such as BookmarkStart etc.
            do
            {
                currentNode = currentNode.NextSibling;
            }
            while ((currentNode != null) && (currentNode.NodeType != NodeType.Run));
        }

        // Split the last run that contains the match if there is any text left.
        if ((currentNode != null) && (remainingLength > 0))
        {
            SplitRun((Run)currentNode, remainingLength);
            runs.Add(currentNode);
        }

        DocumentBuilder builder = new DocumentBuilder((Document)e.MatchNode.Document);
        builder.MoveTo((Run)runs[runs.Count - 1]);

        builder.Font.Color = Color.Red;
        builder.InsertFootnote(FootnoteType.Footnote, e.Replacement, e.Match.Value);

        foreach (Run run in runs)
            run.Remove();

        return ReplaceAction.Stop;
    }

    ///
    /// Splits text of the specified run into two runs.
    /// Inserts the new run just after the specified run.
    ///
    private static Run SplitRun(Run run, int position)
    {
        Run afterRun = (Run)run.Clone(true);
        afterRun.Text = run.Text.Substring(position);
        run.Text = run.Text.Substring(0, position);
        run.ParentNode.InsertAfter(afterRun, run);
        return afterRun;
    }
}

@awais.hafeez Thank you for your replay I tested the code it works great but it is not exactly what I need.
I am looking for a way to just add to the KeyValuePair Dictionary just the numbers that is at the end of the document(If you check the source file you will see that at the end of the document there is a 3underlined tabs and after it you will find the text of the footnote that I want to add) so I don’t want to search the full text I want the search to start after this 3 underlined tabs always and adding the foot note should be in the text before the 3 underlined tabs.
it would be great if we can remove the text after the underlined tabs before saving the documents.

Thanks again and I would appreciate any help in this matter.

@lssupport,

Please check if the following code produces desired output on your end?

Document doc = new Document("C:\\Temp\\source.docx");

bool flag = false;
Dictionary<string, string> keyValuePairs = new Dictionary<string, string>();
foreach (Section section in doc.Sections)
{
    foreach (Paragraph para in section.Body.GetChildNodes(NodeType.Paragraph, true))
    {
        string paraText = para.ToString(SaveFormat.Text);
        if (paraText.Contains(ControlChar.Tab + ControlChar.Tab + ControlChar.Tab))
        {
            // Remove this Paragraph
            para.Remove();
            flag = true;
        }

        if (flag)
        {
            // Collect footnotes data only after the Paragraph containing three Tabs
            paraText = paraText.Trim();
            string strUntilFirstLetter = new string(paraText.TakeWhile(c => !Char.IsLetter(c)).ToArray());

            if (!string.IsNullOrEmpty(strUntilFirstLetter))
            {
                StringBuilder sb = new StringBuilder();
                foreach (char character in strUntilFirstLetter)
                    if (char.IsDigit(character))
                        sb.Append(character);

                string number = sb.ToString();
                string remainingText = paraText.Substring(strUntilFirstLetter.Length);

                keyValuePairs.Add(number, remainingText);

                // remove this Paragraph as it is no longer needed
                para.Remove();
            }
        }
    }
}

FindReplaceOptions findReplaceOptions = new FindReplaceOptions();
findReplaceOptions.ReplacingCallback = new MyReplaceEvaluator();

// hack to skip processing 1500s & 1960s but you can use appropriate Regex
doc.Range.Replace("1500s", "");
doc.Range.Replace("1960s", "");

foreach (KeyValuePair<string, string> kvp in keyValuePairs)
    doc.Range.Replace(kvp.Key, kvp.Value.Trim(), findReplaceOptions);

doc.Save("C:\\temp\\21.7.docx");

@awais.hafeez thanks a million the code works perfectly
but now I am struggling with another issue which is when we add the footnote to the corresponding number in the text the code will add it to the first occurrence of the specified number in the text (kvp.key) but what I am aiming to do is just to replace with the match that is already highlighted in yellow (font.highlight.color) and keep the other matches as it is without doing anything.

Thank you again

Hello Again @awais.hafeez

I tried the following logic in MyReplaceEvaluator

private class MyReplaceEvaluator : IReplacingCallback
        {
            ReplaceAction IReplacingCallback.Replacing(ReplacingArgs e)
            {
                // This is a Run node that contains either the beginning or the complete match.
                Node currentNode = e.MatchNode;

                // The first (and may be the only) run can contain text before the match,
                // in this case it is necessary to split the run.
                if (e.MatchOffset > 0)
                    currentNode = SplitRun((Run)currentNode, e.MatchOffset);

                // This array is used to store all nodes of the match for further processing.
                ArrayList runs = new ArrayList();

                // Find all runs that contain parts of the match string.
                int remainingLength = e.Match.Value.Length;
                while (
                    (remainingLength > 0) &&
                    (currentNode != null) &&
                    (currentNode.GetText().Length <= remainingLength))
                {
                    runs.Add(currentNode);
                    remainingLength = remainingLength - currentNode.GetText().Length;

                    // Select the next Run node.
                    // Have to loop because there could be other nodes such as BookmarkStart etc.
                    do
                    {
                        currentNode = currentNode.NextSibling;
                    }
                    while ((currentNode != null) && (currentNode.NodeType != NodeType.Run));
                }

                // Split the last run that contains the match if there is any text left.
                if ((currentNode != null) && (remainingLength > 0))
                {
                    SplitRun((Run)currentNode, remainingLength);
                    runs.Add(currentNode);
                }

                DocumentBuilder builder = new DocumentBuilder((Document)e.MatchNode.Document);
                builder.MoveTo((Run)runs[runs.Count - 1]);
                    foreach (Run run in runs)
                    {
                        if (run.Font.HighlightColor.Name == "ffffff00")
                        {
                            builder.InsertFootnote(FootnoteType.Footnote, e.Replacement, e.Match.Value);
                            run.Remove();
                            return ReplaceAction.Stop;
                        }
                    }
                return ReplaceAction.Skip;
            }

but I am getting the error startIndex cannot be larger than length of string.

I would appreciate anyhelp

@lssupport,

Alternatively, you can skip the code of MyReplaceEvaluator class and try running the following code:

Document doc = new Document("C:\\Temp\\source.docx");

bool flag = false;
Dictionary<string, string> keyValuePairs = new Dictionary<string, string>();
foreach (Section section in doc.Sections)
{
    foreach (Paragraph para in section.Body.GetChildNodes(NodeType.Paragraph, true))
    {
        string paraText = para.ToString(SaveFormat.Text);
        if (paraText.Contains(ControlChar.Tab + ControlChar.Tab + ControlChar.Tab))
        {
            // Remove this Paragraph
            para.Remove();
            flag = true;
        }

        if (flag)
        {
            // Collect footnotes data only after the Paragraph containing three Tabs
            paraText = paraText.Trim();
            string strUntilFirstLetter = new string(paraText.TakeWhile(c => !Char.IsLetter(c)).ToArray());

            if (!string.IsNullOrEmpty(strUntilFirstLetter))
            {
                StringBuilder sb = new StringBuilder();
                foreach (char character in strUntilFirstLetter)
                    if (char.IsDigit(character))
                        sb.Append(character);

                string number = sb.ToString();
                string remainingText = paraText.Substring(strUntilFirstLetter.Length);

                keyValuePairs.Add(number, remainingText);

                // remove this Paragraph as it is no longer needed
                para.Remove();
            }
        }
    }
}

doc.JoinRunsWithSameFormatting();

DocumentBuilder builder = new DocumentBuilder(doc);
foreach (Run run in doc.GetChildNodes(NodeType.Run, true))
{
    if (run.Font.HighlightColor != Color.Empty)
    {
        foreach (KeyValuePair<string, string> kvp in keyValuePairs)
        {
            if (run.Text.Equals(kvp.Key))
            {
                builder.MoveTo(run);

                builder.Font.Color = Color.Red;
                builder.InsertFootnote(FootnoteType.Footnote, kvp.Value.Trim(), kvp.Key);
                run.Text = "";
                break;
            }
        }
    }
}

doc.Save("C:\\temp\\21.7.docx");