Replacing line break with paragraph break

Asfak · April 19, 2011, 4:03am

Hi !!

I need to change all the line break to paragraph break. I will not save the changes. I need this changes for doing some other processing.

Is there any solution in Java?

Regards.

AndreyN · April 19, 2011, 7:27am

Thanks for your request. Please try using the following code:

// Open document
Document doc = new Document("C:\\Temp\\in.docx");
doc.getRange().replace(new asposewobfuscated.rn(ControlChar.LineBreak), new LineBreakReplacer(), false);
doc.save("C:\\Temp\\out.docx");

public static class LineBreakReplacer implements IReplacingCallback
{
    public int replacing(ReplacingArgs e)
    {
        // Get MatchNode, this is Run node where matched word starts
        // The word can consist of more than one run,
        // in this case MatchNode will represent only part of the word
        Run matchRun = (Run) e.getMatchNode();
        // We will store all parts of the word in the list
        ArrayList mainTextNodes = new ArrayList();
        // Get match word
        String word = e.getMatch().getValue();
        // The run can contain whole match word or part of the word
        // When run contains only part of the word IndexOf will return negative value
        int index = matchRun.getText().lastIndexOf(word);
        int wordLength = word.length();
        while (index <0)
            index = matchRun.getText().lastIndexOf(word.substring(0, wordLength--));
        // Match Run can contain text before match word, so we should split the Run
        if (index> 0)
        {
            // Clone match run to preserve formating
            Run beforeRun = (Run) matchRun.deepClone(true);
            // Split text betwee two runs
            beforeRun.setText(matchRun.getText().substring(0, index));
            matchRun.setText(matchRun.getText().substring(index));
            matchRun.getParentNode().insertBefore(beforeRun, matchRun);
        }
        // If the word is spanned to several Run nodes, we should find all runs that represent whole word
        int currentIdx = word.length();
        while (currentIdx> 0)
        {
            if (matchRun.getText().length()> currentIdx)
            {
                break;
            }
            else
            {
                mainTextNodes.add(matchRun);
                currentIdx = currentIdx - matchRun.getText().length();
                // If there is no next nodes, we should stop while loop
                Node currentNode = matchRun.getNextSibling();
                if (currentNode == null)
                {
                    break;
                }
                else
                {
                    // The next node could be not Run node (BookmarkStart or BookmarkEnd for instance)
                    // in this case we should move to the next node
                    while (currentNode != null)
                    {
                        if (currentNode.getNodeType() == NodeType.RUN)
                        {
                            matchRun = (Run) currentNode;
                            break;
                        }
                        currentNode = currentNode.getNextSibling();
                    }
                }
            }
        }
        // Now we should split the latest run in the sequence that represents whole word,
        // if there is text after match word
        if (matchRun != null)
        {
            if (matchRun.getText().length() == currentIdx)
            {
                mainTextNodes.add(matchRun);
            }
            else
            {
                Run lastRun = (Run) matchRun.deepClone(true);
                lastRun.setText(matchRun.getText().substring(0, currentIdx));
                matchRun.setText(matchRun.getText().substring(currentIdx));
                matchRun.getParentNode().insertBefore(lastRun, matchRun);
                mainTextNodes.add(lastRun);
            }
        }
        // Move DocumentBuilder cursor to the run with line break ans insert a paragraph break.
        DocumentBuilder builder = new DocumentBuilder((Document) e.getMatchNode().getDocument());
        builder.moveTo((Node) mainTextNodes.get(0));
        builder.writeln();
        // Remove run(s) with line break
        for (int i = 0; i <mainTextNodes.size(); i++)
        {
            Run currentRun = (Run) mainTextNodes.get(i);
            currentRun.remove();
        }
        return ReplaceAction.SKIP;
    }
}

Best regards,

smartcrm · December 12, 2011, 1:00am

Do you have a c# / .net Example ?
Where does asposewobfuscated come from ?

awais.hafeez · December 12, 2011, 2:11am

Hi Boas,

Thanks for your request. I have converted the above Java code to C#:

// Open document
Document doc = new Document("C:\Temp\in.docx");
doc.Range.Replace(new Regex(ControlChar.LineBreak), new LineBreakReplacer(), false);
doc.Save("C:\Temp\out.docx");

public class LineBreakReplacer : IReplacingCallback
{
    ReplaceAction IReplacingCallback.Replacing(ReplacingArgs e)
    {
        // MatchNode, this is Run node where matched word starts
        // The word can consist of more than one run,
        // in this case MatchNode will represent only part of the word
        Run matchRun = (Run)e.MatchNode;

        // We will store all parts of the word in the list
        ArrayList mainTextNodes = new ArrayList();

        // Get match word
        String word = e.Match.Value;

        // The run can contain whole match word or part of the word
        // When run contains only part of the word IndexOf will return negative value
        int index = matchRun.Text.LastIndexOf(word);
        int wordLength = word.Length;
        while (index < 0)
            index = matchRun.Text.LastIndexOf(word.Substring(0, wordLengthû));

        // Match Run can contain text before match word, so we should split the Run
        if (index > 0)
        {
            // Clone match run to preserve formating
            Run beforeRun = (Run)matchRun.Clone(true);

            // Split text betwee two runs
            beforeRun.Text = (matchRun.Text.Substring(0, index));
            matchRun.Text = matchRun.Text.Substring(index);
            matchRun.ParentNode.InsertBefore(beforeRun, matchRun);
        }

        // If the word is spanned to several Run nodes, we should find all runs that represent whole word
        int currentIdx = word.Length;
        while (currentIdx > 0)
        {
            if (matchRun.Text.Length > currentIdx)
            {
                break;
            }
            else
            {
                mainTextNodes.Add(matchRun);
                currentIdx = currentIdx - matchRun.Text.Length;

                // If there is no next nodes, we should stop while loop
                Node currentNode = matchRun.NextSibling;
                if (currentNode == null)
                {
                    break;
                }
                else
                {
                    // The next node could be not Run node (BookmarkStart or BookmarkEnd for instance)
                    // in this case we should move to the next node
                    while (currentNode != null)
                    {
                        if (currentNode.NodeType == NodeType.Run)
                        {
                            matchRun = (Run)currentNode;
                            break;
                        }
                        currentNode = currentNode.NextSibling;
                    }
                }
            }
        }

        // Now we should split the latest run in the sequence that represents whole word,
        // if there is text after match word
        if (matchRun != null)
        {
            if (matchRun.Text.Length == currentIdx)
            {
                mainTextNodes.Add(matchRun);
            }
            else
            {
                Run lastRun = (Run)matchRun.Clone(true);
                lastRun.Text = matchRun.Text.Substring(0, currentIdx);
                matchRun.Text = matchRun.Text.Substring(currentIdx);
                matchRun.ParentNode.InsertBefore(lastRun, matchRun);
                mainTextNodes.Add(lastRun);
            }
        }
        // MoveDocumentBuilder cursor to the run with line break ans insert a paragraph break.
        DocumentBuilder builder = new DocumentBuilder((Document)e.MatchNode.Document);
        builder.MoveTo((Node)mainTextNodes[0]);
        builder.Writeln();

        // Remove run(s) with line break
        for (int i = 0; i < mainTextNodes.Count; i++)
        {
            RuncurrentRun = (Run)mainTextNodes[i];
            currentRun.Remove();
        }
        return ReplaceAction.Skip;
    }
}

I hope, this will help.

Best Regards,