正規表現を用いて、Word文書を分割するスクリプトを書いてみたのですが、
下記のエラーが発生します。
System.ArgumentException: ‘Start node and end node must be a child or descendant of a body’
デバッグの方針をご教示いただけたら幸いです。
書いたスクリプトは下記の通りです。
また、Commonに属するメソッドは下記のリンクのファイルから呼び出しております。
using System;
using System.Collections;
using System.IO;
using System.Text.RegularExpressions;
using Aspose.Words;
using Aspose.Words.Examples.CSharp.Programming_Documents.Working_With_Document;
using Aspose.Words.Replacing;
namespace Split_document02
{
class Program
{
static void Main(string[] args)
{
var fileInfo = new FileInfo(@"C:\Users\user\...\sample2.docx");
//Load in the document
Document doc = new Document(fileInfo.FullName);
//insert bookmark at the start of document.
DocumentBuilder builder = new DocumentBuilder(doc);
builder.MoveToDocumentStart();
builder.StartBookmark("BM_0");
builder.EndBookmark("BM_0");
//Find matches using regular expression and insert bookmark
//This time tried to use \d as deviding regular expression
Regex rx = new Regex(@"\d", RegexOptions.Compiled);
FindReplaceOptions options = new FindReplaceOptions();
options.ReplacingCallback = new FindAndInsertBookmark();
doc.Range.Replace(rx, "", options);
ArrayList bookmarks = new ArrayList();
for (int i = 0; i < doc.Range.Bookmarks.Count; i++)
{
if (doc.Range.Bookmarks[i].Name.StartsWith("BM_"))
bookmarks.Add(doc.Range.Bookmarks[i]);
}
builder.MoveToDocumentEnd();
builder.StartBookmark("BM_" + bookmarks.Count);
builder.EndBookmark("BM_" + bookmarks.Count);
for (int i = 0; i < bookmarks.Count - 1; i++)
{
BookmarkStart bStart = ((Bookmark)bookmarks[i]).BookmarkStart;
BookmarkEnd bEnd = ((Bookmark)bookmarks[i + 1]).BookmarkEnd;
ArrayList nodes = Common.ExtractContent(bStart, bEnd, true);
Document newdoc = Common.GenerateDocument(doc, nodes);
newdoc.Save("Out_" + i + ".docx");
}
Console.WriteLine("Process Finished!");
}
public class FindAndInsertBookmark : IReplacingCallback
{
int i = 1;
ReplaceAction IReplacingCallback.Replacing(ReplacingArgs e)
{
// This is a Run node that contains either the beginning or the complete match.
Node currentNode = e.MatchNode;
// The first (and may be the only) run can contain text before the match,
// in this case it is necessary to split the run.
if (e.MatchOffset > 0)
currentNode = SplitRun((Run)currentNode, e.MatchOffset);
// This array is used to store all nodes of the match for further highlighting.
ArrayList runs = new ArrayList();
// Find all runs that contain parts of the match string.
int remainingLength = e.Match.Value.Length;
while (
(remainingLength > 0) &&
(currentNode != null) &&
(currentNode.GetText().Length <= remainingLength))
{
runs.Add(currentNode);
remainingLength = remainingLength - currentNode.GetText().Length;
// Select the next Run node.
// Have to loop because there could be other nodes such as BookmarkStart etc.
do
{
currentNode = currentNode.NextSibling;
}
while ((currentNode != null) && (currentNode.NodeType != NodeType.Run));
}
// Split the last run that contains the match if there is any text left.
if ((currentNode != null) && (remainingLength > 0))
{
SplitRun((Run)currentNode, remainingLength);
runs.Add(currentNode);
}
DocumentBuilder builder = new DocumentBuilder((Document)currentNode.Document);
builder.MoveTo((Run)runs[0]);
builder.InsertParagraph();
builder.StartBookmark("BM_" + i);
builder.EndBookmark("BM_" + i);
builder.InsertParagraph();
i++;
// Signal to the replace engine to do nothing because we have already done all what we wanted.
return ReplaceAction.Skip;
}
}
/// <summary>
/// Splits text of the specified run into two runs.
/// Inserts the new run just after the specified run.
/// </summary>
private static Run SplitRun(Run run, int position)
{
Run afterRun = (Run)run.Clone(true);
afterRun.Text = run.Text.Substring(position);
run.Text = run.Text.Substring(0, position);
run.ParentNode.InsertAfter(afterRun, run);
return afterRun;
}
}
}
どうぞよろしくお願い申し上げます。