C#.net Few word bookmarks are not importing correctly

Hi,
Few bookmarks data are not importing properly. I am attaching word file to replicate issue
import.docx (93.5 KB)

My C# Code:

Document doc = new Document(fname);
DocumentBuilder builder = new DocumentBuilder(doc);
builder.MoveToDocumentEnd();
builder.StartBookmark("DocumentEnd");
builder.EndBookmark("DocumentEnd");
BookmarkCollection bmCollection = doc.Range.Bookmarks;
string bookmarkName = string.Empty;
string bookmarkHtml = string.Empty;
for (int i = 0; i < bmCollection.Count - 1; i++)
{
    BookmarkEnd start = doc.Range.Bookmarks[i].BookmarkEnd;
    BookmarkStart end = doc.Range.Bookmarks[i + 1].BookmarkStart;

    ArrayList nodes = ExtractContent(start, end, true);
    Document htmlDoc = GenerateDocument(doc, nodes);
    //htmlDoc.FirstSection.Body.FirstParagraph.Remove();
    String sb = htmlDoc.ToString(SaveFormat.Html);
    bookmarkName = bmCollection[i].Name;
    // Here we are not getting full word data 
    string data = sb.ToString();
}

Bookmark name which is not imported correctly:
PR_RatingDrivers
PR_Complexity

@pravinghadge,

There are a couple of following hidden bookmarks in the source Word DOCX document.

  • _GoBack
  • _Hlk73452288

You can simply remove them to get the desired output:

Document doc = new Document("C:\\Temp\\import.docx");
DocumentBuilder builder = new DocumentBuilder(doc);

foreach (Bookmark bookmark in doc.Range.Bookmarks)
    if (bookmark.Name.StartsWith("_"))
        bookmark.Remove(); // remove hidden bookmarks        

builder.MoveToDocumentEnd();
builder.StartBookmark("DocumentEnd");
builder.EndBookmark("DocumentEnd");

BookmarkCollection bmCollection = doc.Range.Bookmarks;
string bookmarkName = string.Empty;
string bookmarkHtml = string.Empty;

for (int i = 0; i < bmCollection.Count - 1; i++)
{
    BookmarkEnd start = doc.Range.Bookmarks[i].BookmarkEnd;
    BookmarkStart end = doc.Range.Bookmarks[i + 1].BookmarkStart;

    ArrayList nodes = Common.ExtractContent(start, end, true);
    Document htmlDoc = Common.GenerateDocument(doc, nodes);
    //htmlDoc.FirstSection.Body.FirstParagraph.Remove();
    String sb = htmlDoc.ToString(SaveFormat.Html);
    bookmarkName = bmCollection[i].Name;
    // Here we are not getting full word data 
    string data = sb.ToString();
}