Free Support Forum - aspose.com

Blocks in html

I use the code below to extract the document contents & return a html string. When I did that for the attached document there were 2 issues

1 - it returned the string with irregular characters at the end ("\0\0\0\0\0\0\0\0").

2 - it didn't keep the numbering for the numbered list. Instead it read each list as a separate list ("

  1. Elevated blood pressure with a diagnosis of hypertension, rule out white coat syndrome.
  1. Bilateral ankle lipomas.
")

Please advise.

Thank you

Code:

private static string GetHtmlFromBookmark(string bookmarkName, Document doc, SaveFormat ThisSaveFormat)

{

Document docClone = doc.Clone();

Document doc1 = new Document();

Bookmark mark = docClone.Range.Bookmarks[bookmarkName];

Node node = mark.BookmarkStart.ParentNode;

Node endNode = mark.BookmarkEnd.ParentNode.NextSibling;

while (!node.Equals(endNode))

{

if ((node as CompositeNode).ChildNodes.Contains(mark.BookmarkStart))

{

Node child = (node as CompositeNode).FirstChild;

Node endChild = mark.BookmarkStart.NextSibling;

while (!child.Equals(endChild))

{

child = child.NextSibling;

child.PreviousSibling.Remove();

}

}

if ((node as CompositeNode).ChildNodes.Contains(mark.BookmarkEnd))

{

Node child = mark.BookmarkEnd;

while (!child.Equals(child.ParentNode.LastChild))

{

child = child.NextSibling;

child.PreviousSibling.Remove();

}

child.Remove();

}

doc1.FirstSection.Body.AppendChild(doc1.ImportNode(node, true, ImportFormatMode.KeepSourceFormatting));

node = node.NextSibling;

if (node == null)

break;

}

MemoryStream stream = new MemoryStream();

doc1.Save(stream, ThisSaveFormat);

string html = Encoding.UTF8.GetString(stream.GetBuffer());

return html;

}

Hi

<?xml:namespace prefix = o ns = "urn:schemas-microsoft-com:office:office" />

Thanks for your inquiry.

1. Please try using the following code to convert document to HTML string:

private string ConvertDocumentToHtml(Document doc)

{

string html = string.Empty;

//Save docuemnt to MemoryStream in Hml format

using (MemoryStream htmlStream = new MemoryStream())

{

doc.Save(htmlStream, SaveFormat.Html);

//Get Html string

html = Encoding.UTF8.GetString(htmlStream.GetBuffer(), 0, (int)htmlStream.Length);

}

return html;

}

2. The problem with lists occurs because you import each paragraph between bookmarks into a separate document, during importing List is lost, and each list item in the destination document is item of separate list. As a workaround, you can try remove all except bookmark from the document and then convert it to HTML.

Document doc = new Document(@"Test127\Doc1.doc");

string html = GetHtmlFromBookmark("Body", doc);

Console.WriteLine(html);

============================================================================

private string GetHtmlFromBookmark(string bookmarkName, Document doc)

{

//Clone the original document

Document docClone = doc.Clone();

//Get bookmark

Bookmark mark = docClone.Range.Bookmarks[bookmarkName];

//Remove content before bookmark

RemoveContentBeforeNode(docClone, mark.BookmarkStart);

//Remove content after bookmark

RemoveContentAfterNode(docClone, mark.BookmarkEnd);

//Convert document to HTML

string html = ConvertDocumentToHtml(docClone);

docClone.Save(@"Test127\out.doc");

return html;

}

///

/// Removes all content before specified node

///

private void RemoveContentBeforeNode(Document doc, Node endNode)

{

Node curNode = endNode.PreviousPreOrder(doc);

while (curNode != null)

{

//Move to next node

Node nextNode = curNode.PreviousPreOrder(doc);

//Check whether current contains end node

if (curNode.IsComposite)

{

if (!(curNode as CompositeNode).GetChildNodes(NodeType.Any, true).Contains(endNode))

{

nextNode = curNode.PreviousSibling;

curNode.Remove();

}

}

else

{

curNode.Remove();

}

curNode = nextNode;

}

}

///

/// Removes all content after specified node

///

private void RemoveContentAfterNode(Document doc, Node startNode)

{

Node curNode = startNode.NextPreOrder(doc);

while (curNode != null)

{

//Move to next node

Node nextNode = curNode.NextPreOrder(doc);

//Check whether current contains start node

if (curNode.IsComposite)

{

if (!(curNode as CompositeNode).GetChildNodes(NodeType.Any, true).Contains(startNode))

{

nextNode = curNode.NextSibling;

curNode.Remove();

}

}

else

{

curNode.Remove();

}

curNode = nextNode;

}

}

Best regards.

The issues you have found earlier (filed as WORDSNET-5430) have been fixed in this .NET update and this Java update.


This message was posted using Notification2Forum from Downloads module by aspose.notifier.
(5)