Hi team ,
I am Converting a Paragraph in Aspose to Html but it’s providig me Margin top 4pt which should be zero , I am sharing the code which is used and document which i am converting.
Please note that i am Converting only a basic section with <!save_...
tag.
Private Function GetContenControlHtml(word_document As Word.Document) As DataTable
Const FUNCTION_NAME As String = "Find_All_Document_Saved_Keywords_For_Office_2010_And_Above_Via_Automation"
'This is the logging information for statistical analysis.
Dim logging_information As New ANCLogger("&|$S<<WordContentControlCapture.vb>>&|$", FUNCTION_NAME, "&|$L<<325>>&|$")
Dim found_keywords As DataTable
Dim new_row As DataRow
Try
found_keywords = New DataTable
found_keywords.Columns.Add("keyword_code", GetType(String))
found_keywords.Columns.Add("keyword_value", GetType(String))
Dim pathOfActiveDocument As String = word_document.FullName
Using fs As New FileStream(pathOfActiveDocument, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)
Dim license = New Aspose.Words.License()
Using ms = New MemoryStream(My.Resources.AsposeWordLicense)
license.SetLicense(ms)
End Using
Dim doc As Aspose.Words.Document = New Aspose.Words.Document(fs)
For Each sdt As Aspose.Words.Markup.StructuredDocumentTag In doc.GetChildNodes(Aspose.Words.NodeType.StructuredDocumentTag, True)
Dim htmlString As String = String.Empty
If sdt.Tag.IndexOf("<!SAVE", StringComparison.OrdinalIgnoreCase) = 0 Then
Dim isHtmlContent = sdt.Tag.Contains("_HTML")
Dim KeyWord = ExtractHighlightKey(sdt.Tag, isHtmlContent)
htmlString = ExtractInnerHtml(sdt)
Dim htmlDoc As New HtmlAgilityPack.HtmlDocument()
htmlDoc.OptionFixNestedTags = True
htmlDoc.LoadHtml(htmlString)
If isHtmlContent Then
Dim body As HtmlAgilityPack.HtmlNode = htmlDoc.DocumentNode.SelectSingleNode("//body")
htmlString = body.InnerHtml.Trim()
Else
htmlString = HtmlAgilityPack.HtmlEntity.DeEntitize(htmlDoc.DocumentNode.InnerText)
End If
new_row = found_keywords.NewRow
new_row.Item("keyword_code") = KeyWord.ToUpper()
new_row.Item("keyword_value") = htmlString
found_keywords.Rows.Add(new_row)
End If
Next
End Using
GetContenControlHtml = found_keywords
Catch ex As Exception
logging_information.AddException("&|$L<<426>>&|$", ex)
GetContenControlHtml = Nothing
End Try
logging_information.Done("&|$L<<430>>&|$")
End Function
Friend Function ExtractHighlightKey(input As String, ByVal IsHtml As Boolean) As String
If String.IsNullOrEmpty(input) Then Return String.Empty
Dim m As Match = Regex.Match(
input,
"<!\s*SAVE\s*=\s*([^>/]+?)\s*/?>",
RegexOptions.IgnoreCase Or RegexOptions.Singleline
)
If IsHtml Then
m = Regex.Match(
input,
"<!\s*SAVE_HTML\s*=\s*([^>/]+?)\s*/?>",
RegexOptions.IgnoreCase Or RegexOptions.Singleline
)
End If
If m.Success Then
Return m.Groups(1).Value.Trim()
End If
Return String.Empty
End Function
Friend Function ExtractInnerHtml(ByVal sdt As Aspose.Words.Markup.StructuredDocumentTag) As String
Const FUNCTION_NAME As String = "ExtractInnerHtml"
Dim logging_information As New ANCLogger("&|$S<<WordContentControlCapture.vb>>&|$", FUNCTION_NAME, "&|$L<<325>>&|$")
Try
If sdt.SdtType = Aspose.Words.Markup.SdtType.PlainText OrElse
sdt.SdtType = Aspose.Words.Markup.SdtType.ComboBox OrElse
sdt.SdtType = Aspose.Words.Markup.SdtType.DropDownList OrElse
sdt.SdtType = Aspose.Words.Markup.SdtType.Date Then
Return Global.System.Web.HttpUtility.HtmlEncode(sdt.Range.Text)
End If
Dim srcDoc As Aspose.Words.Document = sdt.Document
' Build tmpDoc WITHOUT EnsureMinimum (prevents default empty paragraph)
Dim tmpDoc As New Aspose.Words.Document()
tmpDoc.RemoveAllChildren()
Dim sec As New Aspose.Words.Section(tmpDoc) : tmpDoc.AppendChild(sec)
Dim body As New Aspose.Words.Body(tmpDoc) : sec.AppendChild(body)
Dim impoter As New Aspose.Words.NodeImporter(srcDoc, tmpDoc, Aspose.Words.ImportFormatMode.KeepSourceFormatting)
Dim isInline As Boolean = (sdt.Level = Aspose.Words.Markup.MarkupLevel.Inline)
Dim hostPara As Aspose.Words.Paragraph = Nothing ' lazily created
' Ensure a single paragraph for inline content
Dim ensureHostPara As Global.System.Func(Of Aspose.Words.Paragraph) =
Function()
If hostPara Is Nothing Then
hostPara = New Aspose.Words.Paragraph(tmpDoc)
body.AppendChild(hostPara)
End If
Return hostPara
End Function
For Each node As Aspose.Words.Node In sdt.Range
Select Case node.NodeType
Case Aspose.Words.NodeType.Table
Dim importedTable As Aspose.Words.Tables.Table =
CType(impoter.ImportNode(node, True), Aspose.Words.Tables.Table)
body.AppendChild(importedTable)
Case Aspose.Words.NodeType.Paragraph
Dim importedPara As Aspose.Words.Paragraph =
CType(impoter.ImportNode(node, True), Aspose.Words.Paragraph)
If isInline Then
' Move children of the imported paragraph into a single host paragraph
Dim host As Aspose.Words.Paragraph = ensureHostPara()
Dim ch As Aspose.Words.Node = importedPara.FirstChild
Dim childNode As Aspose.Words.Node = importedPara.FirstChild
Do While childNode IsNot Nothing
Dim nextNode As Aspose.Words.Node = childNode.NextSibling ' cache before move
host.AppendChild(childNode) ' re-parents the node; no clone needed
childNode = nextNode
Loop
Else
body.AppendChild(importedPara)
End If
End Select
Next
' Safety net: remove any truly empty paragraphs (prevents blank <p>)
RemoveEmptyParagraphs_NoChildNodes(tmpDoc)
Dim saveOpts As New Aspose.Words.Saving.HtmlSaveOptions(Aspose.Words.SaveFormat.Html) With {
.ExportRoundtripInformation = True,
.PrettyFormat = True,
.TableWidthOutputMode = Aspose.Words.Saving.HtmlElementSizeOutputMode.All,
.ExportHeadersFootersMode = Aspose.Words.Saving.ExportHeadersFootersMode.None,
.ExportImagesAsBase64 = True
}
Using ms As New Global.System.IO.MemoryStream()
tmpDoc.Save(ms, saveOpts)
ms.Position = 0
Dim fullHtml As String = New Global.System.IO.StreamReader(ms).ReadToEnd()
Return ExtractBodyInnerHtml(fullHtml) ' your existing helper
End Using
Catch ex As Global.System.Exception
logging_information.AddException("&|$L<<426>>&|$", ex)
End Try
Return ""
End Function
Private Sub RemoveEmptyParagraphs_NoChildNodes(ByVal doc As Aspose.Words.Document)
Dim paras As Aspose.Words.NodeCollection = doc.GetChildNodes(Aspose.Words.NodeType.Paragraph, True)
For i As Integer = paras.Count - 1 To 0 Step -1
Dim p As Aspose.Words.Paragraph = CType(paras(i), Aspose.Words.Paragraph)
If ParagraphIsEmpty_NoChildNodes(p) Then p.Remove()
Next
End Sub
Private Function ParagraphIsEmpty_NoChildNodes(ByVal p As Aspose.Words.Paragraph) As Boolean
' No visible text?
Dim t As String = p.Range.Text
t = t.Replace(Aspose.Words.ControlChar.ParagraphBreak, "") _
.Replace(Aspose.Words.ControlChar.LineBreak, "") _
.Replace(Aspose.Words.ControlChar.SectionBreak, "") _
.Replace(ChrW(&HA0), " ").Trim() ' NBSP -> space
If t.Length > 0 Then Return False
' Any shapes/drawing present?
Dim hasShapes As Boolean = False
Dim n As Aspose.Words.Node = p.FirstChild
Do While n IsNot Nothing
Dim nt As Aspose.Words.NodeType = n.NodeType
If nt = Aspose.Words.NodeType.Shape OrElse
nt = Aspose.Words.NodeType.GroupShape Then
hasShapes = True
Exit Do
End If
n = n.NextSibling
Loop
Return Not hasShapes
End Function
Friend Function ExtractBodyInnerHtml(ByVal fullHtml As String) As String
Const FUNCTION_NAME As String = "ExtractBodyInnerHtml"
Dim logging_information As New ANCLogger("&|$S<<WordContentControlCapture.vb>>&|$", FUNCTION_NAME, "&|$L<<325>>&|$")
Try
Dim lower = fullHtml.ToLowerInvariant()
Dim bodyStart = lower.IndexOf("<body")
If (bodyStart >= 0) Then
bodyStart = lower.IndexOf(">", bodyStart)
If bodyStart >= 0 Then bodyStart = bodyStart + 1
Dim bodyEnd = lower.IndexOf("</body>", bodyStart)
If bodyStart > bodyStart Then Return fullHtml.Substring(bodyStart, bodyEnd - bodyStart).Trim()
End If
Return fullHtml
Catch ex As Exception
logging_information.AddException("&|$L<<426>>&|$", ex)
End Try
Return ""
End Function
Wal-Mart Stores Inc_22-Sep-2025.docx (333.9 KB)