Split Word Document into HTML by Heading

Hello,

I split the document by heading using the following script:


var proposalPath = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetEntryAssembly().Location) + "\\Files\\Template.docx";
var htmlOutputPath = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetEntryAssembly().Location) + "\\Files\\Result\\Output.html";
var doc = new Document(proposalPath);

HtmlSaveOptions options = new HtmlSaveOptions
{
    HtmlVersion = Aspose.Words.Saving.HtmlVersion.Html5,
    ExportFontsAsBase64 = true,
    CssStyleSheetType = CssStyleSheetType.Embedded,
    ExportImagesAsBase64 = true,
    PrettyFormat = true,
    ExportHeadersFootersMode = ExportHeadersFootersMode.PerSection,
    DocumentSplitCriteria = DocumentSplitCriteria.HeadingParagraph,
    DocumentSplitHeadingLevel = 6,
};
doc.Save(htmlOutputPath, options);

My question is how to know the parent heading for each heading section?

@bhavikahirr Unfortunately, there is no built-in way to get parent child relations between the generated files. However, Aspose.Words generates the files sequentially, so you can get the heading paragraphs from your document and build parent child relations. For example, you can use DocumentVisitor to iterate through the paragraphs and get headings:

var doc = new Document(@"C:\Temp\in.docx");
HeadingsPrinter headingsPrinter = new HeadingsPrinter();
doc.Accept(headingsPrinter);
private class HeadingsPrinter : DocumentVisitor
{
    public override VisitorAction VisitParagraphStart(Paragraph paragraph)
    {
        switch (paragraph.ParagraphFormat.StyleIdentifier)
        {
            case StyleIdentifier.Heading1:
                Console.WriteLine(paragraph.ToString(SaveFormat.Text).Trim());
                break;
            case StyleIdentifier.Heading2:
                Console.WriteLine("\t" + paragraph.ToString(SaveFormat.Text).Trim());
                break;
            case StyleIdentifier.Heading3:
                Console.WriteLine("\t\t" + paragraph.ToString(SaveFormat.Text).Trim());
                break;
            case StyleIdentifier.Heading4:
                Console.WriteLine("\t\t\t" + paragraph.ToString(SaveFormat.Text).Trim());
                break;
            case StyleIdentifier.Heading5:
                Console.WriteLine("\t\t\t\t" + paragraph.ToString(SaveFormat.Text).Trim());
                break;
            case StyleIdentifier.Heading6:
                Console.WriteLine("\t\t\t\t\t" + paragraph.ToString(SaveFormat.Text).Trim());
                break;
            default:
                break;
        }

        return VisitorAction.Continue;
    }
}