急急急，docxtohtml 出现样式问题

DiZheng · November 7, 2022, 7:13am

你好,
我们项目需求将doc转换成html并且分割文档，参考官方文档使用了提取doc内容的方法，但是提取完样式有问题，p和table标签的margin bottom都被改变成了0pt。
参考文档：Extract Content Between Document Nodes|Aspose.Words for .NET

public void Get()
{
    try
    {
        HtmlSaveOptions options = new HtmlSaveOptions();
        options.ExportRoundtripInformation = true;
        options.ExportImagesAsBase64 = true;
        options.CssStyleSheetType = CssStyleSheetType.External;
        StreamReader sr = new StreamReader("C://wordtohtml//demo.docx", Encoding.Default);
        Stream stream = sr.BaseStream;
        LoadOptions loadOptions = new LoadOptions
        {
            WarningCallback = new DocumentLoadingWarningCallback(_logger)
        };
        Document doc = new Document(stream, loadOptions);
        doc.Save("C://wordtohtml//demo.html", options);
        //Close the stream now, it is no longer needed because the document is in memory.
        stream.Close();
        ArrayList head2list = new ArrayList();
        var heading2 = doc
            .GetChildNodes(NodeType.Paragraph, true)
            .Cast<Paragraph>()
            .ToArray()
            .Where(p => p.ParagraphFormat.StyleIdentifier == StyleIdentifier.Heading2);

        foreach (var head2 in heading2)
        {
            head2list.Add(head2);
        }
        // get extractedNodes
        List<Node> pprList = _asposeService.ExtractContent((Node)head2list[4], (Node)head2list[5], false);
        Document pprDoc = _asposeService.GenerateDocument(doc, pprList);
        pprDoc.Save("C://wordtohtml//ppr.html", options);
    }
    catch (Exception e)
    {
    }
}
public List<Node> ExtractContent(Node startNode, Node endNode, bool isInclusive)
{
    // First, check that the nodes passed to this method are valid for use.
    VerifyParameterNodes(startNode, endNode);

    // Create a list to store the extracted nodes.
    List<Node> nodes = new List<Node>();

    // If either marker is part of a comment, including the comment itself, we need to move the pointer
    // forward to the Comment Node found after the CommentRangeEnd node.
    if (endNode.NodeType == NodeType.CommentRangeEnd && isInclusive)
    {
        Node node = FindNextNode(NodeType.Comment, endNode.NextSibling);
        if (node != null)
            endNode = node;
    }

    // Keep a record of the original nodes passed to this method to split marker nodes if needed.
    Node originalStartNode = startNode;
    Node originalEndNode = endNode;

    // Extract content based on block-level nodes (paragraphs and tables). Traverse through parent nodes to find them.
    // We will split the first and last nodes' content, depending if the marker nodes are inline.
    startNode = GetAncestorInBody(startNode);
    endNode = GetAncestorInBody(endNode);

    bool isExtracting = true;
    bool isStartingNode = true;
    // The current node we are extracting from the document.
    Node currNode = startNode;

    // Begin extracting content. Process all block-level nodes and specifically split the first
    // and last nodes when needed, so paragraph formatting is retained.
    // Method is a little more complicated than a regular extractor as we need to factor
    // in extracting using inline nodes, fields, bookmarks, etc. to make it useful.
    while (isExtracting)
    {
        // Clone the current node and its children to obtain a copy.
        Node cloneNode = currNode.Clone(true);
        bool isEndingNode = currNode.Equals(endNode);

        if (isStartingNode || isEndingNode)
        {
            // We need to process each marker separately, so pass it off to a separate method instead.
            // End should be processed at first to keep node indexes.
            if (isEndingNode)
            {
                // !isStartingNode: don't add the node twice if the markers are the same node.
                ProcessMarker(cloneNode, nodes, originalEndNode, currNode, isInclusive,
                    false, !isStartingNode, false);
                isExtracting = false;
            }

            // Conditional needs to be separate as the block level start and end markers, maybe the same node.
            if (isStartingNode)
            {
                ProcessMarker(cloneNode, nodes, originalStartNode, currNode, isInclusive,
                    true, true, false);
                isStartingNode = false;
            }
        }
        else
            // Node is not a start or end marker, simply add the copy to the list.
            nodes.Add(cloneNode);

        // Move to the next node and extract it. If the next node is null,
        // the rest of the content is found in a different section.
        if (currNode.NextSibling == null && isExtracting)
        {
            // Move to the next section.
            Section nextSection = (Section)currNode.GetAncestor(NodeType.Section).NextSibling;
            currNode = nextSection.Body.FirstChild;
        }
        else
        {
            // Move to the next node in the body.
            currNode = currNode.NextSibling;
        }
    }

    // For compatibility with mode with inline bookmarks, add the next paragraph (empty).
    if (isInclusive && originalEndNode == endNode && !originalEndNode.IsComposite)
        IncludeNextParagraph(endNode, nodes);

    // Return the nodes between the node markers.
    return nodes;
}
public Document GenerateDocument(Document srcDoc, List<Node> nodes)
{
    Document dstDoc = new Document();
    // Remove the first paragraph from the empty document.
    dstDoc.FirstSection.Body.RemoveAllChildren();

    // Import each node from the list into the new document. Keep the original formatting of the node.
    NodeImporter importer = new NodeImporter(srcDoc, dstDoc, ImportFormatMode.KeepSourceFormatting);

    foreach (Node node in nodes)
    {
        Node importNode = importer.ImportNode(node, true);
        dstDoc.FirstSection.Body.AppendChild(importNode);
    }

    return dstDoc;
}

alexey.noskov · November 7, 2022, 8:20am

@DiZheng 这个问题已经在另一个你的帖子中得到了回答：
https://forum.aspose.com/t/paid-users-docxtohtml-style-problem-after-extract-content-between-nodes-in-a-document/254528

DiZheng · November 7, 2022, 9:51am

@alexey.noskov 非常感谢，已经解决