_Iraq National Data Center_BUG2024022193632 word转换html时 内容错乱

_Iraq National Data Center_BUG2024022193632.zip (242.7 KB)

word转换html时 内容错乱

@xiangma 请尝试使用以下代码并让我们了解结果

Document doc = new Document("Document.docx");
NodeCollection paragraphs = doc.getChildNodes(NodeType.PARAGRAPH, true);

for (Paragraph paragraph : (Iterable<Paragraph>) paragraphs)
{
    paragraph.getParagraphFormat().getStyle().getFont().setLocaleIdBi(2049);
    paragraph.getParagraphFormat().getStyle().getFont().setBidi(true);
}

HtmlSaveOptions saveOptions = new HtmlSaveOptions();
saveOptions.setExportLanguageInformation(true);
doc.save("Result.html", saveOptions);

1a960ba4fa7d7d9e435f303b8e67f55e.zip (29.9 KB)

还是有部分内容显示不对

private void document2Html(String sourceFileName, String targetFileName) throws Exception {
Document document = new Document(sourceFileName);

    /**
     * 文件标题属性值会设置为html转换结果<title>标签值。文件右击->属性-->详细信息,可查看属性值
     * 将文件标题属性设置为空字符串,避免误解及解释。设置为null会报错。
     */
    document.getBuiltInDocumentProperties().setTitle("");
    document.setWarningCallback(new com.aspose.words.IWarningCallback() {
        @Override
        public void warning(WarningInfo info) {
            String description = info.getDescription();
            if (info.getWarningType() == com.aspose.words.WarningType.FONT_SUBSTITUTION) {
                String key = currentFile.get();
                if (key != null) {
                    int start = description.indexOf("Font");
                    int end = description.indexOf(" has ");
                    if (start > -1 && end > -1) {
                        String font = description.substring(start + 4, end).replaceAll("'", "");
                        Set<String> set = missingFonts.get(key);
                        if (set != null) {
                            set.add(font.trim());
                        }
                    }
                }
            }
        }
    });

    // 转换时删除掉批注信息。但不会删除原文件的批注。
    NodeCollection nodeCollection = document.getChildNodes(NodeType.COMMENT, true);
    if (nodeCollection != null) {
        for (com.aspose.words.Comment comment : (Iterable<com.aspose.words.Comment>) nodeCollection) {
            comment.remove();
        }
    }

    /**
     * 优化因有修改痕迹,导致word中表格显示不正常的问题。问题有所改善,但并没有完全解决问题。  
     * aspose-words 23.4缺少com.aspose.words.shaping.harfbuzz.HarfBuzzTextShaperFactory.getInstance()暂时注释,已在aspose技术支持论坛发贴
     * aspose技术论坛回复:自 22.09 版以来,根据其他客户的请求,HarfBuzz 依赖项已从 POM 文件中删除,暂时单独引入<classifier>shaping-harfbuzz-plugin</classifier>,后续aspose会在 23.5 版本中,HarfBuzz 依赖项将再次添加到 POM 文件中。
     */
    try {
        document.getLayoutOptions().setTextShaperFactory(com.aspose.words.shaping.harfbuzz.HarfBuzzTextShaperFactory.getInstance());
    } catch (Throwable e) {
        LOG.warn("HarfBuzzTextShaperFactory fail", e);
    }

    if (OfficeTransUtil.isWordAutoFitColumns) {
        RevisionCollection rc = document.getRevisions();
        if (rc != null && rc.getCount() > 0) {
            document.acceptAllRevisions();
            com.aspose.words.SectionCollection sections = document.getSections();
            if (sections != null && sections != null) {
                for (com.aspose.words.Section section : sections) {
                    if (section != null) {
                        com.aspose.words.TableCollection tables = section.getBody().getTables();
                        if (tables != null) {
                            for (com.aspose.words.Table table : tables) {
                                if (table != null) {
                                    table.autoFit(com.aspose.words.AutoFitBehavior.AUTO_FIT_TO_CONTENTS);
                                }
                            }
                        }
                    }
                }
            }
        }
    }

    if ("image".equals(OfficeTransUtil.wordOutputFormat.trim())) {
        String subPath = subDirectoryPath(targetFileName);
        File path = new File(subPath);
        path.mkdirs();

        ImageSaveOptions options = new ImageSaveOptions(SaveFormat.JPEG);
        options.setUseAntiAliasing(true);
        options.setImageBrightness(OfficeTransUtil.wordImageBrightnessAspose);// 亮度,此参数的作用效果还没研究透彻,先使用默认的0.5,先不在配置文件中体现
        options.setImageContrast(OfficeTransUtil.wordImageContrastAspose);// 对比度,此参数的作用效果还没研究透彻,先使用默认的0.5,先不在配置文件中体现
        options.setJpegQuality(OfficeTransUtil.wordJpegQualityAspose);// 只对jpeg有效

        int pageCount = document.getPageCount();
        for (int i = 0; i < pageCount; i++) {
            options.setPageSet(new PageSet(i));
            document.save(subPath + File.separator + i + ".jpg", options);
        }

        buildSlideMobileHtml(false, targetFileName, path, pageCount);
    } else {
        HtmlFixedSaveOptions options = new HtmlFixedSaveOptions();
        options.setUseTargetMachineFonts(false);
        options.setUseHighQualityRendering(true);
        options.setEncoding(Charset.forName("UTF-8"));
        options.setExportEmbeddedFonts(true);
        options.setShowPageBorder(OfficeTransUtil.wordShowPageBorder);
        document.acceptAllRevisions();
        document.save(targetFileName, options);
    }
}

@xiangma 感谢您报告此问题。 我们已经在我们的内部问题跟踪系统中打开了以下新工单,并将根据 免费支持政策 中提到的条款提供它们的修复:

Issue ID(s): WORDSNET-26635

如果您需要优先支持以及直接联系我们的付费支持管理团队,您可以获得 付费支持服务

@xiangma 我们已完成分析并得出结论,在 WPS 中显示文本存在问题。Aspose.Words 产生的结果与 MS Word 相同。遗憾的是,我们对此无能为力。

The issues you have found earlier (filed as WORDSNET-26635) have been fixed in this Aspose.Words for Java 24.9 update.