Hi there
I used aspose word to convet Word files to HTML format
I found that there are some texts are at wrong place in the result.
(At the table of 2nd and 3rd pages)
Please check my code and the attachment.
try
{
Document doc = new Document("20141008 Pusheen表格格.docx");
Document pageDoc;
LayoutCollector layoutCollector;
DocumentPageSplitter splitter;
ByteArrayOutputStream output = new ByteArrayOutputStream();
HtmlSaveOptions saveOp = new HtmlSaveOptions();
saveOp.setExportImagesAsBase64(true);
saveOp.setExportTextInputFormFieldAsText(false);
saveOp.setExportTocPageNumbers(true);
saveOp.setExportPageSetup(true);
saveOp.setExportDocumentProperties(true);
saveOp.setExportRelativeFontSize(false);
saveOp.setUpdateFields(true);
layoutCollector = new LayoutCollector(doc);
doc.updatePageLayout();
splitter = new DocumentPageSplitter(layoutCollector);
byte[] outputContent;
String outputPath = "";
String blockId = UUID.randomUUID().toString();
File outputDir = new File(outputPath + "/" + blockId + "/");
if (!outputDir.exists())
outputDir.mkdir();
ByteArrayOutputStream testOut = new ByteArrayOutputStream();
for (int page = 1; page <= doc.getPageCount(); page++)
{
System.out.println("page:" + page);
pageDoc = splitter.getDocumentOfPage(page);
Document onepageDoc = splitter.getDocumentOfPage(1);
testOut.reset();
output.reset();
pageDoc.save(output, saveOp);
outputContent = output.toByteArray();
IOUtils.write(outputContent, new FileOutputStream(outputPath + "/" + blockId + "/" + page + ".html"));
}
}
catch (Exception e)
{
e.printStackTrace();
}