代码如下:
try (ByteArrayOutputStream docxStream = new ByteArrayOutputStream()) {
// Step 1: Open PDF stream using Document class of Aspose.Pdf
try (Document pdfDoc = new Document(download)) {
// Step 2: Convert PDF to DOCX by using save method of Aspose.Pdf, but write to ByteArrayOutputStream
pdfDoc.save(docxStream, SaveFormat.DocX);
// Prepare ByteArrayInputStream from the DOCX bytes for Aspose.Words
try (InputStream docxInputStream = new ByteArrayInputStream(docxStream.toByteArray());
ByteArrayOutputStream markdownStream = new ByteArrayOutputStream()) {
// Step 3: Load DOCX stream by using Document class of Aspose.Words
PdfLoadOptions pdfLoadOptions = new PdfLoadOptions();
pdfLoadOptions.setSkipPdfImages(true);
com.aspose.words.Document wordDoc = new com.aspose.words.Document(docxInputStream,pdfLoadOptions);
MarkdownSaveOptions markdownSaveOptions = new MarkdownSaveOptions();
markdownSaveOptions.setExportImagesAsBase64(true);
markdownSaveOptions.setUpdateFields(true);
markdownSaveOptions.setSaveFormat(com.aspose.words.SaveFormat.MARKDOWN);
// Step 4: Save the document to MARKDOWN format using Save method and set MARKDOWN as SaveFormat
wordDoc.save(markdownStream, markdownSaveOptions);
return GPTStringUtils.removeBase64(markdownStream.toString("UTF-8"));
}
}
} catch (Exception e) {
e.printStackTrace();
return null;
}