Hello
Test PDFs.zip (1.8 MB)
We have been using Aspose PDF v20.5 and for some pdf documents that contain images and/or urls the output html seems broken. I have also tried with the latest version of the dll and its having the same issue. I have attached the pdfs I am using and pasted the code below. Please let me know what we can do to fix this issue.
private string PDFToHTML(string filePath)
{
Aspose.Pdf.License license = new Aspose.Pdf.License();
// Apply a license using the embedded resource name.
// license.SetLicense(“Aspose.Pdf.lic”);
Document doc = new Document(filePath);
HtmlSaveOptions htmlOptions = this.GetHtmlOptions();
using (var output = new MemoryStream())
{
doc.Save(output, htmlOptions);
var html = Encoding.UTF8.GetString(output.GetBuffer(), 0, (int)output.Length);
return html;
}
}
private HtmlSaveOptions GetHtmlOptions()
{
HtmlSaveOptions htmlOptions = new HtmlSaveOptions
{
FixedLayout = true,
CompressSvgGraphicsIfAny = false,
SaveTransparentTexts = true,
SaveShadowedTextsAsTransparentTexts = true,
FontSavingMode = HtmlSaveOptions.FontSavingModes.DontSave,
// DefaultFontName = "Comic Sans MS",
UseZOrder = true,
LettersPositioningMethod = HtmlSaveOptions.LettersPositioningMethods.UseEmUnitsAndCompensationOfRoundingErrorsInCss,
PartsEmbeddingMode = HtmlSaveOptions.PartsEmbeddingModes.EmbedAllIntoHtml,
RasterImagesSavingMode = HtmlSaveOptions.RasterImagesSavingModes.AsEmbeddedPartsOfPngPageBackground,
SplitIntoPages = false
};
return htmlOptions;
}
Thanks
Vijay