Hi Team,
I am facing a conversion issue by using the Micka ortila.pdf (2.0 MB)
attached pdf file
My requirement was to convert it from a pdf file to an HTML string to preview it.
So I convert from pdf to word and then I convert from word to HTML string using aspose.words library.
Issue : Not showing proper content as per attached png file PreviewIssue.png (39.2 KB)
Even it is not converted properly when I convert pdf to word and download it.
I also tried the conversion of a document from the online converter “Convert Files Online - Word, PDF, HTML, JPG And Many More”. The same issue occurred.
We are using Aspose.Words version “13.3.0.0”
Source Code of converting pdf to word file
public static byte[] ConvertPdfToDoc(byte[] input)
{
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
string rtf = "";
f.Serial = System.Configuration.ConfigurationManager.AppSettings["PDFFocus.License"];
f.OpenPdf(input);
rtf = f.ToWord();
byte[] wordBytes = System.Text.Encoding.UTF8.GetBytes(rtf);
MemoryStream docXStream = new MemoryStream(wordBytes);
Document docX = new Document(docXStream);
MemoryStream docXOutStream = new MemoryStream();
OoxmlSaveOptions docSaveOpt = new OoxmlSaveOptions(SaveFormat.Docx);
docSaveOpt.PrettyFormat = true;
docX.JoinRunsWithSameFormatting();
Document nDocX = ProcessLetterSpacing(docX);
if (nDocX == null)
{
docX.Save(docXOutStream, docSaveOpt);
}
else
{
nDocX.Save(docXOutStream, docSaveOpt);
}
byte[] b = docXOutStream.GetBuffer();
byte[] b2 = new byte[docXOutStream.Length];
Buffer.BlockCopy(b, 0, b2, 0, (int)docXOutStream.Length);
return b2;
}
Source Code of converting word bytes to html string
public string ConvertToHtml()
{
string html = string.Empty;
MemoryStream output = new MemoryStream();
HtmlSaveOptions saveOpt = new HtmlSaveOptions(SaveFormat.Html);
saveOpt.PrettyFormat = false;
saveOpt.ImageSavingCallback = new HandleImageSaving(this);
saveOpt.ExportImagesAsBase64 = true;
this.ValidateHeaders();
byte[] b;
byte[] b2;
try
{
this._document.Save(output, saveOpt);
b = output.GetBuffer();
b2 = new byte[output.Length];
Buffer.BlockCopy(b, 0, b2, 0, (int)output.Length);
html = Encoding.UTF8.GetString(b2);
}
catch (System.ArithmeticException exArithmetic)
{
this._document.Save(output, SaveFormat.Text);
b = output.GetBuffer();
b2 = new byte[output.Length];
Buffer.BlockCopy(b, 0, b2, 0, (int)output.Length);
html = Encoding.UTF8.GetString(b2);
html.Replace("\r\n", "<br/>").Replace("\t", "").Replace("\"", "'");
}
return html;
}
Let me know what I should do to resolve this issue?