Stream pdf and then convert that to html as bytes array .. (pdf to html) using stream

(1) reading pdf in stream and then convert it to html

(2) output byte[] resultHtmlAsBytes.


See attached file for detail code

Hi Abhishek,


Thanks for contacting support.

As per my understanding, you need to convert PDF file to HTML format and save the output in Byte Array. If so is the case, then please try using following code snippet (very much similar to the one you have shared earlier). In case I have not properly understood your requirement, please feel free to contact.

[C#]

Document doc = new
Document(“c:/pdftest/record_275889686.pdf”);<o:p></o:p>

// tune conversion params

HtmlSaveOptions newOptions = new HtmlSaveOptions();

newOptions.RasterImagesSavingMode = HtmlSaveOptions.RasterImagesSavingModes.AsEmbeddedPartsOfPngPageBackground;

newOptions.FontSavingMode = HtmlSaveOptions.FontSavingModes.SaveInAllFormats;

newOptions.PartsEmbeddingMode = HtmlSaveOptions.PartsEmbeddingModes.EmbedAllIntoHtml;

newOptions.LettersPositioningMethod = HtmlSaveOptions.LettersPositioningMethods.UseEmUnitsAndCompensationOfRoundingErrorsInCss;

newOptions.SplitIntoPages = false;// force write HTMLs of all pages into one output document

newOptions.CustomHtmlSavingStrategy = new HtmlSaveOptions.HtmlPageMarkupSavingStrategy(SavingToStream);

//we can use some non-existing puth as result file name - all real saving will be done

//in our custom method SavingToStream() (it's follows this one)

string outHtmlFile = @"Z:\SomeNonExistingFolder\SomeUnexistingFile.html";

doc.Save(outHtmlFile, newOptions);

MemoryStream stream = new MemoryStream();

doc.Save(stream);

byte[] buffer = new byte[16 * 1024];

using (MemoryStream ms = new MemoryStream())

{

int read;

while ((read = stream.Read(buffer, 0, buffer.Length)) > 0)

{

ms.Write(buffer, 0, read);

}

ms.ToArray();

}

Console.WriteLine(buffer.Length);

stream.Close();

private static void SavingToStream(HtmlSaveOptions.HtmlPageMarkupSavingInfo htmlSavingInfo)

{

byte[] resultHtmlAsBytes = new byte[htmlSavingInfo.ContentStream.Length];

htmlSavingInfo.ContentStream.Read(resultHtmlAsBytes, 0, resultHtmlAsBytes.Length);

// here You can use any writable stream, file stream is taken just as example

string fileName = "stream_out.html";

Stream outStream = File.OpenWrite(fileName);

outStream.Write(resultHtmlAsBytes, 0, resultHtmlAsBytes.Length);

}