Stream pdf and then convert that to html as bytes array .. (pdf to html) using stream

(1) reading pdf in stream and then convert it to html

(2) output byte[] resultHtmlAsBytes.


See attached file for detail code

Hi Abhishek,

Thanks for contacting support.

As per my understanding, you need to convert PDF file to HTML format and save the output in a Byte Array. If so is the case, then please try using the following code snippet (very much similar to the one you have shared earlier). In case I have not properly understood your requirement, please feel free to contact.

[C#]

Document doc = new Document("c:/pdftest/record_275889686.pdf");

// tune conversion params
HtmlSaveOptions newOptions = new HtmlSaveOptions();
newOptions.RasterImagesSavingMode = HtmlSaveOptions.RasterImagesSavingModes.AsEmbeddedPartsOfPngPageBackground;
newOptions.FontSavingMode = HtmlSaveOptions.FontSavingModes.SaveInAllFormats;
newOptions.PartsEmbeddingMode = HtmlSaveOptions.PartsEmbeddingModes.EmbedAllIntoHtml;
newOptions.LettersPositioningMethod = HtmlSaveOptions.LettersPositioningMethods.UseEmUnitsAndCompensationOfRoundingErrorsInCss;
newOptions.SplitIntoPages = false; // force write HTMLs of all pages into one output document

newOptions.CustomHtmlSavingStrategy = new HtmlSaveOptions.HtmlPageMarkupSavingStrategy(SavingToStream);

// we can use some non-existing path as result file name - all real saving will be done
// in our custom method SavingToStream() (it follows this one)
string outHtmlFile = @"Z:\SomeNonExistingFolder\SomeUnexistingFile.html";
doc.Save(outHtmlFile, newOptions);

MemoryStream stream = new MemoryStream();
doc.Save(stream);

byte[] buffer = new byte[16 * 1024];
using (MemoryStream ms = new MemoryStream())
{
    int read;
    while ((read = stream.Read(buffer, 0, buffer.Length)) > 0)
    {
        ms.Write(buffer, 0, read);
    }
    ms.ToArray();
}

Console.WriteLine(buffer.Length);
stream.Close();

private static void SavingToStream(HtmlSaveOptions.HtmlPageMarkupSavingInfo htmlSavingInfo)
{
    byte[] resultHtmlAsBytes = new byte[htmlSavingInfo.ContentStream.Length];
    htmlSavingInfo.ContentStream.Read(resultHtmlAsBytes, 0, resultHtmlAsBytes.Length);
    // here You can use any writable stream, file stream is taken just as example
    string fileName = "stream_out.html";
    Stream outStream = File.OpenWrite(fileName);
    outStream.Write(resultHtmlAsBytes, 0, resultHtmlAsBytes.Length);
}