Html conversion stripping out stuff

I if i take the field save to html file, it looks different than pdf that is generated from it, font, borders etc…

Below is code, i have a html file in a database field strOrderText
contains.
strOrderText="
ORDER 1234567-89

The testing department
Headers Testing
1234 main street
springfield, va 20190


FINAL ORDER: 12-345-678 03 Oct 1999

etc...."

protected void Page_Load(object sender, System.EventArgs e)
{
string strTemp;

        Aspose.Pdf.License license = new Aspose.Pdf.License();
        license.SetLicense(AppDomain.CurrentDomain.BaseDirectory + @"\Aspose.Total.lic");
        MemoryStream objHTMLStream = CreateHTMLStream(); 
        strTemp = "File created | ";
        try
        {
           byte[] theData = Report(objHTMLStream);
           strTemp += "Report Complete | ";
            Response.ContentType = "application/pdf";
            Response.Charset = "UTF-8";
            strTemp += "1 | ";
            //   string newfilename = "Output";
            string newfilename = m_strFileName;
            if (Request.Browser.Browser.ToString() == "IE")
                Response.AddHeader("content-disposition", "attachment; filename=" + newfilename + ".PDF");
            else
                Response.AddHeader("content-disposition", "attachment; filename=\"" + newfilename + "\".PDF");

            strTemp += "2 | ";
            Response.AddHeader("content-length", theData.Length.ToString());

            Response.BinaryWrite(theData);
            strTemp += "3 | ";

        }
        catch (Exception ex)
        {
            Response.Write("Page_Load(): " + strTemp + ex.Message);
            Response.End();
        }

        //DeleteHTMLFile();
    }   

private byte[] Report(Stream objStream)
{
Pdf pdf1 = null;

        try
        {                
            pdf1 = new Pdf();   
            pdf1.TextInfo.FontName = "Courier New"; // "TimesNewRoman";
            pdf1.TextInfo.FontSize -= 4;
            // pdf1.DestinationType = DestinationType.FitPage;
            pdf1.HtmlInfo.PageHeight = PageSize.A4Height;
            pdf1.HtmlInfo.PageWidth = PageSize.A4Width;
            pdf1.HtmlInfo.Margin.Left = 25; 
            pdf1.HtmlInfo.Margin.Right= 25;
            pdf1.BindHTML(objStream);
     }
        catch (Exception e)
        {
            Response.Write("Report(): " + e.Message);
            Response.End();
        }

        if (pdf1 != null)
            return pdf1.GetBuffer();

        return null;
    }

private MemoryStream CreateHTMLStream()
{
string strOrderText = string.Empty;
MemoryStream objStream = null;

        try
        {
            string theURL = Request.QueryString["url"];
            string orderid = Request.QueryString["id"];
            string conusaorder = Request.QueryString["conusaorder"];
            string strParentDirectory = Request.ApplicationPath.ToString().ToLower();
            string strImagePath = "/images/sigs/";
            string strDataImagePath = strParentDirectory + strImagePath;
            string strPath = Request.Url.AbsoluteUri.ToString().ToLower();
             strParentDirectory = strPath.Substring(0, strPath.IndexOf(strParentDirectory, 0)) + strParentDirectory;
            strImagePath = strParentDirectory + strImagePath;
            strOrderText = GetOrderText(conusaorder, int.Parse(orderid));
           //remove the following for formating
            string[] strChunksArray = new string[] 
              {
                  "<td class=\"showBorder\"></td>", 
                  "<td> </td>",                                                                        "<td class=\"noBorder\" colSpan=5></td>",
                  "<td "noBorder\"></td>" ,                                                                 "<div align=\"center\">"
               };
            foreach (string strItem in strChunksArray)
              strOrderText = strOrderText.Replace(strItem, "");
            string sdoctype ="";
            string sdoctypeOld="";
            sdoctype= " XHTML 1.0 Transitional//EN ";
            sdoctype +="http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";
            //sdoctypeOld ="<!DOCTYPE HTML PUBLIC \\"+'"';
            sdoctypeOld ="DTD HTML 4.0 Transitional"+"//EN";
            sdoctypeOld +='"';
            strOrderText = strOrderText.Replace(sdoctypeOld,sdoctype);
            //string sTemp = "border=\"0\"";
            string sTemp = "<div>";
            int iStart = strOrderText.IndexOf(sTemp);
            strOrderText = strOrderText.Remove(iStart , sTemp.Length);
            strOrderText = strOrderText.Insert(iStart, "<div border=\"1\">");
            strOrderText = strOrderText.Replace(strDataImagePath, strImagePath);
            //<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
           //  newone
           // remove align=center atribute from first td tag
            sTemp = "<td align=\"center\">";
            iStart = strOrderText.IndexOf(sTemp);
            // "<td".Length = 3 and ">".Length = 1
            strOrderText = strOrderText.Remove(iStart + 3, sTemp.Length - 3 - 1);
            objStream = new MemoryStream(ASCIIEncoding.Default.GetBytes(strOrderText));

        }

Hello Joe,

Thanks for considering Aspose.

After extracting the HTML contents from database, can you please save the file over hard drive and share it so that we can test the conversion at our end.

FYI, NOTE: This is Beta version of Html2Pdf. Only simple HTML is supported.

works if i manually set each

to " "

However i have a image that is not in PDF file.





John Smith.
Division 1
Manager

 

Hello Joe,

Thanks for considering Aspose.

Please visit HTML to PDF using MemoryStream for information on how to display image while converting HTML file into PDF.

In case it does not resolve your problem or you have any further query, please feel free to share.

Did not resolve signature does not come in , not even a red X as if missing file.


Kevin N. Ingalls
LTC, OD
chief
Manager

 

private byte[] Report(Stream objStream)

{

Pdf pdf1 = null;

try

{

pdf1 = new Pdf();

System.Net.ServicePointManager.SecurityProtocol = System.Net.SecurityProtocolType.Ssl3;

pdf1.HtmlInfo.PageHeight = PageSize.A4Height;

pdf1.HtmlInfo.PageWidth = PageSize.A4Width;

pdf1.HtmlInfo.Margin.Left = 20;

pdf1.HtmlInfo.Margin.Right= 20;

Aspose.Pdf.Image image1 = new Aspose.Pdf.Image();

//Add the image into paragraphs collection of the section

image1.ImageInfo.ImageFileType = ImageFileType.Jpeg;

image1.ImageInfo.ImageStream = objStream;

image1.ImageInfo.File = "/images/sigs/";

pdf1.HtmlInfo.ImgUrl = "/images/sigs/";

pdf1.IsAutoFontAdjusted = true;

pdf1.DefaultFontName = "Times New Roman";

pdf1.BindHTML(objStream);

}

catch (Exception e)

{

Response.Write("Report(): " + e.Message);

Response.End();

}

if (pdf1 != null)

return pdf1.GetBuffer();

return null;

}

private MemoryStream CreateHTMLStream()

{

string strOrderText = string.Empty;

MemoryStream objStream = null;

try

{

string theURL = Request.QueryString["url"];

string orderid = Request.QueryString["id"];

string conusaorder = Request.QueryString["conusaorder"];

string strParentDirectory = Request.ApplicationPath.ToString().ToLower();

string strImagePath = "/images/sigs/";

string strDataImagePath = strParentDirectory + strImagePath;

string strPath = Request.Url.AbsoluteUri.ToString().ToLower();

strParentDirectory = strPath.Substring(0, strPath.IndexOf(strParentDirectory, 0)) + strParentDirectory;

strImagePath = strParentDirectory + strImagePath;

strOrderText = GetOrderText(conusaorder, int.Parse(orderid));

//remove the following for formating

string[] strChunksArray = new string[] {

"

",

"

 ",

"

",

"

"

};

foreach (string strItem in strChunksArray)

strOrderText = strOrderText.Replace(strItem, "");

string sdoctype ="";

string sdoctypeOld="";

sdoctype= " XHTML 1.0 Transitional//EN ";

sdoctype +="http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";

sdoctypeOld ="DTD HTML 4.0 Transitional"+"//EN";

sdoctypeOld +='"';

strOrderText = strOrderText.Replace(sdoctypeOld,sdoctype);

string sTemp = "

";

strOrderText = strOrderText.Replace(sTemp, "

"); //size=\"-2\"

sTemp = "

";

strOrderText = strOrderText.Replace(sTemp, " ");

sTemp = "
";

int iStart = strOrderText.IndexOf(sTemp.ToString());

strOrderText = strOrderText.Replace("/conusa_test", "/conusa");

sTemp = "

";

iStart = strOrderText.IndexOf(sTemp);

strOrderText = strOrderText.Remove(iStart + 3, sTemp.Length - 3 - 1);

objStream = new MemoryStream(ASCIIEncoding.UTF8.GetBytes(strOrderText));

}

catch (Exception ex)

{

Response.Write("CreateHTMLStream(): " + ex.Message);

Response.End();

}

return objStream;

}

protected void Page_Load(object sender, System.EventArgs e)

{

string strTemp;

Aspose.Pdf.License license = new Aspose.Pdf.License();

license.SetLicense(AppDomain.CurrentDomain.BaseDirectory + @"\Aspose.Total.lic");

// TableDemo3();

// return;

// CreateHTMLFile();

MemoryStream objHTMLStream = CreateHTMLStream(); //CreateHTMLStream();

// StreamPDF(objHTMLStream);

strTemp = "File created | ";

try

{

byte[] theData = Report(objHTMLStream);

strTemp += "Report Complete | ";

Response.ContentType = "application/pdf";

Response.Charset = "UTF-8";

strTemp += "1 | ";

// string newfilename = "Output";

string newfilename = m_strFileName;

if (Request.Browser.Browser.ToString() == "IE")

Response.AddHeader("content-disposition", "attachment; filename=" + newfilename + ".PDF");

else

Response.AddHeader("content-disposition", "attachment; filename=\"" + newfilename + "\".PDF");

strTemp += "2 | ";

Response.AddHeader("content-length", theData.Length.ToString());

Response.BinaryWrite(theData);

strTemp += "3 | ";

}

catch (Exception ex)

{

Response.Write("Page_Load(): " + strTemp + ex.Message);

Response.End();

}

DeleteHTMLFile();

}

Hi,

As far as I can understand, you have HTML contents saved in database and you are retrieving the contents through database using MemoryStream object & the contents contain a reference to an image file. Now that you are trying to convert them into PDF, the image is not being displayed.

I’ve tested the scenario and I am unable to notice the problem. I’ve tried the HTML to PDF using Memory Stream approach. Now that in HTML contents the image source is specified as src="/images/sigs/signature.JPG so in code you need to define the Image base location to some path where the image can be found. I’ve used the following code snippet to test the scenario.

<?xml:namespace prefix = o ns = "urn:schemas-microsoft-com:office:office" />

[C#]

// Instantiate an Object of Pdf
Pdf pdf = new Pdf();
// Define the path for the Image files being used by the HTML file
pdf.HtmlInfo.ImgUrl = "D:/pdftest";

// Define a byte array that will hold the conents from the HTML file
byte[] by = System.Text.Encoding.UTF8.GetBytes(File.ReadAllText("D:/pdftest/simplehtml.html"));
// Create an object of memory stream
MemoryStream ms = new MemoryStream(by);
// Bind the MemoryStream holding HTML contents with PDF object
pdf.BindHTML(ms);

// Save the PDf file
pdf.Save(@"D:\pdftest\simplehtml.pdf");

Whereas in HTML contents the image source that I’ve specified is src="Aspose_Pdf-Product-Box.jpg" and in code I’ve provided the Base URL for the image as “D:/pdftest”. So the complete path at which the image can be located would be “d:/pdftest/ Aspose_Pdf-Product-Box.jpg".

Following are the contents of HTML file and I’ve also attached the resultant PDF file. Please take a look. In case it does not resolve your problem or you have any further query, please feel to contact.

[HTML]



<Table>

<font face="TimesNewRoman" size="-1">
<td align="right">
<table cellSpacing="0" cellPadding="0" border="0">
<font face="TimesNewRoman" size="-1">
<td align="left"><img id="ctl00_ctl00_imgSig" src="Aspose_Pdf-Product-Box.jpg" style="border-width:0px;" />

<span id="ctl00_ctl00_sig" class="order_header">Kevin N. Ingalls
LTC, OD
chief
Manager
<td width="150"> 






I have resolved the problem.

<td align="left"><img id="ctl00_ctl00_imgSig" src="images/sigs/Aspose_Pdf-Product-Box.jpg"

if there is a path in the src it does not seem to work, so i did the 2 steps for it to work

strip the above and set imgurl.

<td align="left"><img id="ctl00_ctl00_imgSig" src="Aspose_Pdf-Product-Box.jpg"

strOrderText = strOrderText.Replace("/app_209/system/images/sig//images/sigs/", ""); .

..........

pdf1.HtmlInfo.ImgUrl = "/Inetpub/wwwroot/app_209/system/images/sigs/";

now it works.