I am trying to convert html to pdf and want to set the margins . Here is the code I am using
The margins are not set. What is the approach to set the margins?
Thanks
public byte[] ConvertHtmlToPdf(byte[] htmlBytes)
{
// Add timeout capability
var timeoutMs = 120000; // 2 minute timeout
var task = Task.Run(() =>
{
try
{
// Convert HTML bytes to string
string htmlContent = System.Text.Encoding.UTF8.GetString(htmlBytes);
string sanitizedHtml = SanitizeHtml(htmlContent);
byte[] sanitizedBytes = Encoding.UTF8.GetBytes(sanitizedHtml);
Console.WriteLine($"HTML size: {htmlBytes.Length} bytes");
Console.WriteLine($"Sanitized HTML size: {sanitizedBytes.Length} bytes");
// Create HTML load options with appropriate settings
var htmlOptions = new HtmlLoadOptions
{
IsEmbedFonts = false,
// Note: PageSetup.AnyPage can be used for uniform margins
PageInfo = new PageInfo
{
Width = 595, // A4 width in points
Height = 842, // A4 height in points
Margin = new MarginInfo
{
Top = 72,
Left = 72,
Right = 72,
Bottom = 72
}
}
};
using (var htmlStream = new MemoryStream(sanitizedBytes))
using (var pdfDocument = new Document(htmlStream, htmlOptions))
using (var outputStream = new MemoryStream())
{
// Save options
var saveOptions = new Aspose.Pdf.PdfSaveOptions();
pdfDocument.Save(outputStream, saveOptions);
return outputStream.ToArray();
}
}
catch (Exception ex)
{
Console.WriteLine("Error converting HTML to PDF: " + ex.Message);
if (ex.InnerException != null)
{
Console.WriteLine("Inner exception: " + ex.InnerException.Message);
}
throw;
}
});
// Wait for the task to complete with timeout
if (task.Wait(timeoutMs))
{
return task.Result;
}
else
{
throw new TimeoutException("HTML to PDF conversion timed out after " + timeoutMs / 1000 + " seconds");
}
}
private string SanitizeHtml(string html)
{
if (string.IsNullOrEmpty(html)) return html;
html = Regex.Replace(html, @"<script\b[^<]*(?:(?!</script>)<[^<]*)*</script>", "", RegexOptions.IgnoreCase);
// Remove event handlers (onclick, onload, etc.)
html = Regex.Replace(html, @"\s+on\w+\s*=\s*""[^""]*""", "", RegexOptions.IgnoreCase);
// Remove javascript: protocol in href attributes
html = Regex.Replace(html, @"href\s*=\s*[""']javascript:[^""']*[""']", @"href=""#""", RegexOptions.IgnoreCase);
// Remove data: URIs which could contain JavaScript
html = Regex.Replace(html, @"data:[^,]*base64,[^""']*", @"#", RegexOptions.IgnoreCase);
// Remove iframe tags
html = Regex.Replace(html, @"<iframe\b[^<]*(?:(?!</iframe>)<[^<]*)*</iframe>", "", RegexOptions.IgnoreCase);
// Remove meta refresh tags
html = Regex.Replace(html, @"<meta\s+http-equiv\s*=\s*[""']refresh[""'][^>]*>", "", RegexOptions.IgnoreCase);
return html;
}