Trying to convert HTML to pdf but HTML content not wrapping to new line and it goes out of pdf page. Here is the code snippet.
var contentRootPath = _webHostEnvironment.ContentRootPath;
// aspose license to use aspose.words;
var license = new License();
license.SetLicense(Path.Combine(contentRootPath, "Licenses", "Aspose.Total.NET.lic"));
var wordHtmlTemplate = LoadHtmlTemplate();
// Set up font substitution for the document
FontSettings fontSettings = new FontSettings();
fontSettings.SetFontsFolder(Path.Combine(_webHostEnvironment.ContentRootPath, "Fonts"), true);
fontSettings.FallbackSettings.Load(Path.Combine(_webHostEnvironment.ContentRootPath, "Fonts", "NotoFallbackSettings.xml"));
fontSettings.SubstitutionSettings.FontInfoSubstitution.Enabled = false;
fontSettings.SubstitutionSettings.DefaultFontSubstitution.DefaultFontName = "Noto Sans";
Document doc = new Document();
doc.LayoutOptions.TextShaperFactory = Aspose.Words.Shaping.HarfBuzz.HarfBuzzTextShaperFactory.Instance;
doc.FontSettings = fontSettings;
DocumentBuilder documentBuilder = new DocumentBuilder(doc);
documentBuilder.InsertHtml(wordHtmlTemplate, true);
MemoryStream outStream = new MemoryStream();
// Save the document to stream.
doc.Save(outStream, SaveFormat.Pdf);
// Convert the document to byte form.
byte[] docBytes = outStream.ToArray();
File.WriteAllBytes("HtmlToPdf.pdf", docBytes);
@kd2023
We have opened the following new ticket(s) in our internal issue tracking system and will deliver their fixes according to the terms mentioned in Free Support Policies.
Issue ID(s): WORDSNET-25987
You can obtain Paid Support Services if you need support on a priority basis, along with the direct access to our Paid Support management team.
PS: The problem is not reproducible if use HTML as an input document:
Document doc = new Document(@"C:\Temp\in.html");
doc.LayoutOptions.TextShaperFactory = Aspose.Words.Shaping.HarfBuzz.HarfBuzzTextShaperFactory.Instance;
doc.Save(@"C:\Temp\out.pdf");
@alexey.noskov - Below solution works if we load HTML string as stream while initializing Document object and specifying loading options along with encoding
var wordHtmlTemplate = LoadHtmlTemplate();
// Set up font substitution for the document
FontSettings fontSettings = new FontSettings();
fontSettings.SetFontsFolder(Path.Combine(_webHostEnvironment.ContentRootPath, "Fonts"), true);
fontSettings.FallbackSettings.Load(Path.Combine(_webHostEnvironment.ContentRootPath, "Fonts", "NotoFallbackSettings.xml"));
fontSettings.SubstitutionSettings.FontInfoSubstitution.Enabled = false;
fontSettings.SubstitutionSettings.DefaultFontSubstitution.DefaultFontName = "Noto Sans";
LoadOptions loadOptions = new LoadOptions()
{
Encoding = Encoding.UTF8,
LoadFormat = LoadFormat.Html
};
Document doc = new Document(new MemoryStream(Encoding.UTF8.GetBytes(wordHtmlTemplate)), loadOptions);
doc.LayoutOptions.TextShaperFactory = Aspose.Words.Shaping.HarfBuzz.HarfBuzzTextShaperFactory.Instance;
doc.FontSettings = fontSettings;
MemoryStream outStream = new MemoryStream();
// Save the document to stream.
doc.Save(outStream, SaveFormat.Pdf);
// Convert the document to byte form.
byte[] docBytes = outStream.ToArray();
File.WriteAllBytes("HtmlToPdf.pdf", docBytes);
@kd2023 We have completed analyzing the issue and concluded to close it as Not a Bug. Default compatibility options of Document prevent text from being wrapped correctly. In order to fix the issue, the corresponding compatibility option should be set up correctly before inserting HTML:
Document doc = new Document();
doc.CompatibilityOptions.ApplyBreakingRules = true;
DocumentBuilder builder = new DocumentBuilder(doc);
builder.InsertHtml(File.ReadAllText("in.html"), true);
doc.Save("out.docx");