Special characters disappear after html file insertion

Hi,

we use Aspose.Words.dll 13.3.0.0 to create a document which includes the following steps.

  • create new document from *.dotx template
  • insert html files at bookmarks
  • save created document as pdf file

If the html files include special characters like the € symbol, these characters are missing in the created document (see Antrag_Ergebnis.pdf).

Best regards, Martin Pfeifer

=======================================

Test plattform:
Aspose.Words.dll 13.3.0.0
.net 2.0 runtime
Windows 7 64 Bit
internal bugid#4954

Test code:

using System;
using System.Text;
using System.Collections.Generic;
using System.IO;
using Microsoft.VisualStudio.TestTools.UnitTesting;

using de.egov.Word;
using Aspose.Words;

namespace de.egov.Word
{
    /// 

    /// Zusammenfassungsbeschreibung für Antraege_BuildTest
    /// 

    [DeploymentItem("res\Antrag.dotx")]
    [DeploymentItem("res\Aspose.Total.lic")]
    [DeploymentItem("res\01.htm")]
    [DeploymentItem("res\02.htm")]
    [DeploymentItem("res\03.htm")]
    [DeploymentItem("res\04.htm")]

    [TestClass]
    public class Antraege_BuildTest_Aspose
    {
        public Antraege_BuildTest_Aspose()
        {
        }

        #region Test
        [TestMethod]
        public void Aspose_InsertHtmlTest()
        {
            string strLicFile = Path.Combine(SourceDir, "Aspose.Total.lic");
            string strTemplateFile = Path.Combine(SourceDir, "Antrag.dotx");
            string strPdfFile = Path.Combine(SourceDir, "Antrag_Ergebnis.pdf");
            string strBookmark;
            string strInsertFile;

            License lic = new License();
            lic.SetLicense(strLicFile);

            Document objTemplateDoc = CreateTemplateWithGlossary(strTemplateFile);

            Document newDoc = (Document)objTemplateDoc.Clone(true);
            strBookmark = "ANBV";
            strInsertFile = Path.Combine(SourceDir, "01.htm");
            Assert.IsTrue(InsertFile(newDoc, strBookmark, strInsertFile), "Dokument " + strInsertFile + " kann nicht an Textmarke " + strBookmark + " eingefügt werden");

            strBookmark = "ANSV";
            strInsertFile = Path.Combine(SourceDir, "02.htm");
            Assert.IsTrue(InsertFile(newDoc, strBookmark, strInsertFile), "Dokument " + strInsertFile + " kann nicht an Textmarke " + strBookmark + " eingefügt werden");

            strBookmark = "ANFA";
            strInsertFile = Path.Combine(SourceDir, "03.htm");
            Assert.IsTrue(InsertFile(newDoc, strBookmark, strInsertFile), "Dokument " + strInsertFile + " kann nicht an Textmarke " + strBookmark + " eingefügt werden");

            strBookmark = "ANAN";
            strInsertFile = Path.Combine(SourceDir, "04.htm");
            Assert.IsTrue(InsertFile(newDoc, strBookmark, strInsertFile), "Dokument " + strInsertFile + " kann nicht an Textmarke " + strBookmark + " eingefügt werden");

            Aspose.Words.Saving.PdfSaveOptions saveOpt = new Aspose.Words.Saving.PdfSaveOptions();
            saveOpt.Compliance = Aspose.Words.Saving.PdfCompliance.Pdf15;

            newDoc.Save(strPdfFile, saveOpt);

        }
        #endregion // Test

        /// 

        /// Erstellt eine Dokumentenvorlage im Format *.dotx , die ein Glossary Objekt beinnhaltet und AutoTexte verwalten kann.
        /// Wenn die Dokumentenvorlage im älteren *.dot Format vorliegt, wird sie zunächst ins neu *.dotx Format konvertiert.
        /// 

        /// Dateiname der Dokumentenvorlage
        /// Dokumentenvorlage mit Glossary und Autotexten
        private Document CreateTemplateWithGlossary(string strTemplateFileName)
        {
            try
            {
                if (File.Exists(strTemplateFileName))
                {
                    Document objTemplateDoc = new Document(strTemplateFileName);
                    if (objTemplateDoc.OriginalLoadFormat == LoadFormat.Dotx || objTemplateDoc.OriginalLoadFormat == LoadFormat.Dotm)
                    {
                        // Format der Dokumentenvorlage enthält Glossary und kann AutoTexte verwalten
                        return objTemplateDoc;
                    }
                    else
                    {

                        // Format der Dokumentenvorlage ist *.dot und besitzt kein Glossary ==> in Format *.dotx umwandeln
                        try
                        {
                            MemoryStream outputStream = new MemoryStream();

                            objTemplateDoc.Save(outputStream, SaveFormat.Dotx);

                            objTemplateDoc = null;

                            outputStream.Position = 0;

                            Document newTemplate = new Document(outputStream);

                            outputStream.Close();

                            return newTemplate;
                        }
                        catch (Exception e)
                        {
                            System.Console.Error.WriteLine(e.ToString());
                        }
                        return objTemplateDoc;
                    }
                }
            }
            catch (Exception e)
            {
                System.Console.Error.WriteLine(e.ToString());
            }
            return null;
        }

        /// 

        /// Datei an Textmarke einsetzen.
        /// 

        /// Name der Textmarke
        /// Datei, die eingesetzt wird
        /// True, falls Datei eingefügt wurde
        private bool InsertFile(Document objDoc, string strInsertBookmark, string strFile4Insertion)
        {
            try
            {
                Bookmark destBookmark = objDoc.Range.Bookmarks[strInsertBookmark];
                if (destBookmark != null)
                {
                    if (File.Exists(strFile4Insertion))
                    {
                        FileFormatInfo fileFormatInfo = Aspose.Words.FileFormatUtil.DetectFileFormat(strFile4Insertion);
                        LoadOptions loadOptions = new LoadOptions(fileFormatInfo.LoadFormat, null, null);
                        loadOptions.WarningCallback = new WarningInfoCollection();
                        Document docInsert = new Document(strFile4Insertion, loadOptions);
                        if (docInsert != null)
                        {
                            return InsertFile(destBookmark.BookmarkStart.ParentNode, docInsert);
                        }
                    }
                }
            }
            catch (Exception e)
            {
                System.Console.Error.WriteLine(e.ToString());
            }
            return false;
        }

        /// 

        /// Fügt ein Dokument in das aktuelle Dokument ein.
        /// 

        /// Textmarke, an deren Position das Dokument eingefügt wird
        /// Dokument, das eingefügt wird
        /// True, falls einfügen erfolgreich
        private bool InsertFile(Node insertAfterNode, Document srcDoc)
        {
            // Make sure that the node is either a paragraph or table.
            if ((!insertAfterNode.NodeType.Equals(NodeType.Paragraph)) & (!insertAfterNode.NodeType.Equals(NodeType.Table)))
                throw new ArgumentException("Datei in Dokument einfügen: Die Textmarke zum Einfügen muss innerhalb eines Absatzes oder einer Tabelle stehen.");

            try
            {
                // We will be inserting into the parent of the destination paragraph.
                CompositeNode dstStory = insertAfterNode.ParentNode;

                // This object will be translating styles and lists during the import.
                NodeImporter importer = new NodeImporter(srcDoc, insertAfterNode.Document, ImportFormatMode.KeepSourceFormatting);

                // Loop through all sections in the source document.
                foreach (Section srcSection in srcDoc.Sections)
                {
                    // Loop through all block level nodes (paragraphs and tables) in the body of the section.
                    foreach (Node srcNode in srcSection.Body)
                    {
                        // Let’s skip the node if it is a last empty paragraph in a section.
                        if (srcNode.NodeType.Equals(NodeType.Paragraph))
                        {
                            Paragraph para = (Paragraph)srcNode;
                            if (para.IsEndOfSection && !para.HasChildNodes)
                                continue;
                        }

                        // This creates a clone of the node, suitable for insertion into the destination document.
                        Node newNode = importer.ImportNode(srcNode, true);

                        // Insert new node after the reference node.
                        dstStory.InsertAfter(newNode, insertAfterNode);
                        insertAfterNode = newNode;
                    }
                }

                // destBookmark.Remove(); // Nach Einfügen der datei Textmarke entfernen, sonst gibt es Probleme beim Autotexten
                return true;
            }
            catch (Exception e)
            {
                System.Console.Error.WriteLine(e.ToString());
            }
            return false;
        }

        private string SourceDir
        {
            get { return TestContext.TestDeploymentDir; }
        }

        #region Test Helper
        private TestContext testContextInstance;

        /// 

        ///Gets or sets the test context which provides
        ///information about and functionality for the current test run.
        ///

        public TestContext TestContext
        {
            get { return testContextInstance; }
            set { testContextInstance = value; }
        }
        #endregion // Testhelper 
    }
}

Hi Martin,

Thanks for your inquiry.

In your case, you need to specify the encoding that will be used to load an HTML document. If encoding is not specified in HTML and LoadOptions.Encoding property is null, then the system will try to automatically detect the encoding. Please see the following change in your code:

private bool InsertFile(Document objDoc, string strInsertBookmark, string strFile4Insertion)
{
    try
    {
        Bookmark destBookmark = objDoc.Range.Bookmarks[strInsertBookmark];
        if (destBookmark != null)
        {
            if (File.Exists(strFile4Insertion))
            {
                FileFormatInfo fileFormatInfo = Aspose.Words.FileFormatUtil.DetectFileFormat(strFile4Insertion);
                LoadOptions loadOptions = new LoadOptions(fileFormatInfo.LoadFormat, null, null);
                loadOptions.Encoding = Encoding.Default;
                loadOptions.WarningCallback = new WarningInfoCollection();
                Document docInsert = new Document(strFile4Insertion, loadOptions);
                if (docInsert != null)
                {
                    return InsertFile(destBookmark.BookmarkStart.ParentNode, docInsert);
                }
            }
        }
    }
    catch (Exception e)
    {
        System.Console.Error.WriteLine(e.ToString());
    }
    return false;
}

I hope, this helps.

Best regards,