We're sorry Aspose doesn't work properply without JavaScript enabled.

Free Support Forum - aspose.com

Html to docx Images download during opening Html

The following code will cause (from the Visual Studio Console) to open the remote image - but not embed it.
I added code to move to each field and insert the image. however, watching my network traffic when I call builder.InsertHtml(html) each image in the document is downloaded… so when I un-comment my line of code tasks.Add(InsertImageIntoBuilder(builder, includepicture…); the image is downloaded AGAIN then inserted.

this causes the time to render the document to double.
is there a way to auto insert the images at the time the builder.InsertHtml is called? or prevent the images from having to be downloaded twice?

Code:
Document doc = new Document();
HtmlLoadOptions options = new HtmlLoadOptions();
options.PreserveIncludePictureField = true;

        options.WebRequestTimeout = 1000;
        DocumentBuilder builder = new DocumentBuilder(doc);
        
        builder.InsertHtml(html);
        ArrayList removefields = new ArrayList();
        List<Task> tasks = new List<Task>();
        
        foreach (Aspose.Words.Fields.Field field in doc.Range.Fields)
        {
        
            if (field.Type.Equals(Aspose.Words.Fields.FieldType.FieldHyperlink))
            {
                // If any pictures in the document are linked
                // get the path to the file and embed the photo
                try
                {
                    FieldHyperlink includePicture = (FieldHyperlink)field;
                    string extention = includePicture.Address.ToLower().Substring(includePicture.Address.Length - 3);
                    if (imageExtentions.Contains(extention))
                    {
                        //Console.WriteLine(includePicture.SourceFullName);
                        builder.MoveToField(includePicture, false);
                        
                        // Replace https with http to avoid issues with the server rejecting
                        // the clients connection.
                        //builder.InsertImage(includePicture.Address.ToLower().Replace("https:", "http:"));
                        //tasks.Add(InsertImageIntoBuilder(builder, includePicture.Address.ToLower().Replace("https:", "http:")));
                        removefields.Add(field);
                    }
                }
                catch { }
            }
            GC.Collect();
        }
        Task.WaitAll(tasks.ToArray());

Console output line: - during builder.InsertHtml(html);

Application Insights Telemetry (unconfigured): {“name”:“Microsoft.ApplicationInsights.Dev.RemoteDependency”,“time”:“2018-04-19T12:59:15.8068230Z”,“tags”:{“ai.cloud.roleInstance”:“DESKTOP-5TQIF02”,“ai.internal.nodeName”:“DEV-5TQIF02”,“ai.internal.sdkVersion”:“rddf:2.2.0-738”},“data”:{“baseType”:“RemoteDependencyData”,“baseData”:{“ver”:2,“name”:"/uploads/cfp2/attachments/YNLJGLNO/YNLJGLNO–300677-6-JPG.jpg",“id”:“RPV6ct5lxaI=”,“data”:“https://www.website.com/uploads/cfp2/attachments/YNLJGLNO/YNLJGLNO--300677-6-JPG.jpg",“duration”:“00:00:00.6060000”,“success”:false,“type”:“Http”,“target”:“www.website.com”,“properties”:{“DeveloperMode”:"true”}}}}
The thread 0x5bdc has exited with code 0 (0x0).

@rileyja,

Thanks for your inquiry. We suggest you please implement IResourceLoadingCallback interface if you want to control how Aspose.Words loads external resource when importing a document from HTML or MHTML. Please check the following code snippet.

If you still face problem, please ZIP and attach your input Word and HTML documents along with simplified code example to reproduce the issue that you are facing. We will then provide you more information about your query.

public class HandleResouces : IResourceLoadingCallback
{
    public ResourceLoadingAction ResourceLoading(ResourceLoadingArgs args)
    {
        String url = args.OriginalUri;
        if (args.ResourceType == ResourceType.Image)
            return ResourceLoadingAction.Skip;

        return ResourceLoadingAction.Default;
    }
}

Actual Code. this does not work…

please advise.

  public class OfficeExportApi : IResourceLoadingCallback
  {
     public static MemoryStream RenderToWord(string html)
    {
        SetWordsLicense();
        Document doc = new Document();
        DocumentBuilder builder = new DocumentBuilder(doc);
        builder.InsertHtml(html);
        ArrayList removefields = new ArrayList();
        List<Task> tasks = new List<Task>();

        foreach (Aspose.Words.Fields.Field field in doc.Range.Fields)
        {

            if (field.Type.Equals(Aspose.Words.Fields.FieldType.FieldHyperlink))
            {
                try
                {
                    FieldHyperlink includePicture = (FieldHyperlink)field;
                    {
                        builder.MoveToField(includePicture, false);
                        builder.InsertImage(includePicture.Address.ToLower().Replace("https:", "http:"));
                        tasks.Add(InsertImageIntoBuilder(builder, includePicture.Address.);
                        removefields.Add(field);
                }
                catch { }
            }
            GC.Collect();
        }
        Task.WaitAll(tasks.ToArray());
        GC.Collect();
        foreach (Aspose.Words.Fields.Field field in removefields)
        {
            // Remove and linked fields from the collection built above.
            field.Remove();
        }

        MemoryStream stream = new MemoryStream();
        doc.Save(stream, Aspose.Words.SaveFormat.Docx);
        stream.Position = 0;

        return stream;
    }
 public ResourceLoadingAction ResourceLoading(ResourceLoadingArgs args)     
    {
        String url = args.OriginalUri;
        if (args.ResourceType == ResourceType.Image)
            return ResourceLoadingAction.Skip;

        return ResourceLoadingAction.Default;
    }

@rileyja,

Thanks for your inquiry. You are creating document from scratch. In this case, IResourceLoadingCallback.ResourceLoading will not call. To call this method, you need to pass HtmlLoadOptions to document’s constructor. Please check the Document constructors.

In your case, we suggest you please create an empty word document and use it as shown below.

Aspose.Words.HtmlLoadOptions options = new Aspose.Words.HtmlLoadOptions();

options.ResourceLoadingCallback = new HandleResouces();
Document doc = new Document(MyDir + "in.docx", options);