Here is the full code; I verified in LINQPad that this should work. If I missed anything, let me know.
public async Task<ConvertedDocumentContainer> ConvertToHtml(Stream uploadStream, string filename, string uniqueId)
{
var ext = Path.GetExtension(filename);
/**
* Setup cloud account, container, etc
*/
if (!CloudStorageAccount.TryParse(_project.AzureWebJobsStorage, out var storageAccount))
{
throw new Exception("Could not connect to Azure Cloud Storage. Check credentials.");
}
var cloudClient = storageAccount.CreateCloudBlobClient();
var cloudContainer = cloudClient.GetContainerReference(_project.StorageContainerName);
var convertedDocContainer = new ConvertedDocumentContainer(cloudContainer, uniqueId, ext);
using (var newStream = new MemoryStream())
{
await uploadStream.CopyToAsync(newStream);
switch (ext)
{
case ExcelFileType:
new Aspose.Cells.License().SetLicense(AsposeLicense);
using (var xlsxStream = new MemoryStream())
{
using (var p = new Workbook(newStream))
{
p.Save(xlsxStream, Aspose.Cells.SaveFormat.Xlsx);
}
using (var workbook = new Workbook(xlsxStream))
{
ExportWorkbook(workbook, convertedDocContainer);
}
}
break;
case ExcelFileTypeXml:
new Aspose.Cells.License().SetLicense(AsposeLicense);
using (var workbook = new Workbook(newStream))
{
ExportWorkbook(workbook, convertedDocContainer);
}
break;
case WordFileType:
case WordFileTypeXml:
new Aspose.Words.License().SetLicense(AsposeLicense);
var doc = new Document(newStream);
var htmlSaveOptions = new Aspose.Words.Saving.HtmlSaveOptions(Aspose.Words.SaveFormat.Html)
{
ImagesFolderAlias = "files",
ImageSavingCallback = convertedDocContainer
};
doc.Save(convertedDocContainer.RootDocument, htmlSaveOptions);
break;
case PowerpointFileType:
case PowerpointFileTypeXml:
new Aspose.Slides.License().SetLicense(AsposeLicense);
ISaveOptions opts = new HtmlOptions(convertedDocContainer)
{
NotesCommentsLayouting = { CommentsPosition = CommentsPositions.None },
SlideImageFormat = SlideImageFormat.Bitmap(1, ImageFormat.Jpeg),
JpegQuality = 100,
ShowHiddenSlides = false,
HtmlFormatter = HtmlFormatter.CreateDocumentFormatter(string.Empty, false)
};
//PPT library disposes in the input stream, so copy it to a new one
using (var ppt = new Presentation(newStream))
{
ppt.Save(convertedDocContainer.RootDocument, SaveFormat.Html, opts);
}
break;
case PdfFileType:
new Aspose.Pdf.License().SetLicense(AsposeLicense);
var saveOptions = new Aspose.Pdf.HtmlSaveOptions(HtmlDocumentType.Html5)
{
SplitIntoPages = false,
FixedLayout = true,
RasterImagesSavingMode = Aspose.Pdf.HtmlSaveOptions.RasterImagesSavingModes.AsEmbeddedPartsOfPngPageBackground,
ProcessingBatchSize = 1000,
CustomCssSavingStrategy = convertedDocContainer.PdfCssSaving,
CustomStrategyOfCssUrlCreation = convertedDocContainer.PdfCssUrlCreation,
CustomResourceSavingStrategy = convertedDocContainer.PdfResourceSaving
};
using (var pdf = new Aspose.Pdf.Document(newStream))
{
pdf.OptimizeSize = true;
_logger.LogInformation($"Optimize Begin: [{uniqueId}, {filename}]");
pdf.OptimizeResources();
_logger.LogInformation($"Optimize End: [{uniqueId}, {filename}]");
pdf.Save(convertedDocContainer.RootDocument, saveOptions);
}
break;
default:
throw new Exception($"Could not match {ext} to any Aspose converter.");
}
}
uploadStream.Close();
return convertedDocContainer;
}
public class ConvertedDocumentContainer : IDisposable, IStreamProvider, IImageSavingCallback, ILinkEmbedController
{
public CloudBlobContainer CloudBlobContainer { get; }
private readonly ConcurrentDictionary<string, Stream> _streams;
private readonly ConcurrentDictionary<string, string> _sasTokenMappings;
public ConvertedDocumentContainer(CloudBlobContainer cloudBlobContainer, string uniqueId, string extension)
{
CloudBlobContainer = cloudBlobContainer;
RootDocument = new MemoryStream();
_streams = new ConcurrentDictionary<string, Stream>();
_sasTokenMappings = new ConcurrentDictionary<string, string>();
UniqueId = uniqueId;
Extension = extension;
}
public string UniqueId { get; set; }
public Stream RootDocument { get; set; }
public string Extension { get; set; }
public IDictionary<string, Stream> Assets => _streams;
public IDictionary<string, string> SasTokenMappings => _sasTokenMappings;
public void PdfCssSaving(Aspose.Pdf.HtmlSaveOptions.CssSavingInfo info)
{
var assetInfo = AddAsset(info.SupposedURL);
info.ContentStream.CopyTo(assetInfo.Stream);
info.SupposedURL = assetInfo.CustomPath;
}
public string PdfCssUrlCreation(Aspose.Pdf.HtmlSaveOptions.CssUrlRequestInfo info)
{
return "files/page.css";
}
public string PdfResourceSaving(Aspose.Pdf.SaveOptions.ResourceSavingInfo info)
{
var assetInfo = AddAsset(info.SupposedFileName);
info.ContentStream.CopyTo(assetInfo.Stream);
return assetInfo.CustomPath;
}
public void InitStream(StreamProviderOptions options)
{
var asset = AddAsset(options.DefaultPath);
options.Stream = asset.Stream;
options.CustomPath = asset.CustomPath;
options.ResourceLoadingType = ResourceLoadingType.UserProvided;
}
public void CloseStream(StreamProviderOptions options)
{
}
void IImageSavingCallback.ImageSaving(ImageSavingArgs args)
{
var asset = AddAsset(args.ImageFileName);
args.ImageStream = asset.Stream;
var uri = new Uri(asset.CustomPath);
args.ImageFileName = uri.Segments.Last() + uri.Query;
args.KeepImageStreamOpen = true;
}
public void AddAssets(string directory)
{
foreach (var file in Directory.GetFiles(directory))
{
AddAsset(file);
}
}
private AssetInfo AddAsset(string filename)
{
var stream = _streams.GetOrAdd(filename, s => new MemoryStream());
var customPath = GetUrlForAsset(filename);
_sasTokenMappings.AddOrUpdate(filename, customPath, (k, v) => customPath);
return new AssetInfo(stream, customPath);
}
private class AssetInfo
{
public Stream Stream { get; }
public string CustomPath { get; }
public AssetInfo(Stream stream, string customPath)
{
Stream = stream;
CustomPath = customPath;
}
}
private string GetUrlForAsset(string sourceFile)
{
var originalFileName = Path.GetFileName(sourceFile);
var cloudBlockBlob = CloudBlobContainer.GetBlockBlobReference($"{UniqueId}/files/{originalFileName}");
var sasConstraints = new SharedAccessBlobPolicy
{
SharedAccessStartTime = DateTimeOffset.UtcNow.AddMinutes(-1),
SharedAccessExpiryTime = DateTimeOffset.UtcNow.AddYears(99),
Permissions = SharedAccessBlobPermissions.Read
};
var sasBlobToken = cloudBlockBlob.GetSharedAccessSignature(sasConstraints);
return cloudBlockBlob.Uri.AbsoluteUri + sasBlobToken;
}
public void Reset()
{
RootDocument.Seek(0, SeekOrigin.Begin);
}
public void Dispose()
{
RootDocument?.Dispose();
foreach (var kvp in _streams)
{
kvp.Value?.Dispose();
}
}
private static readonly Dictionary<string, string> PowerpointLinkTemplates = new Dictionary<string, string>
{
{ "image/jpeg", "image-{0}.jpg" },
{ "image/png", "image-{0}.png" }
};
private readonly Dictionary<int, string> _externalPowerpointImages = new Dictionary<int, string>();
LinkEmbedDecision ILinkEmbedController.GetObjectStoringLocation(int id, byte[] entityData, string semanticName, string contentType,
string recomendedExtension)
{
if (!PowerpointLinkTemplates.TryGetValue(contentType, out var template)) return LinkEmbedDecision.Embed;
_externalPowerpointImages.Add(id, template);
return LinkEmbedDecision.Link;
}
string ILinkEmbedController.GetUrl(int id, int referrer)
{
if (!_externalPowerpointImages.TryGetValue(id, out var template)) return null;
return GetUrlForAsset(string.Format(template, id));
}
void ILinkEmbedController.SaveExternal(int id, byte[] entityData)
{
if (!_externalPowerpointImages.TryGetValue(id, out var template)) return;
var filename = string.Format(template, id);
var asset = AddAsset(filename);
asset.Stream.Write(entityData, 0, entityData.Length);
asset.Stream.Flush();
asset.Stream.Seek(0, SeekOrigin.Begin);
}
}