Hello,
during my HTML to PDF transformation tests I didn’t manage to use local links from my HTML file ‘bcl_1948506000_corrected.htm’ in zip file ‘input.zip’. It works fine in the HTML file but not in PDF with href attribute in <a>
tag and linked to id attribute of my data.
Could you investigate on it ?
First in your class define :
public Document DocxFile { get; set; }
public MemoryStream StreamOut { get; set; }
public MemoryStream StreamOutPdf { get; set; }
Then below the code used to transform HTML to PDF :
Dictionary<String, MemoryStream> htmlFiles = new Dictionary<String, MemoryStream>();
//Unziping
ZipFile zip = new ZipFile("");
foreach (ZipEntry zipEntry in zip)
{
byte[] ret = null;
Stream entryStream = zip.GetInputStream(zipEntry);
ret = new byte[zipEntry.Size];
entryStream.Read(ret, 0, ret.Length);
if (!zipEntry.IsDirectory)
{
String fileName = zipEntry.Name;
if (fileName.ToUpper().EndsWith(".HTML") || fileName.ToUpper().EndsWith(".HTM"))
{
MemoryStream memStream = new MemoryStream();
memStream.Write(ret, 0, ret.Length);
htmlFiles.Add(ZipEntry.CleanName(zipEntry.Name), memStream);
}
}
}
// Conversion of HTML file
if (htmlFiles.Count > 0)
{
foreach (string key in htmlFiles.Keys)
{
using (MemoryStream newHtmlDoc = htmlFiles[key])
{
Aspose.Words.LoadOptions opt = new Aspose.Words.LoadOptions();
Aspose.Words.DocumentBuilder builder = new Aspose.Words.DocumentBuilder();
opt.LoadFormat = Aspose.Words.LoadFormat.Html;
opt.Encoding = Encoding.UTF8;
Aspose.Words.Document doc = new Aspose.Words.Document(newHtmlDoc, opt);
builder.Document = doc;
builder.PageSetup.Orientation = Orientation.Landscape;
builder.PageSetup.PaperSize = PaperSize.A4;
builder.PageSetup.LeftMargin = 0;
builder.PageSetup.RightMargin = 0;
builder.Document.Range.Replace(ControlChar.NonBreakingSpace, " ", false, false);
//Margin reducing
foreach (Section sec in doc.Sections)
{
sec.PageSetup.TopMargin = 8;
sec.PageSetup.BottomMargin = 8;
}
this.StreamOut = new MemoryStream();
this.StreamOutPdf = new MemoryStream();
this.DocxFile = forceImageSize(doc);
ToPdf();
}
}
}
Below the forceImageSize(Document doc) :
protected Document forceImageSize(Document aDoc)
{
NodeCollection shapes = aDoc.GetChildNodes(NodeType.Shape, true);
foreach (Aspose.Words.Drawing.Shape shape in shapes)
{
if (shape.HasImage)
{
shape.Width = shape.ImageData.ImageSize.WidthPoints;
shape.Height = shape.ImageData.ImageSize.HeightPoints;
}
}
return aDoc;
}
Below the function ToPdf() :
private void ToPdf()
{
try
{
PdfSaveOptions options = new PdfSaveOptions();
options.ImageCompression = PdfImageCompression.Jpeg;
options.JpegQuality = 100;
options.FontEmbeddingMode = PdfFontEmbeddingMode.EmbedNone;
//Retrieve embedded files from docx file.
Dictionary<String, MemoryStream> embeddedFiles = ExtractEmbeddedFiles();
try
{
((Aspose.Words.Document)this.DocxFile).Save(this.StreamOut, (Aspose.Words.Saving.PdfSaveOptions)options);
}
catch (Exception ex)
{
logger.Error("Error on ASPOSE PDF transformation.", ex);
throw ex;
}
// Embedded file saving in PDF if needed
if (embeddedFiles.Count > 0)
{
Aspose.Pdf.Document pdfDocument = new Aspose.Pdf.Document(StreamOut);
ImportFilesToPdf(pdfDocument, embeddedFiles);
((Aspose.Pdf.Document)pdfDocument).Save(this.StreamOutPdf);
}
}
catch (Exception globalEx)
{
logger.Error("Error in HTML conversion method.", globalEx);
throw globalEx;
}
}
Below the function ExtractEmbeddedFiles() :
private Dictionary<String, MemoryStream> ExtractEmbeddedFiles()
{
Dictionary<String, MemoryStream> oleStreamDictionary = new Dictionary<String, MemoryStream>();
Aspose.Words.NodeCollection shapes = this.DocxFile.GetChildNodes(NodeType.Shape, true);
int i = 0;
foreach (Aspose.Words.Drawing.Shape shape in shapes)
{
if (shape.OleFormat != null)
{
String filename = "";
if (String.IsNullOrEmpty(shape.OleFormat.SuggestedFileName))
{
filename += "OLE_OBJECT_NO_" + (i + 1) + shape.OleFormat.SuggestedExtension;
}
else
{
filename += shape.OleFormat.SuggestedFileName;
if (!filename.EndsWith(shape.OleFormat.SuggestedExtension))
{
filename += shape.OleFormat.SuggestedExtension;
}
}
MemoryStream ms = new MemoryStream();
shape.OleFormat.Save(ms);
oleStreamDictionary.Add(filename, ms);
shape.HRef = filename;
i++;
}
}
return oleStreamDictionary;
}
Below the function ImportFilesToPdf(Aspose.Pdf.Document pdfFile, Dictionary<string, MemoryStream> embeddedFiles)
:
private void ImportFilesToPdf(Aspose.Pdf.Document pdfFile, Dictionary<string, MemoryStream> embeddedFiles)
{
Aspose.Pdf.PageCollection lPdfPages = pdfFile.Pages;
foreach (Aspose.Pdf.Page lPdfPage in lPdfPages)
{
foreach (Aspose.Pdf.Annotations.LinkAnnotation la in lPdfPage.Annotations)
{
Aspose.Pdf.Annotations.GoToURIAction goToAction = (Aspose.Pdf.Annotations.GoToURIAction)la.Action;
if (goToAction == null) continue;
String fileName = goToAction.URI.Replace(@"%20", @" ");
if (embeddedFiles.ContainsKey(fileName))
{
MemoryStream ms = embeddedFiles[fileName];
ms.Position = 0;
Aspose.Pdf.FileSpecification fs = new Aspose.Pdf.FileSpecification(ms, fileName);
Aspose.Pdf.Annotations.FileAttachmentAnnotation faa = new Aspose.Pdf.Annotations.FileAttachmentAnnotation(lPdfPage, la.Rect, fs);
faa.Icon = Aspose.Pdf.Annotations.FileIcon.Graph;
faa.Opacity = 0.0;
faa.Flags = Aspose.Pdf.Annotations.AnnotationFlags.ReadOnly;
lPdfPage.Annotations.Add(faa);
ms.Close();
}
}
}
}
input.zip (732.4 KB)