Convert Hyperlinks to images in output PDF using .NET

@whong4

In your case, we suggest you please call Field.Unlink method for non-email hyperlinks and export the document to PDF.

In this case, hyperlinks are not exported to the output PDF. Acrobat Reader creates hyperlink by itself from the text. This behavior is controlled by “Edit->Preferences->General->Create links from URLs” checkbox. Please untick this property to get the desired output.

@tahir.manzoor

Thank you for your insight. Although this idea works, the preferred approach would be to provide a workaround for the cropped image.

Is there an Aspose API that you can refer me to that could potentially adjust the styling of the cropped image. I would like the format of the cropped text to look identical to the font styling of the rest of the document.

If this approach seems infeasible, please let me know.

Thanks,
Woon Gi

@whong4

In your case, we suggest you following solution.

  1. Please get the text of hyperlink.
  2. Insert textbox into the document.
  3. Insert the hyperlink text into textbox.
  4. Convert the shape (textbox) into image using Shape.GetShapeRenderer().Save method.
  5. Move the cursor to the hyperlink and insert the shape.
  6. Save the document to PDF

Hope this helps you.

Moreover, you can protect your output PDF using Aspose.Words. Following code example demonstrates how to set permissions on a PDF document generated by Aspose.Words.

Document doc = new Document(MyDir + "Rendering.docx");

PdfSaveOptions saveOptions = new PdfSaveOptions();

// Create encryption details and set owner password
PdfEncryptionDetails encryptionDetails =
    new PdfEncryptionDetails("password", string.Empty, PdfEncryptionAlgorithm.RC4_128);

// Start by disallowing all permissions
encryptionDetails.Permissions = PdfPermissions.DisallowAll;

// Extend permissions to allow editing or modifying annotations
encryptionDetails.Permissions = PdfPermissions.ModifyAnnotations | PdfPermissions.DocumentAssembly;
saveOptions.EncryptionDetails = encryptionDetails;

// Render the document to PDF format with the specified permissions
doc.Save(ArtifactsDir + "Rendering.EncryptionPermissions.pdf", saveOptions);

@tahir.manzoor

Thank you for providing a potential solution. Would you be able to provide a working example for me to reference to?

Thanks,
Woon Gi

@whong4

We are writing the code example for your case and will get back to you soon.

@whong4

Please use the following code example to convert hyperlinks to image in Word document. You can skip the email as you are doing in your code. Hope this helps you.

Document doc = new Document(MyDir + "sample_input.docx");
int i = 1;
DocumentBuilder builder = new DocumentBuilder(doc);
foreach (Field field in doc.Range.Fields)
{
    if (field.Type == FieldType.FieldHyperlink)
    {
        FieldHyperlink hyperlink = (FieldHyperlink)field;
        builder.MoveToField(field, false);
        builder.StartBookmark("bookmark" + i);
        builder.MoveToField(field, true);
        builder.EndBookmark("bookmark" + i);
        field.Unlink();
        i++;
    }
}

LayoutCollector collector = new LayoutCollector(doc);
LayoutEnumerator enumerator = new LayoutEnumerator(doc);
ArrayList nodes = new ArrayList();
foreach (Bookmark bookmark in doc.Range.Bookmarks)
{
    Shape shape = DrawText(doc, bookmark, collector, enumerator);
    shape.Stroke.On = false;
    bookmark.Text = "";
    nodes.Add(shape);
}
doc.UpdatePageLayout();
foreach (Shape newshape in nodes)
{
    MemoryStream stream = new MemoryStream(); 
    newshape.GetShapeRenderer().Save(stream, new ImageSaveOptions(SaveFormat.Emf));
    builder.MoveTo(newshape);
    Shape shape = builder.InsertImage(stream);
    shape.Stroked = false;
    shape.StrokeWeight = 0;
    newshape.Remove();
}
doc.Save(MyDir + "output.docx");


public static Shape DrawText(Document doc, Bookmark bookmark, LayoutCollector collector, LayoutEnumerator enumerator)
{
    enumerator.Current = collector.GetEntity(bookmark.BookmarkStart);
    Console.WriteLine(" --> Left : " + enumerator.Rectangle.Left + " Top : " + enumerator.Rectangle.Top);

    Shape shape = new Shape(doc, ShapeType.TextBox);
    shape.AppendChild(new Paragraph(doc));
    Node node = bookmark.BookmarkStart;
    while (node != bookmark.BookmarkEnd)
    {
        node = node.NextSibling;
        shape.FirstParagraph.AppendChild(node.Clone(false));
    }
    shape.TextBox.InternalMarginTop = 0;
    shape.TextBox.InternalMarginBottom = 0;
    shape.TextBox.InternalMarginLeft = 0;
    shape.TextBox.InternalMarginRight = 0;

    double start = enumerator.Rectangle.Left;
    enumerator.Current = collector.GetEntity(bookmark.BookmarkEnd);
    double end = enumerator.Rectangle.Left;
    shape.Width = (end - start) + 5;
    shape.Height = enumerator.Rectangle.Bottom - enumerator.Rectangle.Top;
    bookmark.BookmarkStart.ParentNode.InsertBefore(shape, bookmark.BookmarkStart);
    return shape;
}

@tahir.manzoor

I integrated the following logic to my source code and it looks like I am receiving an underline below the image file.

There is a shape.textpath.underline property I could use to validate whether an underline exists or not, but I am looking for a property that is similar to underline.none from the Run API. I believe the changes must be made in the DrawText() method.

The following provides an example of how I implemented your logic and the changes I am trying to make for DrawText().

Thanks,
Woon Gi

    public static Shape DrawText(Aspose.Words.Document doc, Bookmark bookmark, LayoutCollector collector, LayoutEnumerator enumerator)
    {
        enumerator.Current = collector.GetEntity(bookmark.BookmarkStart);
        Console.WriteLine(" --> Left : " + enumerator.Rectangle.Left + " Top : " + enumerator.Rectangle.Top);

        Shape shape = new Shape(doc, ShapeType.TextBox);
        shape.AppendChild(new Paragraph(doc));
        Node node = bookmark.BookmarkStart;
        while (node != bookmark.BookmarkEnd)
        {
            node = node.NextSibling;
            shape.FirstParagraph.AppendChild(node.Clone(false));
        }
        shape.TextBox.InternalMarginTop = 0;
        shape.TextBox.InternalMarginBottom = 0;
        shape.TextBox.InternalMarginLeft = 0;
        shape.TextBox.InternalMarginRight = 0;

        double start = enumerator.Rectangle.Left;
        enumerator.Current = collector.GetEntity(bookmark.BookmarkEnd);
        double end = enumerator.Rectangle.Left;
        shape.Width = (end - start) + 20;
        shape.Height = enumerator.Rectangle.Bottom - enumerator.Rectangle.Top + 5;
        shape.TextPath.Underline = false;
        bookmark.BookmarkStart.ParentNode.InsertBefore(shape, bookmark.BookmarkStart);
        return shape;
    }

    private void FormatUploadDocument(Aspose.Words.Document doc, bool keepFormatting)
    {
        DocumentBuilder builder = new DocumentBuilder(doc);
        int i = 1;
        bool isHyperlink = false;
        foreach (Field field in doc.Range.Fields)
        {
            if (field.Type == FieldType.FieldHyperlink)
            {
                isHyperlink = true;
                FieldHyperlink hyperlink = (FieldHyperlink)field;

                // Do nothing if hyperlink is email address
                // Otherwise remove all links to change hyperlink into text
                if (Literals.RegEx.IsEmailAddress.IsMatch(hyperlink.Address))
                {
                    continue;
                }

                builder.MoveToField(field, false);
                builder.StartBookmark("bookmark" + i);
                builder.MoveToField(field, true);
                builder.EndBookmark("bookmark" + i);

                //Removes underline from non-email hyperlinks
                Paragraph links = (Paragraph)field.Start.GetAncestor(NodeType.Paragraph);
                if (links != null)
                {
                    foreach (Run run in links.Runs)
                    {
                        run.Font.Color = Color.Black;
                        run.Font.Underline = Underline.None;
                    }

                }
                field.Unlink();
                i++;

            }
        }

@whong4

The table’s cell has font formatting as underline. So, the inserted shape has this font formatting. You can remove it before inserting the Shape node as shown below.

foreach (Shape newshape in nodes)
{
    MemoryStream stream = new MemoryStream();
    newshape.GetShapeRenderer().Save(stream, new ImageSaveOptions(SaveFormat.Emf));
    builder.MoveTo(newshape);
    builder.Font.Underline = Underline.None;
    Shape shape = builder.InsertImage(stream);
    shape.Stroked = false;
    shape.StrokeWeight = 0;
    newshape.Remove();
}
doc.Save(MyDir + "output.docx");

@tahir.manzoor

My guess is that my word generation file unlinks() all non-email hyperlink fields. In order to get a generated PDF, it has to first consider changes that take place in the word generation file.

In this case, only the emails are hyperlink fields and the non emails are just plain text.

@whong4

Please do not call Field.Unlink method for hyperlinks. If you unlink the email address, you can unlink them.

@tahir.manzoor

Would there be a way for me to unlink non email hyperlinks without using Field.Unlink()? The flaw lies in how my Word Generation file is handling non email hyperlinks. By unlinking the fields, the hyperlink text is no longer field.

Since the Word file has to be generated first, the PDF file will not view the non email hyperlink text as a field.

Thanks,
Woon Gi

@whong4

A field in a Word document is a complex structure consisting of multiple nodes that include field start, field code, field separator, field result and field end. Fields can be nested, contain rich content and span multiple paragraphs or sections in a document. The Field class is a “facade” object that provides properties and methods that allow to work with a field as a single object.

Yes, it is true. The Field.Unlink method unlinks the field and replaces the field with its most recent result.

If you want email address as field in output PDF, please do not use field.Unlink(); in your code.

If you still face any issue, please elaborate the complete detail what exact you want to achieve using Aspose.Words. We will then guide you accordingly.

@tahir.manzoor

The email address is remaining as a field in the word document. The field.Unlink() process is only being implemented for non email hyperlinks. In the previous code I sent you, I included an if condition to allow email hyperlinks to remain as a field.

I am currently having an issue implementing the bookmark, layout, and shape logic. The bookmark is looking for a field in its parameter, but there is no field to reference for the regular hyperlink text.

I would need to find a way to bookmark the hyperlink text (not a field). After the location of the hyperlink text has been identified, the shape to image conversion logic should work.

I have provided an excerpt of where the Field.Unlink() logic is being invoked in the Word Generation file.

        bool isHyperlink = false;
        foreach (Field field in doc.Range.Fields)
        {
            if (field.Type == FieldType.FieldHyperlink)
            {
                isHyperlink = true;
                FieldHyperlink hyperlink = (FieldHyperlink)field;
                
                // Do nothing if hyperlink is email address
                // Otherwise remove all links to change hyperlink into text
                if (Literals.RegEx.IsEmailAddress.IsMatch(hyperlink.Address))
                {
                    continue;
                }

                //Removes underline from non-email hyperlinks
                Paragraph links = (Paragraph)field.Start.GetAncestor(NodeType.Paragraph);
                if (links != null)
                {
                    foreach (Run run in links.Runs)
                    {
                        run.Font.Color = Color.Black;
                        run.Font.Underline = Underline.None;
                    }

                }
                field.Unlink();
            }
        }

@tahir.manzoor

Just to provide clarification, I would like to figure out how to identify the location of hyperlink text after the embedded hyperlink has been removed (using Field.Unlink()).

Once it has been identified, I would like to know how to insert bookmarks by text location rather than fields.

Thanks,
Woon Gi Hong

@whong4

Unfortunately, your new requirements are not clear. As per our understanding, you want to convert hyperlink fields to image and insert them at the same place. Your can achieve it by using the code example shared in my post in this thread.

You have email and hyperlinks in your document. You can identify either hyperlink field is email or not and perform the desired task.

Please note that you do not need to call Field.Unlink method for hyperlink that you want to convert to image.

  1. Do you want to find the location after converting hyperlink to image?
  2. Please let us know why you want to identify the location of hyperlink.
  3. Please elaborate some detail what do you mean by location.
  4. Please manually create your expected output Word document using MS Word and attach it here for our reference. We will investigate how you want your final output document. (You can save the final document to PDF. The word document will help us to get your requirement).

@tahir.manzoor

Sorry for the confusion and inconvenience. The logic that you have provided works well when both emails and hyperlinks remain as fields.

However, the requirement for file output should be as follows.

  1. Word document outputs should have regular hyperlinks (not emails) unlinked.
  2. Identify unlinked fields and apply shape to image conversion logic.

There are two output documents that should be generated:

First would be the word file, which consists of a regular hyperlinks and email addresses. The Field.Unlink() is only being used to unlink all regular hyperlinks. By unlinking regular hyperlinks, the document should just be reading them as plaintext.

The same output should be taking place for PDF generation as well. However, Adobe Acrobat is embedding all hyperlink text with hyperlinks by default. Converting the hyperlink text to an image file is the appropriate work around.

After playing around with the shapes to image conversion logic, I believe this would be the best approach to follow.

To answer your questions:

  1. I wouldn’t need to find the location after the hyperlink text has been converted to an image.

  2. After the initial Word file generation, the regular hyperlinks should be unlinked. In this case, I would like to find a way to search for the hyperlink text in the document. Once the hyperlink text has been identified, place bookmarks on the hyperlink text.

  3. By location I am referring to where the hyperlink text is being searched in the document. As of right now, the Aspose.Words.Fields.Field is being invoked to identify all hyperlink fields, including email addresses. However, by unlinking regular hyperlinks during Word file generation, the hyperlink field has been removed. The “if” condition I have provided is only searching for fields not the hyperlink plaintext.

Please let me know if you need further clarification.

Thanks,
Woon Gi

Test Documents.zip (81.8 KB)

@whong4

You can achieve your requirement using the same code example shared in my previous post here:

To get the Word output document, please save the document after first for loop as shown below.

Document doc = new Document(MyDir + "sample_input.docx");
int i = 1;
DocumentBuilder builder = new DocumentBuilder(doc);
foreach (Field field in doc.Range.Fields)
{
    if (field.Type == FieldType.FieldHyperlink)
    {
        try
        {
            System.Net.Mail.MailAddress mail = new System.Net.Mail.MailAddress(field.Result);
            continue;
        }
        catch (Exception ex)
        {

        }
        FieldHyperlink hyperlink = (FieldHyperlink)field;
        builder.MoveToField(field, false);
        builder.StartBookmark("bookmark" + i);
        builder.MoveToField(field, true);
        builder.EndBookmark("bookmark" + i);
        field.Unlink();
        i++;
    }
}

doc.Save(MyDir + "output.docx"); 

To get the PDF document, please use the remaining code and save the document to PDF.

Please note that the only change in my previous code is that save the document after first for loop and email validation check.

@tahir.manzoor

Thank you for all your help, thus far. I am getting extremely close to the final solution, but there are some slight changes that might need to be made.

Based on the shape and image render logic that you have provided, it appears that the hyperlinks are showing up as images in the Word Document, prior to saving it in a PDF file. However, I would like to find a way to convert the hyperlink text to an image upon run time.

The DrawText() method that I have provided initially, converts hyperlink text to an image file on runtime. As of right now, if I save the generated Word Document to a PDF file the hyperlink texts are being shown as an image file. However, when I try to generate a PDF file upon runtime, the hyperlink text are still embedded with a hyperlink.

The bookmark logic is able to identify the bookmarks that has been created in the Word Generation C# file. The saved bookmarks are also being read in the PDF Generation C# file as well. However, I would like to find a way to manipulate the current shape to image logic to work upon run time.

If you need further clarification on this requirement, please let me know.

Thanks,
Woon Gi

@whong4

Please ZIP and attach the problematic PDF along with screenshot that shows the hyperlink.

You can remove the bookmark before saving the document to PDF using Document.Range.Bookmarks.Clear() method.

@whong4

We have tested the scenario using the latest version of Aspose.Words for .NET 20.7 with following code example and have not found the shared issue. So, please use Aspose.Words for .NET 20.7 and following code example. We have attached the output PDF with this post for your kind reference. output 20.7.pdf (53.5 KB)

If you still face problem, please share the PDF viewer that you are using. Please also share the screenshot that shows hyperlink in PDF.

Document doc = new Document(MyDir + "sample_input.docx");
int i = 1;
DocumentBuilder builder = new DocumentBuilder(doc);
foreach (Field field in doc.Range.Fields)
{
    if (field.Type == FieldType.FieldHyperlink)
    {
        try
        {
            System.Net.Mail.MailAddress mail = new System.Net.Mail.MailAddress(field.Result);
            continue;
        }
        catch (Exception ex)
        {
                             
        }

        FieldHyperlink hyperlink = (FieldHyperlink)field;
        builder.MoveToField(field, false);
        builder.StartBookmark("bookmark" + i);
        builder.MoveToField(field, true);
        builder.EndBookmark("bookmark" + i);
        field.Unlink();
        i++;
    }
}
doc.Save(MyDir + "output.docx");

//Convert document to PDF
LayoutCollector collector = new LayoutCollector(doc);
LayoutEnumerator enumerator = new LayoutEnumerator(doc);
ArrayList nodes = new ArrayList();
foreach (Bookmark bookmark in doc.Range.Bookmarks)
{
    if (bookmark.Name.StartsWith("bookmark"))
    {
        Shape shape = DrawText(doc, bookmark, collector, enumerator);
        shape.Stroked = false;
        shape.StrokeWeight = 0;
        shape.Stroke.On = false;
        bookmark.Text = "";
        nodes.Add(shape);
    }
}

doc.UpdatePageLayout();
foreach (Shape newshape in nodes)
{
    MemoryStream stream = new MemoryStream();
    newshape.GetShapeRenderer().Save(stream, new ImageSaveOptions(SaveFormat.Emf));
    builder.MoveTo(newshape);
    Shape shape = builder.InsertImage(stream);
    newshape.Remove();
}

doc.UpdatePageLayout();
doc.Save(MyDir + "output 20.7.pdf");

public static Shape DrawText(Document doc, Bookmark bookmark, LayoutCollector collector, LayoutEnumerator enumerator)
{
    enumerator.Current = collector.GetEntity(bookmark.BookmarkStart);
    Console.WriteLine(" --> Left : " + enumerator.Rectangle.Left + " Top : " + enumerator.Rectangle.Top);

    Shape shape = new Shape(doc, ShapeType.TextBox);
    shape.AppendChild(new Paragraph(doc));
    Node node = bookmark.BookmarkStart;
    while (node != bookmark.BookmarkEnd)
    {
        node = node.NextSibling;
        /*if (node.NodeType == NodeType.Run)
        {
            ((Run)node).Font.Underline = Underline.None;
        }*/
        shape.FirstParagraph.AppendChild(node.Clone(false));
    }
    shape.TextBox.InternalMarginTop = 0;
    shape.TextBox.InternalMarginBottom = 0;
    shape.TextBox.InternalMarginLeft = 0;
    shape.TextBox.InternalMarginRight = 0;

    double start = enumerator.Rectangle.Left;
    enumerator.Current = collector.GetEntity(bookmark.BookmarkEnd);
    double end = enumerator.Rectangle.Left;
    shape.Width = (end - start) + 5;
    shape.Height = enumerator.Rectangle.Bottom - enumerator.Rectangle.Top;
    bookmark.BookmarkStart.ParentNode.InsertBefore(shape, bookmark.BookmarkStart);
    return shape;
}