Urgent: Issue trying to update hyperlinks

Issue is that my I am trying to update the hyperlinks on a pdf I download from blob storage. I use a memory stream to download the pdf. Once i have downloaded it I will scan through the pdf page by page to find link annotations that contain a certain key phrase. Once I have found a link with the certain key phrase I want to update the url, save the document and then upload the updated pdf back up to my blob storage using memory stream.

The below code is how I am trying to achieve this:

        using (var memoryStream = new MemoryStream())
        {
            await blob.DownloadToStreamAsync(memoryStream);

            if (fileExtention == "pdf")
            {
                //Instantiate PDF instance by calling empty constructor
                License pdfLicense = new License();

                //Specify the license file path
                pdfLicense.SetLicense("C:\\VSO\\ParlyFindAndReplace\\ParlyFindAndReplace\\Aspose.Pdf.lic");

                pdfLicense.Embedded = true;
                try
                {
                    Document doc = new Document(memoryStream);

                    foreach (Page page in doc.Pages)
                    {

                        // Get the link annotations from particular page
                        AnnotationSelector selector = new AnnotationSelector(new LinkAnnotation(page, Rectangle.Trivial));

                        page.Accept(selector);

                        // Create list holding all the links
                        System.Collections.IList list = (System.Collections.IList)selector.Selected;

                        // Iterate through invidiaul item inside list
                        foreach (LinkAnnotation a in list)
                        {

                            string url = "";
                            if (a.Action as GoToURIAction == null)
                            {
                                url = "";
                            }
                            else
                            {
                                url = (a.Action as GoToURIAction).URI;

                            }

                            if (url.Contains(findText))
                            {
                                PrintMessage(fileExtention + "**********" + findText + " found" + url + "***********" + "PageNum:" + page.Number);

                                if (replaceText != "")
                                {

                                    url = url.Replace(findText, replaceText);

                                    a.Action = new GoToURIAction(url);
                                    
                                    doc.ProcessParagraphs();

                                    doc.Save(memoryStream);

                                    await blob.UploadFromStreamAsync(memoryStream);
                                    PrintMessage(fileExtention + "**********" + url + " replaced ***********");
                                }

                                return true;

                            }

                        }

                    }
                }
                catch (Exception ex)
                {
                    PrintMessage("Issue trying to process pdf: " + ex.Message + " Blob.uri: " + blob.Uri);
                }
            }

}

I can find the links correctly and it looks like I am adding the new url correctly however when saving and then uploading the pdf back up it is just re-uploading the original pdf without any changes to the links. My version of aspose PDF is at the latest version.

Can you please advise where I am going wrong and why when I upload the pdf does it not include the changes to the hyperlinks? Just to be clear the pdf does get uploaded as I can see the timestamp changes its the content of the pdf that does not change

This is really urgent

@cbparl

The problem may be related to using the same memory stream multiple times. Please try to create a new instance of memory stream and use it to save new/updated PDF document. If issue still persists, please share your sample PDF document for our reference. We will test the scenario in our environment and address it accordingly.

Created a new instance of the memory stream and used it to save the updated pdf but the issue still persists.

As you can tell im trying to update multiple pdfs on mass (around 100 each time i run my application)

here is an example of one of the pdfs i am trying to update:

23-01.pdf (2.7 MB)

Im trying to replace all urls containing “sp-bpr-en-test-cdnep.azureedge.net” and replacing it with the text “www.bbc.co.uk”

@cbparl

Looks like you are saving a new document after replacing only first occurrence of the found hyperlink. Please try to do it after its replaced in all pages. We tried below code snippet and did not notice any issues in our environment:

Document pdfDocument = new Document(dataDir + "23-01.pdf");
PageCollection pages = pdfDocument.Pages;

foreach (Page p in pages)
{
    foreach (Annotation annot in p.Annotations)
    {
        if (annot.AnnotationType == AnnotationType.Link)
        {
            LinkAnnotation a = (LinkAnnotation)annot;
            string URL = string.Empty;
            IAppointment dest = a.Destination;

            if (a.Action != null || dest != null)
            {
                if (a.Action != null)
                {
                    if (a.Action is GoToURIAction)
                    {
                        if (((GoToURIAction)a.Action).URI.Contains("sp-bpr-en-test-cdnep.azureedge.net"))
                        {
                            a.Action = new GoToURIAction("www.bbc.co.uk");
                        }
                    }
                }
            }
        }
    }
}

pdfDocument.Save(dataDir + "Hyperliks Replaced.pdf");

Hyperliks Replaced.pdf (2.7 MB)

Yeah I noticed I was saving to many times so I fixed that before you posted but thank you for confirming this is the issue.

For some reason the pdf viewer Im using doesnt like links that have the text of a different URL (but the underlying uri is different). One last question could you show me using the above code how I can edit the text of the hyperlink while I am updating the uri?

Thanks in advance

@cbparl

We are checking it and will get back to you shortly.

@cbparl

Please try to change the content of the annotation while replacing/removing the URI in order to change the link text like in the below code snippet:

Document pdfDocument = new Document(dataDir + "23-01.pdf");
PageCollection pages = pdfDocument.Pages;

foreach (Page p in pages)
{
    foreach (Annotation annot in p.Annotations)
    {
        if (annot.AnnotationType == AnnotationType.Link)
        {
            LinkAnnotation a = (LinkAnnotation)annot;
            string URL = string.Empty;
            IAppointment dest = a.Destination;

            if (a.Action != null || dest != null)
            {
                if (a.Action != null)
                {
                    if (a.Action is GoToURIAction)
                    {
                        if (((GoToURIAction)a.Action).URI.Contains("sp-bpr-en-test-cdnep.azureedge.net"))
                        {
                            a.Action = new GoToURIAction("www.bbc.co.uk");
                            a.ActionURI = "http://www.bbc.co.uk"; // Set the new URI

                            // Edit the text of the hyperlink
                            a.Rect = new Aspose.Pdf.Rectangle(a.Rect.LLX, a.Rect.LLY, a.Rect.URX, a.Rect.URY);
                            a.Contents = "New Hyperlink Text";
                        }
                    }
                }
            }
        }
    }
}

pdfDocument.Save(dataDir + "Hyperliks Replaced.pdf");

Hi,
In the below code, i am unable to update the text of hyperlink( i want to update “hyperlink” and “hyperlink Text”). Could you please suggest any alternate way.

Document pdfDocument = new Document(dataDir + “23-01.pdf”);
PageCollection pages = pdfDocument.Pages;

foreach (Page p in pages)
{
foreach (Annotation annot in p.Annotations)
{
if (annot.AnnotationType == AnnotationType.Link)
{
LinkAnnotation a = (LinkAnnotation)annot;
string URL = string.Empty;
IAppointment dest = a.Destination;

        if (a.Action != null || dest != null)
        {
            if (a.Action != null)
            {
                if (a.Action is GoToURIAction)
                {
                    if (((GoToURIAction)a.Action).URI.Contains("sp-bpr-en-test-cdnep.azureedge.net"))
                    {
                        a.Action = new GoToURIAction("www.bbc.co.uk");
                        a.ActionURI = "http://www.bbc.co.uk"; // Set the new URI

                        // Edit the text of the hyperlink
                        a.Rect = new Aspose.Pdf.Rectangle(a.Rect.LLX, a.Rect.LLY, a.Rect.URX, a.Rect.URY);
                        a.Contents = "New Hyperlink Text";
                    }
                }
            }
        }
    }
}

}

pdfDocument.Save(dataDir + “Hyperliks Replaced.pdf”);

@Naresh_Booreddy

Would you please share your sample PDF document for our reference so that we can test the scenario in our environment and address it accordingly.

Hi Asad Ali,

PFA , Sample file and output file and below is my code:
Aspose.Pdf.Document document = new Aspose.Pdf.Document(“D://EditHyperLinksInWordDoc//Cars-1Page.pdf”);

        PageCollection pages = document.Pages;
        foreach (Aspose.Pdf.Page p in pages)
        {
            foreach (Aspose.Pdf.Annotations.Annotation annot in p.Annotations)
            {
                if (annot.AnnotationType == AnnotationType.Link)
                {
                    LinkAnnotation a = (LinkAnnotation)annot;
                    string URL = string.Empty;
                    IAppointment dest = a.Destination;

                    if (a.Action != null || dest != null)
                    {
                        if (a.Action != null)
                        {
                            if (a.Action is GoToURIAction)
                            {
                                if (((GoToURIAction)a.Action).URI.Contains("www.britannica.com"))
                                {
                                    a.Action = new GoToURIAction("my link");
                                    //a.ActionURI = "http://www.bbc.co.uk"; // Set the new URI--> this is not working

                                    // Edit the text of the hyperlink
                                    a.Rect = new Aspose.Pdf.Rectangle(a.Rect.LLX, a.Rect.LLY, a.Rect.URX, a.Rect.URY);
                                    a.Contents = "New Hyperlink Text";
                                }
                            }
                        }
                    }
                }
            }
        }

        document.Save("D://EditHyperLinksInWordDoc//output-TextChange.pdf");

Cars - 1 Page.pdf (58.1 KB)
output-TextChange.pdf (138.1 KB)


Thanks
Naresh Booreddy

@Naresh_Booreddy

Please check below code snippet to replace text for the hyperlinks:

private static void FindAndReplaceHyperlink(string dataDir)
{
    Aspose.Pdf.Document document = new Aspose.Pdf.Document(dataDir + "Cars - 1 Page.pdf");

    PageCollection pages = document.Pages;
    foreach (Aspose.Pdf.Page p in pages)
    {
        foreach (Aspose.Pdf.Annotations.Annotation annot in p.Annotations)
        {
            if (annot.AnnotationType == AnnotationType.Link)
            {
                LinkAnnotation a = (LinkAnnotation)annot;
                string URL = string.Empty;
                IAppointment dest = a.Destination;

                if (a.Action != null || dest != null)
                {
                    if (a.Action != null)
                    {
                        if (a.Action is GoToURIAction)
                        {
                            if (((GoToURIAction)a.Action).URI.Contains("www.britannica.com"))
                            {
                                a.Action = new GoToURIAction("my link");
                                //a.ActionURI = "http://www.bbc.co.uk"; // Set the new URI--> this is not working

                                // Edit the text of the hyperlink
                                //a.Rect = new Aspose.Pdf.Rectangle(a.Rect.LLX, a.Rect.LLY, a.Rect.URX, a.Rect.URY);
                                a.Contents = "New Hyperlink Text";
                                replaceText(p, a.Rect, "New Hyperlink Text");
                            }
                        }
                    }
                }
            }
        }
    }

    document.Save(dataDir + "output-TextChange.pdf");
}

private static void replaceText(Page page, Rectangle rect, string text)
{
    TextFragmentAbsorber TextFragmentAbsorberAddress = new TextFragmentAbsorber();

    // search text within page bound
    TextFragmentAbsorberAddress.TextSearchOptions.LimitToPageBounds = true;

    // specify the page region for TextSearch Options
    TextFragmentAbsorberAddress.TextSearchOptions.Rectangle = rect;

    // search text from first page of PDF file
    page.Accept(TextFragmentAbsorberAddress);

    // iterate through individual TextFragment
    foreach (TextFragment tf in TextFragmentAbsorberAddress.TextFragments)
    {
        // update text to blank characters
        if(!string.IsNullOrEmpty(tf.Text.Trim()))
            tf.Text = text;
    }
}

output-TextChange.pdf (76.6 KB)