@Sudharsann01
We modified the code snippet in order to add bookmarks in the PDF obtained from the part of the code that you had shared. The complete code snippet that was used is as below:
Document pdfDocument = new Document(dataDir + "CBRE Employee_Handbook_For_EHA.pdf");
List<Facades.Bookmark> bookmarks = new List<Facades.Bookmark>();
for (int index = 0; index < 10; index++)
{
var annotations = pdfDocument.Pages[index + 1].Annotations.Where(x => x.AnnotationType == AnnotationType.Link).ToList();
foreach (var anno in annotations)
{
var linkAnnotation = (anno as Aspose.Pdf.Annotations.LinkAnnotation);
if (linkAnnotation.Action == null)
{
TextAbsorber absorber = new TextAbsorber();
absorber.TextSearchOptions.LimitToPageBounds = true;
absorber.TextSearchOptions.Rectangle = anno.Rect;
pdfDocument.Pages[index + 1].Accept(absorber);
string extractedText = absorber.Text;
System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(@"(\d+)\s*$");
var match = regex.Match(extractedText);
if (linkAnnotation.Destination is ExplicitDestination destination)
{
string title = "";
if (!string.IsNullOrEmpty(extractedText) && match.Success)
{
int pageNumber = Convert.ToInt32(match.Groups[1].Value);
title = extractedText.Substring(0, extractedText.LastIndexOf(pageNumber.ToString()) - 1)
.Replace(Environment.NewLine, string.Empty).Replace(".", string.Empty).Trim();
}
var bookmark = new Aspose.Pdf.Facades.Bookmark
{
Title = title,
Level = 1,
ChildItems = new Facades.Bookmarks(),
PageNumber = destination.PageNumber
};
bookmarks.Add(bookmark);
}
else
{
if (!string.IsNullOrEmpty(extractedText) && match.Success)
{
int pageNumber = Convert.ToInt32(match.Groups[1].Value);
if (extractedText.LastIndexOf(pageNumber.ToString()) > 0)
{
string title = extractedText.Substring(0, extractedText.LastIndexOf(pageNumber.ToString()) - 1)
.Replace(Environment.NewLine, string.Empty).Replace(".", string.Empty).Trim();
var bookmark = new Aspose.Pdf.Facades.Bookmark
{
Title = title,
Level = 1,
ChildItems = new Facades.Bookmarks(),
PageNumber = pageNumber
};
bookmarks.Add(bookmark);
}
}
}
}
else if (linkAnnotation.Action is Aspose.Pdf.Annotations.GoToAction action)
{
TextAbsorber absorber = new TextAbsorber();
absorber.TextSearchOptions.LimitToPageBounds = true;
absorber.TextSearchOptions.Rectangle = anno.Rect;
pdfDocument.Pages[index + 1].Accept(absorber);
string extractedText = absorber.Text;
System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(@"(\d+)\s*$");
var match = regex.Match(extractedText);
if (!string.IsNullOrEmpty(extractedText) && match.Success)
{
int pageNumber = Convert.ToInt32(match.Groups[1].Value);
if (extractedText.LastIndexOf(pageNumber.ToString()) > 0)
{
string title = extractedText.Substring(0, extractedText.LastIndexOf(pageNumber.ToString()) - 1)
.Replace(Environment.NewLine, string.Empty).Replace(".", string.Empty).Trim();
var bookmark = new Aspose.Pdf.Facades.Bookmark
{
Title = title,
Level = 1,
ChildItems = new Facades.Bookmarks(),
PageNumber = pageNumber
};
bookmarks.Add(bookmark);
}
}
else
{
if (action.Destination is ExplicitDestination destination)
{
string title = extractedText.Replace(Environment.NewLine, string.Empty).Replace(".", string.Empty).Trim();
var bookmark = new Aspose.Pdf.Facades.Bookmark
{
Title = title,
Level = 1,
ChildItems = new Facades.Bookmarks(),
PageNumber = destination.PageNumber
};
bookmarks.Add(bookmark);
}
}
}
}
}
Facades.PdfBookmarkEditor editor = new Facades.PdfBookmarkEditor();
editor.BindPdf(pdfDocument);
foreach (Facades.Bookmark bookmark in bookmarks)
{
editor.CreateBookmarks(bookmark);
}
pdfDocument.Save(dataDir + "output.pdf");
The shared output was generated by this code and you can see it contains the bookmarks as well. We requested you to check it and share your feedback with us if you notice any issue with it.
We did not notice any error during text extraction at our side as you mentioned. Would you please explain a bit more about it so that we can further proceed to assist you accordingly.