Thanks for your inquiry. Please check following code snippet to get text and its coordinates form the PDF document. Hopefully it will help you to accomplish the task.
//open document<o:p></o:p>
Document pdfDocument = new Document(myDir
+ "Table+of+content.pdf");
//create TextAbsorber object to find
all the phrases matching the regular expression
TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber(@"[\S]+");
//set text search option to specify
regular expression usage
TextSearchOptions textSearchOptions = new TextSearchOptions(true);
textFragmentAbsorber.TextSearchOptions = textSearchOptions;
//accept the absorber for all the pages
pdfDocument.Pages.Accept(textFragmentAbsorber);
//get the extracted text fragments
TextFragmentCollection textFragmentCollection =
textFragmentAbsorber.TextFragments;
//loop through the fragments
foreach (TextFragment
textFragment in textFragmentCollection)
{
Console.WriteLine("Text
: {0} ", textFragment.Text);
Console.WriteLine("Position
: {0} ", textFragment.Position);
Console.WriteLine("LLX
: {0} ", textFragment.Position.XIndent);
Console.WriteLine("LLY
: {0} ", textFragment.Position.YIndent);
Console.WriteLine("URX
: {0} ",
textFragment.Position.XIndent+textFragment.Rectangle.Width);
Console.WriteLine("URY
: {0} ",
textFragment.Position.YIndent+textFragment.Rectangle.Height);
}
Please feel free to contact us for any further assistance.
Best Regards,