I want to extract all the text line by line from pdf file. But unable to extract text from pdf file containing images and text.But I can get all the text from pdf file containing only text. Please find attached pdf.
Test.pdf (83.0 KB)
using below code for extracting text.
Aspose.Pdf.Document doc = new Aspose.Pdf.Document(@“Test.pdf”);
foreach (Aspose.Pdf.Page pdfPage in doc.Pages)
{
Aspose.Pdf.Text.TextSearchOptions options = new Aspose.Pdf.Text.TextSearchOptions(true);
Aspose.Pdf.Text.TextFragmentAbsorber absorber = new Aspose.Pdf.Text.TextFragmentAbsorber();
pdfPage.Accept(absorber);
Aspose.Pdf.Text.TextFragmentCollection collection = absorber.TextFragments;
foreach (Aspose.Pdf.Text.TextFragment oneTextFragment in collection)
{
string text = oneTextFragment.Text;//not a row
Console.WriteLine(String.Format("Extracted Text = '{0}'", text));
}
}