Hi Team,
I have a scanned pdf. I am trying to convert it to excel. I am not sure how to use Aspose.OCR. I am pretty new to it. i tried to find documentation but there was nothing clear,
I tired the following code. Is it the right code to do this ?
if this is right in the following code the ocr engine is not getting recognized.
static void Main(string[] args)
{
Aspose.OCR.License license = new Aspose.OCR.License();
license.SetLicense(@“C:\Licenses\Aspose20.lic”);
//Create an instance of Document to load the PDF
Document pdfDocument = new Document(@"C:\PDFs\ScannedForAspose.pdf");
//Create an instance of OcrEngine for recognition
Aspose.OCR.OcrEngine ocrEngine = new Aspose.OCR.OcrEngine();
//Iterate over the pages of PDF
for (int pageCount = 1; pageCount <= pdfDocument.Pages.Count; pageCount++)
{
//Creating a MemoryStream to hold the image temporarily
using (MemoryStream imageStream = new MemoryStream())
{
//Create Resolution object
Resolution resolution = new Resolution(300);
JpegDevice jpegDevice = new JpegDevice();
//Convert a particular page and save the image to stream
jpegDevice.Process(pdfDocument.Pages[pageCount], imageStream);
imageStream.Position = 0;
ocrEngine.Image = ImageStream.FromStream(imageStream, ImageStreamFormat.Jpg);
//Perform OCR operation on one page at a time
if (ocrEngine.Process())
{
Console.WriteLine(ocrEngine.Text);
}
}
}
}