Hello, I am trying to OCR a PDF with multiple languages and always get a System.Exception error.
I tried to use the Logging feature with no luck. I have also downloaded all models as you can see in the screenshot below and did not work either.
Clipboard Image.jpg (44.6 KB)
I am stuck here, and I need to process a large number of files. Please help.
This is my code:
using Aspose.OCR;
using Aspose.OCR.Models.PreprocessingFilters;
using System;
using System.Collections.Generic;
using System.Diagnostics.Metrics;
using System.IO;
using System.Linq;
using System.Text;
namespace OCRtoText
{
class Program
{
static void Main(string[] args)
{
try
{
// Validate arguments
if (args.Length == 0)
{
Console.WriteLine("Error: No file arguments provided.");
Console.WriteLine("Usage: AsposeOCRApp.exe <file1> [file2] [file3] ...");
Environment.Exit(1);
}
// Initialize and set metered license
if (!SetLicense()) throw new Exception("Error setting license.");
Aspose.OCR.Resources.SetLocalPath("ocr");
Aspose.OCR.Resources.AllowAutomaticDownloads(false);
// Configure ONNX Runtime session options
Aspose.OCR.OnnxRuntimeSessionOptions.GraphOptimizationLevel = GraphOptimizationLevelOnnx.ORT_ENABLE_ALL;
Aspose.OCR.OnnxRuntimeSessionOptions.ExecutionMode = ExecutionModeOnnx.ORT_PARALLEL;
Aspose.OCR.OnnxRuntimeSessionOptions.IntraOpNumThreads = Environment.ProcessorCount;
Aspose.OCR.OnnxRuntimeSessionOptions.InterOpNumThreads = Environment.ProcessorCount;
// Initialize Aspose.OCR with ONNX Runtime options for better efficiency
var api = new AsposeOcr();
// Process files
if (args.Length == 1)
{
// Single file - output to console
ProcessSingleFile(api, args[0]);
}
else
{
// Multiple files - save as .txt files
ProcessMultipleFiles(api, args);
}
}
catch (Exception ex)
{
Console.WriteLine($"Error: {ex.Message}");
Environment.Exit(1);
}
finally
{
// Ensure resources are cleaned up
Aspose.OCR.Resources.ReleaseMemory();
}
}
private static bool SetLicense()
{
//Aspose.Words.Metered wordLicense = new();
//Aspose.Pdf.Metered pdfLicense = new();
Aspose.OCR.Metered ocrLicense = new();
//ADrawing::System.Drawing.AsposeDrawing.Metered drawingLicense = new();
try
{
Aspose.License lic = new();
// Set metered public and private keys
//wordLicense.SetMeteredKey(lic.PublicKey, lic.PrivateKey);
//pdfLicense.SetMeteredKey(lic.PublicKey, lic.PrivateKey);
ocrLicense.SetMeteredKey(lic.PublicKey, lic.PrivateKey);
//drawingLicense.SetMeteredKey(lic.PublicKey, lic.PrivateKey);
return true;
}
catch (Exception ex)
{
//res.Errors.Add(new ErrorItem("", ex.Message));
return false;
}
}
static void ProcessSingleFile(AsposeOcr api, string filePath)
{
if (!File.Exists(filePath))
{
throw new FileNotFoundException($"File not found: {filePath}");
}
Console.WriteLine($"Processing file: {filePath}");
// Configure OCR settings for multilingual support
var recognitionSettings = new RecognitionSettings
{
Language = Language.Multilanguage, // Enable automatic language detection
DetectAreasMode = DetectAreasMode.UNIVERSAL, // Detection by paragraph
LanguageDetectionLevel = LanguageDetectionLevel.ByParagraph,
//AutoSkew = true,
//AutoDenoising = true,
//AutoContrast = true,
RecognizeSingleLine = false,
UpscaleSmallFont = true // Helps with Asian characters
};
// Perform OCR
string result = PerformOCR(api, filePath, recognitionSettings);
// Output to console
Console.WriteLine("\n--- OCR Result ---");
Console.WriteLine(result);
}
static void ProcessMultipleFiles(AsposeOcr api, string[] filePaths)
{
var results = new List<string>();
foreach (string filePath in filePaths)
{
if (!File.Exists(filePath))
{
Console.WriteLine($"Warning: File not found - {filePath}. Skipping...");
continue;
}
try
{
Console.WriteLine($"Processing file: {filePath}");
// Configure OCR settings for multilingual support
var recognitionSettings = new RecognitionSettings
{
Language = Language.Multilanguage, // Enable automatic language detection
DetectAreasMode = DetectAreasMode.UNIVERSAL, // Detection by paragraph
LanguageDetectionLevel = LanguageDetectionLevel.ByParagraph,
//AutoSkew = true,
//AutoDenoising = true,
//AutoContrast = true,
RecognizeSingleLine = false,
UpscaleSmallFont = true // Helps with Asian characters
};
// Perform OCR
string result = PerformOCR(api, filePath, recognitionSettings);
// Save result to text file
string outputPath = SaveResultToFile(filePath, result);
Console.WriteLine($"Result saved to: {outputPath}");
}
catch (Exception ex)
{
Console.WriteLine($"Error processing {filePath}: {ex.Message}");
}
}
}
static string PerformOCR(AsposeOcr api, string filePath, RecognitionSettings settings)
{
try
{
// Create OCR input
var input = new OcrInput(InputType.PDF);
input.Add(filePath);
// Apply preprocessing filters for better accuracy
//input.Add(PreprocessingFilter.AutoDewarping());
//input.Add(PreprocessingFilter.ContrastCorrectionFilter());
//input.Add(PreprocessingFilter.AutoDenoising());
// Recognize text
List<RecognitionResult> results = api.Recognize(input, settings);
if (results == null || results.Count == 0)
{
return "No text detected in the image.";
}
// Combine all results
return string.Join("\n\n", results.Select(r => r.RecognitionText));
}
catch (Exception ex)
{
throw new InvalidOperationException($"OCR processing failed: {ex.Message}", ex);
}
}
static string SaveResultToFile(string originalFilePath, string ocrResult)
{
// Get directory and filename without extension
string directory = Path.GetDirectoryName(originalFilePath);
string fileNameWithoutExt = Path.GetFileNameWithoutExtension(originalFilePath);
// Create output filename
string outputFileName = $"{fileNameWithoutExt}_ocr.txt";
string outputPath = Path.Combine(directory, outputFileName);
// Handle duplicate filenames
int counter = 1;
while (File.Exists(outputPath))
{
outputFileName = $"{fileNameWithoutExt}_ocr_{counter}.txt";
outputPath = Path.Combine(directory, outputFileName);
counter++;
}
// Write result to file with UTF-8 encoding to preserve multilingual characters
File.WriteAllText(outputPath, ocrResult, System.Text.Encoding.UTF8);
return outputPath;
}
}
}
I am attaching a sample PDF for your reference.
Multilanguage.pdf (1.6 MB)
Could you please check my code? Am I doing something wrong?
Thanks in advance.
Cesar