Issues when using Multilanguage

Hello, I am trying to OCR a PDF with multiple languages and always get a System.Exception error.
I tried to use the Logging feature with no luck. I have also downloaded all models as you can see in the screenshot below and did not work either.
Clipboard Image.jpg (44.6 KB)
I am stuck here, and I need to process a large number of files. Please help.

This is my code:

using Aspose.OCR;
using Aspose.OCR.Models.PreprocessingFilters;
using System;
using System.Collections.Generic;
using System.Diagnostics.Metrics;
using System.IO;
using System.Linq;
using System.Text;

namespace OCRtoText
{
    class Program
    {
        static void Main(string[] args)
        {
            try
            {
                // Validate arguments
                if (args.Length == 0)
                {
                    Console.WriteLine("Error: No file arguments provided.");
                    Console.WriteLine("Usage: AsposeOCRApp.exe <file1> [file2] [file3] ...");
                    Environment.Exit(1);
                }

                // Initialize and set metered license
                if (!SetLicense()) throw new Exception("Error setting license.");


                Aspose.OCR.Resources.SetLocalPath("ocr");
                Aspose.OCR.Resources.AllowAutomaticDownloads(false);

                // Configure ONNX Runtime session options
                Aspose.OCR.OnnxRuntimeSessionOptions.GraphOptimizationLevel = GraphOptimizationLevelOnnx.ORT_ENABLE_ALL;
                Aspose.OCR.OnnxRuntimeSessionOptions.ExecutionMode = ExecutionModeOnnx.ORT_PARALLEL;
                Aspose.OCR.OnnxRuntimeSessionOptions.IntraOpNumThreads = Environment.ProcessorCount;
                Aspose.OCR.OnnxRuntimeSessionOptions.InterOpNumThreads = Environment.ProcessorCount;

                // Initialize Aspose.OCR with ONNX Runtime options for better efficiency
                var api = new AsposeOcr();

                // Process files
                if (args.Length == 1)
                {
                    // Single file - output to console
                    ProcessSingleFile(api, args[0]);
                }
                else
                {
                    // Multiple files - save as .txt files
                    ProcessMultipleFiles(api, args);
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine($"Error: {ex.Message}");
                Environment.Exit(1);
            }
            finally
            {
                // Ensure resources are cleaned up
                Aspose.OCR.Resources.ReleaseMemory();
            }
        }

        private static bool SetLicense()
        {
            //Aspose.Words.Metered wordLicense = new();
            //Aspose.Pdf.Metered pdfLicense = new();
            Aspose.OCR.Metered ocrLicense = new();
            //ADrawing::System.Drawing.AsposeDrawing.Metered drawingLicense = new();
            try
            {
                Aspose.License lic = new();

                // Set metered public and private keys
                //wordLicense.SetMeteredKey(lic.PublicKey, lic.PrivateKey);
                //pdfLicense.SetMeteredKey(lic.PublicKey, lic.PrivateKey);
                ocrLicense.SetMeteredKey(lic.PublicKey, lic.PrivateKey);
                //drawingLicense.SetMeteredKey(lic.PublicKey, lic.PrivateKey);

                return true;
            }
            catch (Exception ex)
            {
                //res.Errors.Add(new ErrorItem("", ex.Message));
                return false;
            }
        }

        static void ProcessSingleFile(AsposeOcr api, string filePath)
        {
            if (!File.Exists(filePath))
            {
                throw new FileNotFoundException($"File not found: {filePath}");
            }

            Console.WriteLine($"Processing file: {filePath}");

            // Configure OCR settings for multilingual support
            var recognitionSettings = new RecognitionSettings
            {
                Language = Language.Multilanguage, // Enable automatic language detection
                DetectAreasMode = DetectAreasMode.UNIVERSAL, // Detection by paragraph
                LanguageDetectionLevel = LanguageDetectionLevel.ByParagraph,
                //AutoSkew = true,
                //AutoDenoising = true,
                //AutoContrast = true,
                RecognizeSingleLine = false,
                UpscaleSmallFont = true // Helps with Asian characters
            };

            // Perform OCR
            string result = PerformOCR(api, filePath, recognitionSettings);

            // Output to console
            Console.WriteLine("\n--- OCR Result ---");
            Console.WriteLine(result);
        }

        static void ProcessMultipleFiles(AsposeOcr api, string[] filePaths)
        {
            var results = new List<string>();

            foreach (string filePath in filePaths)
            {
                if (!File.Exists(filePath))
                {
                    Console.WriteLine($"Warning: File not found - {filePath}. Skipping...");
                    continue;
                }

                try
                {
                    Console.WriteLine($"Processing file: {filePath}");

                    // Configure OCR settings for multilingual support
                    var recognitionSettings = new RecognitionSettings
                    {
                        Language = Language.Multilanguage, // Enable automatic language detection
                        DetectAreasMode = DetectAreasMode.UNIVERSAL, // Detection by paragraph
                        LanguageDetectionLevel = LanguageDetectionLevel.ByParagraph,
                        //AutoSkew = true,
                        //AutoDenoising = true,
                        //AutoContrast = true,
                        RecognizeSingleLine = false,
                        UpscaleSmallFont = true // Helps with Asian characters
                    };

                    // Perform OCR
                    string result = PerformOCR(api, filePath, recognitionSettings);

                    // Save result to text file
                    string outputPath = SaveResultToFile(filePath, result);
                    Console.WriteLine($"Result saved to: {outputPath}");
                }
                catch (Exception ex)
                {
                    Console.WriteLine($"Error processing {filePath}: {ex.Message}");
                }
            }
        }

        static string PerformOCR(AsposeOcr api, string filePath, RecognitionSettings settings)
        {
            try
            {
                // Create OCR input
                var input = new OcrInput(InputType.PDF);
                input.Add(filePath);

                // Apply preprocessing filters for better accuracy
                //input.Add(PreprocessingFilter.AutoDewarping());
                //input.Add(PreprocessingFilter.ContrastCorrectionFilter());
                //input.Add(PreprocessingFilter.AutoDenoising());

                // Recognize text
                List<RecognitionResult> results = api.Recognize(input, settings);

                if (results == null || results.Count == 0)
                {
                    return "No text detected in the image.";
                }

                // Combine all results
                return string.Join("\n\n", results.Select(r => r.RecognitionText));
            }
            catch (Exception ex)
            {
                throw new InvalidOperationException($"OCR processing failed: {ex.Message}", ex);
            }
        }

        static string SaveResultToFile(string originalFilePath, string ocrResult)
        {
            // Get directory and filename without extension
            string directory = Path.GetDirectoryName(originalFilePath);
            string fileNameWithoutExt = Path.GetFileNameWithoutExtension(originalFilePath);

            // Create output filename
            string outputFileName = $"{fileNameWithoutExt}_ocr.txt";
            string outputPath = Path.Combine(directory, outputFileName);

            // Handle duplicate filenames
            int counter = 1;
            while (File.Exists(outputPath))
            {
                outputFileName = $"{fileNameWithoutExt}_ocr_{counter}.txt";
                outputPath = Path.Combine(directory, outputFileName);
                counter++;
            }

            // Write result to file with UTF-8 encoding to preserve multilingual characters
            File.WriteAllText(outputPath, ocrResult, System.Text.Encoding.UTF8);

            return outputPath;
        }
    }
}

I am attaching a sample PDF for your reference.
Multilanguage.pdf (1.6 MB)

Could you please check my code? Am I doing something wrong?

Thanks in advance.

Cesar

@CQcesar

We have opened the following new ticket(s) in our internal issue tracking system and will deliver their fixes according to the terms mentioned in Free Support Policies.

Issue ID(s): OCRNET-1057

You can obtain Paid Support Services if you need support on a priority basis, along with the direct access to our Paid Support management team.

1 Like