Hi there!
TextAbsorber.Visit
raises an IndexOutOfRangeException
on PDFs created with Tesseract (v5).
This wasn’t an issue with Aspose.PDF 24.1, but was introduced in Aspose.PDF 24.2 (and is still there in 24.6).
Please find such a PDF as dino.pdf (313.6 KB).
Code to reproduce the issue:
var path = @"dino.pdf";
var stream = OpenPdfIntoMemoryStream(path);
var pdfDocument = new Document(stream);
var textAbsorber = new TextAbsorber();
textAbsorber.Visit(pdfDocument);
public static MemoryStream OpenPdfIntoMemoryStream(string path)
{
byte[] fileBytes = File.ReadAllBytes(path);
MemoryStream memoryStream = new MemoryStream(fileBytes);
return memoryStream;
}
StackTrace:
at #=zhEIYYHuILF1luFGofpXGc9MiH8d924Gs9ytxYsoXqSze.#=zKsaoZoC0ZU2X3I6IdA==.#=zReIYfv$Wxqvg(UInt32 #=zlvauQME=)
at #=zhEIYYHuILF1luFGofpXGc9MiH8d924Gs9ytxYsoXqSze.#=zKsaoZoC0ZU2X3I6IdA==.#=zbk51oZkBE0RI(Int32 #=zd3nt_Es=, Double& #=zmeFD0J6Oj74f, Double& #=zsiFc0nP3J_m8, UInt32 #=zlvauQME=)
at #=zhEIYYHuILF1luFGofpXGc9MiH8d924Gs9ytxYsoXqSze.#=zKsaoZoC0ZU2X3I6IdA==.#=z$KU4Zf8tGu1q(Int32 #=zd3nt_Es=, Double #=zmeFD0J6Oj74f, Double #=zsiFc0nP3J_m8, UInt32 #=zlvauQME=)
at #=zhEIYYHuILF1luFGofpXGc9MiH8d924Gs9ytxYsoXqSze.#=zKsaoZoC0ZU2X3I6IdA==.#=zOz9wrYN62VTi(UInt32 #=zW27KsGo=, UInt32 #=zlvauQME=)
at #=z9_gpQjtgZCuXEQkvbmH5dJ4CTkjBfTZIxONE7K8Z5koQ.#=ziV1fO5EvcH5j$a8WsQ==(String #=zHrATqB4=, Int32 #=z36iBL$TzaxV6, Int32 #=zO0ujcUlZB4Ze, #=zOz9wrYN62VTi #=zhKMLqx2pVSOv)
at #=z9_gpQjtgZCuXEQkvbmH5dJ4CTkjBfTZIxONE7K8Z5koQ.#=zUOrP_tJp4$_dJduy9A==(String #=zHrATqB4=, Int32 #=z36iBL$TzaxV6, Int32 #=zO0ujcUlZB4Ze, #=zOz9wrYN62VTi #=zhKMLqx2pVSOv)
at #=zhEIYYHuILF1luFGofpXGc9MiH8d924Gs9ytxYsoXqSze.#=z_n0wjKSpIY_u(#=zbjL3340VkKzVXe58G3aWq611PLLuj3E4kg== #=zuInsAlM=, Int32 #=z36iBL$TzaxV6, Int32 #=zO0ujcUlZB4Ze, Double #=zaxGifJY=, Double& #=zAm0dJPY=, Double& #=z_Ies3jk=, #=zg68jMcTt2di1iw3UlLu8zYh_RLPU$Worq7yChuA=[] #=zHp9_YWpyZUey, Boolean& #=zVDoJIuw=, Boolean #=z3PNvIWNcmpFZSH9_s95xfsM=, Boolean #=zSPEbT56voN3_)
at #=zhEIYYHuILF1luFGofpXGc9MiH8d924Gs9ytxYsoXqSze.#=zVFUxPSg=(#=zbjL3340VkKzVXe58G3aWq611PLLuj3E4kg== #=zuInsAlM=, Int32 #=z36iBL$TzaxV6, Int32 #=zO0ujcUlZB4Ze, Double #=zaxGifJY=, Boolean #=z2cm1wRy0mv1AFoGqYg==, Double& #=zAm0dJPY=, Double& #=z_Ies3jk=, #=zg68jMcTt2di1iw3UlLu8zYh_RLPU$Worq7yChuA=[]& #=zNytLPyvJcOhG, Boolean& #=zVDoJIuw=, Boolean #=z3PNvIWNcmpFZSH9_s95xfsM=)
at #=zhEIYYHuILF1luFGofpXGc9MiH8d924Gs9ytxYsoXqSze.#=zVFUxPSg=(#=zbjL3340VkKzVXe58G3aWq611PLLuj3E4kg== #=zuInsAlM=, Int32 #=z36iBL$TzaxV6, Int32 #=zO0ujcUlZB4Ze, Double #=zaxGifJY=, #=zg68jMcTt2di1iw3UlLu8zYh_RLPU$Worq7yChuA=[]& #=zNytLPyvJcOhG)
at #=ztoMTCPimbvBhsGnpq8cZWtXQJq4AR9h_v1LH0xs=.#=zVFUxPSg=(#=zbjL3340VkKzVXe58G3aWq611PLLuj3E4kg== #=zuInsAlM=, Int32 #=z36iBL$TzaxV6, Int32 #=zO0ujcUlZB4Ze, Double #=zaxGifJY=, #=zg68jMcTt2di1iw3UlLu8zYh_RLPU$Worq7yChuA=[]& #=zNytLPyvJcOhG)
at #=zOKAZzRBtq2PVQSws378jH0xIdYrsaLsCUuf2AY5X3mbpXc5RSFkO0Vg=.#=zVFUxPSg=(#=zbjL3340VkKzVXe58G3aWq611PLLuj3E4kg== #=zuInsAlM=, Int32& #=zM1he0YWGVBFe, Int32 #=zd8EmnQs=, Int32 #=zw7rdG0Y=, Boolean #=z5zpji27egCjohGMAkVE84zE=)
at #=z$0lF7CWGoJdk_kl3ZPxZD6_NpEGh8BhvcLLVPOoQwXRvWj51hA==.#=zqgZbDallFaF4(Int32 #=zd8EmnQs=, Int32 #=zw7rdG0Y=, Boolean #=z5zpji27egCjohGMAkVE84zE=, Int32& #=zWtJkeHQ=, Int32& #=zye8llTEt5FpRRM0S4g==)
at #=z$0lF7CWGoJdk_kl3ZPxZD6_NpEGh8BhvcLLVPOoQwXRvWj51hA==.#=zktXsKaUDZ$32(Int32 #=zd8EmnQs=, Int32 #=zw7rdG0Y=, Boolean #=z5zpji27egCjohGMAkVE84zE=)
at #=zOSyyzGPjQG3sOkmfju0pLbGM84KEYKfHVwzFWcmQNMG6.#=zyqnhhh4=(#=z3QVXnWIibZS1rH0cbRL1UkrfdIsYG0Zg6s6uvOJrcXs$QFOibg== #=zkP9gIC4=)
at #=zOSyyzGPjQG3sOkmfju0pLbGM84KEYKfHVwzFWcmQNMG6.#=zZQMvvgSD9qCZ()
at #=zOSyyzGPjQG3sOkmfju0pLbGM84KEYKfHVwzFWcmQNMG6.#=z4kYO1ohF1dzd()
at #=zB_a$wzSvpTzSGcwxkT_sv2KNJ00jeXBAH3daBbOJJH9AYi6PVHKMyyw=..ctor(List`1 #=zYNwBSXMJ3miy, Rectangle #=zvyK7r8Y=, TextExtractionOptions #=z6nTZAe8=)
at #=zdw7W9bgej7FuBVYXMJ5W5Nuq$UfLhUGEVIjyKuP8rdwhOFr9UdYhycGqEzFI.#=z1X11NIb$DPUV(TextExtractionOptions #=z6nTZAe8=)
at Aspose.Pdf.Text.TextAbsorber.#=zuh81ryn_4pW0(#=zdw7W9bgej7FuBVYXMJ5W5Nuq$UfLhUGEVIjyKuP8rdwhOFr9UdYhycGqEzFI #=zX4MKxfPv8_zPzo25Eg==, TextFormattingMode #=zQcxDzxs=)
at Aspose.Pdf.Text.TextAbsorber.#=zK24tNtx2Wn5o(#=zdw7W9bgej7FuBVYXMJ5W5Nuq$UfLhUGEVIjyKuP8rdwhOFr9UdYhycGqEzFI #=zX4MKxfPv8_zPzo25Eg==, TextFormattingMode #=zQcxDzxs=)
at Aspose.Pdf.Text.TextAbsorber.Visit(Page page)
at Aspose.Pdf.Text.TextAbsorber.Visit(Document pdf)
at ReproduceAsposePdfIndexOutOfRangeException.Program.Main(String[] args) in source\repos\ReproduceAsposePdfIndexOutOfRangeException\ReproduceAsposePdfIndexOutOfRangeException\Program.cs:line 23