We are using Aspose.PDF 23.12.0
We have functionalities of extracting text from pdf page and extracting texts from particular location on pdf.
When multiple threads/calls performs any of these operation we start seeing multiple issues, we were able to reproduce these, but these are intermittent.
Please use the below code to reproduce this, please note this happens intermittently. Please run this test multiple times to reproduce this.
[TestMethod]
public async Task FailureTest()
{
byte[] document1 = File.ReadAllBytes("any.pdf");
Stream documentStream1 = new MemoryStream(document1);
var d1 = documentStream1.CloneStream();
var d2 = documentStream1.CloneStream();
var d3 = documentStream1.CloneStream();
var d4 = documentStream1.CloneStream();
var t1 = Task.Run(() => GetTextFromAllPages(d1));
var t2 = Task.Run(() => GetTextFromAllPages(d2));
var t3 = Task.Run(() => GetTextFromAllPages(d3));
var t4 = Task.Run(() => GetTextFromAllPages(d4));
await Task.WhenAll(t1, t2, t3, t4).ConfigureAwait(false);
}
IEnumerable<string> GetTextFromAllPages(Stream stream)
{
Aspose.Pdf.Document pdfDocument = new Aspose.Pdf.Document(stream);
List<string> pdfTextPages = [];
int pagesCount = pdfDocument.Pages.Count;
for (int pageNo = 1; pageNo <= pagesCount; pageNo++)
{
string pageContent = GetTextFromPage(pdfDocument, pageNo);
pdfTextPages.Add(pageContent);
}
return pdfTextPages;
}
string GetTextFromPage(Aspose.Pdf.Document pdfDocument, int pageNumber)
{
TextDevice textDevice = new TextDevice();
using MemoryStream memoryStream = new MemoryStream();
textDevice.Process(pdfDocument.Pages[pageNumber], memoryStream);
return Encoding.Unicode.GetString(memoryStream.ToArray());
}
These are exceptions that we see with TextDevice and similar issues while using TextAbsorber as well
Message
The given key 'F182' was not present in the dictionary.
Stack
System.Collections.Generic.KeyNotFoundException:
at System.ThrowHelper.ThrowKeyNotFoundException (System.Private.CoreLib, Version=6.0.0.0, Culture=neutral, PublicKeyToken=7cec85d7bea7798e)
at #=zJdMgRdgt7hHT99I6mofkJ4Ka0BKS.#=zC96MGVX_Nm4j (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zJdMgRdgt7hHT99I6mofkJ4Ka0BKS.#=zC96MGVX_Nm4j (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=z3AXh6KtNIg7GyS66nJhaeAtBVrFyLOstuZb$15TlByUnbvLiIWwczUs=.#=zRDfXBFM= (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zxClYwO_U2PJlynuaEcNw5652YA8c0Rf2O$$aXrjpp3xUMyC9rg==.#=znmJrq5_VTFgO (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zxClYwO_U2PJlynuaEcNw5652YA8c0Rf2O$$aXrjpp3xUMyC9rg==..ctor (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=z3AXh6KtNIg7GyS66nJhaeAtBVrFyLOstuZb$15TlByUnbvLiIWwczUs=.#=zaPSsevQ9HaUG (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zKgNmlTRvtujzeIt4Ydrv4yDm6YtRp0$N_bi1hQnb$IjGdumeHPw7O$4=.#=ze$9nGDdz$bKx (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zKgNmlTRvtujzeIt4Ydrv4yDm6YtRp0$N_bi1hQnb$IjGdumeHPw7O$4=.#=zA26DcZQ= (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zKgNmlTRvtujzeIt4Ydrv4yDm6YtRp0$N_bi1hQnb$IjGdumeHPw7O$4=.#=zgLgkm1Q= (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zivHMpuGKu7N7gWkY4coAwuGuV0Ego6tGbh_LVgpJLNpUCdh9cqaHsCUWhBCF.#=zbq4mz$RiuRix (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zivHMpuGKu7N7gWkY4coAwuGuV0Ego6tGbh_LVgpJLNpUCdh9cqaHsCUWhBCF.#=zbq4mz$RiuRix (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zivHMpuGKu7N7gWkY4coAwuGuV0Ego6tGbh_LVgpJLNpUCdh9cqaHsCUWhBCF.#=z2C5DQ9o= (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zivHMpuGKu7N7gWkY4coAwuGuV0Ego6tGbh_LVgpJLNpUCdh9cqaHsCUWhBCF..ctor (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.Text.TextAbsorber.Visit (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.Devices.TextDevice.Process (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at PdfExtractor.AsposePDF.Document.Extraction.PDFAsposeToTextExtraction.GetTextFromPage
Message
Unable to cast object of type '#=zrvz0M4saau7$S$szxPz4ZBwtvaz$lQoDFw==' to type '#=zKDi6weuPF0KFdCxX7iW747kgoqM9uYCliA=='.
Stack
System.InvalidCastException:
at System.Runtime.CompilerServices.CastHelpers.ChkCast_Helper (System.Private.CoreLib, Version=6.0.0.0, Culture=neutral, PublicKeyToken=7cec85d7bea7798e)
at #=z4FTteMyuNTbEeQOt3Yq8dXzAtmW8Rwcxuy6dGsb$K9nNG7OUzw==.#=z$FnbquzaX7Mt (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.Operators.SelectFont.#=zktsDd9U= (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.Operator..ctor (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.Operators.TextOperator..ctor (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.Operators.TextStateOperator..ctor (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.Operators.SelectFont..ctor (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=z4FTteMyuNTbEeQOt3Yq8dXzAtmW8Rwcxuy6dGsb$K9nNG7OUzw==.#=zoUnE0zs= (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.OperatorCollection.#=zBjqwizIAYq8o (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.OperatorCollection.get_Count (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zKgNmlTRvtujzeIt4Ydrv4yDm6YtRp0$N_bi1hQnb$IjGdumeHPw7O$4=.#=zgLgkm1Q= (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zivHMpuGKu7N7gWkY4coAwuGuV0Ego6tGbh_LVgpJLNpUCdh9cqaHsCUWhBCF.#=zbq4mz$RiuRix (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zivHMpuGKu7N7gWkY4coAwuGuV0Ego6tGbh_LVgpJLNpUCdh9cqaHsCUWhBCF.#=zbq4mz$RiuRix (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zivHMpuGKu7N7gWkY4coAwuGuV0Ego6tGbh_LVgpJLNpUCdh9cqaHsCUWhBCF.#=z2C5DQ9o= (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zivHMpuGKu7N7gWkY4coAwuGuV0Ego6tGbh_LVgpJLNpUCdh9cqaHsCUWhBCF..ctor (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.Text.TextAbsorber.Visit (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.Devices.TextDevice.Process (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at PdfExtractor.AsposePDF.Document.Extraction.PDFAsposeToTextExtraction.GetTextFromPage
Message
Index was out of range. Must be non-negative and less than the size of the collection. (Parameter 'index')
Stack
System.ArgumentOutOfRangeException:
at System.ThrowHelper.ThrowArgumentOutOfRange_IndexException (System.Private.CoreLib, Version=6.0.0.0, Culture=neutral, PublicKeyToken=7cec85d7bea7798e)
at #=z4FTteMyuNTbEeQOt3Yq8dXzAtmW8Rwcxuy6dGsb$K9nNG7OUzw==.#=z$FnbquzaX7Mt (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.Operators.SelectFont.#=zktsDd9U= (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.Operator..ctor (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.Operators.TextOperator..ctor (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.Operators.TextStateOperator..ctor (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.Operators.SelectFont..ctor (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=z4FTteMyuNTbEeQOt3Yq8dXzAtmW8Rwcxuy6dGsb$K9nNG7OUzw==.#=zoUnE0zs= (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.OperatorCollection.#=zBjqwizIAYq8o (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.OperatorCollection.get_Count (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zKgNmlTRvtujzeIt4Ydrv4yDm6YtRp0$N_bi1hQnb$IjGdumeHPw7O$4=.#=zgLgkm1Q= (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zivHMpuGKu7N7gWkY4coAwuGuV0Ego6tGbh_LVgpJLNpUCdh9cqaHsCUWhBCF.#=zbq4mz$RiuRix (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zivHMpuGKu7N7gWkY4coAwuGuV0Ego6tGbh_LVgpJLNpUCdh9cqaHsCUWhBCF.#=zbq4mz$RiuRix (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zivHMpuGKu7N7gWkY4coAwuGuV0Ego6tGbh_LVgpJLNpUCdh9cqaHsCUWhBCF.#=z2C5DQ9o= (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zivHMpuGKu7N7gWkY4coAwuGuV0Ego6tGbh_LVgpJLNpUCdh9cqaHsCUWhBCF..ctor (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.Text.TextAbsorber.Visit (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.Devices.TextDevice.Process (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at PdfExtractor.AsposePDF.Document.Extraction.PDFAsposeToTextExtraction.GetTextFromPage
Message
An item with the same key has already been added. Key: F171
Stack
System.ArgumentException:
at System.ThrowHelper.ThrowAddingDuplicateWithKeyArgumentException (System.Private.CoreLib, Version=6.0.0.0, Culture=neutral, PublicKeyToken=7cec85d7bea7798e)
at System.Collections.Generic.Dictionary`2.TryInsert (System.Private.CoreLib, Version=6.0.0.0, Culture=neutral, PublicKeyToken=7cec85d7bea7798e)
at #=zJdMgRdgt7hHT99I6mofkJ4Ka0BKS.#=zC96MGVX_Nm4j (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zJdMgRdgt7hHT99I6mofkJ4Ka0BKS.#=zC96MGVX_Nm4j (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=z3AXh6KtNIg7GyS66nJhaeAtBVrFyLOstuZb$15TlByUnbvLiIWwczUs=.#=zRDfXBFM= (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zxClYwO_U2PJlynuaEcNw5652YA8c0Rf2O$$aXrjpp3xUMyC9rg==.#=znmJrq5_VTFgO (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zxClYwO_U2PJlynuaEcNw5652YA8c0Rf2O$$aXrjpp3xUMyC9rg==..ctor (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=z3AXh6KtNIg7GyS66nJhaeAtBVrFyLOstuZb$15TlByUnbvLiIWwczUs=.#=zaPSsevQ9HaUG (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zKgNmlTRvtujzeIt4Ydrv4yDm6YtRp0$N_bi1hQnb$IjGdumeHPw7O$4=.#=ze$9nGDdz$bKx (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zKgNmlTRvtujzeIt4Ydrv4yDm6YtRp0$N_bi1hQnb$IjGdumeHPw7O$4=.#=zA26DcZQ= (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zKgNmlTRvtujzeIt4Ydrv4yDm6YtRp0$N_bi1hQnb$IjGdumeHPw7O$4=.#=zgLgkm1Q= (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zivHMpuGKu7N7gWkY4coAwuGuV0Ego6tGbh_LVgpJLNpUCdh9cqaHsCUWhBCF.#=zbq4mz$RiuRix (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zivHMpuGKu7N7gWkY4coAwuGuV0Ego6tGbh_LVgpJLNpUCdh9cqaHsCUWhBCF.#=zbq4mz$RiuRix (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zivHMpuGKu7N7gWkY4coAwuGuV0Ego6tGbh_LVgpJLNpUCdh9cqaHsCUWhBCF.#=z2C5DQ9o= (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at #=zivHMpuGKu7N7gWkY4coAwuGuV0Ego6tGbh_LVgpJLNpUCdh9cqaHsCUWhBCF..ctor (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.Text.TextAbsorber.Visit (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at Aspose.Pdf.Devices.TextDevice.Process (Aspose.PDF, Version=23.12.0.0, Culture=neutral, PublicKeyToken=716fcc553a201e56)
at PdfExtractor.AsposePDF.Document.Extraction.PDFAsposeToTextExtraction.GetTextFromPage