Hi,
I’m trying to remove text from some pdf files.
The following two methods are both not work.
Tested OSs were Win7 and Win10
.NET target platform is .net 6.0
Using SDK: Aspose.PDF for .Net 24.12
Test code1
Text of TextShowOperator is not correct.
void testRemoveText2(string filename)
{
Document pdfDocument = new Document(filename);
// Used text showing operators
Operator[] operators = new Operator[]
{
new Aspose.Pdf.Operators.ShowText(),
};
foreach (Page page in pdfDocument.Pages)
{
List<Aspose.Pdf.Operator> list = new List<Aspose.Pdf.Operator>();
OperatorCollection pageOperators = page.Contents;
foreach (Operator op in operators)
{
OperatorSelector operatorSelector = new OperatorSelector(op);
pageOperators.Accept(operatorSelector);
foreach (object obj in operatorSelector.Selected)
{
if (obj is TextShowOperator)
{
TextShowOperator oop = (TextShowOperator)obj;
//this will work
//if (oop.Text == "\0s\0t\0d\0.\0m\0i\0i\0t\0.\0g\0o\0v\0.\0c\0n")
//but this will not work
if (oop.Text == "std.miit.gov.cn")
{
list.Add(oop);
}
Console.WriteLine(oop.Text);
}
// Console.WriteLine(item.ToString);
}
}
pageOperators.Delete(list);
}
pdfDocument.Save(filename + ".removeText2.pdf");
}
Test code2:
got error: IndexOutOfRangeException: Index was outside the bounds of the array.
void testRemoveText(string filename)
{
Document pdfDocument = new Document(filename);
TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("std.miit.gov.cn");
TextSearchOptions textSearchOptions = new TextSearchOptions(false);
textSearchOptions.IgnoreResourceFontErrors = true;
textFragmentAbsorber.TextSearchOptions = textSearchOptions;
pdfDocument.Pages.Accept(textFragmentAbsorber);
TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments;
foreach (TextFragment textFragment in textFragmentCollection)
{
textFragment.Text = "";
}
pdfDocument.Save(filename + ".removeText.pdf");
}
Failure message with test code 2
#=z_RuW3K9tWa63QXf48eYBtzCRPI_E7vmvDTfwWAw=
HResult=0x80131500
Message=Incorrect input string passed for font STSong-Light-UniGB-UCS2-H
Source=Aspose.PDF
StackTrace:
在 #=zlmDFQIODhk83SLt2yia66G$kYGEGsqPmm8HApAHd37bC3SDSTtYSgIA=.#=zvKQ6sJ85O$30(#=zsXBb6A$qdhnPFMmsOHmpsmwu$qXZWh4rwQ== #=zhVacbMnAtPlt, Boolean #=zBzk_iRfcN4HbC71MGQ==)
在 #=zTJcTyGbFessSSxovaP7p88$N$dX6LvEbSA==.#=zw37KZag=(Boolean #=z_dvUE_wn9m1URUsDXg==)
在 #=zTJcTyGbFessSSxovaP7p88$N$dX6LvEbSA==.#=z4nNdBI0x96Fp()
在 #=ztoMTCPimbvBhsGnpq8cZWmjOYzAfhRYMoBb7QK2DACU_.#=zTXP_9ToO7chgJwOpwQ==.#=zcBhtVliq24Ta(UInt32 #=z8D0x0PY=)
在 #=ztoMTCPimbvBhsGnpq8cZWmjOYzAfhRYMoBb7QK2DACU_.#=zTXP_9ToO7chgJwOpwQ==.#=zH1eIqhjHRr7I(Int32 #=zAq4C2TE=, Double& #=zMBLnmW07ieak, Double& #=z9xgoQ$3Vijrc, UInt32 #=z8D0x0PY=)
在 #=ztoMTCPimbvBhsGnpq8cZWmjOYzAfhRYMoBb7QK2DACU_.#=zTXP_9ToO7chgJwOpwQ==.#=zHOF6GzsCZKZw(Int32 #=zAq4C2TE=, Double #=zMBLnmW07ieak, Double #=z9xgoQ$3Vijrc, UInt32 #=z8D0x0PY=)
在 #=ztoMTCPimbvBhsGnpq8cZWmjOYzAfhRYMoBb7QK2DACU_.#=zTXP_9ToO7chgJwOpwQ==.#=z37hh8oPweujM(UInt32 #=zMknrHN0=, UInt32 #=z8D0x0PY=)
在 #=zbzrJu4QH6hrcYB_TryAccRSmCwDQFaYP9U5TjVTEeM7A.#=zSyZS8dRzQHbK_BiCSw==(String #=zQvx$ozs=, Int32 #=zUtafNAGj_jzg, Int32 #=zM1A8o$rfDN5o, #=z37hh8oPweujM #=zvVpK1SNV4Ygf)
在 #=ztoMTCPimbvBhsGnpq8cZWmjOYzAfhRYMoBb7QK2DACU_.#=zHti0rC8LI3V6(#=zsXBb6A$qdhnPFMmsOHmpsmwu$qXZWh4rwQ== #=zJwsmw1M=, Int32 #=zUtafNAGj_jzg, Int32 #=zM1A8o$rfDN5o, Double #=zaOE95l8=, Double& #=zRmurGuA=, Double& #=z9Z9rQOM=, #=zNfPIeFip4D6$w0FN9NuAre4__CdInhzEuC6VXlA=[] #=zpAYoPg0_irRY, Boolean& #=zcZSK5a4=, Boolean #=znisvosHKvFy0VaLpOs_YyJc=, Boolean #=zIqckIs_Pf4Kv)
在 #=ztoMTCPimbvBhsGnpq8cZWmjOYzAfhRYMoBb7QK2DACU_.#=zJ12NhSg=(#=zsXBb6A$qdhnPFMmsOHmpsmwu$qXZWh4rwQ== #=zJwsmw1M=, Int32 #=zUtafNAGj_jzg, Int32 #=zM1A8o$rfDN5o, Double #=zaOE95l8=, Boolean #=z9aRwFcDhgocRVeWeSA==, Double& #=zRmurGuA=, Double& #=z9Z9rQOM=, #=zNfPIeFip4D6$w0FN9NuAre4__CdInhzEuC6VXlA=[]& #=ztnRBOCkAWliH, Boolean& #=zcZSK5a4=, Boolean #=znisvosHKvFy0VaLpOs_YyJc=)
在 #=ztoMTCPimbvBhsGnpq8cZWmjOYzAfhRYMoBb7QK2DACU_.#=zJ12NhSg=(#=zsXBb6A$qdhnPFMmsOHmpsmwu$qXZWh4rwQ== #=zJwsmw1M=, Int32 #=zUtafNAGj_jzg, Int32 #=zM1A8o$rfDN5o, Double #=zaOE95l8=, #=zNfPIeFip4D6$w0FN9NuAre4__CdInhzEuC6VXlA=[]& #=ztnRBOCkAWliH)
在 #=zJqzEB5YWJrqCJbvaLmFIc2Rm5uixTrfeT3$mMag=.#=zJ12NhSg=(#=zsXBb6A$qdhnPFMmsOHmpsmwu$qXZWh4rwQ== #=zJwsmw1M=, Int32 #=zUtafNAGj_jzg, Int32 #=zM1A8o$rfDN5o, Double #=zaOE95l8=, #=zNfPIeFip4D6$w0FN9NuAre4__CdInhzEuC6VXlA=[]& #=ztnRBOCkAWliH)
在 #=zvGNz9Y9a1DDl$CnSwWo_QZ1htCc1PjRMVD6Kbyz4QIrQg2pKofkFrRU=.#=zJ12NhSg=(#=zsXBb6A$qdhnPFMmsOHmpsmwu$qXZWh4rwQ== #=zJwsmw1M=, Int32& #=zlxfdigcHZ3Jp, Int32 #=z1tg$9VQ=, Int32 #=z3mviV4A=, Boolean #=zMNIrXIwCvaOP4gtdGYV7DHg=)
在 #=zuvYg7p43lde7IvbqM3Puq3j1hkyetKi4klqrglwNpsOKSr3QTw==.#=z6pOzSbtyyuSu(Int32 #=z1tg$9VQ=, Int32 #=z3mviV4A=, Boolean #=zMNIrXIwCvaOP4gtdGYV7DHg=, Int32& #=zxDKKCMY=, Int32& #=zbtFev$cn1OwRi2aKUA==)
在 #=zuvYg7p43lde7IvbqM3Puq3j1hkyetKi4klqrglwNpsOKSr3QTw==.#=zy9YbJiEUijY4(Int32 #=z1tg$9VQ=, Int32 #=z3mviV4A=, Boolean #=zMNIrXIwCvaOP4gtdGYV7DHg=)
在 Aspose.Pdf.Text.TextFragment.#=ztU41M6UiSkyS(Double #=zZW5ufbK_xOak)
在 Aspose.Pdf.Text.TextFragment.set_Text(String value)
在 Program.<<Main>$>g__testRemoveText|0_12(String filename) 在 C:\.NET\ConsoleApp.PDF\ConsoleApp1\Program.cs 中: 第 274 行
在 Program.<Main>$(String[] args) 在 C:\.NET\ConsoleApp.PDF\ConsoleApp1\Program.cs 中: 第 284 行
此异常最初是在此调用堆栈中引发的:
#=zbzrJu4QH6hrcYB_TryAccRSmCwDQFaYP9U5TjVTEeM7A.#=zSyZS8dRzQHbK_BiCSw==(string, #=zbzrJu4QH6hrcYB_TryAccRSmCwDQFaYP9U5TjVTEeM7A.#=z37hh8oPweujM)
#=zlmDFQIODhk83SLt2yia66G$kYGEGsqPmm8HApAHd37bC3SDSTtYSgIA=.#=zvKQ6sJ85O$30(#=zsXBb6A$qdhnPFMmsOHmpsmwu$qXZWh4rwQ==, bool)
内部异常 1:
IndexOutOfRangeException: Index was outside the bounds of the array.
Test file:
JBT13734-2019 工业有机废气蓄热热力燃烧装置-2.7z (1.9 KB)