The issue is tested on java 8 environment with Aspose.PDF 19.3 (currently in production) and 21.5 (latest available).
The result is the same - OOM error is thrown.
Sample code and document are below.
Please, provide any workaround and/or register this behavior as a bug.
import com.aspose.pdf.Document;
import com.aspose.pdf.TextFragmentAbsorber;
import com.aspose.pdf.TextFragmentCollection;
import com.aspose.pdf.facades.PdfExtractor;
import org.apache.commons.lang.StringUtils;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
public class FindAnchorOnPdf {
private static String ANCHOR = "МЕСТО_ДЛЯ_ПЕЧАТИ";
/**
* @param args
* 0 - путь до файла, в котором будем искать "якорь"
* 1 - признак активации кода проверки на наличие текста (если есть параметр, то активируем)
*/
public static void main(String[] args) throws IOException {
if(0 == args.length || 0 == args[0].length()) {
System.out.println("Need set one argument - path to file");
return;
}
File sourceFile = new File(args[0]);
if (!sourceFile.exists()) {
System.out.println("File do not exist: " + sourceFile.getAbsolutePath());
return;
}
try (InputStream pdfStream = new FileInputStream(sourceFile)) {
Document document = new Document(pdfStream);
TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber(ANCHOR);
if (2 == args.length) {
//проверяем, есть ли во вложении текст для поиска.
try (final ByteArrayOutputStream docStream = new ByteArrayOutputStream()) {
PdfExtractor pdfExtractor = new PdfExtractor();
pdfExtractor.bindPdf(document);
pdfExtractor.extractText();
pdfExtractor.getText(docStream);
//Если текста нет, то перед нами скан и дальнейшие проверки не имеют смысла, выходим из метода.
if (StringUtils.isEmpty(docStream.toString().replaceAll("\\r?\\n?\0", ""))) {
return;
}
}
}
document.getPages().accept(textFragmentAbsorber);
TextFragmentCollection textFragmentCollection = textFragmentAbsorber.getTextFragments();
System.out.println(String.format("Find %1$d anchors", textFragmentCollection.size()));
}
}
}
link for testing pdf https://we.tl/t-ImAUYsEv4e