@athota
Thanks for your feedback.
We would like to share some more details about our investigation and ticket resolution. The time growing with every setText/setFont/setFontSize and other changes is because, every time data is updated in document operators collection and it gets serialized and de-serialized. for the cases where you want to just change the text against every found text segment, we have implemented methods to avoid serializing and de-serializing on every iteration and it will only be processed once document is saved.
On our side, previous time taken to set Text and set Font for all segments in first fragment was 649-740 ms. After using SuppressedUpdate
methods, the time became 312-339 ms which is faster more than twice.
Please replace the methods as follows;
segment.setText("");
-> segment.setTextSuppressedUpdate("");
segment.getTextState().setFont(arial);
-> segment.getTextState().setFontSuppressedUpdate(arial);
segment.getTextState().setFontSize((segment.getTextState().getFontSize() - 1));
-> segment.getTextState().setFontSizeSuppressedUpdate((segment.getTextState().getFontSize() - 1));
Complete code:
Document pdfDocument = new Document(dataDir+"input-cjk.pdf");
TextFragmentAbsorber visitor = new TextFragmentAbsorber("(?s)\\Q**\\E.*?!", new TextSearchOptions(true));
visitor.getTextReplaceOptions().setReplaceAdjustmentAction(TextReplaceOptions.ReplaceAdjustment.None);
pdfDocument.getPages().accept(visitor);
TextFragmentCollection textFragments = visitor.getTextFragments();
System.out.println("Number of fragments : " + textFragments.size() + "\n");
textFragments.forEach(fragment -> System.out.println("Fragment : " + fragment.getText()));
Font arial = FontRepository.findFont("Arial");
Instant start = Instant.now();
for (TextFragment fragment : textFragments) {
TextSegmentCollection segments = fragment.getSegments();
System.out.println("Number of segments on fragment " + fragment.getText() + ": " + segments.size());
for (Iterator<TextSegment> iterator = segments.iterator(); iterator.hasNext(); ) {
TextSegment segment = iterator.next();
Instant s1 = Instant.now();
System.out.println("BEFORE : Segment : " + segment.getText());
// segment.setText("");
segment.setTextSuppressedUpdate("");
System.out.println("AFTER : Segment : " + segment.getText());
System.out.println("Time Taken to set text on segment : " + Duration.between(s1, Instant.now()).toMillis() + " ms");
}
for (Iterator<TextSegment> iterator = segments.iterator(); iterator.hasNext(); ) {
TextSegment segment = iterator.next();
Instant s1 = Instant.now();
System.out.println("BEFORE : Segment : " + segment.getText());
// segment.getTextState().setFont(arial);
// segment.getTextState().setFontSize((segment.getTextState().getFontSize() - 1));
segment.getTextState().setFontSuppressedUpdate(arial);
segment.getTextState().setFontSizeSuppressedUpdate((segment.getTextState().getFontSize() - 1));
System.out.println("AFTER : Segment : " + segment.getText());
System.out.println("Time Taken to set font and font size on segment : " + Duration.between(s1, Instant.now()).toMillis() + " ms");
}
}
System.out.println("Time Taken to set text and set font on all segments in the first fragment : " + Duration.between(start, Instant.now()).toMillis() + " ms");
pdfDocument.save(dataDir+"input-cjk_version19.6_SuppressedUpdate.pdf");
Also, you can perform mass operation with font change that will improve the overall performance. If the last loop in above code will be changes following way, the result is taken in 220-262 ms:
Document pdfDocument = new Document(dataDir+"input-cjk.pdf");
TextFragmentAbsorber visitor = new TextFragmentAbsorber("(?s)\\Q**\\E.*?!", new TextSearchOptions(true));
visitor.getTextReplaceOptions().setReplaceAdjustmentAction(TextReplaceOptions.ReplaceAdjustment.None);
pdfDocument.getPages().accept(visitor);
TextFragmentCollection textFragments = visitor.getTextFragments();
System.out.println("Number of fragments : " + textFragments.size() + "\n");
textFragments.forEach(fragment -> System.out.println("Fragment : " + fragment.getText()));
Font arial = FontRepository.findFont("Arial");
Instant start = Instant.now();
for (TextFragment fragment : textFragments) {
TextSegmentCollection segments = fragment.getSegments();
System.out.println("Number of segments on fragment " + fragment.getText() + ": " + segments.size());
for (Iterator<TextSegment> iterator = segments.iterator(); iterator.hasNext(); ) {
TextSegment segment = iterator.next();
Instant s1 = Instant.now();
System.out.println("BEFORE : Segment : " + segment.getText());
// segment.setText("");
segment.setTextSuppressedUpdate("");
System.out.println("AFTER : Segment : " + segment.getText());
System.out.println("Time Taken to set text on segment : " + Duration.between(s1, Instant.now()).toMillis() + " ms");
}
// Perform mass operation
visitor.applyForAllFragments(arial);
for (Iterator<TextSegment> iterator = segments.iterator(); iterator.hasNext(); ) {
TextSegment segment = iterator.next();
Instant s1 = Instant.now();
System.out.println("BEFORE : Segment : " + segment.getText());
// segment.getTextState().setFont(arial);
// segment.getTextState().setFontSize((segment.getTextState().getFontSize() - 1));
// segment.getTextState().setFontSuppressedUpdate(arial);
segment.getTextState().setFontSizeSuppressedUpdate((segment.getTextState().getFontSize() - 1));
System.out.println("AFTER : Segment : " + segment.getText());
System.out.println("Time Taken to set font and font size on segment : " + Duration.between(s1, Instant.now()).toMillis() + " ms");
}
}
System.out.println("Time Taken to set text and set font on all segments in the first fragment : " + Duration.between(start, Instant.now()).toMillis() + " ms");
pdfDocument.save(dataDir+"input-cjk_version19.6_SuppressedUpdate.pdf");
Furthermore, we have recorded your feedback and concerns and will let you know in case we have further updates and feedback to share with you.