Hi,
Aspose PDF to html conversion and text replacement is taking too much time , its taking more than 1 min in many cases.
Please see attached code and file.
public Document loadDocument(String documentPath,String name) {
Document docObj = new Document(documentPath);
DocumentInfo docInfo = docObj.getInfo();
docInfo.setAuthor(clientName);
docInfo.setCreationDate(new java.util.Date());
//docInfo.addItem(“Producer”, name);
// docInfo.addItem(“Creator”, name);
docInfo.setKeywords("");
docInfo.setModDate(new java.util.Date());
docInfo.setSubject(name);
docInfo.setTitle(name);
return docObj;
}
public void doReplacement(){
String fileName = “5.pdf”;
Document documentObj = loadDocument(fileName,“Srikanth”)
ArrayList wordList = new ArrayList();
wordList.add(“India”);
wordList.add(“other”);
wordList.add(“name”);
wordList.add(“years”);
for (String word : wordList) {
this.replaceDocumentByRegex(documentObj, word, “");
}
this.replaceDocumentByRegex(documentObj,"[A-Z0-9._%±]+@[A-Z0-9.-]+\.[A-Z]{2,4}", "”);
this.replaceDocumentByRegex(documentObj, “^\+(?:[0-9]\ ?){6,14}[0-9]$”, “****”);
convertToHtml( documentObj, “output.html”)
}
public void replaceDocumentByRegex(Document documentObj, String Regex, String Replacement)
{
TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber(Regex); // like
// 1999-2000
// Set text search option to specify regular expression usage
TextSearchOptions textSearchOptions = new TextSearchOptions(true);
textFragmentAbsorber.setTextSearchOptions(textSearchOptions);
// Accept the absorber for first page of document
documentObj.getPages().accept(textFragmentAbsorber);
// Get the extracted text fragments into collection
TextFragmentCollection textFragmentCollection = textFragmentAbsorber.getTextFragments();
// Loop through the fragments
for (TextFragment textFragment : (Iterable) textFragmentCollection) {
// Update text and other properties
textFragment.setText(Replacement);
// textFragment.getTextState().setFont(com.aspose.pdf.FontRepository.findFont(“Verdana”));
// textFragment.getTextState().setFontSize(22);
// textFragment.getTextState().setForegroundColor(com.aspose.pdf.Color.getBlue());
// textFragment.getTextState().setBackgroundColor(com.aspose.pdf.Color.getGray());
}
}
public void convertToHtml(Document documentObj, String outHtmlFile) {
HtmlSaveOptions newOptions = new HtmlSaveOptions();
// Enable option to embed all resources inside the HTML
newOptions.PartsEmbeddingMode = HtmlSaveOptions.PartsEmbeddingModes.EmbedAllIntoHtml;
newOptions.RasterImagesSavingMode = RasterImagesSavingModes.AsEmbeddedPartsOfPngPageBackground;
// This is just optimization for IE and can be omitted
newOptions.LettersPositioningMethod = LettersPositioningMethods.UseEmUnitsAndCompensationOfRoundingErrorsInCss;
newOptions.RasterImagesSavingMode = HtmlSaveOptions.RasterImagesSavingModes.AsEmbeddedPartsOfPngPageBackground;
newOptions.FontSavingMode = HtmlSaveOptions.FontSavingModes.SaveInAllFormats;
documentObj.save(outHtmlFile, newOptions);
}
//call above methods like
doReplacement();
attaching pdf file and also we are unable to change pdf property’s like Producer, Creator etc
Also My email Replacement is not working , i have used same regex for aspose word its working as expected. Can u place suggest why my email regex is not working here.