package pdftranslator;
import java.awt.GraphicsEnvironment;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import com.aspose.pdf.Document;
import com.aspose.pdf.Field;
import com.aspose.pdf.Font;
import com.aspose.pdf.FontRepository;
import com.aspose.pdf.Form;
import com.aspose.pdf.Paragraphs;
import com.aspose.pdf.TextFragment;
import com.aspose.pdf.TextFragmentAbsorber;
import com.aspose.pdf.TextFragmentCollection;
import com.aspose.pdf.TextReplaceOptions;
import com.aspose.pdf.TextSearchOptions;
import com.aspose.pdf.facades.PdfFileInfo;
import pdftranslator.Base;
public class ReplaceTextAspose {
/**
* Main method
*
* @param args String array
*
* @throws Exception
*/
public static void main(String[] args) throws Exception {
if (args.length > 2) {
JSONObject response = replacePDFText(args[0], args[1], args[2], args[3], args[4], args[5]);
System.out.print(response);
} else {
System.out.print("ReplaceTextAspose requries filePath, searchString and replacement as arguments");
}
}
/**
* Replace the text in the PDF
*
* @param filePath
* @param targetPath
* @param searchString
* @param replacement
*
* @return JSONObject
* @throws Exception
*/
@SuppressWarnings("unchecked")
public static JSONObject replacePDFText(String filePath, String searchString, String replacement, String fontExtractionPath, String defaultFont, String defaultFontSize) throws Exception {
JSONObject successObject = new JSONObject();
Base baseObject = new Base();
TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber();
boolean Remediated_by_default_font = false;
String fontName = null;
try {
boolean content_available = baseObject.checkFileContent(filePath);
if(content_available) {
// Initialize License Instance
com.aspose.pdf.License license = new com.aspose.pdf.License();
// Call setLicense method to set license
license.setLicense(ReplaceTextAspose.class.getResourceAsStream("/Aspose.Total.Java.lic"));
final FileInputStream fis = new FileInputStream(filePath);
Document pdfDocument = new Document(fis);
PdfFileInfo info = new PdfFileInfo(filePath);
float page_width = info.getPageWidth(1);
// Open document
//Document pdfDocument = new Document(filePath);
if(!checkContainForm(pdfDocument)) {
//String urlPattern = "^(http:\\/\\/www\\.|https:\\/\\/www\\.|http:\\/\\/|https:\\/\\/)?[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*\\.[a-z]{2,5}(:[0-9]{1,5})?(\\/.*)?$";
String urlPattern = "^((https?|ftp|smtp):\\/\\/)?(www.)?[a-zA-z0-9]+(\\.[a-zA-Z]{2,}){1,3}(#?\\/?[a-zA-Z0-9#]+)*\\/?(\\?[a-zA-Z0-9-_]+=[a-zA-Z0-9-%]+&?)?";
boolean matches = Pattern.matches(urlPattern, searchString);
if (matches) {
textFragmentAbsorber = new TextFragmentAbsorber(searchString);
// Accept the absorber for first page of document
pdfDocument.getPages().accept(textFragmentAbsorber);
// Get the extracted text fragments into collection
TextFragmentCollection textFragmentCollection = textFragmentAbsorber.getTextFragments();
for (TextFragment textFragment : (Iterable<TextFragment>) textFragmentCollection) {
textFragment.setText(replacement);
}
} else {
ArrayList<String> unicodeList = getUnicodeSearchStrings(searchString, fontExtractionPath);
boolean text_changes_done = false;
for(int i=0; i< unicodeList.size();i++){
String currentSearchString = unicodeList.get(i);
String pattern = getEscapeString(currentSearchString);
textFragmentAbsorber = new TextFragmentAbsorber(pattern, new TextSearchOptions(true));
long search_string_count = searchString.chars().count();
long replace_string_count = replacement.chars().count();
TextReplaceOptions textReplaceOptions = textFragmentAbsorber.getTextReplaceOptions();
if(replace_string_count > search_string_count && page_width < 300) {
if(textReplaceOptions != null ){
textReplaceOptions.setReplaceAdjustmentAction(TextReplaceOptions.ReplaceAdjustment.WholeWordsHyphenation);
}
}else {
if(textReplaceOptions != null ){
textReplaceOptions.setReplaceAdjustmentAction(TextReplaceOptions.ReplaceAdjustment.ShiftRestOfLine);
}
}
// Accept the absorber for first page of document
try {
pdfDocument.getPages().accept(textFragmentAbsorber);
}catch(Exception e) {
String [] exception_array = {"key already present in dictionary", "invalid font name"};
// Convert String Array to List
List<String> exception_list = Arrays.asList(exception_array);
if(exception_list.contains(e.getMessage().toLowerCase())) {
pdfDocument = acceptAllPages(pdfDocument, textFragmentAbsorber);
}else {
return baseObject.handleException(e.getMessage());
}
}
// Get the extracted text fragments into collection
TextFragmentCollection textFragmentCollection = textFragmentAbsorber.getTextFragments();
String available_fonts[] = GraphicsEnvironment.getLocalGraphicsEnvironment().getAvailableFontFamilyNames();
// Convert String Array to List
List<String> available_font_list = Arrays.asList(available_fonts);
// Get the extracted text fragments into collection
for (TextFragment textFragment : (Iterable<TextFragment>) textFragmentCollection) {
//Update text and other properties
Font font = textFragment.getTextState().getFont();
//font.setEmbedded(true);
float font_size = textFragment.getTextState().getFontSize();
int font_weight = textFragment.getTextState().getFontStyle();
fontName = font.getFontName();
//String required_font = (available_font_list.contains(fontName))? fontName : defaultFont;
//if(required_font.trim() != "") {
try {
textFragment.getTextState().setFont(FontRepository.findFont(fontName,1));
textFragment.getTextState().setFontSize(font_size);
textFragment.getTextState().setFontStyle(font_weight);
}catch(Exception e) {
Remediated_by_default_font = true;
textFragment.getTextState().setFont(FontRepository.findFont(defaultFont,1));
//get the float value from string
textFragment.getTextState().setFontSize(Float.valueOf(defaultFontSize));
textFragment.getTextState().setFontStyle(font_weight);
}
//}
textFragment.setText(replacement);
text_changes_done = true;
}
// if(text_changes_done){
// break;
// }
}
}
String save_status = saveDocument(pdfDocument, filePath);
fis.close();
if(save_status == "completed"){
successObject.put("status", "success");
successObject.put("file_path", filePath);
successObject.put("remediated_by_default_font", Remediated_by_default_font);
successObject.put("original_font_name", fontName);
}else {
save_status = (save_status == null) ? "Null Exception" : save_status;
successObject.put("status", "error");
successObject.put("file_path", filePath);
successObject.put("error_message", save_status);
}
} else {
successObject.put("status", "error");
successObject.put("file_path", filePath);
successObject.put("error_message", "pdf file contain form itself");
}
}else {
successObject.put("status", "error");
String errorMessage = "empty file content "+filePath;
successObject.put("error_message", errorMessage);
}
} catch (Exception e) {
return baseObject.handleException(e.getMessage());
}
return successObject;
}
/**
* save the pdf
*
* @param pdfDocument
* @param filePath
*
* @return boolean
* @throws Exception
*/
public static String saveDocument(Document pdfDocument, String filePath) throws Exception {
try{
pdfDocument.save(filePath);
return "completed";
}catch(Exception e) {
return e.getMessage();
}
}
/**
* return special character escaped string
*
* @param string
*
* @return regex special character escaped string
*/
public static String escapeRE(String str) {
final String regExSpecialChars = "<([{\\^-=$!|]})?*+>";
final String regExSpecialCharsRE = regExSpecialChars.replaceAll( ".", "\\\\$0");
final Pattern reCharsREP = Pattern.compile( "[" + regExSpecialCharsRE + "]");
Matcher m = reCharsREP.matcher( str);
return m.replaceAll( "\\\\$0");
}
/**
* get escape search string
*
* @param searchString
*
* @return special character escape searchString
*/
public static String getEscapeString(String searchString) {
String search_str_esc = escapeRE(searchString);
String[] split = search_str_esc.split("\\ ");
String search_pattern = "[\\n\\r\\s\\t]+".join(" ",split);
String pattern = "(?<![\\/])" + search_pattern + "(?!\\b.(com|co)\\b)";
return pattern;
}
/**
* get unicoded search string list
*
* @param searchString and fontExtractionpath
*
* @return array of strings which contain all unicoded list
*/
public static ArrayList<String> getUnicodeSearchStrings(String searchString, String fontExtractionPath) throws IOException, JSONException {
ArrayList<String> unicodeList = new ArrayList<String>();
unicodeList.add(searchString);
BufferedReader br = null;
FileReader fr = null;
// Initializing InputStreamReader object
InputStreamReader in_strm = new InputStreamReader(TextParser.class.getResourceAsStream("/unicode_list.txt"));
/*unicode conversion changes start here */
br = new BufferedReader(in_strm);
// read line by line
String unicode_list = "";
String line;
while ((line = br.readLine()) != null) {
unicode_list+=line;
}
JSONArray jsonArray = new JSONArray(unicode_list);
for (int i = 0; i < jsonArray.length(); i++) {
JSONObject json = jsonArray.getJSONObject(i);
Iterator<String> keys = json.keys();
while (keys.hasNext()) {
String key = keys.next();
String json_key = key;
String json_value = json.getString(key);
String currentUnicodeSearchStr = searchString.replace(json_value, json_key);
unicodeList.add(currentUnicodeSearchStr);
}
}
return unicodeList;
}
/**
* save the pdf
*
* @param pdfDocument
*
* @return boolean
* @throws Exception
*/
public static boolean checkContainForm(Document pdfDocument) {
// Get the PDF form
boolean check_form_availability = false;
Form form = pdfDocument.getForm();
// Get fields in the rectangular area
Field[] form_fields = form.getFields();
for(int i=0; i<form_fields.length;i++) {
String field_name = form_fields[i].getFullName();
boolean printable_form = form_fields[i].toString().contains("ButtonField");
if(form_fields[i].toString().contains("ButtonField")) {
check_form_availability = true;
break;
}
}
return check_form_availability;
}
/**
* pdfDocument accept all pages
*
* @param pdfDocument
*
* @return pdfDocument
* @throws Exception
*/
public static Document acceptAllPages(Document pdfDocument, TextFragmentAbsorber textFragmentAbsorber) {
for(int i=1; i<= pdfDocument.getPages().size(); i++) {
try {
pdfDocument.getPages().get_Item(i).accept(textFragmentAbsorber);
}catch (Exception e) {
}
}
return pdfDocument;
}
}