Dear Asad,
PDF_2.zip (2.0 MB)
Here i attached the HTML file i got after conversion from PDF.
Here is the code of conversion:
FileInputStream fstream = new FileInputStream(“C:\Users\admin\Documents\Aspose.Pdf.lic”);
License license = new License();
license.setLicense(fstream);
Document doc = new Document("C:\\Users\\admin\\Desktop\\drillbit_Plagiarism\\verylarge\\1.pdf");
// tune conversion parameters
HtmlSaveOptions newOptions = new HtmlSaveOptions();
newOptions.RasterImagesSavingMode = HtmlSaveOptions.RasterImagesSavingModes.AsEmbeddedPartsOfPngPageBackground;
newOptions.FontSavingMode = HtmlSaveOptions.FontSavingModes.SaveInAllFormats;
newOptions.PartsEmbeddingMode = HtmlSaveOptions.PartsEmbeddingModes.EmbedAllIntoHtml;
newOptions.LettersPositioningMethod = LettersPositioningMethods.UseEmUnitsAndCompensationOfRoundingErrorsInCss;
newOptions.setSplitIntoPages(false);// force write HTMLs of all pages into one output document
newOptions.CustomHtmlSavingStrategy = new HtmlSaveOptions.HtmlPageMarkupSavingStrategy() {
public void invoke(HtmlSaveOptions.HtmlPageMarkupSavingInfo htmlSavingInfo) {
try {
// TODO Auto-generated method stub
byte[] resultHtmlAsBytes = new byte[(int) htmlSavingInfo.ContentStream.available()];
htmlSavingInfo.ContentStream.read(resultHtmlAsBytes, 0, resultHtmlAsBytes.length);
// here You can use any writable stream, file stream is taken just as example
FileOutputStream fos;
try {
fos = new FileOutputStream("C:\\Users\\admin\\Desktop\\drillbit_Plagiarism\\verylarge\\1.html");
fos.write(resultHtmlAsBytes);
fos.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} catch (IOException ex) {
Logger.getLogger(Aspose.class.getName()).log(Level.SEVERE, null, ex);
}
}
};
// we can use some non-existing file name all real saving will be done in CustomerHtmlSavingStrategy
String outHtmlFile = "C:\\Users\\admin\\Desktop\\drillbit_Plagiarism\\jntu1.html";
doc.save(outHtmlFile, newOptions);
Here is the code of background color setting
String mat=“ANALYTICAL METHOD”;
StringBuilder contentBuilder = new StringBuilder();
try {
BufferedReader in = new BufferedReader(new FileReader(“C:\Users\admin\Desktop\PDF_2.html”));
String str;
while ((str = in.readLine()) != null) {
contentBuilder.append(str);
}
in.close();
} catch (IOException e) {
}
String content = contentBuilder.toString();
content=content.replaceAll(mat,"<sup style='background-color:red'>1</sup><font style='background-color: rgba(51,51,51,1.0) !important; all:unset;'>"+mat+"</font>");
// System.out.println(content);
BufferedWriter bw = new BufferedWriter(new FileWriter("yati.html"));
bw.write(content);
bw.close();