Hi,
I am trying to parse a pdf document and trying to validate the type of images present in it like Jpeg, png , tiff, bmp, etc. I tried to extract the image but couldnt able to identify the right Image type. please help
PFB the code
private static void parseAndValidateImage(final byte[] fileContent) {
// Aspose pdf reader
final Document asposeDocument = new Document(new ByteArrayInputStream(fileContent));
final PageCollection pagecollection = asposeDocument.getPages();
int pages = pagecollection.size();
System.out.println("No of pages: " + pages);
for (int i = 1; i <= pages; i++) {
System.out.println("Processing page: " + i);
final Page page = pagecollection.get_Item(i);
final Resources resources = page.getResources();
if (!Objects.isNull(resources)
&& !Objects.isNull(resources.getImages())
&& resources.getImages().size() > 0) {
XImageCollection imageCollection = resources.getImages();
int noOfImages = imageCollection.size();
System.out.println("Page: " + i + " no of Images: " + noOfImages);
for (String name : imageCollection.getNames()) {
System.out.println("Page number: " + i + " Image name: " + name);
}
}
}
}
public static void main(String[] args) throws IOException {
System.out.println(“START_____”);
final byte[] fileContent =
org.apache.commons.io.FileUtils.readFileToByteArray(
new File("//Users//mcn//Documents//img_cmyk_icc_tiff.pdf"));
parseAndValidateImage(fileContent);
}