Images can be extracted from pdf using couple of ways in PDFBox library. In this post we will see the ways we can extract Image from PDF using Apache PDFBox.
Extract Image using PDFRenderer
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
package com.kscodes.examples.pdfbox; import java.awt.image.BufferedImage; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import javax.imageio.ImageIO; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.rendering.PDFRenderer; public class PdfImageReaderExample { public static void main(String args[]) { try { // load Document object with existing pdf. PDDocument pdDocument = PDDocument.load( new FileInputStream("K:\\Kscodes\\pdf\\imageSample.pdf")); PDFRenderer renderer = new PDFRenderer(pdDocument); BufferedImage image = renderer.renderImage(0); ImageIO.write(image, "jpg", new File("K:\\Kscodes\\pdf\\_images\\image.jpg")); pdDocument.close(); } catch (IOException ioe) { System.out.println("Error while reading pdf" + ioe.getMessage()); } } } |
Extract Image using PDResources
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
package com.kscodes.examples.pdfbox; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import javax.imageio.ImageIO; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.graphics.PDXObject; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; public class PdfImageReaderExample { public static void main(String args[]) { try { // load Document object with existing pdf. PDDocument pdDocument = PDDocument.load( new FileInputStream("K:\\Kscodes\\pdf\\imageSample.pdf")); PDPage pdPage = pdDocument.getPage(0); PDResources pdResources = pdPage.getResources(); int imageCounter = 1; for (COSName name : pdResources.getXObjectNames()) { PDXObject o = pdResources.getXObject(name); if (o instanceof PDImageXObject) { PDImageXObject image = (PDImageXObject) o; String filename = "K:\\Kscodes\\pdf\\images\\image_" + imageCounter + ".png"; ImageIO.write(image.getImage(), "png", new File(filename)); imageCounter++; } } pdDocument.close(); } catch (IOException ioe) { System.out.println("Error while reading pdf" + ioe.getMessage()); } } } |
Output