Monday, April 17, 2006

PDF Graphic Conversion

As promised, here is some Java code which can pull a graphic out of a PDF, convert it to an array of bytes. You can then do whatever you like with it. Code is not final, but getting close.

// Returns the contents of the file in a byte array.
public static byte[] getBytesFromFile (File file) throws IOException {
InputStream is = new FileInputStream (file);

// Get the size of the file
long length = file.length ();

// Before converting to an int type, check
// to ensure that file is not larger than Integer.MAX_VALUE.
if (length > Integer.MAX_VALUE) {
// File is too large
return null;
}

// Create the byte array to hold the data
byte[] bytes = new byte[(int)length];

// Read in the bytes
int offset = 0;
int numRead = 0;
while (offset < bytes.length && (numRead=is.read (bytes, offset, bytes.length-offset)) >= 0) {
offset += numRead;
}

// Ensure all the bytes have been read in
if (offset < bytes.length) {
throw new IOException ("Could not completely read file "+file.getName ());
}

// Close the input stream and return bytes
is.close ();
return bytes;
}

/*
* This method opens a byteArrayInputStream (and assumes it's a PDF)
* Once open, the file is read in, and all of the images are located on the page (by the pageNumber).
* The image specified (by the imageNumber) is returned.
*
* In this case, pageNumber and imageNumber are both "1" based
*
*/
private byte[] fetchImageFromPDF (ByteArrayInputStream inp, int pageNumber, int imageNumber) throws Exception {
try {
if (inp == null)
return null;

PDDocument document = PDDocument.load (inp);

List pages = document.getDocumentCatalog ().getAllPages ();
PDPage page = (PDPage) pages.get (pageNumber-1);
PDResources resources = page.getResources ();
Map images = resources.getImages ();
ByteArrayOutputStream baOut = new ByteArrayOutputStream ();
if (images != null) {
Iterator imageIter = images.keySet ().iterator ();
int i = 1;
while (imageIter.hasNext ()) {
if (i == imageNumber) {
String key = (String) imageIter.next ();
PDXObjectImage image = (PDXObjectImage)images.get ( key );
image.write2OutputStream (baOut);
// String name = "/tmp/out12";
// System.out.println ( "Writing image:" + name + "." + image.getSuffix ());
// image.write2file ( name );
return baOut.toByteArray ();
}
i++;
}
}
} catch (Exception e) {
throw new Exception ("Exception fetching image from byteArray PDF: " + e.getMessage ());
}

return null;
}

/*
*
* This method is really a wrapper for the same method using a byteArray, using a File object.
* This method calls the getBytesFromFile(); method
*
*/
private byte[] fetchImageFromPDF (File f, int pageNumber, int imageNumber) throws Exception {
try {
if (f == null)
return null;

byte[] bytes = getBytesFromFile (f);
ByteArrayInputStream inp = new ByteArrayInputStream (bytes);

return this.fetchImageFromPDF (inp, pageNumber, imageNumber);
} catch (Exception e) {
throw new Exception ("Exception fetching image from file PDF: " + e.getMessage ());
}
}

0 Comments:

Post a Comment

<< Home