Detecting file type stored in Blob

Recently I had to create a download servlet which allowed users to download pdf, excel, or  power point files stored in DB2 database as Blob. Whoever designed the database did not include a mime type column in the database so I had to figure out the file type. After looking at files in a binary editor, I was able to figure out the magic marker and then I used Blob.position() method from java.sql.Blob. Below is a code I used to detect file types from Blob:


/**
 * Returns file type (.pdf, .doc, .xls, or .ppt) from a Blob
 * <p>
 * This method returns empty string if exception or file type
 * cannot be determined.
 *
 * @param  attachment  SQL Blob
 * @return the file type stored in Blob
 * @see    java.sql.Blob
 */
 private String findMimeType(Blob attachment) {

 String returnString = "";

 //Define binary markers for the file types
 byte[] officeMagic = hexStringToByteArray("D0CF11E0A1B11AE1");
 byte[] wordDocMagic = hexStringToByteArray("4D6963726F736F667420576F726420446F63756D656E74");
 byte[] excelMagic = hexStringToByteArray("4D6963726F736F667420457863656C20576F726B7368656574");
 byte[] excelMagic2 = hexStringToByteArray("4D6963726F736F667420457863656C");
 byte[] pdfMagic = hexStringToByteArray("255044462D");
 byte[] pptMagic =
 hexStringToByteArray("50006F0077006500720050006F0069006E007400200044006F00630075006D0065006E0074");
 byte[] pptMagic2 = hexStringToByteArray("4D6963726F736F667420506F776572506F696E74");

	try {
		//check if pdf file
		if (attachment.position(pdfMagic, 1) != -1) {

			returnString = ".pdf";
		} else if (attachment.position(officeMagic, 1) != -1) {
			//Microsoft office document

			if (attachment.position(wordDocMagic, 1) != -1) {
				returnString = ".doc";
			} else if (attachment.position(excelMagic, 1) != -1) {
				returnString = ".xls";
			} else if (attachment.position(excelMagic2, 1) != -1) {
				returnString = ".xls";
			} else if (attachment.position(pptMagic, 1) != -1) {
				returnString = ".ppt";
			} else if (attachment.position(pptMagic2, 1) != -1) {
				returnString = ".ppt";
		}
	}

	} catch (Exception e) {

     	//log warning here
	}

	return returnString;
}

	/**
	 * This method converts a HEX string to byte array
	 * @param s Hex string
	 * @return byte array
	 */
	private static byte[] hexStringToByteArray(String s) {
	    int len = s.length();
	    byte[] data = new byte[len / 2];
	    for (int i = 0; i < len; i += 2) {
	        data[i / 2] = (byte) ((Character.digit(s.charAt(i), 16) << 4)
	                             + Character.digit(s.charAt(i+1), 16));
	    }
	    return data;
	}

Leave a Reply

Your email address will not be published. Required fields are marked *

*

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>