Index: main/classes/core/src/com/ibm/icu/impl/ICUBinary.java |
=================================================================== |
--- main/classes/core/src/com/ibm/icu/impl/ICUBinary.java (revision 36076) |
+++ main/classes/core/src/com/ibm/icu/impl/ICUBinary.java (revision 36102) |
@@ -7,17 +7,262 @@ |
package com.ibm.icu.impl; |
+import java.io.DataOutputStream; |
+import java.io.File; |
+import java.io.FileInputStream; |
+import java.io.FileNotFoundException; |
import java.io.IOException; |
import java.io.InputStream; |
import java.nio.ByteBuffer; |
import java.nio.ByteOrder; |
+import java.nio.channels.FileChannel; |
+import java.util.ArrayList; |
+import java.util.List; |
+import java.util.MissingResourceException; |
+import com.ibm.icu.util.ICUUncheckedIOException; |
import com.ibm.icu.util.VersionInfo; |
-public final class ICUBinary |
-{ |
+public final class ICUBinary { |
+ /** |
+ * Reads the ICU .dat package file format. |
+ * Most methods do not modify the ByteBuffer in any way, |
+ * not even its position or other state. |
+ */ |
+ private static final class DatPackageReader { |
+ /** |
+ * .dat package data format ID "CmnD". |
+ */ |
+ private static final int DATA_FORMAT = 0x436d6e44; |
+ |
+ private static final class IsAcceptable implements Authenticate { |
+ // @Override when we switch to Java 6 |
+ public boolean isDataVersionAcceptable(byte version[]) { |
+ return version[0] == 1; |
+ } |
+ } |
+ private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable(); |
+ |
+ /** |
+ * Checks that the ByteBuffer contains a valid, usable ICU .dat package. |
+ * Moves the buffer position from 0 to after the data header. |
+ */ |
+ private static boolean validate(ByteBuffer bytes) { |
+ try { |
+ readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE); |
+ } catch (IOException ignored) { |
+ return false; |
+ } |
+ int count = bytes.getInt(bytes.position()); // Do not move the position. |
+ if (count <= 0) { |
+ return false; |
+ } |
+ // For each item, there is one ToC entry (8 bytes) and a name string |
+ // and a data item of at least 16 bytes. |
+ // (We assume no data item duplicate elimination for now.) |
+ if (bytes.position() + 4 + count * (8 + 16) > bytes.capacity()) { |
+ return false; |
+ } |
+ if (!startsWithPackageName(bytes, getNameOffset(bytes, 0)) || |
+ !startsWithPackageName(bytes, getNameOffset(bytes, count - 1))) { |
+ return false; |
+ } |
+ return true; |
+ } |
+ |
+ private static boolean startsWithPackageName(ByteBuffer bytes, int start) { |
+ // Compare all but the trailing 'b' or 'l' which depends on the platform. |
+ int length = ICUData.PACKAGE_NAME.length() - 1; |
+ for (int i = 0; i < length; ++i) { |
+ if (bytes.get(start + i) != ICUData.PACKAGE_NAME.charAt(i)) { |
+ return false; |
+ } |
+ } |
+ // Check for 'b' or 'l' followed by '/'. |
+ byte c = bytes.get(start + length++); |
+ if ((c != 'b' && c != 'l') || bytes.get(start + length) != '/') { |
+ return false; |
+ } |
+ return true; |
+ } |
+ |
+ private static ByteBuffer getData(ByteBuffer bytes, CharSequence key) { |
+ int base = bytes.position(); |
+ int count = bytes.getInt(base); |
+ |
+ // Do a binary search for the key. |
+ int start = 0; |
+ int limit = count; |
+ while (start < limit) { |
+ int mid = (start + limit) >>> 1; |
+ int nameOffset = getNameOffset(bytes, mid); |
+ // Skip "icudt54b/". |
+ nameOffset += ICUData.PACKAGE_NAME.length() + 1; |
+ int result = compareKeys(key, bytes, nameOffset); |
+ if (result < 0) { |
+ limit = mid; |
+ } else if (result > 0) { |
+ start = mid + 1; |
+ } else { |
+ // We found it! |
+ ByteBuffer data = bytes.duplicate(); |
+ data.position(getDataOffset(bytes, mid)); |
+ data.limit(getDataOffset(bytes, mid + 1)); |
+ return ICUBinary.sliceWithOrder(data); |
+ } |
+ } |
+ return null; // Not found or table is empty. |
+ } |
+ |
+ private static int getNameOffset(ByteBuffer bytes, int index) { |
+ int base = bytes.position(); |
+ assert 0 <= index && index < bytes.getInt(base); // count |
+ // The count integer (4 bytes) |
+ // is followed by count (nameOffset, dataOffset) integer pairs (8 bytes per pair). |
+ return base + bytes.getInt(base + 4 + index * 8); |
+ } |
+ |
+ private static int getDataOffset(ByteBuffer bytes, int index) { |
+ int base = bytes.position(); |
+ int count = bytes.getInt(base); |
+ if (index == count) { |
+ // Return the limit of the last data item. |
+ return bytes.capacity(); |
+ } |
+ assert 0 <= index && index < count; |
+ // The count integer (4 bytes) |
+ // is followed by count (nameOffset, dataOffset) integer pairs (8 bytes per pair). |
+ // The dataOffset follows the nameOffset (skip another 4 bytes). |
+ return base + bytes.getInt(base + 4 + 4 + index * 8); |
+ } |
+ } |
+ |
+ private static final class DataFile { |
+ public final String itemPath; |
+ /** |
+ * null if a .dat package. |
+ */ |
+ public final File path; |
+ /** |
+ * .dat package bytes, or null if not a .dat package. |
+ * position() is after the header. |
+ * Do not modify the position or other state, for thread safety. |
+ */ |
+ public final ByteBuffer pkgBytes; |
+ |
+ public DataFile(String item, File path) { |
+ itemPath = item; |
+ this.path = path; |
+ pkgBytes = null; |
+ } |
+ public DataFile(String item, ByteBuffer bytes) { |
+ itemPath = item; |
+ path = null; |
+ pkgBytes = bytes; |
+ } |
+ public String toString() { |
+ return path.toString(); |
+ } |
+ } |
+ private static final List<DataFile> icuDataFiles = new ArrayList<DataFile>(); |
+ |
+ static { |
+ // Normally com.ibm.icu.impl.ICUBinary.dataPath. |
+ String dataPath = ICUConfig.get(ICUBinary.class.getName() + ".dataPath"); |
+ if (dataPath != null) { |
+ addDataFilesFromPath(dataPath, icuDataFiles); |
+ } |
+ } |
+ |
+ private static void addDataFilesFromPath(String dataPath, List<DataFile> files) { |
+ // Split the path and find files in each location. |
+ // This splitting code avoids the regex pattern compilation in String.split() |
+ // and its array allocation. |
+ // (There is no simple by-character split() |
+ // and the StringTokenizer "is discouraged in new code".) |
+ int pathStart = 0; |
+ while (pathStart < dataPath.length()) { |
+ int sepIndex = dataPath.indexOf(File.pathSeparatorChar, pathStart); |
+ int pathLimit; |
+ if (sepIndex >= 0) { |
+ pathLimit = sepIndex; |
+ } else { |
+ pathLimit = dataPath.length(); |
+ } |
+ String path = dataPath.substring(pathStart, pathLimit).trim(); |
+ if (path.endsWith(File.separator)) { |
+ path = path.substring(0, path.length() - 1); |
+ } |
+ if (path.length() != 0) { |
+ addDataFilesFromFolder(new File(path), new StringBuilder(), icuDataFiles); |
+ } |
+ if (sepIndex < 0) { |
+ break; |
+ } |
+ pathStart = sepIndex + 1; |
+ } |
+ } |
+ |
+ private static void addDataFilesFromFolder(File folder, StringBuilder itemPath, |
+ List<DataFile> dataFiles) { |
+ File[] files = folder.listFiles(); |
+ if (files == null || files.length == 0) { |
+ return; |
+ } |
+ int folderPathLength = itemPath.length(); |
+ if (folderPathLength > 0) { |
+ // The item path must use the ICU file separator character, |
+ // not the platform-dependent File.separatorChar, |
+ // so that the enumerated item paths match the paths requested by ICU code. |
+ itemPath.append('/'); |
+ ++folderPathLength; |
+ } |
+ for (File file : files) { |
+ String fileName = file.getName(); |
+ if (fileName.endsWith(".txt")) { |
+ continue; |
+ } |
+ itemPath.append(fileName); |
+ if (file.isDirectory()) { |
+ // TODO: Within a folder, put all single files before all .dat packages? |
+ addDataFilesFromFolder(file, itemPath, dataFiles); |
+ } else if (fileName.endsWith(".dat")) { |
+ ByteBuffer pkgBytes = mapFile(file); |
+ if (pkgBytes != null && DatPackageReader.validate(pkgBytes)) { |
+ dataFiles.add(new DataFile(itemPath.toString(), pkgBytes)); |
+ } |
+ } else { |
+ dataFiles.add(new DataFile(itemPath.toString(), file)); |
+ } |
+ itemPath.setLength(folderPathLength); |
+ } |
+ } |
+ |
+ /** |
+ * Compares the length-specified input key with the |
+ * NUL-terminated table key. (ASCII) |
+ */ |
+ static int compareKeys(CharSequence key, ByteBuffer bytes, int offset) { |
+ for (int i = 0;; ++i, ++offset) { |
+ int c2 = bytes.get(offset); |
+ if (c2 == 0) { |
+ if (i == key.length()) { |
+ return 0; |
+ } else { |
+ return 1; // key > table key because key is longer. |
+ } |
+ } else if (i == key.length()) { |
+ return -1; // key < table key because key is shorter. |
+ } |
+ int diff = (int)key.charAt(i) - c2; |
+ if (diff != 0) { |
+ return diff; |
+ } |
+ } |
+ } |
+ |
// public inner interface ------------------------------------------------ |
- |
+ |
/** |
* Special interface for data authentication |
*/ |
@@ -35,9 +280,130 @@ |
// public methods -------------------------------------------------------- |
/** |
+ * Loads an ICU binary data file and returns it as a ByteBuffer. |
+ * The buffer contents is normally read-only, but its position etc. can be modified. |
+ * |
+ * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu". |
+ * @return The data as a read-only ByteBuffer, |
+ * or null if the resource could not be found. |
+ */ |
+ public static ByteBuffer getData(String itemPath) { |
+ return getData(null, null, itemPath, false); |
+ } |
+ |
+ /** |
+ * Loads an ICU binary data file and returns it as a ByteBuffer. |
+ * The buffer contents is normally read-only, but its position etc. can be modified. |
+ * |
+ * @param loader Used for loader.getResourceAsStream() unless the data is found elsewhere. |
+ * @param resourceName Resource name for use with the loader. |
+ * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu". |
+ * @return The data as a read-only ByteBuffer, |
+ * or null if the resource could not be found. |
+ */ |
+ public static ByteBuffer getData(ClassLoader loader, String resourceName, String itemPath) { |
+ return getData(loader, resourceName, itemPath, false); |
+ } |
+ |
+ /** |
+ * Loads an ICU binary data file and returns it as a ByteBuffer. |
+ * The buffer contents is normally read-only, but its position etc. can be modified. |
+ * |
+ * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu". |
+ * @return The data as a read-only ByteBuffer. |
+ * @throws MissingResourceException if required==true and the resource could not be found |
+ */ |
+ public static ByteBuffer getRequiredData(String itemPath) { |
+ return getData(null, null, itemPath, true); |
+ } |
+ |
+ /** |
+ * Loads an ICU binary data file and returns it as a ByteBuffer. |
+ * The buffer contents is normally read-only, but its position etc. can be modified. |
+ * |
+ * @param loader Used for loader.getResourceAsStream() unless the data is found elsewhere. |
+ * @param resourceName Resource name for use with the loader. |
+ * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu". |
+ * @return The data as a read-only ByteBuffer. |
+ * @throws MissingResourceException if required==true and the resource could not be found |
+ */ |
+// public static ByteBuffer getRequiredData(ClassLoader loader, String resourceName, |
+// String itemPath) { |
+// return getData(loader, resourceName, itemPath, true); |
+// } |
+ |
+ /** |
+ * Loads an ICU binary data file and returns it as a ByteBuffer. |
+ * The buffer contents is normally read-only, but its position etc. can be modified. |
+ * |
+ * @param loader Used for loader.getResourceAsStream() unless the data is found elsewhere. |
+ * @param resourceName Resource name for use with the loader. |
+ * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu". |
+ * @param required If the resource cannot be found, |
+ * this method returns null (!required) or throws an exception (required). |
+ * @return The data as a read-only ByteBuffer, |
+ * or null if required==false and the resource could not be found. |
+ * @throws MissingResourceException if required==true and the resource could not be found |
+ */ |
+ private static ByteBuffer getData(ClassLoader loader, String resourceName, |
+ String itemPath, boolean required) { |
+ ByteBuffer bytes = getDataFromFile(itemPath); |
+ if (bytes != null) { |
+ return bytes; |
+ } |
+ if (loader == null) { |
+ loader = ICUData.class.getClassLoader(); |
+ } |
+ if (resourceName == null) { |
+ resourceName = ICUData.ICU_BASE_NAME + '/' + itemPath; |
+ } |
+ InputStream is = ICUData.getStream(loader, resourceName, required); |
+ if (is == null) { |
+ return null; |
+ } |
+ try { |
+ return getByteBufferFromInputStream(is); |
+ } catch (IOException e) { |
+ throw new ICUUncheckedIOException(e); |
+ } |
+ } |
+ |
+ private static ByteBuffer getDataFromFile(String itemPath) { |
+ for (DataFile dataFile : icuDataFiles) { |
+ if (dataFile.pkgBytes != null) { |
+ ByteBuffer data = DatPackageReader.getData(dataFile.pkgBytes, itemPath); |
+ if (data != null) { |
+ return data; |
+ } |
+ } else if (itemPath.equals(dataFile.itemPath)) { |
+ return mapFile(dataFile.path); |
+ } |
+ } |
+ return null; |
+ } |
+ |
+ private static ByteBuffer mapFile(File path) { |
+ FileInputStream file; |
+ try { |
+ file = new FileInputStream(path); |
+ FileChannel channel = file.getChannel(); |
+ ByteBuffer bytes = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size()); |
+ // Close the file and its channel; this seems to keep the ByteBuffer valid. |
+ // If not, then we will need to return the pair of (file, bytes). |
+ file.close(); |
+ return bytes; |
+ } catch(FileNotFoundException ignored) { |
+ System.err.println(ignored); |
+ } catch (IOException ignored) { |
+ System.err.println(ignored); |
+ } |
+ return null; |
+ } |
+ |
+ /** |
* Same as readHeader(), but returns a VersionInfo rather than a compact int. |
*/ |
- public static final VersionInfo readHeaderAndDataVersion(ByteBuffer bytes, |
+ public static VersionInfo readHeaderAndDataVersion(ByteBuffer bytes, |
int dataFormat, |
Authenticate authenticate) |
throws IOException { |
@@ -56,7 +422,7 @@ |
* @return dataVersion |
* @throws IOException if this is not a valid ICU data item of the expected dataFormat |
*/ |
- public static final int readHeader(ByteBuffer bytes, int dataFormat, Authenticate authenticate) |
+ public static int readHeader(ByteBuffer bytes, int dataFormat, Authenticate authenticate) |
throws IOException { |
assert bytes.position() == 0; |
byte magic1 = bytes.get(2); |
@@ -89,7 +455,11 @@ |
bytes.get(14) != (byte)(dataFormat >> 8) || |
bytes.get(15) != (byte)dataFormat || |
(authenticate != null && !authenticate.isDataVersionAcceptable(formatVersion))) { |
- throw new IOException(HEADER_AUTHENTICATION_FAILED_); |
+ throw new IOException(HEADER_AUTHENTICATION_FAILED_ + |
+ String.format("; data format %02x%02x%02x%02x, format version %d.%d.%d.%d", |
+ bytes.get(12), bytes.get(13), bytes.get(14), bytes.get(15), |
+ formatVersion[0] & 0xff, formatVersion[1] & 0xff, |
+ formatVersion[2] & 0xff, formatVersion[3] & 0xff)); |
} |
bytes.position(headerSize); |
@@ -100,17 +470,54 @@ |
(bytes.get(23) & 0xff); |
} |
- public static final void skipBytes(ByteBuffer bytes, int skipLength) { |
+ /** |
+ * Writes an ICU data header. |
+ * Does not write a copyright string. |
+ * |
+ * @return The length of the header (number of bytes written). |
+ * @throws IOException from the DataOutputStream |
+ */ |
+ public static int writeHeader(int dataFormat, int formatVersion, int dataVersion, |
+ DataOutputStream dos) throws IOException { |
+ // ucmndata.h MappedData |
+ dos.writeChar(32); // headerSize |
+ dos.writeByte(MAGIC1); |
+ dos.writeByte(MAGIC2); |
+ // unicode/udata.h UDataInfo |
+ dos.writeChar(20); // sizeof(UDataInfo) |
+ dos.writeChar(0); // reservedWord |
+ dos.writeByte(1); // isBigEndian |
+ dos.writeByte(CHAR_SET_); // charsetFamily |
+ dos.writeByte(CHAR_SIZE_); // sizeofUChar |
+ dos.writeByte(0); // reservedByte |
+ dos.writeInt(dataFormat); |
+ dos.writeInt(formatVersion); |
+ dos.writeInt(dataVersion); |
+ // 8 bytes padding for 32 bytes headerSize (multiple of 16). |
+ dos.writeLong(0); |
+ assert dos.size() == 32; |
+ return 32; |
+ } |
+ |
+ public static void skipBytes(ByteBuffer bytes, int skipLength) { |
if (skipLength > 0) { |
bytes.position(bytes.position() + skipLength); |
} |
} |
/** |
+ * Same as ByteBuffer.slice() plus preserving the byte order. |
+ */ |
+ public static ByteBuffer sliceWithOrder(ByteBuffer bytes) { |
+ ByteBuffer b = bytes.slice(); |
+ return b.order(bytes.order()); |
+ } |
+ |
+ /** |
* Reads the entire contents from the stream into a byte array |
* and wraps it into a ByteBuffer. Closes the InputStream at the end. |
*/ |
- public static final ByteBuffer getByteBufferFromInputStream(InputStream is) throws IOException { |
+ public static ByteBuffer getByteBufferFromInputStream(InputStream is) throws IOException { |
try { |
int avail = is.available(); |
byte[] bytes = new byte[avail]; |
@@ -128,7 +535,7 @@ |
} |
} |
- private static final void readFully(InputStream is, byte[] bytes, int offset, int avail) |
+ private static void readFully(InputStream is, byte[] bytes, int offset, int avail) |
throws IOException { |
while (avail > 0) { |
int numRead = is.read(bytes, offset, avail); |