diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index 2661d5d48..34edfa0da 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -163,13 +163,15 @@ public final class FormatSpec { static final int NOT_A_VERSION_NUMBER = -1; // These MUST have the same values as the relevant constants in format_utils.h. - // From version 4 on, we use version * 100 + revision as a version number. That allows + // From version 2.01 on, we use version * 100 + revision as a version number. That allows // us to change the format during development while having testing devices remove // older files with each upgrade, while still having a readable versioning scheme. // When we bump up the dictionary format version, we should update // ExpandableDictionary.needsToMigrateDictionary() and // ExpandableDictionary.matchesExpectedBinaryDictFormatVersionForThisType(). public static final int VERSION2 = 2; + public static final int VERSION201 = 201; + public static final int MINIMUM_SUPPORTED_VERSION_OF_CODE_POINT_TABLE = VERSION201; // Dictionary version used for testing. public static final int VERSION4_ONLY_FOR_TESTING = 399; public static final int VERSION401 = 401; diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index f8b68e0ce..a96b3fd0a 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -312,7 +312,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer); for (final String word : sWords) { Arrays.fill(buffer, (byte) 0); - CharEncoding.writeString(buffer, 0, word); + CharEncoding.writeString(buffer, 0, word, null); dictBuffer.position(0); final String str = CharEncoding.readString(dictBuffer); assertEquals(word, str); diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java index 96604a197..1f3ee19af 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java @@ -17,11 +17,11 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; - import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; +import java.util.HashMap; /** * Decodes binary files for a FusionDictionary. @@ -109,15 +109,19 @@ public final class BinaryDictDecoderUtils { * A class grouping utility function for our specific character encoding. */ static final class CharEncoding { - private static final int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; - private static final int MAXIMAL_ONE_BYTE_CHARACTER_VALUE = 0xFF; /** * Helper method to find out whether this code fits on one byte */ - private static boolean fitsOnOneByte(final int character) { - return character >= MINIMAL_ONE_BYTE_CHARACTER_VALUE - && character <= MAXIMAL_ONE_BYTE_CHARACTER_VALUE; + private static boolean fitsOnOneByte(int character, + final HashMap codePointToOneByteCodeMap) { + if (codePointToOneByteCodeMap != null) { + if (codePointToOneByteCodeMap.containsKey(character)) { + character = codePointToOneByteCodeMap.get(character); + } + } + return character >= FormatSpec.MINIMAL_ONE_BYTE_CHARACTER_VALUE + && character <= FormatSpec.MAXIMAL_ONE_BYTE_CHARACTER_VALUE; } /** @@ -137,9 +141,10 @@ public final class BinaryDictDecoderUtils { * @param character the character code. * @return the size in binary encoded-form, either 1 or 3 bytes. */ - static int getCharSize(final int character) { + static int getCharSize(final int character, + final HashMap codePointToOneByteCodeMap) { // See char encoding in FusionDictionary.java - if (fitsOnOneByte(character)) return 1; + if (fitsOnOneByte(character, codePointToOneByteCodeMap)) return 1; if (FormatSpec.INVALID_CHARACTER == character) return 1; return 3; } @@ -147,9 +152,10 @@ public final class BinaryDictDecoderUtils { /** * Compute the byte size of a character array. */ - static int getCharArraySize(final int[] chars) { + static int getCharArraySize(final int[] chars, + final HashMap codePointToOneByteCodeMap) { int size = 0; - for (int character : chars) size += getCharSize(character); + for (int character : chars) size += getCharSize(character, codePointToOneByteCodeMap); return size; } @@ -159,11 +165,19 @@ public final class BinaryDictDecoderUtils { * @param codePoints the code point array to write. * @param buffer the byte buffer to write to. * @param index the index in buffer to write the character array to. + * @param codePointToOneByteCodeMap the map to convert the code point. * @return the index after the last character. */ - static int writeCharArray(final int[] codePoints, final byte[] buffer, int index) { + static int writeCharArray(final int[] codePoints, final byte[] buffer, int index, + final HashMap codePointToOneByteCodeMap) { for (int codePoint : codePoints) { - if (1 == getCharSize(codePoint)) { + if (codePointToOneByteCodeMap != null) { + if (codePointToOneByteCodeMap.containsKey(codePoint)) { + // Convert code points + codePoint = codePointToOneByteCodeMap.get(codePoint); + } + } + if (1 == getCharSize(codePoint, codePointToOneByteCodeMap)) { buffer[index++] = (byte)codePoint; } else { buffer[index++] = (byte)(0xFF & (codePoint >> 16)); @@ -184,12 +198,19 @@ public final class BinaryDictDecoderUtils { * @param word the string to write. * @return the size written, in bytes. */ - static int writeString(final byte[] buffer, final int origin, final String word) { + static int writeString(final byte[] buffer, final int origin, final String word, + final HashMap codePointToOneByteCodeMap) { final int length = word.length(); int index = origin; for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { - final int codePoint = word.codePointAt(i); - if (1 == getCharSize(codePoint)) { + int codePoint = word.codePointAt(i); + if (codePointToOneByteCodeMap != null) { + if (codePointToOneByteCodeMap.containsKey(codePoint)) { + // Convert code points + codePoint = codePointToOneByteCodeMap.get(codePoint); + } + } + if (1 == getCharSize(codePoint, codePointToOneByteCodeMap)) { buffer[index++] = (byte)codePoint; } else { buffer[index++] = (byte)(0xFF & (codePoint >> 16)); @@ -210,12 +231,13 @@ public final class BinaryDictDecoderUtils { * @param word the string to write. * @return the size written, in bytes. */ - static int writeString(final OutputStream stream, final String word) throws IOException { + static int writeString(final OutputStream stream, final String word, + final HashMap codePointToOneByteCodeMap) throws IOException { final int length = word.length(); int written = 0; for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { final int codePoint = word.codePointAt(i); - final int charSize = getCharSize(codePoint); + final int charSize = getCharSize(codePoint, codePointToOneByteCodeMap); if (1 == charSize) { stream.write((byte) codePoint); } else { @@ -253,7 +275,7 @@ public final class BinaryDictDecoderUtils { */ static int readChar(final DictBuffer dictBuffer) { int character = dictBuffer.readUnsignedByte(); - if (!fitsOnOneByte(character)) { + if (!fitsOnOneByte(character, null)) { if (FormatSpec.PTNODE_CHARACTERS_TERMINATOR == character) { return FormatSpec.INVALID_CHARACTER; } diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java index 12290e6aa..2d536d822 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java @@ -61,8 +61,9 @@ public class BinaryDictEncoderUtils { * @param characters the character array * @return the size of the char array, including the terminator if any */ - static int getPtNodeCharactersSize(final int[] characters) { - int size = CharEncoding.getCharArraySize(characters); + static int getPtNodeCharactersSize(final int[] characters, + final HashMap codePointToOneByteCodeMap) { + int size = CharEncoding.getCharArraySize(characters, codePointToOneByteCodeMap); if (characters.length > 1) size += FormatSpec.PTNODE_TERMINATOR_SIZE; return size; } @@ -76,8 +77,9 @@ public class BinaryDictEncoderUtils { * @param ptNode the PtNode * @return the size of the char array, including the terminator if any */ - private static int getPtNodeCharactersSize(final PtNode ptNode) { - return getPtNodeCharactersSize(ptNode.mChars); + private static int getPtNodeCharactersSize(final PtNode ptNode, + final HashMap codePointToOneByteCodeMap) { + return getPtNodeCharactersSize(ptNode.mChars, codePointToOneByteCodeMap); } /** @@ -92,13 +94,14 @@ public class BinaryDictEncoderUtils { /** * Compute the size of a shortcut in bytes. */ - private static int getShortcutSize(final WeightedString shortcut) { + private static int getShortcutSize(final WeightedString shortcut, + final HashMap codePointToOneByteCodeMap) { int size = FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE; final String word = shortcut.mWord; final int length = word.length(); for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { final int codePoint = word.codePointAt(i); - size += CharEncoding.getCharSize(codePoint); + size += CharEncoding.getCharSize(codePoint, codePointToOneByteCodeMap); } size += FormatSpec.PTNODE_TERMINATOR_SIZE; return size; @@ -110,11 +113,12 @@ public class BinaryDictEncoderUtils { * This is known in advance and does not change according to position in the file * like address lists do. */ - static int getShortcutListSize(final ArrayList shortcutList) { + static int getShortcutListSize(final ArrayList shortcutList, + final HashMap codePointToOneByteCodeMap) { if (null == shortcutList || shortcutList.isEmpty()) return 0; int size = FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE; for (final WeightedString shortcut : shortcutList) { - size += getShortcutSize(shortcut); + size += getShortcutSize(shortcut, codePointToOneByteCodeMap); } return size; } @@ -125,14 +129,16 @@ public class BinaryDictEncoderUtils { * @param ptNode the PtNode to compute the size of. * @return the maximum size of the PtNode. */ - private static int getPtNodeMaximumSize(final PtNode ptNode) { - int size = getNodeHeaderSize(ptNode); + private static int getPtNodeMaximumSize(final PtNode ptNode, + final HashMap codePointToOneByteCodeMap) { + int size = getNodeHeaderSize(ptNode, codePointToOneByteCodeMap); if (ptNode.isTerminal()) { // If terminal, one byte for the frequency. size += FormatSpec.PTNODE_FREQUENCY_SIZE; } size += FormatSpec.PTNODE_MAX_ADDRESS_SIZE; // For children address - size += getShortcutListSize(ptNode.mShortcutTargets); + // TODO: Use codePointToOneByteCodeMap for shortcuts. + size += getShortcutListSize(ptNode.mShortcutTargets, null /* codePointToOneByteCodeMap */); if (null != ptNode.mBigrams) { size += (FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE + FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE) @@ -148,10 +154,11 @@ public class BinaryDictEncoderUtils { * * @param ptNodeArray the node array to compute the maximum size of. */ - private static void calculatePtNodeArrayMaximumSize(final PtNodeArray ptNodeArray) { + private static void calculatePtNodeArrayMaximumSize(final PtNodeArray ptNodeArray, + final HashMap codePointToOneByteCodeMap) { int size = getPtNodeCountSize(ptNodeArray); for (PtNode node : ptNodeArray.mData) { - final int nodeSize = getPtNodeMaximumSize(node); + final int nodeSize = getPtNodeMaximumSize(node, codePointToOneByteCodeMap); node.mCachedSize = nodeSize; size += nodeSize; } @@ -163,8 +170,10 @@ public class BinaryDictEncoderUtils { * * @param ptNode the PtNode of which to compute the size of the header */ - private static int getNodeHeaderSize(final PtNode ptNode) { - return FormatSpec.PTNODE_FLAGS_SIZE + getPtNodeCharactersSize(ptNode); + private static int getNodeHeaderSize(final PtNode ptNode, + final HashMap codePointToOneByteCodeMap) { + return FormatSpec.PTNODE_FLAGS_SIZE + getPtNodeCharactersSize(ptNode, + codePointToOneByteCodeMap); } /** @@ -367,7 +376,8 @@ public class BinaryDictEncoderUtils { * @return false if none of the cached addresses inside the node array changed, true otherwise. */ private static boolean computeActualPtNodeArraySize(final PtNodeArray ptNodeArray, - final FusionDictionary dict) { + final FusionDictionary dict, + final HashMap codePointToOneByteCodeMap) { boolean changed = false; int size = getPtNodeCountSize(ptNodeArray); for (PtNode ptNode : ptNodeArray.mData) { @@ -375,7 +385,7 @@ public class BinaryDictEncoderUtils { if (ptNode.mCachedAddressAfterUpdate != ptNode.mCachedAddressBeforeUpdate) { changed = true; } - int nodeSize = getNodeHeaderSize(ptNode); + int nodeSize = getNodeHeaderSize(ptNode, codePointToOneByteCodeMap); if (ptNode.isTerminal()) { nodeSize += FormatSpec.PTNODE_FREQUENCY_SIZE; } @@ -383,7 +393,9 @@ public class BinaryDictEncoderUtils { nodeSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(ptNodeArray, nodeSize + size, ptNode.mChildren)); } - nodeSize += getShortcutListSize(ptNode.mShortcutTargets); + // TODO: Use codePointToOneByteCodeMap for shortcuts. + nodeSize += getShortcutListSize(ptNode.mShortcutTargets, + null /* codePointToOneByteCodeMap */); if (null != ptNode.mBigrams) { for (WeightedString bigram : ptNode.mBigrams) { final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray, @@ -454,10 +466,11 @@ public class BinaryDictEncoderUtils { * @return the same array it was passed. The nodes have been updated for address and size. */ /* package */ static ArrayList computeAddresses(final FusionDictionary dict, - final ArrayList flatNodes) { + final ArrayList flatNodes, + final HashMap codePointToOneByteCodeMap) { // First get the worst possible sizes and offsets for (final PtNodeArray n : flatNodes) { - calculatePtNodeArrayMaximumSize(n); + calculatePtNodeArrayMaximumSize(n, codePointToOneByteCodeMap); } final int offset = initializePtNodeArraysCachedAddresses(flatNodes); @@ -472,7 +485,8 @@ public class BinaryDictEncoderUtils { for (final PtNodeArray ptNodeArray : flatNodes) { ptNodeArray.mCachedAddressAfterUpdate = ptNodeArrayStartOffset; final int oldNodeArraySize = ptNodeArray.mCachedSize; - final boolean changed = computeActualPtNodeArraySize(ptNodeArray, dict); + final boolean changed = computeActualPtNodeArraySize(ptNodeArray, dict, + codePointToOneByteCodeMap); final int newNodeArraySize = ptNodeArray.mCachedSize; if (oldNodeArraySize < newNodeArraySize) { throw new RuntimeException("Increased size ?!"); @@ -686,9 +700,10 @@ public class BinaryDictEncoderUtils { + (frequency & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY); } - /* package */ static final int getChildrenPosition(final PtNode ptNode) { + /* package */ static final int getChildrenPosition(final PtNode ptNode, + final HashMap codePointToOneByteCodeMap) { int positionOfChildrenPosField = ptNode.mCachedAddressAfterUpdate - + getNodeHeaderSize(ptNode); + + getNodeHeaderSize(ptNode, codePointToOneByteCodeMap); if (ptNode.isTerminal()) { // A terminal node has the frequency. // If positionOfChildrenPosField is incorrect, we may crash when jumping to the children @@ -705,10 +720,12 @@ public class BinaryDictEncoderUtils { * @param dict the dictionary the node array is a part of (for relative offsets). * @param dictEncoder the dictionary encoder. * @param ptNodeArray the node array to write. + * @param codePointToOneByteCodeMap the map to convert the code points. */ @SuppressWarnings("unused") /* package */ static void writePlacedPtNodeArray(final FusionDictionary dict, - final DictEncoder dictEncoder, final PtNodeArray ptNodeArray) { + final DictEncoder dictEncoder, final PtNodeArray ptNodeArray, + final HashMap codePointToOneByteCodeMap) { // TODO: Make the code in common with BinaryDictIOUtils#writePtNode dictEncoder.setPosition(ptNodeArray.mCachedAddressAfterUpdate); @@ -727,7 +744,7 @@ public class BinaryDictEncoderUtils { + FormatSpec.MAX_TERMINAL_FREQUENCY + " : " + ptNode.mProbabilityInfo.toString()); } - dictEncoder.writePtNode(ptNode, dict); + dictEncoder.writePtNode(ptNode, dict, codePointToOneByteCodeMap); } if (dictEncoder.getPosition() != ptNodeArray.mCachedAddressAfterUpdate + ptNodeArray.mCachedSize) { @@ -834,12 +851,16 @@ public class BinaryDictEncoderUtils { // Write out the options. for (final String key : dict.mOptions.mAttributes.keySet()) { final String value = dict.mOptions.mAttributes.get(key); - CharEncoding.writeString(headerBuffer, key); - CharEncoding.writeString(headerBuffer, value); + CharEncoding.writeString(headerBuffer, key, null); + CharEncoding.writeString(headerBuffer, value, null); + } + // Write out the codePointTable if there is codePointOccurrenceArray. + if (codePointOccurrenceArray != null) { + final String codePointTableString = + encodeCodePointTable(codePointOccurrenceArray); + CharEncoding.writeString(headerBuffer, DictionaryHeader.CODE_POINT_TABLE_KEY, null); + CharEncoding.writeString(headerBuffer, codePointTableString, null); } - - // TODO: Write out the code point table. - final int size = headerBuffer.size(); final byte[] bytes = headerBuffer.toByteArray(); // Write out the header size. @@ -857,10 +878,30 @@ public class BinaryDictEncoderUtils { final HashMap mCodePointToOneByteCodeMap; final ArrayList> mCodePointOccurrenceArray; + // Let code point table empty for version 200 dictionary which used in test + CodePointTable() { + mCodePointToOneByteCodeMap = null; + mCodePointOccurrenceArray = null; + } + CodePointTable(final HashMap codePointToOneByteCodeMap, final ArrayList> codePointOccurrenceArray) { mCodePointToOneByteCodeMap = codePointToOneByteCodeMap; mCodePointOccurrenceArray = codePointOccurrenceArray; } } + + private static String encodeCodePointTable( + final ArrayList> codePointOccurrenceArray) { + final StringBuilder codePointTableString = new StringBuilder(); + int currentCodePointTableIndex = FormatSpec.MINIMAL_ONE_BYTE_CHARACTER_VALUE; + for (final Entry entry : codePointOccurrenceArray) { + // Native reads the table as a string + codePointTableString.appendCodePoint(entry.getKey()); + if (FormatSpec.MAXIMAL_ONE_BYTE_CHARACTER_VALUE < ++currentCodePointTableIndex) { + break; + } + } + return codePointTableString.toString(); + } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/DictEncoder.java index 645fd5c02..10dd00325 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/DictEncoder.java +++ b/tests/src/com/android/inputmethod/latin/makedict/DictEncoder.java @@ -21,6 +21,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import java.io.IOException; +import java.util.HashMap; /** * An interface of binary dictionary encoder. @@ -33,5 +34,6 @@ public interface DictEncoder { public void setPosition(final int position); public int getPosition(); public void writePtNodeCount(final int ptNodeCount); - public void writePtNode(final PtNode ptNode, final FusionDictionary dict); + public void writePtNode(final PtNode ptNode, final FusionDictionary dict, + final HashMap codePointToOneByteCodeMap); } diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java index 18f4bcf5f..6227f13e1 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java +++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java @@ -177,7 +177,8 @@ public class Ver2DictDecoder extends AbstractDictDecoder { if (header == null) { throw new IOException("Cannot read the dictionary header."); } - if (header.mFormatOptions.mVersion != FormatSpec.VERSION2) { + if (header.mFormatOptions.mVersion != FormatSpec.VERSION2 && + header.mFormatOptions.mVersion != FormatSpec.VERSION201) { throw new UnsupportedFormatException("File header has a wrong version : " + header.mFormatOptions.mVersion); } @@ -200,19 +201,19 @@ public class Ver2DictDecoder extends AbstractDictDecoder { if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) { int index = 0; int character = CharEncoding.readChar(mDictBuffer); - addressPointer += CharEncoding.getCharSize(character); + addressPointer += CharEncoding.getCharSize(character, null); while (FormatSpec.INVALID_CHARACTER != character) { // FusionDictionary is making sure that the length of the word is smaller than // MAX_WORD_LENGTH. // So we'll never write past the end of mCharacterBuffer. mCharacterBuffer[index++] = character; character = CharEncoding.readChar(mDictBuffer); - addressPointer += CharEncoding.getCharSize(character); + addressPointer += CharEncoding.getCharSize(character, null); } characters = Arrays.copyOfRange(mCharacterBuffer, 0, index); } else { final int character = CharEncoding.readChar(mDictBuffer); - addressPointer += CharEncoding.getCharSize(character); + addressPointer += CharEncoding.getCharSize(character, null); characters = new int[] { character }; } final ProbabilityInfo probabilityInfo; diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java index c47190190..eabde4620 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java +++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java @@ -124,7 +124,7 @@ public class Ver2DictEncoder implements DictEncoder { @Override public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions) throws IOException, UnsupportedFormatException { - if (formatOptions.mVersion > FormatSpec.VERSION2) { + if (formatOptions.mVersion > FormatSpec.VERSION201) { throw new UnsupportedFormatException( "The given format options has wrong version number : " + formatOptions.mVersion); @@ -135,7 +135,13 @@ public class Ver2DictEncoder implements DictEncoder { } // Make code point conversion table ordered by occurrence of code points - final CodePointTable codePointTable = makeCodePointTable(dict); + // Version 201 or later have codePointTable + final CodePointTable codePointTable; + if (formatOptions.mVersion >= FormatSpec.MINIMUM_SUPPORTED_VERSION_OF_CODE_POINT_TABLE) { + codePointTable = makeCodePointTable(dict); + } else { + codePointTable = new CodePointTable(); + } BinaryDictEncoderUtils.writeDictionaryHeader(mOutStream, dict, formatOptions, codePointTable.mCodePointOccurrenceArray); @@ -152,7 +158,8 @@ public class Ver2DictEncoder implements DictEncoder { ArrayList flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray); MakedictLog.i("Computing addresses..."); - BinaryDictEncoderUtils.computeAddresses(dict, flatNodes); + BinaryDictEncoderUtils.computeAddresses(dict, flatNodes, + codePointTable.mCodePointToOneByteCodeMap); MakedictLog.i("Checking PtNode array..."); if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); @@ -164,7 +171,8 @@ public class Ver2DictEncoder implements DictEncoder { MakedictLog.i("Writing file..."); for (PtNodeArray nodeArray : flatNodes) { - BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray); + BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray, + codePointTable.mCodePointToOneByteCodeMap); } if (MakedictLog.DBG) BinaryDictEncoderUtils.showStatistics(flatNodes); mOutStream.write(mBuffer, 0, mPosition); @@ -196,15 +204,19 @@ public class Ver2DictEncoder implements DictEncoder { countSize); } - private void writePtNodeFlags(final PtNode ptNode) { - final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode); + private void writePtNodeFlags(final PtNode ptNode, + final HashMap codePointToOneByteCodeMap) { + final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, + codePointToOneByteCodeMap); mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, BinaryDictEncoderUtils.makePtNodeFlags(ptNode, childrenPos), FormatSpec.PTNODE_FLAGS_SIZE); } - private void writeCharacters(final int[] codePoints, final boolean hasSeveralChars) { - mPosition = CharEncoding.writeCharArray(codePoints, mBuffer, mPosition); + private void writeCharacters(final int[] codePoints, final boolean hasSeveralChars, + final HashMap codePointToOneByteCodeMap) { + mPosition = CharEncoding.writeCharArray(codePoints, mBuffer, mPosition, + codePointToOneByteCodeMap); if (hasSeveralChars) { mBuffer[mPosition++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR; } @@ -217,8 +229,10 @@ public class Ver2DictEncoder implements DictEncoder { } } - private void writeChildrenPosition(final PtNode ptNode) { - final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode); + private void writeChildrenPosition(final PtNode ptNode, + final HashMap codePointToOneByteCodeMap) { + final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, + codePointToOneByteCodeMap); mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition, childrenPos); } @@ -228,7 +242,8 @@ public class Ver2DictEncoder implements DictEncoder { * * @param shortcuts the shortcut attributes list. */ - private void writeShortcuts(final ArrayList shortcuts) { + private void writeShortcuts(final ArrayList shortcuts, + final HashMap codePointToOneByteCodeMap) { if (null == shortcuts || shortcuts.isEmpty()) return; final int indexOfShortcutByteSize = mPosition; @@ -241,7 +256,8 @@ public class Ver2DictEncoder implements DictEncoder { target.getProbability()); mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, shortcutFlags, FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); - final int shortcutShift = CharEncoding.writeString(mBuffer, mPosition, target.mWord); + final int shortcutShift = CharEncoding.writeString(mBuffer, mPosition, target.mWord, + codePointToOneByteCodeMap); mPosition += shortcutShift; } final int shortcutByteSize = mPosition - indexOfShortcutByteSize; @@ -281,12 +297,14 @@ public class Ver2DictEncoder implements DictEncoder { } @Override - public void writePtNode(final PtNode ptNode, final FusionDictionary dict) { - writePtNodeFlags(ptNode); - writeCharacters(ptNode.mChars, ptNode.hasSeveralChars()); + public void writePtNode(final PtNode ptNode, final FusionDictionary dict, + final HashMap codePointToOneByteCodeMap) { + writePtNodeFlags(ptNode, codePointToOneByteCodeMap); + writeCharacters(ptNode.mChars, ptNode.hasSeveralChars(), codePointToOneByteCodeMap); writeFrequency(ptNode.getProbability()); - writeChildrenPosition(ptNode); - writeShortcuts(ptNode.mShortcutTargets); + writeChildrenPosition(ptNode, codePointToOneByteCodeMap); + // TODO: Use codePointToOneByteCodeMap for shortcuts. + writeShortcuts(ptNode.mShortcutTargets, null /* codePointToOneByteCodeMap */); writeBigrams(ptNode.mBigrams, dict); } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java index 401ffde6d..3262a1623 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +++ b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java @@ -27,6 +27,7 @@ import com.android.inputmethod.latin.utils.LocaleUtils; import java.io.File; import java.io.IOException; +import java.util.HashMap; /** * An implementation of DictEncoder for version 4 binary dictionary. @@ -142,6 +143,7 @@ public class Ver4DictEncoder implements DictEncoder { } @Override - public void writePtNode(PtNode ptNode, FusionDictionary dict) { + public void writePtNode(PtNode ptNode, FusionDictionary dict, + HashMap codePointToOneByteCodeMap) { } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java index 44f9695cf..5dfb7bf11 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java @@ -158,7 +158,7 @@ public class DictionaryMaker { String outputBinary = null; String outputXml = null; String outputCombined = null; - int outputBinaryFormatVersion = 2; // the default version is 2. + int outputBinaryFormatVersion = FormatSpec.VERSION201; // the default version is 201. // Don't use code point table by default. int codePointTableMode = Ver2DictEncoder.CODE_POINT_TABLE_OFF;