From 14087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4 Mon Sep 17 00:00:00 2001 From: Yuichiro Hanada Date: Tue, 20 Aug 2013 21:05:05 +0900 Subject: [PATCH] Add Ver4DictDecoder. Bug: 9618601 Change-Id: I43c5840505c6a847aaf4893a400392ccd45903c0 --- .../makedict/BinaryDictDecoderUtils.java | 4 +- .../makedict/BinaryDictEncoderUtils.java | 4 +- .../latin/makedict/BinaryDictIOUtils.java | 4 +- .../latin/makedict/DictDecoder.java | 206 ++++++++++++-- .../latin/makedict/FormatSpec.java | 18 +- .../latin/makedict/Ver3DictDecoder.java | 178 +----------- .../latin/makedict/Ver3DictEncoder.java | 2 +- .../latin/makedict/Ver4DictDecoder.java | 259 ++++++++++++++++++ .../BinaryDictDecoderEncoderTests.java | 104 +++++-- 9 files changed, 569 insertions(+), 210 deletions(-) create mode 100644 java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java index 5b319ad90..665c7a27c 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java @@ -498,7 +498,7 @@ public final class BinaryDictDecoderUtils { // reach the end of the array. if (options.mSupportsDynamicUpdate) { - final boolean hasValidForwardLink = dictDecoder.readForwardLinkAndAdvancePosition(); + final boolean hasValidForwardLink = dictDecoder.readAndFollowForwardLink(); if (!hasValidForwardLink) break; } } while (options.mSupportsDynamicUpdate && dictDecoder.hasNextPtNodeArray()); @@ -550,7 +550,7 @@ public final class BinaryDictDecoderUtils { * @return the created (or merged) dictionary. */ @UsedForTesting - /* package */ static FusionDictionary readDictionaryBinary(final Ver3DictDecoder dictDecoder, + /* package */ static FusionDictionary readDictionaryBinary(final DictDecoder dictDecoder, final FusionDictionary dict) throws IOException, UnsupportedFormatException { // Read header final FileHeader fileHeader = dictDecoder.readHeader(); diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java index 70931f885..4dba8e5cf 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java @@ -368,9 +368,9 @@ public class BinaryDictEncoderUtils { if (null != ptNode.mBigrams) { for (WeightedString bigram : ptNode.mBigrams) { final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray, - nodeSize + size + FormatSpec.PTNODE_FLAGS_SIZE, + nodeSize + size + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE, FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord)); - nodeSize += getByteSize(offset) + FormatSpec.PTNODE_FLAGS_SIZE; + nodeSize += getByteSize(offset) + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE; } } ptNode.mCachedSize = nodeSize; diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index 2c5e93e5c..a282f595c 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -114,7 +114,7 @@ public final class BinaryDictIOUtils { if (p.mPosition == p.mNumOfPtNode) { if (formatOptions.mSupportsDynamicUpdate) { final boolean hasValidForwardLinkAddress = - dictDecoder.readForwardLinkAndAdvancePosition(); + dictDecoder.readAndFollowForwardLink(); if (hasValidForwardLinkAddress && dictDecoder.hasNextPtNodeArray()) { // The node array has a forward link. p.mNumOfPtNode = Position.NOT_READ_PTNODE_COUNT; @@ -233,7 +233,7 @@ public final class BinaryDictIOUtils { } final boolean hasValidForwardLinkAddress = - dictDecoder.readForwardLinkAndAdvancePosition(); + dictDecoder.readAndFollowForwardLink(); if (!hasValidForwardLinkAddress || !dictDecoder.hasNextPtNodeArray()) { return FormatSpec.NOT_VALID_WORD; } diff --git a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java index 40e852423..3796a466c 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java @@ -17,9 +17,11 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; +import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; import java.io.File; @@ -30,13 +32,50 @@ import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.ArrayList; +import java.util.HashMap; import java.util.TreeMap; /** - * An interface of binary dictionary decoder. + * The base class of binary dictionary decoders. */ -public interface DictDecoder { - public FileHeader readHeader() throws IOException, UnsupportedFormatException; +public abstract class DictDecoder { + + protected FileHeader readHeader(final DictBuffer dictBuffer) + throws IOException, UnsupportedFormatException { + if (dictBuffer == null) { + openDictBuffer(); + } + + final int version = HeaderReader.readVersion(dictBuffer); + if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION + || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) { + throw new UnsupportedFormatException("Unsupported version : " + version); + } + // TODO: Remove this field. + final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer); + + final int headerSize = HeaderReader.readHeaderSize(dictBuffer); + + if (headerSize < 0) { + throw new UnsupportedFormatException("header size can't be negative."); + } + + final HashMap attributes = HeaderReader.readAttributes(dictBuffer, + headerSize); + + final FileHeader header = new FileHeader(headerSize, + new FusionDictionary.DictionaryOptions(attributes, + 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), + 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), + new FormatOptions(version, + 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE))); + return header; + } + + /** + * Reads and returns the file header. + */ + public abstract FileHeader readHeader() throws IOException, UnsupportedFormatException; /** * Reads PtNode from nodeAddress. @@ -44,7 +83,7 @@ public interface DictDecoder { * @param formatOptions the format options. * @return PtNodeInfo. */ - public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions); + public abstract PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions); /** * Reads a buffer and returns the memory representation of the dictionary. @@ -59,9 +98,9 @@ public interface DictDecoder { * @return the created (or merged) dictionary. */ @UsedForTesting - public FusionDictionary readDictionaryBinary(final FusionDictionary dict, + public abstract FusionDictionary readDictionaryBinary(final FusionDictionary dict, final boolean deleteDictIfBroken) - throws FileNotFoundException, IOException, UnsupportedFormatException; + throws FileNotFoundException, IOException, UnsupportedFormatException; /** * Gets the address of the last PtNode of the exact matching word in the dictionary. @@ -74,7 +113,12 @@ public interface DictDecoder { */ @UsedForTesting public int getTerminalPosition(final String word) - throws IOException, UnsupportedFormatException; + throws IOException, UnsupportedFormatException { + if (!isDictBufferOpen()) { + openDictBuffer(); + } + return BinaryDictIOUtils.getTerminalPosition(this, word); + } /** * Reads unigrams and bigrams from the binary file. @@ -86,50 +130,56 @@ public interface DictDecoder { * @throws IOException if the file can't be read. * @throws UnsupportedFormatException if the format of the file is not recognized. */ + @UsedForTesting public void readUnigramsAndBigramsBinary(final TreeMap words, final TreeMap frequencies, final TreeMap> bigrams) - throws IOException, UnsupportedFormatException; + throws IOException, UnsupportedFormatException { + if (!isDictBufferOpen()) { + openDictBuffer(); + } + BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams); + } /** * Sets the position of the buffer to the given value. * * @param newPos the new position */ - public void setPosition(final int newPos); + public abstract void setPosition(final int newPos); /** * Gets the position of the buffer. * * @return the position */ - public int getPosition(); + public abstract int getPosition(); /** * Reads and returns the PtNode count out of a buffer and forwards the pointer. */ - public int readPtNodeCount(); + public abstract int readPtNodeCount(); /** * Reads the forward link and advances the position. * - * @return if this method advances the position then true else false. + * @return true if this method moves the file pointer, false otherwise. */ - public boolean readForwardLinkAndAdvancePosition(); - public boolean hasNextPtNodeArray(); + public abstract boolean readAndFollowForwardLink(); + public abstract boolean hasNextPtNodeArray(); /** * Opens the dictionary file and makes DictBuffer. */ @UsedForTesting - public void openDictBuffer() throws FileNotFoundException, IOException; + public abstract void openDictBuffer() throws FileNotFoundException, IOException; @UsedForTesting - public boolean isOpenedDictBuffer(); + public abstract boolean isDictBufferOpen(); - // Flags for DictionaryBufferFactory. + // Constants for DictionaryBufferFactory. public static final int USE_READONLY_BYTEBUFFER = 0x01000000; public static final int USE_BYTEARRAY = 0x02000000; - public static final int USE_WRITABLE_BYTEBUFFER = 0x04000000; + public static final int USE_WRITABLE_BYTEBUFFER = 0x03000000; public static final int MASK_DICTBUFFER = 0x0F000000; public interface DictionaryBufferFactory { @@ -221,4 +271,124 @@ public interface DictDecoder { return null; } } + + /** + * A utility class for reading a file header. + */ + protected static class HeaderReader { + protected static int readVersion(final DictBuffer dictBuffer) + throws IOException, UnsupportedFormatException { + return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer); + } + + protected static int readOptionFlags(final DictBuffer dictBuffer) { + return dictBuffer.readUnsignedShort(); + } + + protected static int readHeaderSize(final DictBuffer dictBuffer) { + return dictBuffer.readInt(); + } + + protected static HashMap readAttributes(final DictBuffer dictBuffer, + final int headerSize) { + final HashMap attributes = new HashMap(); + while (dictBuffer.position() < headerSize) { + // We can avoid an infinite loop here since dictBuffer.position() is always + // increased by calling CharEncoding.readString. + final String key = CharEncoding.readString(dictBuffer); + final String value = CharEncoding.readString(dictBuffer); + attributes.put(key, value); + } + dictBuffer.position(headerSize); + return attributes; + } + } + + /** + * A utility class for reading a PtNode. + */ + protected static class PtNodeReader { + protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) { + return dictBuffer.readUnsignedByte(); + } + + protected static int readParentAddress(final DictBuffer dictBuffer, + final FormatOptions formatOptions) { + if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { + return BinaryDictDecoderUtils.readSInt24(dictBuffer); + } else { + return FormatSpec.NO_PARENT_ADDRESS; + } + } + + protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags, + final FormatOptions formatOptions) { + if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { + final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer); + if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS; + return address; + } else { + switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) { + case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE: + return dictBuffer.readUnsignedByte(); + case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES: + return dictBuffer.readUnsignedShort(); + case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES: + return dictBuffer.readUnsignedInt24(); + case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS: + default: + return FormatSpec.NO_CHILDREN_ADDRESS; + } + } + } + + // Reads shortcuts and returns the read length. + protected static int readShortcut(final DictBuffer dictBuffer, + final ArrayList shortcutTargets) { + final int pointerBefore = dictBuffer.position(); + dictBuffer.readUnsignedShort(); // skip the size + while (true) { + final int targetFlags = dictBuffer.readUnsignedByte(); + final String word = CharEncoding.readString(dictBuffer); + shortcutTargets.add(new WeightedString(word, + targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); + if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; + } + return dictBuffer.position() - pointerBefore; + } + + protected static int readBigramAddresses(final DictBuffer dictBuffer, + final ArrayList bigrams, final int baseAddress) { + int readLength = 0; + int bigramCount = 0; + while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { + final int bigramFlags = dictBuffer.readUnsignedByte(); + ++readLength; + final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE) + ? 1 : -1; + int bigramAddress = baseAddress + readLength; + switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) { + case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE: + bigramAddress += sign * dictBuffer.readUnsignedByte(); + readLength += 1; + break; + case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES: + bigramAddress += sign * dictBuffer.readUnsignedShort(); + readLength += 2; + break; + case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES: + bigramAddress += sign * dictBuffer.readUnsignedInt24(); + readLength += 3; + break; + default: + throw new RuntimeException("Has bigrams with no address"); + } + bigrams.add(new PendingAttribute( + bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, + bigramAddress)); + if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; + } + return readLength; + } + } } diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index 96ccd8e49..51b89a02a 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -360,18 +360,26 @@ public final class FormatSpec { * Returns new dictionary decoder. * * @param dictFile the dictionary file. - * @param bufferType the flag indicating buffer type which is used by the dictionary decoder. + * @param bufferType The type of buffer, as one of USE_* in DictDecoder. * @return new dictionary decoder if the dictionary file exists, otherwise null. */ public static DictDecoder getDictDecoder(final File dictFile, final int bufferType) { - if (!dictFile.isFile()) return null; - return new Ver3DictDecoder(dictFile, bufferType); + if (dictFile.isDirectory()) { + return new Ver4DictDecoder(dictFile, bufferType); + } else if (dictFile.isFile()) { + return new Ver3DictDecoder(dictFile, bufferType); + } + return null; } public static DictDecoder getDictDecoder(final File dictFile, final DictionaryBufferFactory factory) { - if (!dictFile.isFile()) return null; - return new Ver3DictDecoder(dictFile, factory); + if (dictFile.isDirectory()) { + return new Ver4DictDecoder(dictFile, factory); + } else if (dictFile.isFile()) { + return new Ver3DictDecoder(dictFile, factory); + } + return null; } public static DictDecoder getDictDecoder(final File dictFile) { diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java index 1a90a4b98..848277cd4 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java @@ -32,14 +32,12 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; -import java.util.TreeMap; /** * An implementation of DictDecoder for version 3 binary dictionary. */ @UsedForTesting -public class Ver3DictDecoder implements DictDecoder { +public class Ver3DictDecoder extends DictDecoder { private static final String TAG = Ver3DictDecoder.class.getSimpleName(); static { @@ -49,124 +47,10 @@ public class Ver3DictDecoder implements DictDecoder { // TODO: implement something sensical instead of just a phony method private static native int doNothing(); - private final static class HeaderReader { - protected static int readVersion(final DictBuffer dictBuffer) - throws IOException, UnsupportedFormatException { - return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer); - } - - protected static int readOptionFlags(final DictBuffer dictBuffer) { - return dictBuffer.readUnsignedShort(); - } - - protected static int readHeaderSize(final DictBuffer dictBuffer) { - return dictBuffer.readInt(); - } - - protected static HashMap readAttributes(final DictBuffer dictBuffer, - final int headerSize) { - final HashMap attributes = new HashMap(); - while (dictBuffer.position() < headerSize) { - // We can avoid an infinite loop here since dictBuffer.position() is always - // increased by calling CharEncoding.readString. - final String key = CharEncoding.readString(dictBuffer); - final String value = CharEncoding.readString(dictBuffer); - attributes.put(key, value); - } - dictBuffer.position(headerSize); - return attributes; - } - } - - private final static class PtNodeReader { - protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) { + protected static class PtNodeReader extends DictDecoder.PtNodeReader { + private static int readFrequency(final DictBuffer dictBuffer) { return dictBuffer.readUnsignedByte(); } - - protected static int readParentAddress(final DictBuffer dictBuffer, - final FormatOptions formatOptions) { - if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { - return BinaryDictDecoderUtils.readSInt24(dictBuffer); - } else { - return FormatSpec.NO_PARENT_ADDRESS; - } - } - - protected static int readFrequency(final DictBuffer dictBuffer) { - return dictBuffer.readUnsignedByte(); - } - - protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags, - final FormatOptions formatOptions) { - if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { - final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer); - if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS; - return address; - } else { - switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) { - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE: - return dictBuffer.readUnsignedByte(); - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES: - return dictBuffer.readUnsignedShort(); - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES: - return dictBuffer.readUnsignedInt24(); - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS: - default: - return FormatSpec.NO_CHILDREN_ADDRESS; - } - } - } - - // Reads shortcuts and returns the read length. - protected static int readShortcut(final DictBuffer dictBuffer, - final ArrayList shortcutTargets) { - final int pointerBefore = dictBuffer.position(); - dictBuffer.readUnsignedShort(); // skip the size - while (true) { - final int targetFlags = dictBuffer.readUnsignedByte(); - final String word = CharEncoding.readString(dictBuffer); - shortcutTargets.add(new WeightedString(word, - targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); - if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; - } - return dictBuffer.position() - pointerBefore; - } - - protected static int readBigrams(final DictBuffer dictBuffer, - final ArrayList bigrams, final int baseAddress) { - int readLength = 0; - int bigramCount = 0; - while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { - final int bigramFlags = dictBuffer.readUnsignedByte(); - ++readLength; - final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE) - ? 1 : -1; - int bigramAddress = baseAddress + readLength; - switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) { - case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE: - bigramAddress += sign * dictBuffer.readUnsignedByte(); - readLength += 1; - break; - case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES: - bigramAddress += sign * dictBuffer.readUnsignedShort(); - readLength += 2; - break; - case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES: - final int offset = (dictBuffer.readUnsignedByte() << 16) - + dictBuffer.readUnsignedShort(); - bigramAddress += sign * offset; - readLength += 3; - break; - default: - throw new RuntimeException("Has bigrams with no address"); - } - bigrams.add(new PendingAttribute( - bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, - bigramAddress)); - if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; - } - return readLength; - } } private final File mDictionaryBinaryFile; @@ -199,7 +83,7 @@ public class Ver3DictDecoder implements DictDecoder { } @Override - public boolean isOpenedDictBuffer() { + public boolean isDictBufferOpen() { return mDictBuffer != null; } @@ -218,25 +102,11 @@ public class Ver3DictDecoder implements DictDecoder { if (mDictBuffer == null) { openDictBuffer(); } - - final int version = HeaderReader.readVersion(mDictBuffer); - final int optionsFlags = HeaderReader.readOptionFlags(mDictBuffer); - - final int headerSize = HeaderReader.readHeaderSize(mDictBuffer); - - if (headerSize < 0) { - throw new UnsupportedFormatException("header size can't be negative."); + final FileHeader header = super.readHeader(mDictBuffer); + final int version = header.mFormatOptions.mVersion; + if (!(version >= 2 && version <= 3)) { + throw new UnsupportedFormatException("File header has a wrong version : " + version); } - - final HashMap attributes = HeaderReader.readAttributes(mDictBuffer, - headerSize); - - final FileHeader header = new FileHeader(headerSize, - new FusionDictionary.DictionaryOptions(attributes, - 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), - 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), - new FormatOptions(version, - 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE))); return header; } @@ -246,11 +116,11 @@ public class Ver3DictDecoder implements DictDecoder { public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions options) { int addressPointer = ptNodePos; final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); - ++addressPointer; + addressPointer += FormatSpec.PTNODE_FLAGS_SIZE; final int parentAddress = PtNodeReader.readParentAddress(mDictBuffer, options); if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { - addressPointer += 3; + addressPointer += FormatSpec.PARENT_ADDRESS_SIZE; } final int characters[]; @@ -258,7 +128,7 @@ public class Ver3DictDecoder implements DictDecoder { int index = 0; int character = CharEncoding.readChar(mDictBuffer); addressPointer += CharEncoding.getCharSize(character); - while (-1 != character) { + while (FormatSpec.INVALID_CHARACTER != character) { // FusionDictionary is making sure that the length of the word is smaller than // MAX_WORD_LENGTH. // So we'll never write past the end of mCharacterBuffer. @@ -274,8 +144,8 @@ public class Ver3DictDecoder implements DictDecoder { } final int frequency; if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { - ++addressPointer; frequency = PtNodeReader.readFrequency(mDictBuffer); + addressPointer += FormatSpec.PTNODE_FREQUENCY_SIZE; } else { frequency = PtNode.NOT_A_TERMINAL; } @@ -296,7 +166,8 @@ public class Ver3DictDecoder implements DictDecoder { final ArrayList bigrams; if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { bigrams = new ArrayList(); - addressPointer += PtNodeReader.readBigrams(mDictBuffer, bigrams, addressPointer); + addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams, + addressPointer); if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { MakedictLog.d("too many bigrams in a PtNode."); } @@ -331,25 +202,6 @@ public class Ver3DictDecoder implements DictDecoder { } } - @Override - public int getTerminalPosition(String word) throws IOException, UnsupportedFormatException { - if (mDictBuffer == null) { - openDictBuffer(); - } - return BinaryDictIOUtils.getTerminalPosition(this, word); - } - - @Override - public void readUnigramsAndBigramsBinary(final TreeMap words, - final TreeMap frequencies, - final TreeMap> bigrams) - throws IOException, UnsupportedFormatException { - if (mDictBuffer == null) { - openDictBuffer(); - } - BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams); - } - @Override public void setPosition(int newPos) { mDictBuffer.position(newPos); @@ -366,7 +218,7 @@ public class Ver3DictDecoder implements DictDecoder { } @Override - public boolean readForwardLinkAndAdvancePosition() { + public boolean readAndFollowForwardLink() { final int nextAddress = mDictBuffer.readUnsignedInt24(); if (nextAddress >= 0 && nextAddress < mDictBuffer.limit()) { mDictBuffer.position(nextAddress); diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java index 222a0f474..76f0f4052 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java @@ -167,7 +167,7 @@ public class Ver3DictEncoder implements DictEncoder { } } - public void writeChildrenPosition(final PtNode ptNode, final FormatOptions formatOptions) { + private void writeChildrenPosition(final PtNode ptNode, final FormatOptions formatOptions) { final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); if (formatOptions.mSupportsDynamicUpdate) { mPosition += BinaryDictEncoderUtils.writeSignedChildrenPosition(mBuffer, mPosition, diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java new file mode 100644 index 000000000..36c5a2720 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -0,0 +1,259 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import com.android.inputmethod.annotations.UsedForTesting; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; +import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; +import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; +import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; + +import android.util.Log; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; + +/** + * An implementation of binary dictionary decoder for version 4 binary dictionary. + */ +@UsedForTesting +public class Ver4DictDecoder extends DictDecoder { + private static final String TAG = Ver4DictDecoder.class.getSimpleName(); + + private static final int FILETYPE_TRIE = 1; + private static final int FILETYPE_FREQUENCY = 2; + + private final File mDictDirectory; + private final DictionaryBufferFactory mBufferFactory; + private DictBuffer mDictBuffer; + private DictBuffer mFrequencyBuffer; + + @UsedForTesting + /* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) { + mDictDirectory = dictDirectory; + mDictBuffer = mFrequencyBuffer = null; + + if ((factoryFlag & MASK_DICTBUFFER) == USE_READONLY_BYTEBUFFER) { + mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory(); + } else if ((factoryFlag & MASK_DICTBUFFER) == USE_BYTEARRAY) { + mBufferFactory = new DictionaryBufferFromByteArrayFactory(); + } else if ((factoryFlag & MASK_DICTBUFFER) == USE_WRITABLE_BYTEBUFFER) { + mBufferFactory = new DictionaryBufferFromWritableByteBufferFactory(); + } else { + mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory(); + } + } + + @UsedForTesting + /* package */ Ver4DictDecoder(final File dictDirectory, final DictionaryBufferFactory factory) { + mDictDirectory = dictDirectory; + mBufferFactory = factory; + mDictBuffer = mFrequencyBuffer = null; + } + + private File getFile(final int fileType) { + if (fileType == FILETYPE_TRIE) { + return new File(mDictDirectory, + mDictDirectory.getName() + FormatSpec.TRIE_FILE_EXTENSION); + } else if (fileType == FILETYPE_FREQUENCY) { + return new File(mDictDirectory, + mDictDirectory.getName() + FormatSpec.FREQ_FILE_EXTENSION); + } else { + throw new RuntimeException("Unsupported kind of file : " + fileType); + } + } + + @Override + public void openDictBuffer() throws FileNotFoundException, IOException { + final String filename = mDictDirectory.getName(); + mDictBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_TRIE)); + mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY)); + } + + @Override + public boolean isDictBufferOpen() { + return mDictBuffer != null; + } + + /* package */ DictBuffer getDictBuffer() { + return mDictBuffer; + } + + @Override + public FileHeader readHeader() throws IOException, UnsupportedFormatException { + if (mDictBuffer == null) { + openDictBuffer(); + } + final FileHeader header = super.readHeader(mDictBuffer); + final int version = header.mFormatOptions.mVersion; + if (version != 4) { + throw new UnsupportedFormatException("File header has a wrong version : " + version); + } + return header; + } + + protected static class PtNodeReader extends DictDecoder.PtNodeReader { + protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) { + frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1); + return frequencyBuffer.readUnsignedByte(); + } + + protected static int readTerminalId(final DictBuffer dictBuffer) { + return dictBuffer.readInt(); + } + } + + // TODO: Make this buffer thread safe. + // TODO: Support words longer than FormatSpec.MAX_WORD_LENGTH. + private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH]; + @Override + public PtNodeInfo readPtNode(int ptNodePos, FormatOptions options) { + int addressPointer = ptNodePos; + final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); + addressPointer += FormatSpec.PTNODE_FLAGS_SIZE; + + final int parentAddress = PtNodeReader.readParentAddress(mDictBuffer, options); + if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { + addressPointer += FormatSpec.PARENT_ADDRESS_SIZE; + } + + final int characters[]; + if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) { + int index = 0; + int character = CharEncoding.readChar(mDictBuffer); + addressPointer += CharEncoding.getCharSize(character); + while (FormatSpec.INVALID_CHARACTER != character + && index < FormatSpec.MAX_WORD_LENGTH) { + mCharacterBuffer[index++] = character; + character = CharEncoding.readChar(mDictBuffer); + addressPointer += CharEncoding.getCharSize(character); + } + characters = Arrays.copyOfRange(mCharacterBuffer, 0, index); + } else { + final int character = CharEncoding.readChar(mDictBuffer); + addressPointer += CharEncoding.getCharSize(character); + characters = new int[] { character }; + } + final int terminalId; + if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { + terminalId = PtNodeReader.readTerminalId(mDictBuffer); + addressPointer += FormatSpec.PTNODE_TERMINAL_ID_SIZE; + } else { + terminalId = PtNode.NOT_A_TERMINAL; + } + + final int frequency; + if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { + frequency = PtNodeReader.readFrequency(mFrequencyBuffer, terminalId); + } else { + frequency = PtNode.NOT_A_TERMINAL; + } + int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options); + if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { + childrenAddress += addressPointer; + } + addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options); + final ArrayList shortcutTargets; + if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) { + // readShortcut will add shortcuts to shortcutTargets. + shortcutTargets = new ArrayList(); + addressPointer += PtNodeReader.readShortcut(mDictBuffer, shortcutTargets); + } else { + shortcutTargets = null; + } + + final ArrayList bigrams; + if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { + bigrams = new ArrayList(); + addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams, + addressPointer); + if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { + MakedictLog.d("too many bigrams in a node."); + } + } else { + bigrams = null; + } + return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, frequency, + parentAddress, childrenAddress, shortcutTargets, bigrams); + } + + private void deleteDictFiles() { + final File[] files = mDictDirectory.listFiles(); + for (int i = 0; i < files.length; ++i) { + files[i].delete(); + } + } + + @Override + public FusionDictionary readDictionaryBinary(final FusionDictionary dict, + final boolean deleteDictIfBroken) + throws FileNotFoundException, IOException, UnsupportedFormatException { + if (mDictBuffer == null) { + openDictBuffer(); + } + try { + return BinaryDictDecoderUtils.readDictionaryBinary(this, dict); + } catch (IOException e) { + Log.e(TAG, "The dictionary " + mDictDirectory.getName() + " is broken.", e); + if (deleteDictIfBroken) { + deleteDictFiles(); + } + throw e; + } catch (UnsupportedFormatException e) { + Log.e(TAG, "The dictionary " + mDictDirectory.getName() + " is broken.", e); + if (deleteDictIfBroken) { + deleteDictFiles(); + } + throw e; + } + } + + @Override + public void setPosition(int newPos) { + mDictBuffer.position(newPos); + } + + @Override + public int getPosition() { + return mDictBuffer.position(); + } + + @Override + public int readPtNodeCount() { + return BinaryDictDecoderUtils.readPtNodeCount(mDictBuffer); + } + + @Override + public boolean readAndFollowForwardLink() { + final int nextAddress = mDictBuffer.readUnsignedInt24(); + if (nextAddress >= 0 && nextAddress < mDictBuffer.limit()) { + mDictBuffer.position(nextAddress); + return true; + } + return false; + } + + @Override + public boolean hasNextPtNodeArray() { + return mDictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS; + } +} diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index 8bc0095a5..cedd0df88 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -25,6 +25,7 @@ import android.util.SparseArray; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; +import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; @@ -75,6 +76,10 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { new FormatSpec.FormatOptions(3, false /* supportsDynamicUpdate */); private static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE = new FormatSpec.FormatOptions(3, true /* supportsDynamicUpdate */); + private static final FormatSpec.FormatOptions VERSION4_WITHOUT_DYNAMIC_UPDATE = + new FormatSpec.FormatOptions(4, false /* supportsDynamicUpdate */); + private static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE = + new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */); private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; @@ -114,6 +119,17 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } } + private DictEncoder getDictEncoder(final File file, final FormatOptions formatOptions) { + if (formatOptions.mVersion == FormatSpec.VERSION4) { + return new Ver4DictEncoder(getContext().getCacheDir()); + } else if (formatOptions.mVersion == 3 || formatOptions.mVersion == 2) { + return new Ver3DictEncoder(file); + } else { + throw new RuntimeException("The format option has a wrong version : " + + formatOptions.mVersion); + } + } + private void generateWords(final int number, final Random random, final int[] codePointSet) { final Set wordSet = CollectionUtils.newHashSet(); while (wordSet.size() < number) { @@ -165,7 +181,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { long now = -1, diff = -1; try { - final DictEncoder dictEncoder = new Ver3DictEncoder(file); + final DictEncoder dictEncoder = getDictEncoder(file, formatOptions); now = System.currentTimeMillis(); // If you need to dump the dict to a textual file, uncomment the line below and the @@ -246,16 +262,28 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { return file; } + private DictDecoder getDictDecoder(final File file, final int bufferType, + final FormatOptions formatOptions, final DictionaryOptions dictOptions) { + if (formatOptions.mVersion == FormatSpec.VERSION4) { + final FileHeader header = new FileHeader(0, dictOptions, formatOptions); + return FormatSpec.getDictDecoder(new File(getContext().getCacheDir(), + header.getId() + "." + header.getVersion()), bufferType); + } else { + return FormatSpec.getDictDecoder(file, bufferType); + } + } // Tests for readDictionaryBinary and writeDictionaryBinary private long timeReadingAndCheckDict(final File file, final List words, final SparseArray> bigrams, - final HashMap> shortcutMap, final int bufferType) { + final HashMap> shortcutMap, final int bufferType, + final FormatOptions formatOptions, final DictionaryOptions dictOptions) { long now, diff = -1; FusionDictionary dict = null; try { - final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, bufferType); + final DictDecoder dictDecoder = getDictDecoder(file, bufferType, formatOptions, + dictOptions); now = System.currentTimeMillis(); dict = dictDecoder.readDictionaryBinary(null, false /* deleteDictIfBroken */); diff = System.currentTimeMillis() - now; @@ -286,7 +314,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { checkDictionary(dict, words, bigrams, shortcuts); final long write = timeWritingDictToFile(file, dict, formatOptions); - final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType); + final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType, + formatOptions, dict.mOptions); return "PROF: read=" + read + "ms, write=" + write + "ms :" + message + " : " + outputOptions(bufferType, formatOptions); @@ -330,6 +359,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION2); runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); + runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); + runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); for (final String result : results) { Log.d(TAG, result); @@ -342,6 +373,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION2); runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); + runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); + runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); for (final String result : results) { Log.d(TAG, result); @@ -397,7 +430,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List words, - final SparseArray> bigrams, final int bufferType) { + final SparseArray> bigrams, final int bufferType, + final FormatOptions formatOptions, final DictionaryOptions dictOptions) { FileInputStream inStream = null; final TreeMap resultWords = CollectionUtils.newTreeMap(); @@ -407,7 +441,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { long now = -1, diff = -1; try { - final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, bufferType); + final DictDecoder dictDecoder = getDictDecoder(file, bufferType, formatOptions, + dictOptions); now = System.currentTimeMillis(); dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams); diff = System.currentTimeMillis() - now; @@ -444,9 +479,10 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { timeWritingDictToFile(file, dict, formatOptions); - long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType); + long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType, + formatOptions, dict.mOptions); long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */, - bufferType); + bufferType, formatOptions, dict.mOptions); return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap + " : " + message + " : " + outputOptions(bufferType, formatOptions); @@ -468,6 +504,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION2); runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); for (final String result : results) { Log.d(TAG, result); @@ -480,6 +518,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION2); runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); for (final String result : results) { Log.d(TAG, result); @@ -503,7 +543,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { address, fileHeader.mFormatOptions).mWord; } - private long runGetTerminalPosition(final DictDecoder dictDecoder, final String word, + private long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word, int index, boolean contained) { final int expectedFrequency = (UNIGRAM_FREQ + index) % 255; long diff = -1; @@ -523,7 +563,9 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { return diff; } - public void testGetTerminalPosition() { + private void runGetTerminalPosition(final ArrayList words, + final SparseArray> bigrams, final int bufferType, + final FormatOptions formatOptions, final String message) { final String dictName = "testGetTerminalPosition"; final String dictVersion = Long.toString(System.currentTimeMillis()); final File file = setUpDictionaryFile(dictName, dictVersion); @@ -531,16 +573,18 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), getDictionaryOptions(dictName, dictVersion)); addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); - timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE); + addBigrams(dict, words, bigrams); + timeWritingDictToFile(file, dict, formatOptions); - final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, DictDecoder.USE_BYTEARRAY); + final DictDecoder dictDecoder = getDictDecoder(file, DictDecoder.USE_BYTEARRAY, + formatOptions, dict.mOptions); try { dictDecoder.openDictBuffer(); } catch (IOException e) { // ignore Log.e(TAG, "IOException while opening the buffer", e); } - assertTrue("Can't get the buffer", dictDecoder.isOpenedDictBuffer()); + assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen()); try { // too long word @@ -559,10 +603,11 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { // Test a word that is contained within the dictionary. long sum = 0; for (int i = 0; i < sWords.size(); ++i) { - final long time = runGetTerminalPosition(dictDecoder, sWords.get(i), i, true); + final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), i, true); sum += time == -1 ? 0 : time; } - Log.d(TAG, "per a search : " + (((double)sum) / sWords.size() / 1000000)); + Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message + + " : " + outputOptions(bufferType, formatOptions)); // Test a word that isn't contained within the dictionary. final Random random = new Random((int)System.currentTimeMillis()); @@ -571,7 +616,32 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { for (int i = 0; i < 1000; ++i) { final String word = CodePointUtils.generateWord(random, codePointSet); if (sWords.indexOf(word) != -1) continue; - runGetTerminalPosition(dictDecoder, word, i, false); + checkGetTerminalPosition(dictDecoder, word, i, false); + } + } + + private void runGetTerminalPositionTests(final ArrayList results, final int bufferType, + final FormatOptions formatOptions) { + runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram"); + } + + public void testGetTerminalPosition() { + final ArrayList results = CollectionUtils.newArrayList(); + + runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION2); + runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); + runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); + runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); + runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); + + runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION2); + runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); + runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); + runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); + runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); + + for (final String result : results) { + Log.d(TAG, result); } } @@ -593,7 +663,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { // ignore Log.e(TAG, "IOException while opening the buffer", e); } - assertTrue("Can't get the buffer", dictDecoder.isOpenedDictBuffer()); + assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen()); try { MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,