From 9b3e59d6444d54c6835369e939794c1c1e49b00d Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Tue, 5 Nov 2013 16:00:26 +0900 Subject: [PATCH] Implement ver4 bigram reading method. Bug: 11073222 Change-Id: I7b3408938f304da361201892e0a1342fdf92e62e --- .../makedict/BinaryDictEncoderUtils.java | 11 +++- .../bigram/ver4_bigram_list_policy.h | 65 +++++++++++++++++++ .../v4/content/bigram_dict_content.h | 62 ++++++++++++++++++ .../v4/content/sparse_table_dict_content.h | 9 +++ .../content/terminal_position_lookup_table.h | 10 ++- .../structure/v4/ver4_dict_buffers.h | 23 ++++--- .../structure/v4/ver4_dict_constants.cpp | 3 + .../structure/v4/ver4_dict_constants.h | 3 + .../v4/ver4_patricia_trie_policy.cpp | 3 +- .../structure/v4/ver4_patricia_trie_policy.h | 6 +- .../dictionary/utils/sparse_table.cpp | 5 +- .../latin/Ver4BinaryDictionaryTests.java | 54 ++++++++++++++- 12 files changed, 234 insertions(+), 20 deletions(-) create mode 100644 native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java index bc1a2579e..c0dad3db2 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java @@ -711,6 +711,13 @@ public class BinaryDictEncoderUtils { + word + " is " + unigramFrequency); bigramFrequency = unigramFrequency; } + bigramFlags += getBigramFrequencyDiff(unigramFrequency, bigramFrequency) + & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY; + return bigramFlags; + } + + public static int getBigramFrequencyDiff(final int unigramFrequency, + final int bigramFrequency) { // We compute the difference between 255 (which means probability = 1) and the // unigram score. We split this into a number of discrete steps. // Now, the steps are numbered 0~15; 0 represents an increase of 1 step while 15 @@ -744,9 +751,7 @@ public class BinaryDictEncoderUtils { // include this bigram in the dictionary. For now, register as 0, and live with the // small over-estimation that we get in this case. TODO: actually remove this bigram // if discretizedFrequency < 0. - final int finalBigramFrequency = discretizedFrequency > 0 ? discretizedFrequency : 0; - bigramFlags += finalBigramFrequency & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY; - return bigramFlags; + return discretizedFrequency > 0 ? discretizedFrequency : 0; } /** diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h new file mode 100644 index 000000000..875a0ff9b --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER4_BIGRAM_LIST_POLICY_H +#define LATINIME_VER4_BIGRAM_LIST_POLICY_H + +#include "defines.h" +#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" + +namespace latinime { + +class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { + public: + Ver4BigramListPolicy(const BigramDictContent *const bigramDictContent, + const TerminalPositionLookupTable *const terminalPositionLookupTable) + : mBigramDictContent(bigramDictContent), + mTerminalPositionLookupTable(terminalPositionLookupTable) {} + + void getNextBigram(int *const outBigramPos, int *const outProbability, + bool *const outHasNext, int *const bigramEntryPos) const { + int bigramFlags = 0; + int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + mBigramDictContent->getBigramEntryAndAdvancePosition(&bigramFlags, &targetTerminalId, + bigramEntryPos); + if (outProbability) { + *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags); + } + if (outHasNext) { + *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags); + } + if (outBigramPos) { + // Lookup target PtNode position. + *outBigramPos = + mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId); + } + } + + void skipAllBigrams(int *const pos) const { + // Do nothing because we don't need to skip bigram lists in ver4 dictionaries. + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy); + + const BigramDictContent *const mBigramDictContent; + const TerminalPositionLookupTable *const mTerminalPositionLookupTable; +}; +} // namespace latinime +#endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h new file mode 100644 index 000000000..634c1f08e --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_BIGRAM_DICT_CONTENT_H +#define LATINIME_BIGRAM_DICT_CONTENT_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" + +namespace latinime { + +class BigramDictContent : public SparseTableDictContent { + public: + BigramDictContent(const char *const dictDirPath, const bool isUpdatable) + : SparseTableDictContent(dictDirPath, + Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION, + Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION, + Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable, + Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, + Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {} + + void getBigramEntryAndAdvancePosition(int *const outBigramFlags, + int *const outTargetTerminalId, int *const bigramEntryPos) const { + const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer(); + if (outBigramFlags) { + *outBigramFlags = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::BIGRAM_FRAGS_FIELD_SIZE, bigramEntryPos); + } + if (outTargetTerminalId) { + *outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos); + } + } + + // Returns head position of bigram list for a PtNode specified by terminalId. + int getBigramListHeadPos(const int terminalId) const { + const SparseTable *const addressLookupTable = getAddressLookupTable(); + if (!addressLookupTable->contains(terminalId)) { + return NOT_A_DICT_POS; + } + return addressLookupTable->get(terminalId); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictContent); +}; +} // namespace latinime +#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h index 4836d8688..71868e9ca 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h @@ -58,6 +58,15 @@ class SparseTableDictContent : public DictContent { && mContentBuffer.get() != 0; } + protected: + const SparseTable *getAddressLookupTable() const { + return &mAddressLookupTable; + } + + const BufferWithExtendableBuffer *getContentBuffer() const { + return &mExpandableContentBuffer; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h index b12ab58a7..173d0da05 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h @@ -29,11 +29,14 @@ namespace latinime { class TerminalPositionLookupTable : public SingleDictContent { public: - TerminalPositionLookupTable(const char *const dictDirPath, const bool isUpdatable) + // TODO: Quit using headerRegionSize. + TerminalPositionLookupTable(const char *const dictDirPath, const bool isUpdatable, + const int headerRegionSize) : SingleDictContent(dictDirPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable), mSize(getBuffer()->getTailPosition() - / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {} + / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE), + mHeaderRegionSize(headerRegionSize) {} int getTerminalPtNodePosition(const int terminalId) const { if (terminalId < 0 || terminalId >= mSize) { @@ -41,13 +44,14 @@ class TerminalPositionLookupTable : public SingleDictContent { } const int readingPos = terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE; return getBuffer()->readUint(Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, - readingPos); + readingPos) - mHeaderRegionSize; } private: DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalPositionLookupTable); const int mSize; + const int mHeaderRegionSize; }; } // namespace latinime #endif // LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h index 7c0c83c3e..4e10403f3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h @@ -18,6 +18,8 @@ #define LATINIME_VER4_DICT_BUFFER_H #include "defines.h" +#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h" @@ -52,24 +54,29 @@ class Ver4DictBuffers { return mDictBuffer.get()->getBufferSize(); } + AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const { + return &mTerminalPositionLookupTable; + } + AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const { return &mProbabilityDictContent; } + AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const { + return &mBigramDictContent; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers); AK_FORCE_INLINE Ver4DictBuffers(const char *const dictDirPath, const MmappedBuffer::MmappedBufferPtr &dictBuffer, const bool isUpdatable) : mDictBuffer(dictBuffer), - mTerminalPositionLookupTable(dictDirPath, isUpdatable), + // TODO: Quit using getHeaderSize. + mTerminalPositionLookupTable(dictDirPath, isUpdatable, + HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())), mProbabilityDictContent(dictDirPath, isUpdatable), - mBigramDictContent(dictDirPath, - Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION, - Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION, - Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable, - Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, - Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE), + mBigramDictContent(dictDirPath, isUpdatable), mShortcutDictContent(dictDirPath, Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION, Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION, @@ -80,7 +87,7 @@ class Ver4DictBuffers { const MmappedBuffer::MmappedBufferPtr mDictBuffer; TerminalPositionLookupTable mTerminalPositionLookupTable; ProbabilityDictContent mProbabilityDictContent; - SparseTableDictContent mBigramDictContent; + BigramDictContent mBigramDictContent; SparseTableDictContent mShortcutDictContent; }; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index 6b3a749b2..fb29c0c4a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -40,4 +40,7 @@ const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4; const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16; const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4; +const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3; +const int Ver4DictConstants::BIGRAM_FRAGS_FIELD_SIZE = 1; + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h index 3801f594d..a0bebb75f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -43,6 +43,9 @@ class Ver4DictConstants { static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE; static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE; static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE; + + static const int BIGRAM_FRAGS_FIELD_SIZE; + static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants); }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 33f738413..43ad301db 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -119,7 +119,8 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons if (ptNodeParams.isDeleted()) { return NOT_A_DICT_POS; } - return ptNodeParams.getTerminalId(); + return mBuffers.get()->getBigramDictContent()->getBigramListHeadPos( + ptNodeParams.getTerminalId()); } bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 2f577f741..d0be77d0b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -19,6 +19,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" +#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" @@ -38,6 +39,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mDictBuffer(mBuffers.get()->getRawDictBuffer() + mHeaderPolicy.getSize(), mBuffers.get()->getRawDictBufferSize() - mHeaderPolicy.getSize(), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + mBigramPolicy(mBuffers.get()->getBigramDictContent(), + mBuffers.get()->getTerminalPositionLookupTable()), mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()) {}; AK_FORCE_INLINE int getRootPosition() const { @@ -67,7 +70,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { } const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { - return 0; + return &mBigramPolicy; } const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { @@ -97,6 +100,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; const HeaderPolicy mHeaderPolicy; BufferWithExtendableBuffer mDictBuffer; + const Ver4BigramListPolicy mBigramPolicy; Ver4PatriciaTrieNodeReader mNodeReader; }; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp index 93ec70c1e..2678b8c7b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp @@ -21,11 +21,10 @@ namespace latinime { const int SparseTable::NOT_EXIST = -1; bool SparseTable::contains(const int id) const { - if (id < 0 || mIndexTableBuffer->getTailPosition() <= id * mDataSize) { + const int readingPos = id / mBlockSize * mDataSize; + if (id < 0 || mIndexTableBuffer->getTailPosition() <= readingPos) { return false; } - const int indexTableIndex = id / mBlockSize; - const int readingPos = indexTableIndex * mDataSize; const int index = mIndexTableBuffer->readUint(mDataSize, readingPos); return index != NOT_EXIST; } diff --git a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java index f48a4296a..e43335265 100644 --- a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java @@ -20,6 +20,7 @@ import android.test.AndroidTestCase; import android.test.suitebuilder.annotation.LargeTest; import android.util.Log; +import com.android.inputmethod.latin.makedict.BinaryDictEncoderUtils; import com.android.inputmethod.latin.makedict.DictEncoder; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FusionDictionary; @@ -62,7 +63,7 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { // TODO: remove after native code support dictionary creation. private File getTrieFile(final String id, final String version) { - return new File(getContext().getCacheDir() + "/" + id + "." + version, + return new File(getContext().getCacheDir() + "/" + id + "." + version, TEST_LOCALE + "." + version + FormatSpec.TRIE_FILE_EXTENSION); } @@ -120,4 +121,55 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { assertEquals(frequency, binaryDictionary.getFrequency("aaa")); assertEquals(frequency, binaryDictionary.getFrequency("ab")); } + + public static int getCalculatedBigramProbabiliy(BinaryDictionary binaryDictionary, + final int unigramFrequency, final int bigramFrequency) { + final int bigramFrequencyDiff = BinaryDictEncoderUtils.getBigramFrequencyDiff( + unigramFrequency, bigramFrequency); + return binaryDictionary.calculateProbability(unigramFrequency, bigramFrequencyDiff); + } + + // TODO: Add large tests. + public void testReadBigrams() { + final String dictVersion = Long.toString(System.currentTimeMillis()); + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), + getDictionaryOptions(TEST_LOCALE, dictVersion)); + + final int unigramFrequency = 1; + final int bigramFrequency0 = 150; + final int bigramFrequency1 = 1; + final int bigramFrequency2 = 255; + dict.add("a", unigramFrequency, null, false /* isNotAWord */); + dict.add("aaa", unigramFrequency, null, false /* isNotAWord */); + dict.add("ab", unigramFrequency, null, false /* isNotAWord */); + dict.setBigram("a", "aaa", bigramFrequency0); + dict.setBigram("a", "ab", bigramFrequency1); + dict.setBigram("aaa", "ab", bigramFrequency2); + + DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); + try { + encoder.writeDictionary(dict, FORMAT_OPTIONS); + } catch (IOException e) { + Log.e(TAG, "IOException while writing dictionary", e); + } catch (UnsupportedFormatException e) { + Log.e(TAG, "Unsupported format", e); + } + File trieFile = getTrieFile(TEST_LOCALE, dictVersion); + BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), + 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + assertTrue(binaryDictionary.isValidDictionary()); + + assertEquals(getCalculatedBigramProbabiliy(binaryDictionary, unigramFrequency, + bigramFrequency0), binaryDictionary.getBigramProbability("a", "aaa")); + assertEquals(getCalculatedBigramProbabiliy(binaryDictionary, unigramFrequency, + bigramFrequency1), binaryDictionary.getBigramProbability("a", "ab")); + assertEquals(getCalculatedBigramProbabiliy(binaryDictionary, unigramFrequency, + bigramFrequency2), binaryDictionary.getBigramProbability("aaa", "ab")); + + assertFalse(binaryDictionary.isValidBigram("aaa", "a")); + assertFalse(binaryDictionary.isValidBigram("ab", "a")); + assertFalse(binaryDictionary.isValidBigram("ab", "aaa")); + } }