From ff4b1d1fd06ccc804313d4c649fe00b43c76895e Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Wed, 13 Nov 2013 16:40:01 +0900 Subject: [PATCH] Implement ver4 dictionary bigram removing methods. Bug: 11073222 Change-Id: I0c42e283d1ff983dc2c51e91b167bc39cdfd3da8 --- .../bigram/ver4_bigram_list_policy.cpp | 27 ++++++++++ .../bigram/ver4_bigram_list_policy.h | 2 + .../v4/content/bigram_dict_content.cpp | 12 +++-- .../structure/v4/ver4_dict_constants.cpp | 4 ++ .../structure/v4/ver4_dict_constants.h | 1 + .../v4/ver4_patricia_trie_node_writer.cpp | 4 +- .../v4/ver4_patricia_trie_policy.cpp | 27 +++++++++- .../latin/Ver4BinaryDictionaryTests.java | 52 +++++++++++++++++++ 8 files changed, 122 insertions(+), 7 deletions(-) diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp index 028078111..94d7f1061 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp @@ -91,6 +91,33 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget return mBigramDictContent->copyBigramList(bigramListPos, writingPos); } +bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) { + const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (bigramListPos == NOT_A_DICT_POS) { + // Bigram list does't exist. + return false; + } + const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos); + if (entryPosToUpdate == NOT_A_DICT_POS) { + // Bigram entry doesn't exist. + return false; + } + int readingPos = entryPosToUpdate; + bool hasNext = false; + int probability = NOT_A_PROBABILITY; + int originalTargetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext, + &originalTargetTerminalId, &readingPos); + if (targetTerminalId != originalTargetTerminalId) { + // Bigram entry doesn't exist. + return false; + } + int writingPos = entryPosToUpdate; + // Remove bigram entry by overwriting target terminal Id. + return mBigramDictContent->writeBigramEntryAndAdvancePosition(probability, hasNext, + Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos); +} + int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const { bool hasNext = true; diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h index e599a2c8d..b3fe13d7d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h @@ -42,6 +42,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability, bool *const outAddedNewEntry); + bool removeEntry(const int terminalId, const int targetTerminalId); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp index f02f14582..999460086 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp @@ -31,9 +31,12 @@ void BigramDictContent::getBigramEntryAndAdvancePosition(int *const outProbabili if (outHasNext) { *outHasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0; } + const int targetTerminalId = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos); if (outTargetTerminalId) { - *outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition( - Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos); + *outTargetTerminalId = + (targetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ? + Ver4DictConstants::NOT_A_TERMINAL_ID : targetTerminalId; } } @@ -45,7 +48,10 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) { return false; } - return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalId, + const int targetTerminalIdToWrite = + (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ? + Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId; + return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite, Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index 2b113e38b..ced76d25d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -42,6 +42,10 @@ const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16; const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4; const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3; +// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing +// invalid terminal ID in bigram lists. +const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID = + (1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1; const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1; const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F; const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h index a1830dd0c..b00821373 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -47,6 +47,7 @@ class Ver4DictConstants { static const int BIGRAM_FLAGS_FIELD_SIZE; static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; + static const int INVALID_BIGRAM_TARGET_TERMINAL_ID; static const int BIGRAM_PROBABILITY_MASK; static const int BIGRAM_HAS_NEXT_MASK; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index 7c66a6691..b572ee87f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -192,8 +192,8 @@ bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry( bool Ver4PatriciaTrieNodeWriter::removeBigramEntry( const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) { - // TODO: Implement. - return false; + return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(), + targetPtNodeParam->getTerminalId()); } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 57201e4a8..40cc3d00b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -188,8 +188,31 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, const int *const word1, const int length1) { - // TODO: Implement. - return false; + if (!mBuffers.get()->isUpdatable()) { + AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); + return false; + } + if (mDictBuffer.getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", + mDictBuffer.getTailPosition()); + return false; + } + const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, + false /* forceLowerCaseSearch */); + if (word0Pos == NOT_A_DICT_POS) { + return false; + } + const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, + false /* forceLowerCaseSearch */); + if (word1Pos == NOT_A_DICT_POS) { + return false; + } + if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) { + mBigramCount--; + return true; + } else { + return false; + } } void Ver4PatriciaTriePolicy::flush(const char *const filePath) { diff --git a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java index b7cef738b..ad57e4c9f 100644 --- a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java @@ -246,4 +246,56 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); } + + public void testRemoveBigramWords() { + final String dictVersion = Long.toString(System.currentTimeMillis()); + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), + getDictionaryOptions(TEST_LOCALE, dictVersion)); + final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); + try { + encoder.writeDictionary(dict, FORMAT_OPTIONS); + } catch (IOException e) { + Log.e(TAG, "IOException while writing dictionary", e); + } catch (UnsupportedFormatException e) { + Log.e(TAG, "Unsupported format", e); + } + final File trieFile = getTrieFile(TEST_LOCALE, dictVersion); + final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), + 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + assertTrue(binaryDictionary.isValidDictionary()); + + final int unigramProbability = 100; + final int bigramProbability = 10; + binaryDictionary.addUnigramWord("aaa", unigramProbability); + binaryDictionary.addUnigramWord("abb", unigramProbability); + binaryDictionary.addUnigramWord("bcc", unigramProbability); + binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); + binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); + binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); + binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); + + assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); + assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); + assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa")); + assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc")); + + binaryDictionary.removeBigramWords("aaa", "abb"); + assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb")); + binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); + assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); + + binaryDictionary.removeBigramWords("aaa", "bcc"); + assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc")); + binaryDictionary.removeBigramWords("abb", "aaa"); + assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa")); + binaryDictionary.removeBigramWords("abb", "bcc"); + assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc")); + + binaryDictionary.removeBigramWords("aaa", "abb"); + // Test remove non-existing bigram operation. + binaryDictionary.removeBigramWords("aaa", "abb"); + binaryDictionary.removeBigramWords("bcc", "aaa"); + } + }