From d9f450ef00f09a9eccfc677968b46e072267a5f2 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Tue, 17 Sep 2013 17:49:22 +0900 Subject: [PATCH] Implement removeBigramWords() for DynamicPatriciaTrie. Bug: 6669677 Change-Id: I99cb517511b4c968b8bf937baab5d7f42b0f534e --- .../bigram/dynamic_bigram_list_policy.cpp | 5 +- .../bigram/dynamic_bigram_list_policy.h | 5 +- .../dynamic_patricia_trie_writing_helper.cpp | 32 +++++++----- .../latin/BinaryDictionaryTests.java | 51 ++++++++++++++++++- 4 files changed, 77 insertions(+), 16 deletions(-) diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index b8a5f27e9..4c44d22fd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -54,11 +54,13 @@ void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const { } } -bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos) { +bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos, + int *outBigramsCount) { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos); if (usesAdditionalBuffer) { *fromPos -= mBuffer->getOriginalBufferSize(); } + *outBigramsCount = 0; BigramListReadWriteUtils::BigramFlags flags; do { // The buffer address can be changed after calling buffer writing methods. @@ -91,6 +93,7 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo toPos)) { return false; } + (*outBigramsCount)++; } while(BigramListReadWriteUtils::hasNext(flags)); if (usesAdditionalBuffer) { *fromPos += mBuffer->getOriginalBufferSize(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h index c45e26acc..dafb62d80 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h @@ -45,8 +45,9 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { void skipAllBigrams(int *const pos) const; // Copy bigrams from the bigram list that starts at fromPos to toPos and advance these - // positions after bigram lists. This method skips invalid bigram entries. - bool copyAllBigrams(int *const fromPos, int *const toPos); + // positions after bigram lists. This method skips invalid bigram entries and write the valid + // bigram entry count to outBigramsCount. + bool copyAllBigrams(int *const fromPos, int *const toPos, int *outBigramsCount); bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index b80b9b33f..311d31e5d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -125,11 +125,10 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) { DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); nodeReader.fetchNodeInfoFromBuffer(word0Pos); - if (nodeReader.isDeleted() || nodeReader.getBigramsPos() == NOT_A_DICT_POS) { + if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) { return false; } - // TODO: Implement. - return false; + return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos); } bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( @@ -193,13 +192,9 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo const int originalBigramListPos, const int originalShortcutListPos, int *const writingPos) { const int nodePos = *writingPos; - // Create node flags and write them. - const PatriciaTrieReadingUtils::NodeFlags nodeFlags = - PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, - probability != NOT_A_PROBABILITY, originalShortcutListPos != NOT_A_DICT_POS, - originalBigramListPos != NOT_A_DICT_POS, codePointCount > 1, - 3 /* childrenPositionFieldSize */); - if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags, + // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the + // PtNode writing. + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, 0 /* nodeFlags */, writingPos)) { return false; } @@ -212,7 +207,7 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo // Write code points if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, codePoints, codePointCount, writingPos)) { - return false;; + return false; } // Write probability when the probability is a valid probability, which means this node is // terminal. @@ -235,12 +230,25 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo } } // Copy bigram list when the originalBigramListPos is valid dictionary position. + int bigramCount = 0; if (originalBigramListPos != NOT_A_DICT_POS) { int fromPos = originalBigramListPos; - if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos)) { + if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos, &bigramCount)) { return false; } } + // Create node flags and write them. + PatriciaTrieReadingUtils::NodeFlags nodeFlags = + PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, + probability != NOT_A_PROBABILITY /* isTerminal */, + originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */, + bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */, + CHILDREN_POSITION_FIELD_SIZE); + int flagsFieldPos = nodePos; + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags, + &flagsFieldPos)) { + return false; + } return true; } diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index f9dd35a34..4d231cde7 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -206,6 +206,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int bigramCount = 1000; final int codePointSetSize = 50; final int seed = 11111; + File dictFile = null; try { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); @@ -217,7 +218,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - final ArrayList words = new ArrayList(); // Test a word that isn't contained within the dictionary. final Random random = new Random(seed); @@ -250,4 +250,53 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile.delete(); } + + public void testRemoveBigramWords() { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } catch (UnsupportedFormatException e) { + fail("UnsupportedFormatException while writing an initial dictionary : " + e); + } + BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final int unigramProbability = 100; + final int bigramProbability = 10; + binaryDictionary.addUnigramWord("aaa", unigramProbability); + binaryDictionary.addUnigramWord("abb", unigramProbability); + binaryDictionary.addUnigramWord("bcc", unigramProbability); + binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); + binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); + binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); + binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); + + assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); + assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); + assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa")); + assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc")); + + binaryDictionary.removeBigramWords("aaa", "abb"); + assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb")); + binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); + assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); + + + binaryDictionary.removeBigramWords("aaa", "bcc"); + assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc")); + binaryDictionary.removeBigramWords("abb", "aaa"); + assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa")); + binaryDictionary.removeBigramWords("abb", "bcc"); + assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc")); + + binaryDictionary.removeBigramWords("aaa", "abb"); + // Test remove non-existing bigram operation. + binaryDictionary.removeBigramWords("aaa", "abb"); + binaryDictionary.removeBigramWords("bcc", "aaa"); + + dictFile.delete(); + } }