diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h index 9a930747c..884bcd7a9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h @@ -62,6 +62,11 @@ public: return flags | FLAG_ATTRIBUTE_HAS_NEXT; } + static AK_FORCE_INLINE BigramFlags setProbabilityInFlags(const BigramFlags flags, + const int probability) { + return (flags & (~MASK_ATTRIBUTE_PROBABILITY)) | (probability & MASK_ATTRIBUTE_PROBABILITY); + } + // Returns true if the bigram entry is valid and put entry values into out*. static AK_FORCE_INLINE bool createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize( const int entryPos, const int targetPos, const int probability, const bool hasNext, diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index ca3b64da1..6a8164d40 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -98,8 +98,8 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo return true; } -bool DynamicBigramListPolicy::addBigramEntry(const int bigramPos, const int probability, - int *const pos) { +bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramPos, + const int probability, int *const pos) { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); if (usesAdditionalBuffer) { *pos -= mBuffer->getOriginalBufferSize(); @@ -113,7 +113,17 @@ bool DynamicBigramListPolicy::addBigramEntry(const int bigramPos, const int prob // The buffer address can be changed after calling buffer writing methods. const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos); - BigramListReadWriteUtils::getBigramAddressAndForwardPointer(buffer, flags, pos); + int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( + buffer, flags, pos); + if (usesAdditionalBuffer && originalBigramPos != NOT_A_VALID_WORD_POS) { + originalBigramPos += mBuffer->getOriginalBufferSize(); + } + if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramPos) { + // Update this bigram entry. + const BigramListReadWriteUtils::BigramFlags updatedFlags = + BigramListReadWriteUtils::setProbabilityInFlags(flags, probability); + return mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos); + } if (BigramListReadWriteUtils::hasNext(flags)) { continue; } @@ -124,33 +134,35 @@ bool DynamicBigramListPolicy::addBigramEntry(const int bigramPos, const int prob if (!mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos)) { return false; } - // Then, add a new entry after the last entry. - BigramListReadWriteUtils::BigramFlags newBigramFlags; - uint32_t newBigramOffset; - int newBigramOffsetFieldSize; - if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize( - *pos, bigramPos, BigramListReadWriteUtils::getProbabilityFromFlags(flags), - BigramListReadWriteUtils::hasNext(flags), &newBigramFlags, &newBigramOffset, - &newBigramOffsetFieldSize)) { - continue; - } - int newEntryPos = *pos; if (usesAdditionalBuffer) { - newEntryPos += mBuffer->getOriginalBufferSize(); - } - // Write bigram flags. - if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */, - &newEntryPos)) { - return false; - } - // Write bigram positon offset. - if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize, - &newEntryPos)) { - return false; + *pos += mBuffer->getOriginalBufferSize(); } + // Then, add a new entry after the last entry. + return writeNewBigramEntry(bigramPos, probability, pos); } while(BigramListReadWriteUtils::hasNext(flags)); - if (usesAdditionalBuffer) { - *pos += mBuffer->getOriginalBufferSize(); + // We return directly from the while loop. + ASSERT(false); + return false; +} + +bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramPos, const int probability, + int *const writingPos) { + BigramListReadWriteUtils::BigramFlags newBigramFlags; + uint32_t newBigramOffset; + int newBigramOffsetFieldSize; + if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize( + *writingPos, bigramPos, probability, false /* hasNext */, &newBigramFlags, + &newBigramOffset, &newBigramOffsetFieldSize)) { + return false; + } + // Write bigram flags. + if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */, writingPos)) { + return false; + } + // Write bigram positon offset. + if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize, + writingPos)) { + return false; } return true; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h index e451e313d..c45e26acc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h @@ -48,7 +48,10 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { // positions after bigram lists. This method skips invalid bigram entries. bool copyAllBigrams(int *const fromPos, int *const toPos); - bool addBigramEntry(const int bigramPos, const int probability, int *const pos); + bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos); + + bool writeNewBigramEntry(const int bigramPos, const int probability, + int *const writingPos); // Return if targetBigramPos is found or not. bool removeBigram(const int bigramListPos, const int targetBigramPos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp index e455080d7..737098423 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp @@ -69,10 +69,12 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) { mChildrenPos += mBuffer->getOriginalBufferSize(); } - if (mSiblingPos == NOT_A_DICT_POS && DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) { - mBigramLinkedNodePos = mChildrenPos; - } else { - mBigramLinkedNodePos = NOT_A_DICT_POS; + if (mSiblingPos == NOT_A_DICT_POS) { + if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) { + mBigramLinkedNodePos = mChildrenPos; + } else { + mBigramLinkedNodePos = NOT_A_DICT_POS; + } } if (usesAdditionalBuffer) { pos += mBuffer->getOriginalBufferSize(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index dbc80f66a..b80b9b33f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -26,6 +26,8 @@ namespace latinime { +const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; + bool DynamicPatriciaTrieWritingHelper::addUnigramWord( DynamicPatriciaTrieReadingHelper *const readingHelper, const int *const wordCodePoints, const int codePointCount, const int probability) { @@ -79,13 +81,44 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos, const int probability) { + int mMergedNodeCodePoints[MAX_WORD_LENGTH]; DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - nodeReader.fetchNodeInfoFromBuffer(word0Pos); - if (nodeReader.isDeleted()) { + nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH, + mMergedNodeCodePoints); + // Move node to add bigram entry. + const int newNodePos = mBuffer->getTailPosition(); + if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) { return false; } - // TODO: Implement. - return false; + int writingPos = newNodePos; + // Write a new PtNode using original PtNode's info to the tail of the dictionary. + if (!writePtNodeToBufferByCopyingPtNodeInfo(&nodeReader, nodeReader.getParentPos(), + mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(), + &writingPos)) { + return false; + } + nodeReader.fetchNodeInfoFromBuffer(newNodePos); + if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) { + // Insert a new bigram entry into the existing bigram list. + int bigramListPos = nodeReader.getBigramsPos(); + return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos); + } else { + // The PtNode doesn't have a bigram list. + // First, Write a bigram entry at the tail position of the PtNode. + if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) { + return false; + } + // Then, Mark as the PtNode having bigram list in the flags. + const PatriciaTrieReadingUtils::NodeFlags updatedFlags = + PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(), + nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY, + nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */, + nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE); + writingPos = newNodePos; + // Write updated flags into the moved PtNode's flags field. + return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, + &writingPos); + } } // Remove a bigram relation from word0Pos to word1Pos. diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index e1b9d2e75..20e35abcf 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -49,6 +49,8 @@ class DynamicPatriciaTrieWritingHelper { private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); + static const int CHILDREN_POSITION_FIELD_SIZE; + BufferWithExtendableBuffer *const mBuffer; DynamicBigramListPolicy *const mBigramPolicy; DynamicShortcutListPolicy *const mShortcutPolicy; diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index bf4954b34..6f05d428c 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -122,4 +122,41 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(probability, binaryDictionary.getFrequency("a")); assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa")); } + + public void testAddBigramWords() { + // TODO: Add a test to check the frequency of the bigram score which uses current value + // calculated in the native code + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } catch (UnsupportedFormatException e) { + fail("UnsupportedFormatException while writing an initial dictionary : " + e); + } + BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final int unigramProbability = 100; + final int bigramProbability = 10; + binaryDictionary.addUnigramWord("aaa", unigramProbability); + binaryDictionary.addUnigramWord("abb", unigramProbability); + binaryDictionary.addUnigramWord("bcc", unigramProbability); + binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); + binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); + binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); + binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); + + assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); + assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); + assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa")); + assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc")); + + assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa")); + assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc")); + assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa")); + + dictFile.delete(); + } }