From 851e0458fe460526b1f953e39a1e406a21ab4647 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Tue, 5 Aug 2014 14:13:07 +0900 Subject: [PATCH] Remove ProbabilityDictContent and use LanguageModelDictContent Bug: 14425059 Change-Id: I1bb9e78ecb24139b87c99be6722e37eec0a2285d --- native/jni/NativeFileList.mk | 1 - .../content/language_model_dict_content.cpp | 4 +- .../v4/content/language_model_dict_content.h | 13 +- .../v4/content/probability_dict_content.cpp | 159 ------------------ .../v4/content/probability_dict_content.h | 66 -------- .../structure/v4/ver4_dict_buffers.cpp | 10 -- .../structure/v4/ver4_dict_buffers.h | 12 +- .../structure/v4/ver4_dict_constants.cpp | 9 +- .../structure/v4/ver4_dict_constants.h | 1 - .../v4/ver4_patricia_trie_node_reader.cpp | 5 +- .../v4/ver4_patricia_trie_node_reader.h | 10 +- .../v4/ver4_patricia_trie_node_writer.cpp | 12 +- .../v4/ver4_patricia_trie_policy.cpp | 2 +- .../structure/v4/ver4_patricia_trie_policy.h | 2 +- .../v4/ver4_patricia_trie_writing_helper.cpp | 10 +- .../language_model_dict_content_test.cpp | 9 +- 16 files changed, 43 insertions(+), 282 deletions(-) delete mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp delete mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk index 64704d94a..7a732a588 100644 --- a/native/jni/NativeFileList.mk +++ b/native/jni/NativeFileList.mk @@ -73,7 +73,6 @@ LATIN_IME_CORE_SRC_FILES := \ $(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \ bigram_dict_content.cpp \ language_model_dict_content.cpp \ - probability_dict_content.cpp \ shortcut_dict_content.cpp \ sparse_table_dict_content.cpp \ terminal_position_lookup_table.cpp) \ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp index ae5847568..07e1051bc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp @@ -30,7 +30,7 @@ bool LanguageModelDictContent::runGC( 0 /* nextLevelBitmapEntryIndex */, outNgramCount); } -ProbabilityEntry LanguageModelDictContent::getProbabilityEntry( +ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry( const WordIdArrayView prevWordIds, const int wordId) const { if (!prevWordIds.empty()) { // TODO: Read n-gram entry. @@ -44,7 +44,7 @@ ProbabilityEntry LanguageModelDictContent::getProbabilityEntry( return ProbabilityEntry::decode(result.mValue, mHasHistoricalInfo); } -bool LanguageModelDictContent::setProbabilityEntry(const WordIdArrayView prevWordIds, +bool LanguageModelDictContent::setNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int terminalId, const ProbabilityEntry *const probabilityEntry) { if (!prevWordIds.empty()) { // TODO: Add n-gram entry. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h index 7f184f1d7..f181dfeee 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h @@ -53,9 +53,18 @@ class LanguageModelDictContent { const LanguageModelDictContent *const originalContent, int *const outNgramCount); - ProbabilityEntry getProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId) const; + ProbabilityEntry getProbabilityEntry(const int wordId) const { + return getNgramProbabilityEntry(WordIdArrayView(), wordId); + } - bool setProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId, + bool setProbabilityEntry(const int wordId, const ProbabilityEntry *const probabilityEntry) { + return setNgramProbabilityEntry(WordIdArrayView(), wordId, probabilityEntry); + } + + ProbabilityEntry getNgramProbabilityEntry(const WordIdArrayView prevWordIds, + const int wordId) const; + + bool setNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId, const ProbabilityEntry *const probabilityEntry); private: diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp deleted file mode 100644 index 2425b3b2f..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h" - -#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" - -namespace latinime { - -const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int terminalId) const { - if (terminalId < 0 || terminalId >= mSize) { - // This method can be called with invalid terminal id during GC. - return ProbabilityEntry(0 /* flags */, NOT_A_PROBABILITY); - } - const BufferWithExtendableBuffer *const buffer = getBuffer(); - int entryPos = getEntryPos(terminalId); - const int flags = buffer->readUintAndAdvancePosition( - Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &entryPos); - const int probability = buffer->readUintAndAdvancePosition( - Ver4DictConstants::PROBABILITY_SIZE, &entryPos); - if (mHasHistoricalInfo) { - const int timestamp = buffer->readUintAndAdvancePosition( - Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &entryPos); - const int level = buffer->readUintAndAdvancePosition( - Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &entryPos); - const int count = buffer->readUintAndAdvancePosition( - Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &entryPos); - const HistoricalInfo historicalInfo(timestamp, level, count); - return ProbabilityEntry(flags, probability, &historicalInfo); - } else { - return ProbabilityEntry(flags, probability); - } -} - -bool ProbabilityDictContent::setProbabilityEntry(const int terminalId, - const ProbabilityEntry *const probabilityEntry) { - if (terminalId < 0) { - return false; - } - const int entryPos = getEntryPos(terminalId); - if (terminalId >= mSize) { - ProbabilityEntry dummyEntry; - // Write new entry. - int writingPos = getBuffer()->getTailPosition(); - while (writingPos <= entryPos) { - // Fulfilling with dummy entries until writingPos. - if (!writeEntry(&dummyEntry, writingPos)) { - AKLOGE("Cannot write dummy entry. pos: %d, mSize: %d", writingPos, mSize); - return false; - } - writingPos += getEntrySize(); - mSize++; - } - } - return writeEntry(probabilityEntry, entryPos); -} - -bool ProbabilityDictContent::flushToFile(FILE *const file) const { - if (getEntryPos(mSize) < getBuffer()->getTailPosition()) { - ProbabilityDictContent probabilityDictContentToWrite(mHasHistoricalInfo); - for (int i = 0; i < mSize; ++i) { - const ProbabilityEntry probabilityEntry = getProbabilityEntry(i); - if (!probabilityDictContentToWrite.setProbabilityEntry(i, &probabilityEntry)) { - AKLOGE("Cannot set probability entry in flushToFile. terminalId: %d", i); - return false; - } - } - return probabilityDictContentToWrite.flush(file); - } else { - return flush(file); - } -} - -bool ProbabilityDictContent::runGC( - const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, - const ProbabilityDictContent *const originalProbabilityDictContent) { - mSize = 0; - for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin(); - it != terminalIdMap->end(); ++it) { - const ProbabilityEntry probabilityEntry = - originalProbabilityDictContent->getProbabilityEntry(it->first); - if (!setProbabilityEntry(it->second, &probabilityEntry)) { - AKLOGE("Cannot set probability entry in runGC. terminalId: %d", it->second); - return false; - } - mSize++; - } - return true; -} - -int ProbabilityDictContent::getEntrySize() const { - if (mHasHistoricalInfo) { - return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE - + Ver4DictConstants::PROBABILITY_SIZE - + Ver4DictConstants::TIME_STAMP_FIELD_SIZE - + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE - + Ver4DictConstants::WORD_COUNT_FIELD_SIZE; - } else { - return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE - + Ver4DictConstants::PROBABILITY_SIZE; - } -} - -int ProbabilityDictContent::getEntryPos(const int terminalId) const { - return terminalId * getEntrySize(); -} - -bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilityEntry, - const int entryPos) { - BufferWithExtendableBuffer *const bufferToWrite = getWritableBuffer(); - int writingPos = entryPos; - if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getFlags(), - Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) { - AKLOGE("Cannot write flags in probability dict content. pos: %d", writingPos); - return false; - } - if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getProbability(), - Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) { - AKLOGE("Cannot write probability in probability dict content. pos: %d", writingPos); - return false; - } - if (mHasHistoricalInfo) { - const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo(); - if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(), - Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) { - AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos); - return false; - } - if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getLevel(), - Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &writingPos)) { - AKLOGE("Cannot write level in probability dict content. pos: %d", writingPos); - return false; - } - if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getCount(), - Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &writingPos)) { - AKLOGE("Cannot write count in probability dict content. pos: %d", writingPos); - return false; - } - } - return true; -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h deleted file mode 100644 index 80e992c1c..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_PROBABILITY_DICT_CONTENT_H -#define LATINIME_PROBABILITY_DICT_CONTENT_H - -#include -#include - -#include "defines.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" - -namespace latinime { - -class ProbabilityEntry; - -class ProbabilityDictContent : public SingleDictContent { - public: - ProbabilityDictContent(uint8_t *const buffer, const int bufferSize, - const bool hasHistoricalInfo) - : SingleDictContent(buffer, bufferSize), - mHasHistoricalInfo(hasHistoricalInfo), - mSize(getBuffer()->getTailPosition() / getEntrySize()) {} - - ProbabilityDictContent(const bool hasHistoricalInfo) - : mHasHistoricalInfo(hasHistoricalInfo), mSize(0) {} - - const ProbabilityEntry getProbabilityEntry(const int terminalId) const; - - bool setProbabilityEntry(const int terminalId, const ProbabilityEntry *const probabilityEntry); - - bool flushToFile(FILE *const file) const; - - bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, - const ProbabilityDictContent *const originalProbabilityDictContent); - - private: - DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent); - - int getEntrySize() const; - - int getEntryPos(const int terminalId) const; - - bool writeEntry(const ProbabilityEntry *const probabilityEntry, const int entryPos); - - bool mHasHistoricalInfo; - int mSize; -}; -} // namespace latinime -#endif /* LATINIME_PROBABILITY_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp index 125ae1711..3c8008dc4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp @@ -157,11 +157,6 @@ bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const { AKLOGE("Terminal position lookup table cannot be written."); return false; } - // Write probability dict content. - if (!mProbabilityDictContent.flushToFile(file)) { - AKLOGE("Probability dict content cannot be written."); - return false; - } // Write language model content. if (!mLanguageModelDictContent.save(file)) { AKLOGE("Language model dict content cannot be written."); @@ -196,10 +191,6 @@ Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer, contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX], contentBufferSizes[ Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]), - mProbabilityDictContent( - contentBuffers[Ver4DictConstants::PROBABILITY_BUFFER_INDEX], - contentBufferSizes[Ver4DictConstants::PROBABILITY_BUFFER_INDEX], - mHeaderPolicy.hasHistoricalInfoOfWords()), mLanguageModelDictContent( ReadWriteByteArrayView( contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX], @@ -216,7 +207,6 @@ Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const i : mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy), mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(), - mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()), mLanguageModelDictContent(headerPolicy->hasHistoricalInfoOfWords()), mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(), mIsUpdatable(true) {} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h index d524dd6b7..68027dcb8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h @@ -24,7 +24,6 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" @@ -53,7 +52,7 @@ class Ver4DictBuffers { AK_FORCE_INLINE bool isNearSizeLimit() const { return mExpandableTrieBuffer.isNearSizeLimit() || mTerminalPositionLookupTable.isNearSizeLimit() - || mProbabilityDictContent.isNearSizeLimit() + || mLanguageModelDictContent.isNearSizeLimit() || mBigramDictContent.isNearSizeLimit() || mShortcutDictContent.isNearSizeLimit(); } @@ -82,14 +81,6 @@ class Ver4DictBuffers { return &mTerminalPositionLookupTable; } - AK_FORCE_INLINE ProbabilityDictContent *getMutableProbabilityDictContent() { - return &mProbabilityDictContent; - } - - AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const { - return &mProbabilityDictContent; - } - AK_FORCE_INLINE LanguageModelDictContent *getMutableLanguageModelDictContent() { return &mLanguageModelDictContent; } @@ -144,7 +135,6 @@ class Ver4DictBuffers { BufferWithExtendableBuffer mExpandableHeaderBuffer; BufferWithExtendableBuffer mExpandableTrieBuffer; TerminalPositionLookupTable mTerminalPositionLookupTable; - ProbabilityDictContent mProbabilityDictContent; LanguageModelDictContent mLanguageModelDictContent; BigramDictContent mBigramDictContent; ShortcutDictContent mShortcutDictContent; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index e7e31e96f..93d4e562d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -27,19 +27,18 @@ const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024; // limited to 1MB to prevent from inefficient traversing. const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024; -// NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie, TerminalAddressLookupTable and Probability. +// NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie and TerminalAddressLookupTable. +// NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT for language model. // NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT for bigram and shortcut. const size_t Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE = - NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 3 + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 2 + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT + NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT * 2; const int Ver4DictConstants::TRIE_BUFFER_INDEX = 0; const int Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX = TRIE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT; -const int Ver4DictConstants::PROBABILITY_BUFFER_INDEX = - TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT; const int Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX = - PROBABILITY_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT; + TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT; const int Ver4DictConstants::BIGRAM_BUFFERS_INDEX = LANGUAGE_MODEL_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT; const int Ver4DictConstants::SHORTCUT_BUFFERS_INDEX = diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h index e75db9f75..6950ca70f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -35,7 +35,6 @@ class Ver4DictConstants { static const size_t NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE; static const int TRIE_BUFFER_INDEX; static const int TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX; - static const int PROBABILITY_BUFFER_INDEX; static const int LANGUAGE_MODEL_BUFFER_INDEX; static const int BIGRAM_BUFFERS_INDEX; static const int SHORTCUT_BUFFERS_INDEX; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp index 0a435e91c..731092efd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp @@ -18,7 +18,7 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" @@ -61,8 +61,9 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce terminalIdFieldPos += mBuffer->getOriginalBufferSize(); } terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos); + // TODO: Quit reading probability here. const ProbabilityEntry probabilityEntry = - mProbabilityDictContent->getProbabilityEntry(terminalId); + mLanguageModelDictContent->getProbabilityEntry(terminalId); if (probabilityEntry.hasHistoricalInfo()) { probability = ForgettingCurveUtils::decodeProbability( probabilityEntry.getHistoricalInfo(), mHeaderPolicy); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h index 22ed4a6c0..a91ad5728 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h @@ -25,18 +25,18 @@ namespace latinime { class BufferWithExtendableBuffer; class HeaderPolicy; -class ProbabilityDictContent; +class LanguageModelDictContent; /* * This class is used for helping to read nodes of ver4 patricia trie. This class handles moved - * node and reads node attributes including probability form probabilityBuffer. + * node and reads node attributes including probability form language model. */ class Ver4PatriciaTrieNodeReader : public PtNodeReader { public: Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer, - const ProbabilityDictContent *const probabilityDictContent, + const LanguageModelDictContent *const languageModelDictContent, const HeaderPolicy *const headerPolicy) - : mBuffer(buffer), mProbabilityDictContent(probabilityDictContent), + : mBuffer(buffer), mLanguageModelDictContent(languageModelDictContent), mHeaderPolicy(headerPolicy) {} ~Ver4PatriciaTrieNodeReader() {} @@ -50,7 +50,7 @@ class Ver4PatriciaTrieNodeReader : public PtNodeReader { DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader); const BufferWithExtendableBuffer *const mBuffer; - const ProbabilityDictContent *const mProbabilityDictContent; + const LanguageModelDictContent *const mLanguageModelDictContent; const HeaderPolicy *const mHeaderPolicy; const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index 3d8da9173..1a311b156 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -143,11 +143,11 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty( return false; } const ProbabilityEntry originalProbabilityEntry = - mBuffers->getProbabilityDictContent()->getProbabilityEntry( + mBuffers->getLanguageModelDictContent()->getProbabilityEntry( toBeUpdatedPtNodeParams->getTerminalId()); const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry, unigramProperty); - return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry( + return mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry( toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry); } @@ -158,14 +158,14 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA return false; } const ProbabilityEntry originalProbabilityEntry = - mBuffers->getProbabilityDictContent()->getProbabilityEntry( + mBuffers->getLanguageModelDictContent()->getProbabilityEntry( toBeUpdatedPtNodeParams->getTerminalId()); if (originalProbabilityEntry.hasHistoricalInfo()) { const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave( originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy); const ProbabilityEntry probabilityEntry = originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo); - if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry( + if (!mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry( toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) { AKLOGE("Cannot write updated probability entry. terminalId: %d", toBeUpdatedPtNodeParams->getTerminalId()); @@ -218,8 +218,8 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( ProbabilityEntry newProbabilityEntry; const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom( &newProbabilityEntry, unigramProperty); - return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId, - &probabilityEntryToWrite); + return mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry( + terminalId, &probabilityEntryToWrite); } bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry( diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 4bf8050e1..2b92d5bea 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -452,7 +452,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code std::vector codePointVector(ptNodeParams.getCodePoints(), ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount()); const ProbabilityEntry probabilityEntry = - mBuffers->getProbabilityDictContent()->getProbabilityEntry( + mBuffers->getLanguageModelDictContent()->getProbabilityEntry( ptNodeParams.getTerminalId()); const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); // Fetch bigram information. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 76b3404c4..faad4290d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -46,7 +46,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy), mShortcutPolicy(mBuffers->getMutableShortcutDictContent(), mBuffers->getTerminalPositionLookupTable()), - mNodeReader(mDictBuffer, mBuffers->getProbabilityDictContent(), mHeaderPolicy), + mNodeReader(mDictBuffer, mBuffers->getLanguageModelDictContent(), mHeaderPolicy), mPtNodeArrayReader(mDictBuffer), mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader, &mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy), diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp index 0e658f8e3..4220312e0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp @@ -75,7 +75,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount, int *const outBigramCount) { Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(), - mBuffers->getProbabilityDictContent(), headerPolicy); + mBuffers->getLanguageModelDictContent(), headerPolicy); Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer()); Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(), mBuffers->getTerminalPositionLookupTable(), headerPolicy); @@ -138,7 +138,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, // Create policy instances for the GCed dictionary. Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(), - buffersToWrite->getProbabilityDictContent(), headerPolicy); + buffersToWrite->getLanguageModelDictContent(), headerPolicy); Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer()); Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(), buffersToWrite->getTerminalPositionLookupTable(), headerPolicy); @@ -154,8 +154,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, return false; } // Run GC for probability dict content. - if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap, - mBuffers->getProbabilityDictContent())) { + if (!buffersToWrite->getMutableLanguageModelDictContent()->runGC(&terminalIdMap, + mBuffers->getLanguageModelDictContent(), nullptr /* outNgramCount */)) { return false; } // Run GC for bigram dict content. @@ -201,7 +201,7 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams( continue; } const ProbabilityEntry probabilityEntry = - mBuffers->getProbabilityDictContent()->getProbabilityEntry(i); + mBuffers->getLanguageModelDictContent()->getProbabilityEntry(i); const int probability = probabilityEntry.hasHistoricalInfo() ? ForgettingCurveUtils::decodeProbability( probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) : diff --git a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp index bf08344dd..6eef2040b 100644 --- a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp +++ b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp @@ -30,9 +30,9 @@ TEST(LanguageModelDictContentTest, TestUnigramProbability) { const int probability = 10; const int wordId = 100; const ProbabilityEntry probabilityEntry(flag, probability); - LanguageModelDictContent.setProbabilityEntry(WordIdArrayView(), wordId, &probabilityEntry); + LanguageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry); const ProbabilityEntry entry = - LanguageModelDictContent.getProbabilityEntry(WordIdArrayView(), wordId); + LanguageModelDictContent.getProbabilityEntry(wordId); EXPECT_EQ(flag, entry.getFlags()); EXPECT_EQ(probability, entry.getProbability()); } @@ -47,9 +47,8 @@ TEST(LanguageModelDictContentTest, TestUnigramProbabilityWithHistoricalInfo) { const int wordId = 100; const HistoricalInfo historicalInfo(timestamp, level, count); const ProbabilityEntry probabilityEntry(flag, NOT_A_PROBABILITY, &historicalInfo); - LanguageModelDictContent.setProbabilityEntry(WordIdArrayView(), wordId, &probabilityEntry); - const ProbabilityEntry entry = - LanguageModelDictContent.getProbabilityEntry(WordIdArrayView(), wordId); + LanguageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry); + const ProbabilityEntry entry = LanguageModelDictContent.getProbabilityEntry(wordId); EXPECT_EQ(flag, entry.getFlags()); EXPECT_EQ(timestamp, entry.getHistoricalInfo()->getTimeStamp()); EXPECT_EQ(level, entry.getHistoricalInfo()->getLevel());