From 6b0561f9d26215209e8e8895f5c35982af5158f0 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Wed, 29 Oct 2014 21:05:41 +0900 Subject: [PATCH] Add a class to have global counters for LanguageModelDictContent. Bug: 14425059 Change-Id: I08ec19903432356b6028853fd73b4eefce20218e --- native/jni/NativeFileList.mk | 2 + .../content/language_model_dict_content.cpp | 9 +- .../v4/content/language_model_dict_content.h | 14 ++- ...age_model_dict_content_global_counters.cpp | 30 ++++++ ...guage_model_dict_content_global_counters.h | 97 +++++++++++++++++++ .../structure/v4/ver4_dict_buffers.cpp | 2 +- .../structure/v4/ver4_dict_constants.cpp | 2 +- ...odel_dict_content_global_counters_test.cpp | 60 ++++++++++++ 8 files changed, 209 insertions(+), 7 deletions(-) create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h create mode 100644 native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters_test.cpp diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk index 7299ed3c0..ca40ba8a5 100644 --- a/native/jni/NativeFileList.mk +++ b/native/jni/NativeFileList.mk @@ -72,6 +72,7 @@ LATIN_IME_CORE_SRC_FILES := \ ver4_pt_node_array_reader.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \ language_model_dict_content.cpp \ + language_model_dict_content_global_counters.cpp \ shortcut_dict_content.cpp \ sparse_table_dict_content.cpp \ terminal_position_lookup_table.cpp) \ @@ -128,6 +129,7 @@ LATIN_IME_CORE_TEST_FILES := \ suggest/core/layout/normal_distribution_2d_test.cpp \ suggest/policyimpl/dictionary/header/header_read_write_utils_test.cpp \ suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp \ + suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters_test.cpp \ suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp \ suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table_test.cpp \ suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer_test.cpp \ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp index 509bd683b..05a3a6356 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp @@ -24,9 +24,11 @@ namespace latinime { const int LanguageModelDictContent::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1; +const int LanguageModelDictContent::TRIE_MAP_BUFFER_INDEX = 0; +const int LanguageModelDictContent::GLOBAL_COUNTERS_BUFFER_INDEX = 1; bool LanguageModelDictContent::save(FILE *const file) const { - return mTrieMap.save(file); + return mTrieMap.save(file) && mGlobalCounters.save(file); } bool LanguageModelDictContent::runGC( @@ -212,6 +214,9 @@ bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView if (!setProbabilityEntry(wordId, &updatedUnigramProbabilityEntry)) { return false; } + mGlobalCounters.incrementTotalCount(); + mGlobalCounters.updateMaxValueOfCounters( + updatedUnigramProbabilityEntry.getHistoricalInfo()->getCount()); for (size_t i = 0; i < prevWordIds.size(); ++i) { if (prevWordIds[i] == NOT_A_WORD_ID) { break; @@ -225,6 +230,8 @@ bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView if (!setNgramProbabilityEntry(limitedPrevWordIds, wordId, &updatedNgramProbabilityEntry)) { return false; } + mGlobalCounters.updateMaxValueOfCounters( + updatedUnigramProbabilityEntry.getHistoricalInfo()->getCount()); if (!originalNgramProbabilityEntry.isValid()) { entryCountersToUpdate->incrementNgramCount(i + 2); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h index 1cccf92d2..5b92b96e3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h @@ -22,6 +22,7 @@ #include "defines.h" #include "suggest/core/dictionary/word_attributes.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" @@ -131,15 +132,17 @@ class LanguageModelDictContent { const ProbabilityEntry mProbabilityEntry; }; - LanguageModelDictContent(const ReadWriteByteArrayView trieMapBuffer, + LanguageModelDictContent(const ReadWriteByteArrayView *const buffers, const bool hasHistoricalInfo) - : mTrieMap(trieMapBuffer), mHasHistoricalInfo(hasHistoricalInfo) {} + : mTrieMap(buffers[TRIE_MAP_BUFFER_INDEX]), + mGlobalCounters(buffers[GLOBAL_COUNTERS_BUFFER_INDEX]), + mHasHistoricalInfo(hasHistoricalInfo) {} explicit LanguageModelDictContent(const bool hasHistoricalInfo) - : mTrieMap(), mHasHistoricalInfo(hasHistoricalInfo) {} + : mTrieMap(), mGlobalCounters(), mHasHistoricalInfo(hasHistoricalInfo) {} bool isNearSizeLimit() const { - return mTrieMap.isNearSizeLimit(); + return mTrieMap.isNearSizeLimit() || mGlobalCounters.needsToHalveCounters(); } bool save(FILE *const file) const; @@ -218,8 +221,11 @@ class LanguageModelDictContent { // TODO: Remove static const int DUMMY_PROBABILITY_FOR_VALID_WORDS; + static const int TRIE_MAP_BUFFER_INDEX; + static const int GLOBAL_COUNTERS_BUFFER_INDEX; TrieMap mTrieMap; + LanguageModelDictContentGlobalCounters mGlobalCounters; const bool mHasHistoricalInfo; bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.cpp new file mode 100644 index 000000000..9518ab419 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.cpp @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h" + +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" + +namespace latinime { + +const int LanguageModelDictContentGlobalCounters::COUNTER_VALUE_NEAR_LIMIT_THRESHOLD = + (1 << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT)) - 64; +const int LanguageModelDictContentGlobalCounters::TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD = 1 << 30; +const int LanguageModelDictContentGlobalCounters::COUNTER_SIZE_IN_BYTES = 4; +const int LanguageModelDictContentGlobalCounters::TOTAL_COUNT_INDEX = 0; +const int LanguageModelDictContentGlobalCounters::MAX_VALUE_OF_COUNTERS_INDEX = 1; + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h new file mode 100644 index 000000000..9953aa425 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H +#define LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H + +#include + +#include "defines.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" +#include "utils/byte_array_view.h" + +namespace latinime { + +class LanguageModelDictContentGlobalCounters { + public: + explicit LanguageModelDictContentGlobalCounters(const ReadWriteByteArrayView buffer) + : mBuffer(buffer, 0 /* maxAdditionalBufferSize */), + mTotalCount(readValue(mBuffer, TOTAL_COUNT_INDEX)), + mMaxValueOfCounters(readValue(mBuffer, MAX_VALUE_OF_COUNTERS_INDEX)) {} + + LanguageModelDictContentGlobalCounters() + : mBuffer(0 /* maxAdditionalBufferSize */), mTotalCount(0), mMaxValueOfCounters(0) {} + + bool needsToHalveCounters() const { + return mMaxValueOfCounters >= COUNTER_VALUE_NEAR_LIMIT_THRESHOLD + || mTotalCount >= TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD; + } + + int getTotalCount() const { + return mTotalCount; + } + + bool save(FILE *const file) const { + BufferWithExtendableBuffer bufferToWrite( + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); + if (!bufferToWrite.writeUint(mTotalCount, COUNTER_SIZE_IN_BYTES, + TOTAL_COUNT_INDEX * COUNTER_SIZE_IN_BYTES)) { + return false; + } + if (!bufferToWrite.writeUint(mMaxValueOfCounters, COUNTER_SIZE_IN_BYTES, + MAX_VALUE_OF_COUNTERS_INDEX * COUNTER_SIZE_IN_BYTES)) { + return false; + } + return DictFileWritingUtils::writeBufferToFileTail(file, &bufferToWrite); + } + + void incrementTotalCount() { + mTotalCount += 1; + } + + void updateMaxValueOfCounters(const int count) { + mMaxValueOfCounters = std::max(count, mMaxValueOfCounters); + } + + void halveCounters() { + mMaxValueOfCounters /= 2; + mTotalCount /= 2; + } + +private: + DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContentGlobalCounters); + + const static int COUNTER_VALUE_NEAR_LIMIT_THRESHOLD; + const static int TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD; + const static int COUNTER_SIZE_IN_BYTES; + const static int TOTAL_COUNT_INDEX; + const static int MAX_VALUE_OF_COUNTERS_INDEX; + + BufferWithExtendableBuffer mBuffer; + int mTotalCount; + int mMaxValueOfCounters; + + static int readValue(const BufferWithExtendableBuffer &buffer, const int index) { + const int pos = COUNTER_SIZE_IN_BYTES * index; + if (pos + COUNTER_SIZE_IN_BYTES > buffer.getTailPosition()) { + return 0; + } + return buffer.readUint(COUNTER_SIZE_IN_BYTES, pos); + } +}; +} // namespace latinime +#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp index 45f88e9b2..4d088dcab 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp @@ -179,7 +179,7 @@ Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer, BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), mTerminalPositionLookupTable( contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]), - mLanguageModelDictContent(contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX], + mLanguageModelDictContent(&contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX], mHeaderPolicy.hasHistoricalInfoOfWords()), mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]), mIsUpdatable(mDictBuffer->isUpdatable()) {} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index 8e6cb974b..eb6080a24 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -67,6 +67,6 @@ const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80; const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT = 1; const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT = 3; -const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT = 1; +const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT = 2; } // namespace latinime diff --git a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters_test.cpp new file mode 100644 index 000000000..44b5a8aaa --- /dev/null +++ b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters_test.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h" + +#include + +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" + +namespace latinime { +namespace { + +TEST(LanguageModelDictContentGlobalCountersTest, TestUpdateMaxValueOfCounters) { + LanguageModelDictContentGlobalCounters globalCounters; + + EXPECT_FALSE(globalCounters.needsToHalveCounters()); + globalCounters.updateMaxValueOfCounters(10); + EXPECT_FALSE(globalCounters.needsToHalveCounters()); + const int count = (1 << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT)) - 1; + globalCounters.updateMaxValueOfCounters(count); + EXPECT_TRUE(globalCounters.needsToHalveCounters()); + globalCounters.halveCounters(); + EXPECT_FALSE(globalCounters.needsToHalveCounters()); +} + +TEST(LanguageModelDictContentGlobalCountersTest, TestIncrementTotalCount) { + LanguageModelDictContentGlobalCounters globalCounters; + + EXPECT_EQ(0, globalCounters.getTotalCount()); + globalCounters.incrementTotalCount(); + EXPECT_EQ(1, globalCounters.getTotalCount()); + for (int i = 1; i < 50; ++i) { + globalCounters.incrementTotalCount(); + } + EXPECT_EQ(50, globalCounters.getTotalCount()); + globalCounters.halveCounters(); + EXPECT_EQ(25, globalCounters.getTotalCount()); + globalCounters.halveCounters(); + EXPECT_EQ(12, globalCounters.getTotalCount()); + for (int i = 0; i < 4; ++i) { + globalCounters.halveCounters(); + } + EXPECT_EQ(0, globalCounters.getTotalCount()); +} + +} // namespace +} // namespace latinime