am afe67611
: Merge "Add a class to have global counters for LanguageModelDictContent."
* commit 'afe67611c384e395175e52e6a3a37fc20d42ea64': Add a class to have global counters for LanguageModelDictContent.
This commit is contained in:
commit
01760cc6a6
8 changed files with 209 additions and 7 deletions
|
@ -72,6 +72,7 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
ver4_pt_node_array_reader.cpp) \
|
ver4_pt_node_array_reader.cpp) \
|
||||||
$(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \
|
$(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \
|
||||||
language_model_dict_content.cpp \
|
language_model_dict_content.cpp \
|
||||||
|
language_model_dict_content_global_counters.cpp \
|
||||||
shortcut_dict_content.cpp \
|
shortcut_dict_content.cpp \
|
||||||
sparse_table_dict_content.cpp \
|
sparse_table_dict_content.cpp \
|
||||||
terminal_position_lookup_table.cpp) \
|
terminal_position_lookup_table.cpp) \
|
||||||
|
@ -128,6 +129,7 @@ LATIN_IME_CORE_TEST_FILES := \
|
||||||
suggest/core/layout/normal_distribution_2d_test.cpp \
|
suggest/core/layout/normal_distribution_2d_test.cpp \
|
||||||
suggest/policyimpl/dictionary/header/header_read_write_utils_test.cpp \
|
suggest/policyimpl/dictionary/header/header_read_write_utils_test.cpp \
|
||||||
suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp \
|
suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp \
|
||||||
|
suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters_test.cpp \
|
||||||
suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp \
|
suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp \
|
||||||
suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table_test.cpp \
|
suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table_test.cpp \
|
||||||
suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer_test.cpp \
|
suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer_test.cpp \
|
||||||
|
|
|
@ -24,9 +24,11 @@
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
const int LanguageModelDictContent::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1;
|
const int LanguageModelDictContent::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1;
|
||||||
|
const int LanguageModelDictContent::TRIE_MAP_BUFFER_INDEX = 0;
|
||||||
|
const int LanguageModelDictContent::GLOBAL_COUNTERS_BUFFER_INDEX = 1;
|
||||||
|
|
||||||
bool LanguageModelDictContent::save(FILE *const file) const {
|
bool LanguageModelDictContent::save(FILE *const file) const {
|
||||||
return mTrieMap.save(file);
|
return mTrieMap.save(file) && mGlobalCounters.save(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LanguageModelDictContent::runGC(
|
bool LanguageModelDictContent::runGC(
|
||||||
|
@ -212,6 +214,9 @@ bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView
|
||||||
if (!setProbabilityEntry(wordId, &updatedUnigramProbabilityEntry)) {
|
if (!setProbabilityEntry(wordId, &updatedUnigramProbabilityEntry)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
mGlobalCounters.incrementTotalCount();
|
||||||
|
mGlobalCounters.updateMaxValueOfCounters(
|
||||||
|
updatedUnigramProbabilityEntry.getHistoricalInfo()->getCount());
|
||||||
for (size_t i = 0; i < prevWordIds.size(); ++i) {
|
for (size_t i = 0; i < prevWordIds.size(); ++i) {
|
||||||
if (prevWordIds[i] == NOT_A_WORD_ID) {
|
if (prevWordIds[i] == NOT_A_WORD_ID) {
|
||||||
break;
|
break;
|
||||||
|
@ -225,6 +230,8 @@ bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView
|
||||||
if (!setNgramProbabilityEntry(limitedPrevWordIds, wordId, &updatedNgramProbabilityEntry)) {
|
if (!setNgramProbabilityEntry(limitedPrevWordIds, wordId, &updatedNgramProbabilityEntry)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
mGlobalCounters.updateMaxValueOfCounters(
|
||||||
|
updatedUnigramProbabilityEntry.getHistoricalInfo()->getCount());
|
||||||
if (!originalNgramProbabilityEntry.isValid()) {
|
if (!originalNgramProbabilityEntry.isValid()) {
|
||||||
entryCountersToUpdate->incrementNgramCount(i + 2);
|
entryCountersToUpdate->incrementNgramCount(i + 2);
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/word_attributes.h"
|
#include "suggest/core/dictionary/word_attributes.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||||
|
@ -131,15 +132,17 @@ class LanguageModelDictContent {
|
||||||
const ProbabilityEntry mProbabilityEntry;
|
const ProbabilityEntry mProbabilityEntry;
|
||||||
};
|
};
|
||||||
|
|
||||||
LanguageModelDictContent(const ReadWriteByteArrayView trieMapBuffer,
|
LanguageModelDictContent(const ReadWriteByteArrayView *const buffers,
|
||||||
const bool hasHistoricalInfo)
|
const bool hasHistoricalInfo)
|
||||||
: mTrieMap(trieMapBuffer), mHasHistoricalInfo(hasHistoricalInfo) {}
|
: mTrieMap(buffers[TRIE_MAP_BUFFER_INDEX]),
|
||||||
|
mGlobalCounters(buffers[GLOBAL_COUNTERS_BUFFER_INDEX]),
|
||||||
|
mHasHistoricalInfo(hasHistoricalInfo) {}
|
||||||
|
|
||||||
explicit LanguageModelDictContent(const bool hasHistoricalInfo)
|
explicit LanguageModelDictContent(const bool hasHistoricalInfo)
|
||||||
: mTrieMap(), mHasHistoricalInfo(hasHistoricalInfo) {}
|
: mTrieMap(), mGlobalCounters(), mHasHistoricalInfo(hasHistoricalInfo) {}
|
||||||
|
|
||||||
bool isNearSizeLimit() const {
|
bool isNearSizeLimit() const {
|
||||||
return mTrieMap.isNearSizeLimit();
|
return mTrieMap.isNearSizeLimit() || mGlobalCounters.needsToHalveCounters();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool save(FILE *const file) const;
|
bool save(FILE *const file) const;
|
||||||
|
@ -218,8 +221,11 @@ class LanguageModelDictContent {
|
||||||
|
|
||||||
// TODO: Remove
|
// TODO: Remove
|
||||||
static const int DUMMY_PROBABILITY_FOR_VALID_WORDS;
|
static const int DUMMY_PROBABILITY_FOR_VALID_WORDS;
|
||||||
|
static const int TRIE_MAP_BUFFER_INDEX;
|
||||||
|
static const int GLOBAL_COUNTERS_BUFFER_INDEX;
|
||||||
|
|
||||||
TrieMap mTrieMap;
|
TrieMap mTrieMap;
|
||||||
|
LanguageModelDictContentGlobalCounters mGlobalCounters;
|
||||||
const bool mHasHistoricalInfo;
|
const bool mHasHistoricalInfo;
|
||||||
|
|
||||||
bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h"
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
const int LanguageModelDictContentGlobalCounters::COUNTER_VALUE_NEAR_LIMIT_THRESHOLD =
|
||||||
|
(1 << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT)) - 64;
|
||||||
|
const int LanguageModelDictContentGlobalCounters::TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD = 1 << 30;
|
||||||
|
const int LanguageModelDictContentGlobalCounters::COUNTER_SIZE_IN_BYTES = 4;
|
||||||
|
const int LanguageModelDictContentGlobalCounters::TOTAL_COUNT_INDEX = 0;
|
||||||
|
const int LanguageModelDictContentGlobalCounters::MAX_VALUE_OF_COUNTERS_INDEX = 1;
|
||||||
|
|
||||||
|
} // namespace latinime
|
|
@ -0,0 +1,97 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H
|
||||||
|
#define LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
||||||
|
#include "utils/byte_array_view.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class LanguageModelDictContentGlobalCounters {
|
||||||
|
public:
|
||||||
|
explicit LanguageModelDictContentGlobalCounters(const ReadWriteByteArrayView buffer)
|
||||||
|
: mBuffer(buffer, 0 /* maxAdditionalBufferSize */),
|
||||||
|
mTotalCount(readValue(mBuffer, TOTAL_COUNT_INDEX)),
|
||||||
|
mMaxValueOfCounters(readValue(mBuffer, MAX_VALUE_OF_COUNTERS_INDEX)) {}
|
||||||
|
|
||||||
|
LanguageModelDictContentGlobalCounters()
|
||||||
|
: mBuffer(0 /* maxAdditionalBufferSize */), mTotalCount(0), mMaxValueOfCounters(0) {}
|
||||||
|
|
||||||
|
bool needsToHalveCounters() const {
|
||||||
|
return mMaxValueOfCounters >= COUNTER_VALUE_NEAR_LIMIT_THRESHOLD
|
||||||
|
|| mTotalCount >= TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getTotalCount() const {
|
||||||
|
return mTotalCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool save(FILE *const file) const {
|
||||||
|
BufferWithExtendableBuffer bufferToWrite(
|
||||||
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||||
|
if (!bufferToWrite.writeUint(mTotalCount, COUNTER_SIZE_IN_BYTES,
|
||||||
|
TOTAL_COUNT_INDEX * COUNTER_SIZE_IN_BYTES)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!bufferToWrite.writeUint(mMaxValueOfCounters, COUNTER_SIZE_IN_BYTES,
|
||||||
|
MAX_VALUE_OF_COUNTERS_INDEX * COUNTER_SIZE_IN_BYTES)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return DictFileWritingUtils::writeBufferToFileTail(file, &bufferToWrite);
|
||||||
|
}
|
||||||
|
|
||||||
|
void incrementTotalCount() {
|
||||||
|
mTotalCount += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void updateMaxValueOfCounters(const int count) {
|
||||||
|
mMaxValueOfCounters = std::max(count, mMaxValueOfCounters);
|
||||||
|
}
|
||||||
|
|
||||||
|
void halveCounters() {
|
||||||
|
mMaxValueOfCounters /= 2;
|
||||||
|
mTotalCount /= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContentGlobalCounters);
|
||||||
|
|
||||||
|
const static int COUNTER_VALUE_NEAR_LIMIT_THRESHOLD;
|
||||||
|
const static int TOTAL_COUNT_VALUE_NEAR_LIMIT_THRESHOLD;
|
||||||
|
const static int COUNTER_SIZE_IN_BYTES;
|
||||||
|
const static int TOTAL_COUNT_INDEX;
|
||||||
|
const static int MAX_VALUE_OF_COUNTERS_INDEX;
|
||||||
|
|
||||||
|
BufferWithExtendableBuffer mBuffer;
|
||||||
|
int mTotalCount;
|
||||||
|
int mMaxValueOfCounters;
|
||||||
|
|
||||||
|
static int readValue(const BufferWithExtendableBuffer &buffer, const int index) {
|
||||||
|
const int pos = COUNTER_SIZE_IN_BYTES * index;
|
||||||
|
if (pos + COUNTER_SIZE_IN_BYTES > buffer.getTailPosition()) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return buffer.readUint(COUNTER_SIZE_IN_BYTES, pos);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_GLOBAL_COUNTERS_H */
|
|
@ -179,7 +179,7 @@ Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
mTerminalPositionLookupTable(
|
mTerminalPositionLookupTable(
|
||||||
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
|
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
|
||||||
mLanguageModelDictContent(contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
|
mLanguageModelDictContent(&contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
|
||||||
mHeaderPolicy.hasHistoricalInfoOfWords()),
|
mHeaderPolicy.hasHistoricalInfoOfWords()),
|
||||||
mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
|
mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
|
||||||
mIsUpdatable(mDictBuffer->isUpdatable()) {}
|
mIsUpdatable(mDictBuffer->isUpdatable()) {}
|
||||||
|
|
|
@ -67,6 +67,6 @@ const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
|
||||||
|
|
||||||
const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT = 1;
|
const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT = 1;
|
||||||
const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT = 3;
|
const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT = 3;
|
||||||
const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT = 1;
|
const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT = 2;
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -0,0 +1,60 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_global_counters.h"
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
TEST(LanguageModelDictContentGlobalCountersTest, TestUpdateMaxValueOfCounters) {
|
||||||
|
LanguageModelDictContentGlobalCounters globalCounters;
|
||||||
|
|
||||||
|
EXPECT_FALSE(globalCounters.needsToHalveCounters());
|
||||||
|
globalCounters.updateMaxValueOfCounters(10);
|
||||||
|
EXPECT_FALSE(globalCounters.needsToHalveCounters());
|
||||||
|
const int count = (1 << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT)) - 1;
|
||||||
|
globalCounters.updateMaxValueOfCounters(count);
|
||||||
|
EXPECT_TRUE(globalCounters.needsToHalveCounters());
|
||||||
|
globalCounters.halveCounters();
|
||||||
|
EXPECT_FALSE(globalCounters.needsToHalveCounters());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LanguageModelDictContentGlobalCountersTest, TestIncrementTotalCount) {
|
||||||
|
LanguageModelDictContentGlobalCounters globalCounters;
|
||||||
|
|
||||||
|
EXPECT_EQ(0, globalCounters.getTotalCount());
|
||||||
|
globalCounters.incrementTotalCount();
|
||||||
|
EXPECT_EQ(1, globalCounters.getTotalCount());
|
||||||
|
for (int i = 1; i < 50; ++i) {
|
||||||
|
globalCounters.incrementTotalCount();
|
||||||
|
}
|
||||||
|
EXPECT_EQ(50, globalCounters.getTotalCount());
|
||||||
|
globalCounters.halveCounters();
|
||||||
|
EXPECT_EQ(25, globalCounters.getTotalCount());
|
||||||
|
globalCounters.halveCounters();
|
||||||
|
EXPECT_EQ(12, globalCounters.getTotalCount());
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
globalCounters.halveCounters();
|
||||||
|
}
|
||||||
|
EXPECT_EQ(0, globalCounters.getTotalCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace latinime
|
Loading…
Reference in a new issue