am 583bbb16: am 203ba408: Merge "Remove ProbabilityDictContent and use LanguageModelDictContent" into lmp-dev

* commit '583bbb16dcb564bc00871587788cd119898e0650':
  Remove ProbabilityDictContent and use LanguageModelDictContent
This commit is contained in:
Keisuke Kuroyanagi 2014-08-05 13:25:54 +00:00 committed by Android Git Automerger
commit 9becc8f56b
16 changed files with 43 additions and 282 deletions

View file

@ -73,7 +73,6 @@ LATIN_IME_CORE_SRC_FILES := \
$(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \ $(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \
bigram_dict_content.cpp \ bigram_dict_content.cpp \
language_model_dict_content.cpp \ language_model_dict_content.cpp \
probability_dict_content.cpp \
shortcut_dict_content.cpp \ shortcut_dict_content.cpp \
sparse_table_dict_content.cpp \ sparse_table_dict_content.cpp \
terminal_position_lookup_table.cpp) \ terminal_position_lookup_table.cpp) \

View file

@ -30,7 +30,7 @@ bool LanguageModelDictContent::runGC(
0 /* nextLevelBitmapEntryIndex */, outNgramCount); 0 /* nextLevelBitmapEntryIndex */, outNgramCount);
} }
ProbabilityEntry LanguageModelDictContent::getProbabilityEntry( ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
const WordIdArrayView prevWordIds, const int wordId) const { const WordIdArrayView prevWordIds, const int wordId) const {
if (!prevWordIds.empty()) { if (!prevWordIds.empty()) {
// TODO: Read n-gram entry. // TODO: Read n-gram entry.
@ -44,7 +44,7 @@ ProbabilityEntry LanguageModelDictContent::getProbabilityEntry(
return ProbabilityEntry::decode(result.mValue, mHasHistoricalInfo); return ProbabilityEntry::decode(result.mValue, mHasHistoricalInfo);
} }
bool LanguageModelDictContent::setProbabilityEntry(const WordIdArrayView prevWordIds, bool LanguageModelDictContent::setNgramProbabilityEntry(const WordIdArrayView prevWordIds,
const int terminalId, const ProbabilityEntry *const probabilityEntry) { const int terminalId, const ProbabilityEntry *const probabilityEntry) {
if (!prevWordIds.empty()) { if (!prevWordIds.empty()) {
// TODO: Add n-gram entry. // TODO: Add n-gram entry.

View file

@ -53,9 +53,18 @@ class LanguageModelDictContent {
const LanguageModelDictContent *const originalContent, const LanguageModelDictContent *const originalContent,
int *const outNgramCount); int *const outNgramCount);
ProbabilityEntry getProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId) const; ProbabilityEntry getProbabilityEntry(const int wordId) const {
return getNgramProbabilityEntry(WordIdArrayView(), wordId);
}
bool setProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId, bool setProbabilityEntry(const int wordId, const ProbabilityEntry *const probabilityEntry) {
return setNgramProbabilityEntry(WordIdArrayView(), wordId, probabilityEntry);
}
ProbabilityEntry getNgramProbabilityEntry(const WordIdArrayView prevWordIds,
const int wordId) const;
bool setNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId,
const ProbabilityEntry *const probabilityEntry); const ProbabilityEntry *const probabilityEntry);
private: private:

View file

@ -1,159 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int terminalId) const {
if (terminalId < 0 || terminalId >= mSize) {
// This method can be called with invalid terminal id during GC.
return ProbabilityEntry(0 /* flags */, NOT_A_PROBABILITY);
}
const BufferWithExtendableBuffer *const buffer = getBuffer();
int entryPos = getEntryPos(terminalId);
const int flags = buffer->readUintAndAdvancePosition(
Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &entryPos);
const int probability = buffer->readUintAndAdvancePosition(
Ver4DictConstants::PROBABILITY_SIZE, &entryPos);
if (mHasHistoricalInfo) {
const int timestamp = buffer->readUintAndAdvancePosition(
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &entryPos);
const int level = buffer->readUintAndAdvancePosition(
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &entryPos);
const int count = buffer->readUintAndAdvancePosition(
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &entryPos);
const HistoricalInfo historicalInfo(timestamp, level, count);
return ProbabilityEntry(flags, probability, &historicalInfo);
} else {
return ProbabilityEntry(flags, probability);
}
}
bool ProbabilityDictContent::setProbabilityEntry(const int terminalId,
const ProbabilityEntry *const probabilityEntry) {
if (terminalId < 0) {
return false;
}
const int entryPos = getEntryPos(terminalId);
if (terminalId >= mSize) {
ProbabilityEntry dummyEntry;
// Write new entry.
int writingPos = getBuffer()->getTailPosition();
while (writingPos <= entryPos) {
// Fulfilling with dummy entries until writingPos.
if (!writeEntry(&dummyEntry, writingPos)) {
AKLOGE("Cannot write dummy entry. pos: %d, mSize: %d", writingPos, mSize);
return false;
}
writingPos += getEntrySize();
mSize++;
}
}
return writeEntry(probabilityEntry, entryPos);
}
bool ProbabilityDictContent::flushToFile(FILE *const file) const {
if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
ProbabilityDictContent probabilityDictContentToWrite(mHasHistoricalInfo);
for (int i = 0; i < mSize; ++i) {
const ProbabilityEntry probabilityEntry = getProbabilityEntry(i);
if (!probabilityDictContentToWrite.setProbabilityEntry(i, &probabilityEntry)) {
AKLOGE("Cannot set probability entry in flushToFile. terminalId: %d", i);
return false;
}
}
return probabilityDictContentToWrite.flush(file);
} else {
return flush(file);
}
}
bool ProbabilityDictContent::runGC(
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const ProbabilityDictContent *const originalProbabilityDictContent) {
mSize = 0;
for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
it != terminalIdMap->end(); ++it) {
const ProbabilityEntry probabilityEntry =
originalProbabilityDictContent->getProbabilityEntry(it->first);
if (!setProbabilityEntry(it->second, &probabilityEntry)) {
AKLOGE("Cannot set probability entry in runGC. terminalId: %d", it->second);
return false;
}
mSize++;
}
return true;
}
int ProbabilityDictContent::getEntrySize() const {
if (mHasHistoricalInfo) {
return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ Ver4DictConstants::PROBABILITY_SIZE
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE;
} else {
return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ Ver4DictConstants::PROBABILITY_SIZE;
}
}
int ProbabilityDictContent::getEntryPos(const int terminalId) const {
return terminalId * getEntrySize();
}
bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilityEntry,
const int entryPos) {
BufferWithExtendableBuffer *const bufferToWrite = getWritableBuffer();
int writingPos = entryPos;
if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getFlags(),
Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) {
AKLOGE("Cannot write flags in probability dict content. pos: %d", writingPos);
return false;
}
if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getProbability(),
Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) {
AKLOGE("Cannot write probability in probability dict content. pos: %d", writingPos);
return false;
}
if (mHasHistoricalInfo) {
const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo();
if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) {
AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos);
return false;
}
if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getLevel(),
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &writingPos)) {
AKLOGE("Cannot write level in probability dict content. pos: %d", writingPos);
return false;
}
if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getCount(),
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &writingPos)) {
AKLOGE("Cannot write count in probability dict content. pos: %d", writingPos);
return false;
}
}
return true;
}
} // namespace latinime

View file

@ -1,66 +0,0 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_PROBABILITY_DICT_CONTENT_H
#define LATINIME_PROBABILITY_DICT_CONTENT_H
#include <cstdint>
#include <cstdio>
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
class ProbabilityEntry;
class ProbabilityDictContent : public SingleDictContent {
public:
ProbabilityDictContent(uint8_t *const buffer, const int bufferSize,
const bool hasHistoricalInfo)
: SingleDictContent(buffer, bufferSize),
mHasHistoricalInfo(hasHistoricalInfo),
mSize(getBuffer()->getTailPosition() / getEntrySize()) {}
ProbabilityDictContent(const bool hasHistoricalInfo)
: mHasHistoricalInfo(hasHistoricalInfo), mSize(0) {}
const ProbabilityEntry getProbabilityEntry(const int terminalId) const;
bool setProbabilityEntry(const int terminalId, const ProbabilityEntry *const probabilityEntry);
bool flushToFile(FILE *const file) const;
bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const ProbabilityDictContent *const originalProbabilityDictContent);
private:
DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent);
int getEntrySize() const;
int getEntryPos(const int terminalId) const;
bool writeEntry(const ProbabilityEntry *const probabilityEntry, const int entryPos);
bool mHasHistoricalInfo;
int mSize;
};
} // namespace latinime
#endif /* LATINIME_PROBABILITY_DICT_CONTENT_H */

View file

@ -157,11 +157,6 @@ bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const {
AKLOGE("Terminal position lookup table cannot be written."); AKLOGE("Terminal position lookup table cannot be written.");
return false; return false;
} }
// Write probability dict content.
if (!mProbabilityDictContent.flushToFile(file)) {
AKLOGE("Probability dict content cannot be written.");
return false;
}
// Write language model content. // Write language model content.
if (!mLanguageModelDictContent.save(file)) { if (!mLanguageModelDictContent.save(file)) {
AKLOGE("Language model dict content cannot be written."); AKLOGE("Language model dict content cannot be written.");
@ -196,10 +191,6 @@ Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX], contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX],
contentBufferSizes[ contentBufferSizes[
Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]), Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
mProbabilityDictContent(
contentBuffers[Ver4DictConstants::PROBABILITY_BUFFER_INDEX],
contentBufferSizes[Ver4DictConstants::PROBABILITY_BUFFER_INDEX],
mHeaderPolicy.hasHistoricalInfoOfWords()),
mLanguageModelDictContent( mLanguageModelDictContent(
ReadWriteByteArrayView( ReadWriteByteArrayView(
contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX], contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
@ -216,7 +207,6 @@ Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const i
: mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy), : mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy),
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(), mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
mLanguageModelDictContent(headerPolicy->hasHistoricalInfoOfWords()), mLanguageModelDictContent(headerPolicy->hasHistoricalInfoOfWords()),
mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(), mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
mIsUpdatable(true) {} mIsUpdatable(true) {}

View file

@ -24,7 +24,6 @@
#include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@ -53,7 +52,7 @@ class Ver4DictBuffers {
AK_FORCE_INLINE bool isNearSizeLimit() const { AK_FORCE_INLINE bool isNearSizeLimit() const {
return mExpandableTrieBuffer.isNearSizeLimit() return mExpandableTrieBuffer.isNearSizeLimit()
|| mTerminalPositionLookupTable.isNearSizeLimit() || mTerminalPositionLookupTable.isNearSizeLimit()
|| mProbabilityDictContent.isNearSizeLimit() || mLanguageModelDictContent.isNearSizeLimit()
|| mBigramDictContent.isNearSizeLimit() || mBigramDictContent.isNearSizeLimit()
|| mShortcutDictContent.isNearSizeLimit(); || mShortcutDictContent.isNearSizeLimit();
} }
@ -82,14 +81,6 @@ class Ver4DictBuffers {
return &mTerminalPositionLookupTable; return &mTerminalPositionLookupTable;
} }
AK_FORCE_INLINE ProbabilityDictContent *getMutableProbabilityDictContent() {
return &mProbabilityDictContent;
}
AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const {
return &mProbabilityDictContent;
}
AK_FORCE_INLINE LanguageModelDictContent *getMutableLanguageModelDictContent() { AK_FORCE_INLINE LanguageModelDictContent *getMutableLanguageModelDictContent() {
return &mLanguageModelDictContent; return &mLanguageModelDictContent;
} }
@ -144,7 +135,6 @@ class Ver4DictBuffers {
BufferWithExtendableBuffer mExpandableHeaderBuffer; BufferWithExtendableBuffer mExpandableHeaderBuffer;
BufferWithExtendableBuffer mExpandableTrieBuffer; BufferWithExtendableBuffer mExpandableTrieBuffer;
TerminalPositionLookupTable mTerminalPositionLookupTable; TerminalPositionLookupTable mTerminalPositionLookupTable;
ProbabilityDictContent mProbabilityDictContent;
LanguageModelDictContent mLanguageModelDictContent; LanguageModelDictContent mLanguageModelDictContent;
BigramDictContent mBigramDictContent; BigramDictContent mBigramDictContent;
ShortcutDictContent mShortcutDictContent; ShortcutDictContent mShortcutDictContent;

View file

@ -27,19 +27,18 @@ const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024;
// limited to 1MB to prevent from inefficient traversing. // limited to 1MB to prevent from inefficient traversing.
const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024; const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
// NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie, TerminalAddressLookupTable and Probability. // NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie and TerminalAddressLookupTable.
// NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT for language model.
// NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT for bigram and shortcut. // NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT for bigram and shortcut.
const size_t Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE = const size_t Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE =
NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 3 NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 2
+ NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT
+ NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT * 2; + NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT * 2;
const int Ver4DictConstants::TRIE_BUFFER_INDEX = 0; const int Ver4DictConstants::TRIE_BUFFER_INDEX = 0;
const int Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX = const int Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX =
TRIE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT; TRIE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
const int Ver4DictConstants::PROBABILITY_BUFFER_INDEX =
TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
const int Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX = const int Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX =
PROBABILITY_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT; TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
const int Ver4DictConstants::BIGRAM_BUFFERS_INDEX = const int Ver4DictConstants::BIGRAM_BUFFERS_INDEX =
LANGUAGE_MODEL_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT; LANGUAGE_MODEL_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT;
const int Ver4DictConstants::SHORTCUT_BUFFERS_INDEX = const int Ver4DictConstants::SHORTCUT_BUFFERS_INDEX =

View file

@ -35,7 +35,6 @@ class Ver4DictConstants {
static const size_t NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE; static const size_t NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE;
static const int TRIE_BUFFER_INDEX; static const int TRIE_BUFFER_INDEX;
static const int TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX; static const int TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX;
static const int PROBABILITY_BUFFER_INDEX;
static const int LANGUAGE_MODEL_BUFFER_INDEX; static const int LANGUAGE_MODEL_BUFFER_INDEX;
static const int BIGRAM_BUFFERS_INDEX; static const int BIGRAM_BUFFERS_INDEX;
static const int SHORTCUT_BUFFERS_INDEX; static const int SHORTCUT_BUFFERS_INDEX;

View file

@ -18,7 +18,7 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
@ -61,8 +61,9 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
terminalIdFieldPos += mBuffer->getOriginalBufferSize(); terminalIdFieldPos += mBuffer->getOriginalBufferSize();
} }
terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos); terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
// TODO: Quit reading probability here.
const ProbabilityEntry probabilityEntry = const ProbabilityEntry probabilityEntry =
mProbabilityDictContent->getProbabilityEntry(terminalId); mLanguageModelDictContent->getProbabilityEntry(terminalId);
if (probabilityEntry.hasHistoricalInfo()) { if (probabilityEntry.hasHistoricalInfo()) {
probability = ForgettingCurveUtils::decodeProbability( probability = ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo(), mHeaderPolicy); probabilityEntry.getHistoricalInfo(), mHeaderPolicy);

View file

@ -25,18 +25,18 @@ namespace latinime {
class BufferWithExtendableBuffer; class BufferWithExtendableBuffer;
class HeaderPolicy; class HeaderPolicy;
class ProbabilityDictContent; class LanguageModelDictContent;
/* /*
* This class is used for helping to read nodes of ver4 patricia trie. This class handles moved * This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
* node and reads node attributes including probability form probabilityBuffer. * node and reads node attributes including probability form language model.
*/ */
class Ver4PatriciaTrieNodeReader : public PtNodeReader { class Ver4PatriciaTrieNodeReader : public PtNodeReader {
public: public:
Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer, Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
const ProbabilityDictContent *const probabilityDictContent, const LanguageModelDictContent *const languageModelDictContent,
const HeaderPolicy *const headerPolicy) const HeaderPolicy *const headerPolicy)
: mBuffer(buffer), mProbabilityDictContent(probabilityDictContent), : mBuffer(buffer), mLanguageModelDictContent(languageModelDictContent),
mHeaderPolicy(headerPolicy) {} mHeaderPolicy(headerPolicy) {}
~Ver4PatriciaTrieNodeReader() {} ~Ver4PatriciaTrieNodeReader() {}
@ -50,7 +50,7 @@ class Ver4PatriciaTrieNodeReader : public PtNodeReader {
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader); DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
const BufferWithExtendableBuffer *const mBuffer; const BufferWithExtendableBuffer *const mBuffer;
const ProbabilityDictContent *const mProbabilityDictContent; const LanguageModelDictContent *const mLanguageModelDictContent;
const HeaderPolicy *const mHeaderPolicy; const HeaderPolicy *const mHeaderPolicy;
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,

View file

@ -143,11 +143,11 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty(
return false; return false;
} }
const ProbabilityEntry originalProbabilityEntry = const ProbabilityEntry originalProbabilityEntry =
mBuffers->getProbabilityDictContent()->getProbabilityEntry( mBuffers->getLanguageModelDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId()); toBeUpdatedPtNodeParams->getTerminalId());
const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry, const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
unigramProperty); unigramProperty);
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry( return mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry); toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
} }
@ -158,14 +158,14 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA
return false; return false;
} }
const ProbabilityEntry originalProbabilityEntry = const ProbabilityEntry originalProbabilityEntry =
mBuffers->getProbabilityDictContent()->getProbabilityEntry( mBuffers->getLanguageModelDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId()); toBeUpdatedPtNodeParams->getTerminalId());
if (originalProbabilityEntry.hasHistoricalInfo()) { if (originalProbabilityEntry.hasHistoricalInfo()) {
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave( const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy); originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy);
const ProbabilityEntry probabilityEntry = const ProbabilityEntry probabilityEntry =
originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo); originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry( if (!mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) { toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
AKLOGE("Cannot write updated probability entry. terminalId: %d", AKLOGE("Cannot write updated probability entry. terminalId: %d",
toBeUpdatedPtNodeParams->getTerminalId()); toBeUpdatedPtNodeParams->getTerminalId());
@ -218,8 +218,8 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
ProbabilityEntry newProbabilityEntry; ProbabilityEntry newProbabilityEntry;
const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom( const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
&newProbabilityEntry, unigramProperty); &newProbabilityEntry, unigramProperty);
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId, return mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry(
&probabilityEntryToWrite); terminalId, &probabilityEntryToWrite);
} }
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry( bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(

View file

@ -452,7 +452,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
std::vector<int> codePointVector(ptNodeParams.getCodePoints(), std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount()); ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
const ProbabilityEntry probabilityEntry = const ProbabilityEntry probabilityEntry =
mBuffers->getProbabilityDictContent()->getProbabilityEntry( mBuffers->getLanguageModelDictContent()->getProbabilityEntry(
ptNodeParams.getTerminalId()); ptNodeParams.getTerminalId());
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
// Fetch bigram information. // Fetch bigram information.

View file

@ -46,7 +46,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy), mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy),
mShortcutPolicy(mBuffers->getMutableShortcutDictContent(), mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable()), mBuffers->getTerminalPositionLookupTable()),
mNodeReader(mDictBuffer, mBuffers->getProbabilityDictContent(), mHeaderPolicy), mNodeReader(mDictBuffer, mBuffers->getLanguageModelDictContent(), mHeaderPolicy),
mPtNodeArrayReader(mDictBuffer), mPtNodeArrayReader(mDictBuffer),
mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader, mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
&mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy), &mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),

View file

@ -75,7 +75,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite, const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
int *const outUnigramCount, int *const outBigramCount) { int *const outUnigramCount, int *const outBigramCount) {
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(), Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
mBuffers->getProbabilityDictContent(), headerPolicy); mBuffers->getLanguageModelDictContent(), headerPolicy);
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer()); Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(), Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
mBuffers->getTerminalPositionLookupTable(), headerPolicy); mBuffers->getTerminalPositionLookupTable(), headerPolicy);
@ -138,7 +138,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
// Create policy instances for the GCed dictionary. // Create policy instances for the GCed dictionary.
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(), Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
buffersToWrite->getProbabilityDictContent(), headerPolicy); buffersToWrite->getLanguageModelDictContent(), headerPolicy);
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer()); Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(), Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
buffersToWrite->getTerminalPositionLookupTable(), headerPolicy); buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
@ -154,8 +154,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
return false; return false;
} }
// Run GC for probability dict content. // Run GC for probability dict content.
if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap, if (!buffersToWrite->getMutableLanguageModelDictContent()->runGC(&terminalIdMap,
mBuffers->getProbabilityDictContent())) { mBuffers->getLanguageModelDictContent(), nullptr /* outNgramCount */)) {
return false; return false;
} }
// Run GC for bigram dict content. // Run GC for bigram dict content.
@ -201,7 +201,7 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
continue; continue;
} }
const ProbabilityEntry probabilityEntry = const ProbabilityEntry probabilityEntry =
mBuffers->getProbabilityDictContent()->getProbabilityEntry(i); mBuffers->getLanguageModelDictContent()->getProbabilityEntry(i);
const int probability = probabilityEntry.hasHistoricalInfo() ? const int probability = probabilityEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability( ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) : probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :

View file

@ -30,9 +30,9 @@ TEST(LanguageModelDictContentTest, TestUnigramProbability) {
const int probability = 10; const int probability = 10;
const int wordId = 100; const int wordId = 100;
const ProbabilityEntry probabilityEntry(flag, probability); const ProbabilityEntry probabilityEntry(flag, probability);
LanguageModelDictContent.setProbabilityEntry(WordIdArrayView(), wordId, &probabilityEntry); LanguageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry);
const ProbabilityEntry entry = const ProbabilityEntry entry =
LanguageModelDictContent.getProbabilityEntry(WordIdArrayView(), wordId); LanguageModelDictContent.getProbabilityEntry(wordId);
EXPECT_EQ(flag, entry.getFlags()); EXPECT_EQ(flag, entry.getFlags());
EXPECT_EQ(probability, entry.getProbability()); EXPECT_EQ(probability, entry.getProbability());
} }
@ -47,9 +47,8 @@ TEST(LanguageModelDictContentTest, TestUnigramProbabilityWithHistoricalInfo) {
const int wordId = 100; const int wordId = 100;
const HistoricalInfo historicalInfo(timestamp, level, count); const HistoricalInfo historicalInfo(timestamp, level, count);
const ProbabilityEntry probabilityEntry(flag, NOT_A_PROBABILITY, &historicalInfo); const ProbabilityEntry probabilityEntry(flag, NOT_A_PROBABILITY, &historicalInfo);
LanguageModelDictContent.setProbabilityEntry(WordIdArrayView(), wordId, &probabilityEntry); LanguageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry);
const ProbabilityEntry entry = const ProbabilityEntry entry = LanguageModelDictContent.getProbabilityEntry(wordId);
LanguageModelDictContent.getProbabilityEntry(WordIdArrayView(), wordId);
EXPECT_EQ(flag, entry.getFlags()); EXPECT_EQ(flag, entry.getFlags());
EXPECT_EQ(timestamp, entry.getHistoricalInfo()->getTimeStamp()); EXPECT_EQ(timestamp, entry.getHistoricalInfo()->getTimeStamp());
EXPECT_EQ(level, entry.getHistoricalInfo()->getLevel()); EXPECT_EQ(level, entry.getHistoricalInfo()->getLevel());