Extend unigram probability field to support historical info.
Bug: 11073222 Change-Id: I9668db89ae5e90271f3d70c86cea458832275bffmain
parent
4fa5588d1b
commit
16412cca66
|
@ -127,7 +127,8 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNo
|
|||
ptNodeParams->getHeadPos(), writingPos));
|
||||
mValidPtNodeCount++;
|
||||
// Writes current PtNode.
|
||||
return mPtNodeWriter->writePtNodeAndAdvancePosition(ptNodeParams, &writingPos);
|
||||
return mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(ptNodeParams,
|
||||
0 /* timestamp */, &writingPos);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
|
||||
|
|
|
@ -16,55 +16,72 @@
|
|||
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
int ProbabilityDictContent::getProbability(const int terminalId) const {
|
||||
void ProbabilityDictContent::getProbabilityEntry(const int terminalId,
|
||||
ProbabilityEntry *const outProbabilityEntry) const {
|
||||
if (terminalId < 0 || terminalId >= mSize) {
|
||||
return NOT_A_PROBABILITY;
|
||||
outProbabilityEntry->setProbability(0 /* flags */, NOT_A_PROBABILITY);
|
||||
AKLOGE("Terminal id (%d) is not in the probability dict content. mSize: %d", terminalId,
|
||||
mSize);
|
||||
return;
|
||||
}
|
||||
const BufferWithExtendableBuffer *const buffer = getBuffer();
|
||||
int entryPos = getEntryPos(terminalId);
|
||||
const int flags = buffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &entryPos);
|
||||
const int probability = buffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::PROBABILITY_SIZE, &entryPos);
|
||||
if (mHasHistoricalInfo) {
|
||||
const int timestamp = buffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &entryPos);
|
||||
const int level = buffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &entryPos);
|
||||
const int count = buffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &entryPos);
|
||||
outProbabilityEntry->setProbabilityWithHistricalInfo(flags, probability, timestamp, level,
|
||||
count);
|
||||
} else {
|
||||
outProbabilityEntry->setProbability(flags, probability);
|
||||
}
|
||||
const int probabilityFieldPos =
|
||||
getEntryPos(terminalId) + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE;
|
||||
return getBuffer()->readUint(Ver4DictConstants::PROBABILITY_SIZE, probabilityFieldPos);
|
||||
}
|
||||
|
||||
bool ProbabilityDictContent::setProbability(const int terminalId, const int probability) {
|
||||
bool ProbabilityDictContent::setProbabilityEntry(const int terminalId,
|
||||
const ProbabilityEntry *const probabilityEntry) {
|
||||
if (terminalId < 0) {
|
||||
return false;
|
||||
}
|
||||
const int entryPos = getEntryPos(terminalId);
|
||||
if (terminalId >= mSize) {
|
||||
ProbabilityEntry dummyEntry;
|
||||
// Write new entry.
|
||||
int writingPos = getBuffer()->getTailPosition();
|
||||
while (writingPos <= getEntryPos(terminalId)) {
|
||||
const int dummyFlags = 0;
|
||||
if (!getWritableBuffer()->writeUintAndAdvancePosition(dummyFlags,
|
||||
Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) {
|
||||
return false;
|
||||
}
|
||||
const int dummyProbability = 0;
|
||||
if (!getWritableBuffer()->writeUintAndAdvancePosition(dummyProbability,
|
||||
Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) {
|
||||
while (writingPos <= entryPos) {
|
||||
// Fulfilling with dummy entries until writingPos.
|
||||
if (!writeEntry(&dummyEntry, writingPos)) {
|
||||
AKLOGE("Cannot write dummy entry. pos: %d, mSize: %d", writingPos, mSize);
|
||||
return false;
|
||||
}
|
||||
writingPos += getEntrySize();
|
||||
mSize++;
|
||||
}
|
||||
}
|
||||
const int probabilityWritingPos = getEntryPos(terminalId)
|
||||
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE;
|
||||
return getWritableBuffer()->writeUint(probability,
|
||||
Ver4DictConstants::PROBABILITY_SIZE, probabilityWritingPos);
|
||||
return writeEntry(probabilityEntry, entryPos);
|
||||
}
|
||||
|
||||
bool ProbabilityDictContent::flushToFile(const char *const dictDirPath) const {
|
||||
if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
|
||||
ProbabilityDictContent probabilityDictContentToWrite;
|
||||
ProbabilityDictContent probabilityDictContentToWrite(mHasHistoricalInfo);
|
||||
ProbabilityEntry probabilityEntry;
|
||||
for (int i = 0; i < mSize; ++i) {
|
||||
if (!probabilityDictContentToWrite.setProbability(i, getProbability(i))) {
|
||||
getProbabilityEntry(i, &probabilityEntry);
|
||||
if (!probabilityDictContentToWrite.setProbabilityEntry(i, &probabilityEntry)) {
|
||||
AKLOGE("Cannot set probability entry in flushToFile. terminalId: %d", i);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -79,10 +96,12 @@ bool ProbabilityDictContent::runGC(
|
|||
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||
const ProbabilityDictContent *const originalProbabilityDictContent) {
|
||||
mSize = 0;
|
||||
ProbabilityEntry probabilityEntry;
|
||||
for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
|
||||
it != terminalIdMap->end(); ++it) {
|
||||
if (!setProbability(it->second,
|
||||
originalProbabilityDictContent->getProbability(it->first))) {
|
||||
originalProbabilityDictContent->getProbabilityEntry(it->first, &probabilityEntry);
|
||||
if (!setProbabilityEntry(it->second, &probabilityEntry)) {
|
||||
AKLOGE("Cannot set probability entry in runGC. terminalId: %d", it->second);
|
||||
return false;
|
||||
}
|
||||
mSize++;
|
||||
|
@ -90,9 +109,55 @@ bool ProbabilityDictContent::runGC(
|
|||
return true;
|
||||
}
|
||||
|
||||
int ProbabilityDictContent::getEntrySize() const {
|
||||
if (mHasHistoricalInfo) {
|
||||
return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
|
||||
+ Ver4DictConstants::PROBABILITY_SIZE
|
||||
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE
|
||||
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
|
||||
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE;
|
||||
} else {
|
||||
return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
|
||||
+ Ver4DictConstants::PROBABILITY_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
int ProbabilityDictContent::getEntryPos(const int terminalId) const {
|
||||
return terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
|
||||
+ Ver4DictConstants::PROBABILITY_SIZE);
|
||||
return terminalId * getEntrySize();
|
||||
}
|
||||
|
||||
bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilityEntry,
|
||||
const int entryPos) {
|
||||
BufferWithExtendableBuffer *const bufferToWrite = getWritableBuffer();
|
||||
int writingPos = entryPos;
|
||||
if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getFlags(),
|
||||
Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) {
|
||||
AKLOGE("Cannot write flags in probability dict content. pos: %d", writingPos);
|
||||
return false;
|
||||
}
|
||||
if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getProbability(),
|
||||
Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) {
|
||||
AKLOGE("Cannot write probability in probability dict content. pos: %d", writingPos);
|
||||
return false;
|
||||
}
|
||||
if (mHasHistoricalInfo) {
|
||||
if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getTimeStamp(),
|
||||
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) {
|
||||
AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos);
|
||||
return false;
|
||||
}
|
||||
if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getLevel(),
|
||||
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &writingPos)) {
|
||||
AKLOGE("Cannot write level in probability dict content. pos: %d", writingPos);
|
||||
return false;
|
||||
}
|
||||
if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getCount(),
|
||||
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &writingPos)) {
|
||||
AKLOGE("Cannot write count in probability dict content. pos: %d", writingPos);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -25,18 +25,23 @@
|
|||
|
||||
namespace latinime {
|
||||
|
||||
class ProbabilityEntry;
|
||||
|
||||
class ProbabilityDictContent : public SingleDictContent {
|
||||
public:
|
||||
ProbabilityDictContent(const char *const dictDirPath, const bool isUpdatable)
|
||||
ProbabilityDictContent(const char *const dictDirPath, const bool hasHistoricalInfo,
|
||||
const bool isUpdatable)
|
||||
: SingleDictContent(dictDirPath, Ver4DictConstants::FREQ_FILE_EXTENSION, isUpdatable),
|
||||
mSize(getBuffer()->getTailPosition() / (Ver4DictConstants::PROBABILITY_SIZE
|
||||
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE)) {}
|
||||
mHasHistoricalInfo(hasHistoricalInfo),
|
||||
mSize(getBuffer()->getTailPosition() / getEntrySize()) {}
|
||||
|
||||
ProbabilityDictContent() : mSize(0) {}
|
||||
ProbabilityDictContent(const bool hasHistoricalInfo)
|
||||
: mHasHistoricalInfo(hasHistoricalInfo), mSize(0) {}
|
||||
|
||||
int getProbability(const int terminalId) const;
|
||||
void getProbabilityEntry(const int terminalId,
|
||||
ProbabilityEntry *const outProbabilityEntry) const;
|
||||
|
||||
bool setProbability(const int terminalId, const int probability);
|
||||
bool setProbabilityEntry(const int terminalId, const ProbabilityEntry *const probabilityEntry);
|
||||
|
||||
bool flushToFile(const char *const dictDirPath) const;
|
||||
|
||||
|
@ -46,8 +51,13 @@ class ProbabilityDictContent : public SingleDictContent {
|
|||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent);
|
||||
|
||||
int getEntrySize() const;
|
||||
|
||||
int getEntryPos(const int terminalId) const;
|
||||
|
||||
bool writeEntry(const ProbabilityEntry *const probabilityEntry, const int entryPos);
|
||||
|
||||
bool mHasHistoricalInfo;
|
||||
int mSize;
|
||||
};
|
||||
} // namespace latinime
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_PROBABILITY_ENTRY_H
|
||||
#define LATINIME_PROBABILITY_ENTRY_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class ProbabilityEntry {
|
||||
public:
|
||||
ProbabilityEntry()
|
||||
: mFlags(0), mProbability(NOT_A_PROBABILITY),
|
||||
mTimestamp(Ver4DictConstants::NOT_A_TIME_STAMP), mLevel(0), mCount(0) {}
|
||||
|
||||
void setProbability(const int flags, const int probability) {
|
||||
mFlags = flags;
|
||||
mProbability = probability;
|
||||
mTimestamp = Ver4DictConstants::NOT_A_TIME_STAMP;
|
||||
mLevel = 0;
|
||||
mCount = 0;
|
||||
}
|
||||
|
||||
void setProbabilityWithHistricalInfo(const int flags, const int probability,
|
||||
const int timestamp, const int level, const int count) {
|
||||
mFlags = flags;
|
||||
mProbability = probability;
|
||||
mTimestamp = timestamp;
|
||||
mLevel = level;
|
||||
mCount = count;
|
||||
}
|
||||
|
||||
int getFlags() const {
|
||||
return mFlags;
|
||||
}
|
||||
|
||||
int getProbability() const {
|
||||
return mProbability;
|
||||
}
|
||||
|
||||
int getTimeStamp() const {
|
||||
return mTimestamp;
|
||||
}
|
||||
|
||||
int getLevel() const {
|
||||
return mLevel;
|
||||
}
|
||||
|
||||
int getCount() const {
|
||||
return mCount;
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(ProbabilityEntry);
|
||||
|
||||
int mFlags;
|
||||
int mProbability;
|
||||
int mTimestamp;
|
||||
int mLevel;
|
||||
int mCount;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_PROBABILITY_ENTRY_H */
|
|
@ -126,7 +126,7 @@ class Ver4DictBuffers {
|
|||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||
// TODO: Quit using header size.
|
||||
mTerminalPositionLookupTable(dictDirPath, isUpdatable, mHeaderSize),
|
||||
mProbabilityDictContent(dictDirPath, isUpdatable),
|
||||
mProbabilityDictContent(dictDirPath, false /* hasHistoricalInfo */, isUpdatable),
|
||||
mBigramDictContent(dictDirPath, isUpdatable),
|
||||
mShortcutDictContent(dictDirPath, isUpdatable),
|
||||
mIsUpdatable(isUpdatable) {}
|
||||
|
@ -135,7 +135,8 @@ class Ver4DictBuffers {
|
|||
: mDictBuffer(0), mHeaderSize(0),
|
||||
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||
mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||
mTerminalPositionLookupTable(), mProbabilityDictContent(),
|
||||
mTerminalPositionLookupTable(),
|
||||
mProbabilityDictContent(false /* hasHistoricalInfo */),
|
||||
mBigramDictContent(), mShortcutDictContent(), mIsUpdatable(true) {}
|
||||
|
||||
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
|
||||
|
|
|
@ -45,6 +45,8 @@ const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
|
|||
const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0;
|
||||
const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4;
|
||||
const int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4;
|
||||
const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 1;
|
||||
const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1;
|
||||
|
||||
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4;
|
||||
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
|
||||
|
@ -59,7 +60,9 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
|
|||
terminalIdFieldPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
|
||||
probability = mProbabilityDictContent->getProbability(terminalId);
|
||||
ProbabilityEntry probabilityEntry;
|
||||
mProbabilityDictContent->getProbabilityEntry(terminalId, &probabilityEntry);
|
||||
probability = probabilityEntry.getProbability();
|
||||
}
|
||||
int childrenPosFieldPos = pos;
|
||||
if (usesAdditionalBuffer) {
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
|
||||
|
@ -119,8 +120,12 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability(
|
|||
}
|
||||
const int probabilityToWrite = getUpdatedProbability(toBeUpdatedPtNodeParams->getProbability(),
|
||||
newProbability);
|
||||
return mBuffers->getUpdatableProbabilityDictContent()->setProbability(
|
||||
toBeUpdatedPtNodeParams->getTerminalId(), probabilityToWrite);
|
||||
ProbabilityEntry probabilityEntry;
|
||||
mBuffers->getProbabilityDictContent()->getProbabilityEntry(
|
||||
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
|
||||
probabilityEntry.setProbability(probabilityEntry.getFlags(), probabilityToWrite);
|
||||
return mBuffers->getUpdatableProbabilityDictContent()->setProbabilityEntry(
|
||||
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition(
|
||||
|
@ -153,8 +158,10 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
|
|||
// Write probability.
|
||||
const int probabilityToWrite = getUpdatedProbability(NOT_A_PROBABILITY,
|
||||
ptNodeParams->getProbability());
|
||||
return mBuffers->getUpdatableProbabilityDictContent()->setProbability(terminalId,
|
||||
probabilityToWrite);
|
||||
ProbabilityEntry probabilityEntry;
|
||||
probabilityEntry.setProbability(0 /* flags */, probabilityToWrite);
|
||||
return mBuffers->getUpdatableProbabilityDictContent()->setProbabilityEntry(terminalId,
|
||||
&probabilityEntry);
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
|
||||
|
@ -258,13 +265,6 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
|
|||
Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Write probability.
|
||||
if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) {
|
||||
if (!mBuffers->getUpdatableProbabilityDictContent()->setProbability(
|
||||
terminalId, ptNodeParams->getProbability())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (outTerminalId) {
|
||||
*outTerminalId = terminalId;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue