Merge "Implement shortcut adding method."

main
Keisuke Kuroyanagi 2013-12-05 11:56:39 +00:00 committed by Android (Google) Code Review
commit f2d689c4da
9 changed files with 175 additions and 48 deletions

View File

@ -29,7 +29,7 @@ namespace latinime {
class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
public:
Ver4ShortcutListPolicy(const ShortcutDictContent *const shortcutDictContent,
Ver4ShortcutListPolicy(ShortcutDictContent *const shortcutDictContent,
const TerminalPositionLookupTable *const terminalPositionLookupTable)
: mShortcutDictContent(shortcutDictContent),
mTerminalPositionLookupTable(terminalPositionLookupTable) {}
@ -44,16 +44,11 @@ class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
int *const pos) const {
int shortcutFlags = 0;
if (outCodePoint && outCodePointCount) {
int probability = 0;
mShortcutDictContent->getShortcutEntryAndAdvancePosition(maxCodePointCount,
outCodePoint, outCodePointCount, &shortcutFlags, pos);
}
if (outHasNext) {
*outHasNext = ShortcutListReadingUtils::hasNext(shortcutFlags);
}
outCodePoint, outCodePointCount, &probability, outHasNext, pos);
if (outIsWhitelist) {
*outIsWhitelist = ShortcutListReadingUtils::isWhitelist(shortcutFlags);
*outIsWhitelist = ShortcutListReadingUtils::isWhitelist(probability);
}
}
@ -61,10 +56,52 @@ class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
// Do nothing because we don't need to skip shortcut lists in ver4 dictionaries.
}
bool addNewShortcut(const int terminalId, const int *const codePoints, const int codePointCount,
const int probability) {
const int shortcutListPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
if (shortcutListPos == NOT_A_DICT_POS) {
// Create shortcut list.
if (!mShortcutDictContent->createNewShortcutList(terminalId)) {
AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId);
return false;
}
const int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
return mShortcutDictContent->writeShortcutEntry(codePoints, codePointCount, probability,
false /* hasNext */, writingPos);
}
const int entryPos = mShortcutDictContent->findShortcutEntryAndGetPos(shortcutListPos,
codePoints, codePointCount);
if (entryPos == NOT_A_DICT_POS) {
// Add new entry to the shortcut list.
// Create new shortcut list.
if (!mShortcutDictContent->createNewShortcutList(terminalId)) {
AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId);
return false;
}
int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
if (!mShortcutDictContent->writeShortcutEntryAndAdvancePosition(codePoints,
codePointCount, probability, true /* hasNext */, &writingPos)) {
AKLOGE("Cannot write shortcut entry. terminal id: %d, pos: %d", terminalId,
writingPos);
return false;
}
return mShortcutDictContent->copyShortcutList(shortcutListPos, writingPos);
}
// Overwrite existing entry.
int writingPos = entryPos;
if (!mShortcutDictContent->writeShortcutEntryAndAdvancePosition(codePoints,
codePointCount, probability, true /* hasNext */, &writingPos)) {
AKLOGE("Cannot overwrite shortcut entry. terminal id: %d, pos: %d", terminalId,
writingPos);
return false;
}
return true;
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4ShortcutListPolicy);
const ShortcutDictContent *const mShortcutDictContent;
ShortcutDictContent *const mShortcutDictContent;
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
};
} // namespace latinime

View File

@ -21,12 +21,16 @@
namespace latinime {
void ShortcutDictContent::getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
int *const outCodePoint, int *const outCodePointCount, int *const outShortcutFlags,
int *const shortcutEntryPos) const {
int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
bool *const outhasNext, int *const shortcutEntryPos) const {
const BufferWithExtendableBuffer *const shortcutListBuffer = getContentBuffer();
if (outShortcutFlags) {
*outShortcutFlags = shortcutListBuffer->readUintAndAdvancePosition(
const int shortcutFlags = shortcutListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
if (outProbability) {
*outProbability = shortcutFlags & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK;
}
if (outhasNext) {
*outhasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK;
}
if (outCodePoint && outCodePointCount) {
shortcutListBuffer->readCodePointsAndAdvancePosition(
@ -59,50 +63,113 @@ bool ShortcutDictContent::runGC(
continue;
}
const int shortcutListPos = getContentBuffer()->getTailPosition();
// Copy shortcut list with GC from original content.
if (!copyShortcutList(originalShortcutListPos, originalShortcutDictContent,
// Copy shortcut list from original content.
if (!copyShortcutListFromDictContent(originalShortcutListPos, originalShortcutDictContent,
shortcutListPos)) {
AKLOGE("Cannot copy shortcut list during GC. original pos: %d, pos: %d",
originalShortcutListPos, shortcutListPos);
return false;
}
// Set shortcut list position to the lookup table.
if (!getUpdatableAddressLookupTable()->set(it->second, shortcutListPos)) {
AKLOGE("Cannot set shortcut list position. terminal id: %d, pos: %d",
it->second, shortcutListPos);
return false;
}
}
return true;
}
bool ShortcutDictContent::copyShortcutList(const int shortcutListPos,
bool ShortcutDictContent::createNewShortcutList(const int terminalId) {
const int shortcutListListPos = getContentBuffer()->getTailPosition();
return getUpdatableAddressLookupTable()->set(terminalId, shortcutListListPos);
}
bool ShortcutDictContent::copyShortcutList(const int shortcutListPos, const int toPos) {
return copyShortcutListFromDictContent(shortcutListPos, this, toPos);
}
bool ShortcutDictContent::copyShortcutListFromDictContent(const int shortcutListPos,
const ShortcutDictContent *const sourceShortcutDictContent, const int toPos) {
bool hasNext = true;
int readingPos = shortcutListPos;
int writingPos = toPos;
int codePoints[MAX_WORD_LENGTH];
while (hasNext) {
int shortcutFlags = 0;
int probability = 0;
int codePointCount = 0;
sourceShortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH,
codePoints, &codePointCount, &shortcutFlags, &readingPos);
if (!writeShortcutEntryAndAdvancePosition(codePoints, codePointCount, shortcutFlags,
&writingPos)) {
codePoints, &codePointCount, &probability, &hasNext, &readingPos);
if (!writeShortcutEntryAndAdvancePosition(codePoints, codePointCount, probability,
hasNext, &writingPos)) {
AKLOGE("Cannot write shortcut entry to copy. pos: %d", writingPos);
return false;
}
}
return true;
}
bool ShortcutDictContent::writeShortcutEntryAndAdvancePosition(const int *const codePoint,
const int codePointCount, const int shortcutFlags, int *const shortcutEntryPos) {
bool ShortcutDictContent::setProbability(const int probability, const int shortcutEntryPos) {
BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer();
const int shortcutFlags = shortcutListBuffer->readUint(
Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
const bool hasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK;
const int shortcutFlagsToWrite = createAndGetShortcutFlags(probability, hasNext);
return shortcutListBuffer->writeUint(shortcutFlagsToWrite,
Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
}
bool ShortcutDictContent::writeShortcutEntryAndAdvancePosition(const int *const codePoint,
const int codePointCount, const int probability, const bool hasNext,
int *const shortcutEntryPos) {
BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer();
const int shortcutFlags = createAndGetShortcutFlags(probability, hasNext);
if (!shortcutListBuffer->writeUintAndAdvancePosition(shortcutFlags,
Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos)) {
AKLOGE("Cannot write shortcut flags. flags; %x, pos: %d", shortcutFlags, *shortcutEntryPos);
return false;
}
if (!shortcutListBuffer->writeCodePointsAndAdvancePosition(codePoint, codePointCount,
true /* writesTerminator */, shortcutEntryPos)) {
AKLOGE("Cannot write shortcut target code points. pos: %d", *shortcutEntryPos);
return false;
}
return true;
}
// Find a shortcut entry that has specified target and return its position.
int ShortcutDictContent::findShortcutEntryAndGetPos(const int shortcutListPos,
const int *const targetCodePointsToFind, const int codePointCount) const {
bool hasNext = true;
int readingPos = shortcutListPos;
int targetCodePoints[MAX_WORD_LENGTH];
while (hasNext) {
const int entryPos = readingPos;
int probability = 0;
int targetCodePointCount = 0;
getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, targetCodePoints, &targetCodePointCount,
&probability, &hasNext, &readingPos);
if (targetCodePointCount != codePointCount) {
continue;
}
bool matched = true;
for (int i = 0; i < codePointCount; ++i) {
if (targetCodePointsToFind[i] != targetCodePoints[i]) {
matched = false;
break;
}
}
if (matched) {
return entryPos;
}
}
return NOT_A_DICT_POS;
}
int ShortcutDictContent::createAndGetShortcutFlags(const int probability,
const bool hasNext) const {
return (probability & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK)
| (hasNext ? Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK : 0);
}
} // namespace latinime

View File

@ -39,8 +39,8 @@ class ShortcutDictContent : public SparseTableDictContent {
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
void getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
int *const outCodePoint, int *const outCodePointCount, int *const outShortcutFlags,
int *const shortcutEntryPos) const;
int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
bool *const outhasNext, int *const shortcutEntryPos) const;
// Returns head position of shortcut list for a PtNode specified by terminalId.
int getShortcutListHeadPos(const int terminalId) const;
@ -50,14 +50,33 @@ class ShortcutDictContent : public SparseTableDictContent {
bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const ShortcutDictContent *const originalShortcutDictContent);
bool createNewShortcutList(const int terminalId);
bool copyShortcutList(const int shortcutListPos, const int toPos);
bool setProbability(const int probability, const int shortcutEntryPos);
bool writeShortcutEntry(const int *const codePoint, const int codePointCount,
const int probability, const bool hasNext, const int shortcutEntryPos) {
int writingPos = shortcutEntryPos;
return writeShortcutEntryAndAdvancePosition(codePoint, codePointCount, probability,
hasNext, &writingPos);
}
bool writeShortcutEntryAndAdvancePosition(const int *const codePoint,
const int codePointCount, const int probability, const bool hasNext,
int *const shortcutEntryPos);
int findShortcutEntryAndGetPos(const int shortcutListPos,
const int *const targetCodePointsToFind, const int codePointCount) const;
private:
DISALLOW_COPY_AND_ASSIGN(ShortcutDictContent);
bool copyShortcutList(const int shortcutListPos,
bool copyShortcutListFromDictContent(const int shortcutListPos,
const ShortcutDictContent *const sourceShortcutDictContent, const int toPos);
bool writeShortcutEntryAndAdvancePosition(const int *const codePoint,
const int codePointCount, const int shortcutFlags, int *const shortcutEntryPos);
int createAndGetShortcutFlags(const int probability, const bool hasNext) const;
};
} // namespace latinime
#endif /* LATINIME_SHORTCUT_DICT_CONTENT_H */

View File

@ -74,7 +74,7 @@ class Ver4DictBuffers {
return &mExpandableTrieBuffer;
}
AK_FORCE_INLINE TerminalPositionLookupTable *getUpdatableTerminalPositionLookupTable() {
AK_FORCE_INLINE TerminalPositionLookupTable *getMutableTerminalPositionLookupTable() {
return &mTerminalPositionLookupTable;
}
@ -82,7 +82,7 @@ class Ver4DictBuffers {
return &mTerminalPositionLookupTable;
}
AK_FORCE_INLINE ProbabilityDictContent *getUpdatableProbabilityDictContent() {
AK_FORCE_INLINE ProbabilityDictContent *getMutableProbabilityDictContent() {
return &mProbabilityDictContent;
}
@ -90,7 +90,7 @@ class Ver4DictBuffers {
return &mProbabilityDictContent;
}
AK_FORCE_INLINE BigramDictContent *getUpdatableBigramDictContent() {
AK_FORCE_INLINE BigramDictContent *getMutableBigramDictContent() {
return &mBigramDictContent;
}
@ -98,7 +98,7 @@ class Ver4DictBuffers {
return &mBigramDictContent;
}
AK_FORCE_INLINE ShortcutDictContent *getUpdatableShortcutDictContent() {
AK_FORCE_INLINE ShortcutDictContent *getMutableShortcutDictContent() {
return &mShortcutDictContent;
}

View File

@ -64,5 +64,7 @@ const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80;
const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1;
const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F;
const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
} // namespace latinime

View File

@ -62,6 +62,8 @@ class Ver4DictConstants {
static const int BIGRAM_LARGE_PROBABILITY_FIELD_SIZE;
static const int SHORTCUT_FLAGS_FIELD_SIZE;
static const int SHORTCUT_PROBABILITY_MASK;
static const int SHORTCUT_HAS_NEXT_MASK;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);

View File

@ -53,7 +53,7 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted(
}
if (toBeUpdatedPtNodeParams->isTerminal()) {
// The PtNode is a terminal. Delete entry from the terminal position lookup table.
return mBuffers->getUpdatableTerminalPositionLookupTable()->setTerminalPtNodePosition(
return mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */);
} else {
return true;
@ -117,7 +117,7 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability(
toBeUpdatedPtNodeParams->getTerminalId());
const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
newProbability, timestamp);
return mBuffers->getUpdatableProbabilityDictContent()->setProbabilityEntry(
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
}
@ -152,7 +152,7 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
ProbabilityEntry newProbabilityEntry;
const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
&newProbabilityEntry, ptNodeParams->getProbability(), timestamp);
return mBuffers->getUpdatableProbabilityDictContent()->setProbabilityEntry(terminalId,
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId,
&probabilityEntryToWrite);
}
@ -248,7 +248,7 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
if (isTerminal) {
// Update the lookup table.
if (!mBuffers->getUpdatableTerminalPositionLookupTable()->setTerminalPtNodePosition(
if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
terminalId, nodePos)) {
return false;
}

View File

@ -40,10 +40,10 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers)
: mBuffers(buffers), mHeaderPolicy(mBuffers.get()->getHeaderPolicy()),
mDictBuffer(mBuffers.get()->getWritableTrieBuffer()),
mBigramPolicy(mBuffers.get()->getUpdatableBigramDictContent(),
mBigramPolicy(mBuffers.get()->getMutableBigramDictContent(),
mBuffers.get()->getTerminalPositionLookupTable(), mHeaderPolicy,
mHeaderPolicy->isDecayingDict()),
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
mShortcutPolicy(mBuffers.get()->getMutableShortcutDictContent(),
mBuffers.get()->getTerminalPositionLookupTable()),
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,

View File

@ -80,9 +80,9 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
int *const outUnigramCount, int *const outBigramCount, const bool needsToDecay) {
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
mBuffers->getProbabilityDictContent());
Ver4BigramListPolicy bigramPolicy(mBuffers->getUpdatableBigramDictContent(),
Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
mBuffers->getTerminalPositionLookupTable(), headerPolicy, needsToDecay);
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getShortcutDictContent(),
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy,
@ -133,32 +133,32 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
// Create policy instances for the GCed dictionary.
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
buffersToWrite->getProbabilityDictContent());
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getUpdatableBigramDictContent(),
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
buffersToWrite->getTerminalPositionLookupTable(), headerPolicy,
false /* needsToDecay */);
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getShortcutDictContent(),
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
buffersToWrite->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy,
false /* needsToDecayWhenUpdating */);
// Re-assign terminal IDs for valid terminal PtNodes.
TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
if(!buffersToWrite->getUpdatableTerminalPositionLookupTable()->runGCTerminalIds(
if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds(
&terminalIdMap)) {
return false;
}
// Run GC for probability dict content.
if (!buffersToWrite->getUpdatableProbabilityDictContent()->runGC(&terminalIdMap,
if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap,
mBuffers->getProbabilityDictContent())) {
return false;
}
// Run GC for bigram dict content.
if(!buffersToWrite->getUpdatableBigramDictContent()->runGC(&terminalIdMap,
if(!buffersToWrite->getMutableBigramDictContent()->runGC(&terminalIdMap,
mBuffers->getBigramDictContent(), outBigramCount)) {
return false;
}
// Run GC for shortcut dict content.
if(!buffersToWrite->getUpdatableShortcutDictContent()->runGC(&terminalIdMap,
if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap,
mBuffers->getShortcutDictContent())) {
return false;
}