Merge "Employ "bigram link" for handling moved bigram target."

This commit is contained in:
Keisuke Kuroyanagi 2013-09-13 09:46:27 +00:00 committed by Android (Google) Code Review
commit 04bf3cd4e0
8 changed files with 128 additions and 72 deletions

View file

@ -18,6 +18,42 @@
namespace latinime {
const int DynamicBigramListPolicy::BIGRAM_LINK_COUNT_LIMIT = 10000;
void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const pos) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
*pos -= mBuffer->getOriginalBufferSize();
}
const BigramListReadWriteUtils::BigramFlags flags =
BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
buffer, flags, pos);
if (usesAdditionalBuffer && originalBigramPos != NOT_A_VALID_WORD_POS) {
originalBigramPos += mBuffer->getOriginalBufferSize();
}
*outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
*outHasNext = BigramListReadWriteUtils::hasNext(flags);
if (usesAdditionalBuffer) {
*pos += mBuffer->getOriginalBufferSize();
}
}
void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
*pos -= mBuffer->getOriginalBufferSize();
}
BigramListReadWriteUtils::skipExistingBigrams(buffer, pos);
if (usesAdditionalBuffer) {
*pos += mBuffer->getOriginalBufferSize();
}
}
bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
if (usesAdditionalBuffer) {
@ -28,15 +64,16 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo
// The buffer address can be changed after calling buffer writing methods.
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, fromPos);
int bigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
buffer, flags, fromPos);
if (bigramPos == NOT_A_VALID_WORD_POS) {
if (originalBigramPos == NOT_A_VALID_WORD_POS) {
// skip invalid bigram entry.
continue;
}
if (usesAdditionalBuffer) {
bigramPos += mBuffer->getOriginalBufferSize();
originalBigramPos += mBuffer->getOriginalBufferSize();
}
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
BigramListReadWriteUtils::BigramFlags newBigramFlags;
uint32_t newBigramOffset;
int newBigramOffsetFieldSize;
@ -133,11 +170,12 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int ta
if (usesAdditionalBuffer) {
bigramOffsetFieldPos += mBuffer->getOriginalBufferSize();
}
int bigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
buffer, flags, &pos);
if (usesAdditionalBuffer && bigramPos != NOT_A_VALID_WORD_POS) {
bigramPos += mBuffer->getOriginalBufferSize();
if (usesAdditionalBuffer && originalBigramPos != NOT_A_VALID_WORD_POS) {
originalBigramPos += mBuffer->getOriginalBufferSize();
}
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
if (bigramPos != targetBigramPos) {
continue;
}
@ -152,4 +190,26 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int ta
return false;
}
int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
const int originalBigramPos) const {
if (originalBigramPos == NOT_A_VALID_WORD_POS) {
return NOT_A_VALID_WORD_POS;
}
int currentPos = originalBigramPos;
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
nodeReader.fetchNodeInfoFromBuffer(currentPos);
int bigramLinkCount = 0;
while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) {
currentPos = nodeReader.getBigramLinkedNodePos();
nodeReader.fetchNodeInfoFromBuffer(currentPos);
bigramLinkCount++;
if (bigramLinkCount > BIGRAM_LINK_COUNT_LIMIT) {
AKLOGI("Bigram link is invalid. start position: %d", bigramPos);
ASSERT(false);
return NOT_A_VALID_WORD_POS;
}
}
return currentPos;
}
} // namespace latinime

View file

@ -21,7 +21,9 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
@ -31,43 +33,16 @@ namespace latinime {
*/
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
public:
DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer)
: mBuffer(buffer) {}
DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer,
const DictionaryShortcutsStructurePolicy *const shortcutPolicy)
: mBuffer(buffer), mShortcutPolicy(shortcutPolicy) {}
~DynamicBigramListPolicy() {}
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
int *const pos) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
*pos -= mBuffer->getOriginalBufferSize();
}
const BigramListReadWriteUtils::BigramFlags flags =
BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
*outBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
buffer, flags, pos);
if (usesAdditionalBuffer && *outBigramPos != NOT_A_VALID_WORD_POS) {
*outBigramPos += mBuffer->getOriginalBufferSize();
}
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
*outHasNext = BigramListReadWriteUtils::hasNext(flags);
if (usesAdditionalBuffer) {
*pos += mBuffer->getOriginalBufferSize();
}
}
int *const pos) const;
void skipAllBigrams(int *const pos) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
*pos -= mBuffer->getOriginalBufferSize();
}
BigramListReadWriteUtils::skipExistingBigrams(buffer, pos);
if (usesAdditionalBuffer) {
*pos += mBuffer->getOriginalBufferSize();
}
}
void skipAllBigrams(int *const pos) const;
// Copy bigrams from the bigram list that starts at fromPos to toPos and advance these
// positions after bigram lists. This method skips invalid bigram entries.
@ -81,7 +56,13 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
static const int BIGRAM_LINK_COUNT_LIMIT;
BufferWithExtendableBuffer *const mBuffer;
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
// Follow bigram link and return the position of bigram target PtNode that is currently valid.
int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H

View file

@ -62,6 +62,11 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
mChildrenPos += mBuffer->getOriginalBufferSize();
}
if (mSiblingPos == NOT_A_VALID_WORD_POS && DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
mBigramLinkedNodePos = mChildrenPos;
} else {
mBigramLinkedNodePos = NOT_A_DICT_POS;
}
if (usesAdditionalBuffer) {
pos += mBuffer->getOriginalBufferSize();
}

View file

@ -39,11 +39,11 @@ class DynamicPatriciaTrieNodeReader {
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
: mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS),
mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS), mCodePointCount(0),
mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_VALID_WORD_POS), mFlags(0),
mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS),
mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
mSiblingPos(NOT_A_VALID_WORD_POS) {}
~DynamicPatriciaTrieNodeReader() {}
@ -56,13 +56,9 @@ class DynamicPatriciaTrieNodeReader {
AK_FORCE_INLINE void fetchNodeInfoFromBufferAndGetNodeCodePoints(const int nodePos,
const int maxCodePointCount, int *const outCodePoints) {
mNodePos = nodePos;
mSiblingPos = NOT_A_VALID_WORD_POS;
fetchNodeInfoFromBufferAndProcessMovedNode(mNodePos, maxCodePointCount, outCodePoints);
}
AK_FORCE_INLINE int getNodePos() const {
return mNodePos;
mBigramLinkedNodePos = NOT_A_DICT_POS;
fetchNodeInfoFromBufferAndProcessMovedNode(nodePos, maxCodePointCount, outCodePoints);
}
// HeadPos is different from NodePos when the current PtNode is a moved PtNode.
@ -119,6 +115,11 @@ class DynamicPatriciaTrieNodeReader {
return mChildrenPos;
}
// Bigram linked node position.
AK_FORCE_INLINE int getBigramLinkedNodePos() const {
return mBigramLinkedNodePos;
}
// Shortcutlist position
AK_FORCE_INLINE int getShortcutPos() const {
return mShortcutPos;
@ -140,7 +141,6 @@ class DynamicPatriciaTrieNodeReader {
const BufferWithExtendableBuffer *const mBuffer;
const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
int mNodePos;
int mHeadPos;
DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
int mParentPos;
@ -149,6 +149,7 @@ class DynamicPatriciaTrieNodeReader {
int mProbability;
int mChildrenPosFieldPos;
int mChildrenPos;
int mBigramLinkedNodePos;
int mShortcutPos;
int mBigramPos;
int mSiblingPos;

View file

@ -38,7 +38,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos());
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
while (!readingHelper.isEnd()) {
childDicNodes->pushLeavingChild(dicNode, nodeReader->getNodePos(),
childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
nodeReader->getChildrenPos(), nodeReader->getProbability(),
nodeReader->isTerminal() && !nodeReader->isDeleted(),
nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(),
@ -122,7 +122,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
// All characters are matched.
if (length == readingHelper.getTotalCodePointCount()) {
// Terminal position is found.
return nodeReader->getNodePos();
return nodeReader->getHeadPos();
}
if (!nodeReader->hasChildren()) {
return NOT_A_VALID_WORD_POS;

View file

@ -36,8 +36,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
: mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()),
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
mBigramListPolicy(&mBufferWithExtendableBuffer),
mShortcutListPolicy(&mBufferWithExtendableBuffer) {}
mShortcutListPolicy(&mBufferWithExtendableBuffer),
mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy) {}
~DynamicPatriciaTriePolicy() {
delete mBuffer;
@ -91,8 +91,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const MmappedBuffer *const mBuffer;
const HeaderPolicy mHeaderPolicy;
BufferWithExtendableBuffer mBufferWithExtendableBuffer;
DynamicBigramListPolicy mBigramListPolicy;
DynamicShortcutListPolicy mShortcutListPolicy;
DynamicBigramListPolicy mBigramListPolicy;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H

View file

@ -63,7 +63,7 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
codePointCount - readingHelper->getTotalCodePointCount());
}
// Advance to the children nodes.
parentPos = nodeReader->getNodePos();
parentPos = nodeReader->getHeadPos();
readingHelper->readChildNode();
}
if (readingHelper->isError()) {
@ -100,8 +100,9 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con
}
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos) {
int pos = originalNode->getNodePos();
const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
const int bigramLinkedNodePos) {
int pos = originalNode->getHeadPos();
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
@ -113,18 +114,24 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
false /* isDeleted */);
int writingPos = originalNode->getNodePos();
int writingPos = originalNode->getHeadPos();
// Update flags.
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
&writingPos)) {
return false;
}
// Update moved position, which is stored in the parent offset field.
const int movedPosOffset = movedPos - originalNode->getNodePos();
const int movedPosOffset = movedPos - originalNode->getHeadPos();
if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(
mBuffer, movedPosOffset, &writingPos)) {
return false;
}
// Update bigram linked node position, which is stored in the children position field.
int childrenPosFieldPos = originalNode->getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
return false;
}
if (originalNode->hasChildren()) {
// Update children's parent position.
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
@ -248,7 +255,7 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
} else {
// Make the node terminal and write the probability.
int movedPos = mBuffer->getTailPosition();
if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos)) {
if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) {
return false;
}
if (!writePtNodeToBufferByCopyingPtNodeInfo(originalPtNode, originalPtNode->getParentPos(),
@ -268,7 +275,7 @@ bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode(
newPtNodeArrayPos, &childrenPosFieldPos)) {
return false;
}
return createNewPtNodeArrayWithAChildPtNode(parentNode->getNodePos(), codePoints,
return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints,
codePointCount, probability);
}
@ -305,11 +312,8 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
// Reallocating PtNode: abcde, newNode: abc.
// abc (1st, terminal) __ de (2nd)
const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
const int firstPtNodePos = mBuffer->getTailPosition();
if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPtNodePos)) {
return false;
}
int writingPos = firstPtNodePos;
const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
int writingPos = firstPartOfReallocatedPtNodePos;
// Write the 1st part of the reallocating node. The children position will be updated later
// with actual children position.
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
@ -325,15 +329,15 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
return false;
}
// Write the 2nd part of the reallocating node.
if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode,
reallocatingPtNode->getNodePos(),
const int secondPartOfReallocatedPtNodePos = writingPos;
if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
reallocatingPtNodeCodePoints + overlappingCodePointCount,
reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
reallocatingPtNode->getProbability(), &writingPos)) {
return false;
}
if (addsExtraChild) {
if (!writePtNodeToBuffer(reallocatingPtNode->getNodePos(),
if (!writePtNodeToBuffer(firstPartOfReallocatedPtNodePos,
newNodeCodePoints + overlappingCodePointCount,
newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode,
&writingPos)) {
@ -344,9 +348,14 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
return false;
}
// Update original reallocatingPtNode as moved.
if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
secondPartOfReallocatedPtNodePos)) {
return false;
}
// Load node info. Information of the 1st part will be fetched.
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoFromBuffer(firstPtNodePos);
nodeReader.fetchNodeInfoFromBuffer(firstPartOfReallocatedPtNodePos);
// Update children position.
int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,

View file

@ -54,7 +54,7 @@ class DynamicPatriciaTrieWritingHelper {
DynamicShortcutListPolicy *const mShortcutPolicy;
bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
const int movedPos);
const int movedPos, const int bigramLinkedNodePos);
bool writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord,
const int parentPos, const int *const codePoints, const int codePointCount,