Merge "Use extendable buffer for reading node info."
This commit is contained in:
commit
2ce9f78b6e
4 changed files with 45 additions and 27 deletions
|
@ -19,34 +19,44 @@
|
||||||
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
||||||
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/extendable_buffer.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos,
|
void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos,
|
||||||
const int maxCodePointCount, int *const outCodePoints) {
|
const int maxCodePointCount, int *const outCodePoints) {
|
||||||
int pos = nodePos;
|
const bool usesAdditionalBuffer = nodePos >= mOriginalDictSize;
|
||||||
mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
const uint8_t *const dictBuf =
|
||||||
|
usesAdditionalBuffer ? mExtendableBuffer->getBuffer() : mDictRoot;
|
||||||
|
int pos = (usesAdditionalBuffer) ? nodePos - mOriginalDictSize : nodePos;
|
||||||
|
mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
||||||
const int parentPos =
|
const int parentPos =
|
||||||
DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(mDictRoot, &pos);
|
DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos);
|
||||||
mParentPos = (parentPos != 0) ? mNodePos + parentPos : NOT_A_DICT_POS;
|
mParentPos = (parentPos != 0) ? mNodePos + parentPos : NOT_A_DICT_POS;
|
||||||
if (outCodePoints != 0) {
|
if (outCodePoints != 0) {
|
||||||
mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
|
mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
|
||||||
mDictRoot, mFlags, maxCodePointCount, outCodePoints, &pos);
|
dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
|
||||||
} else {
|
} else {
|
||||||
mCodePointCount = PatriciaTrieReadingUtils::skipCharacters(
|
mCodePointCount = PatriciaTrieReadingUtils::skipCharacters(
|
||||||
mDictRoot, mFlags, MAX_WORD_LENGTH, &pos);
|
dictBuf, mFlags, MAX_WORD_LENGTH, &pos);
|
||||||
}
|
}
|
||||||
if (isTerminal()) {
|
if (isTerminal()) {
|
||||||
mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
|
mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
|
||||||
} else {
|
} else {
|
||||||
mProbability = NOT_A_PROBABILITY;
|
mProbability = NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
if (hasChildren()) {
|
if (hasChildren()) {
|
||||||
mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
|
mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
|
||||||
mDictRoot, mFlags, &pos);
|
dictBuf, mFlags, &pos);
|
||||||
|
if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
|
||||||
|
mChildrenPos += mOriginalDictSize;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
mChildrenPos = NOT_A_DICT_POS;
|
mChildrenPos = NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
pos += mOriginalDictSize;
|
||||||
|
}
|
||||||
if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) {
|
if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) {
|
||||||
mShortcutPos = pos;
|
mShortcutPos = pos;
|
||||||
mShortcutsPolicy->skipAllShortcuts(&pos);
|
mShortcutsPolicy->skipAllShortcuts(&pos);
|
||||||
|
|
|
@ -27,6 +27,7 @@ namespace latinime {
|
||||||
|
|
||||||
class DictionaryBigramsStructurePolicy;
|
class DictionaryBigramsStructurePolicy;
|
||||||
class DictionaryShortcutsStructurePolicy;
|
class DictionaryShortcutsStructurePolicy;
|
||||||
|
class ExtendableBuffer;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
|
* This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
|
||||||
|
@ -34,12 +35,14 @@ class DictionaryShortcutsStructurePolicy;
|
||||||
*/
|
*/
|
||||||
class DynamicPatriciaTrieNodeReader {
|
class DynamicPatriciaTrieNodeReader {
|
||||||
public:
|
public:
|
||||||
DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot,
|
DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot, const int originalDictSize,
|
||||||
|
const ExtendableBuffer *const extendableBuffer,
|
||||||
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
|
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
|
||||||
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
||||||
: mDictRoot(dictRoot), mBigramsPolicy(bigramsPolicy),
|
: mDictRoot(dictRoot), mOriginalDictSize(originalDictSize),
|
||||||
|
mExtendableBuffer(extendableBuffer), mBigramsPolicy(bigramsPolicy),
|
||||||
mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0),
|
mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0),
|
||||||
mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY),
|
mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY),
|
||||||
mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
|
mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
|
||||||
mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_VALID_WORD_POS) {}
|
mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_VALID_WORD_POS) {}
|
||||||
|
|
||||||
|
@ -123,6 +126,8 @@ class DynamicPatriciaTrieNodeReader {
|
||||||
|
|
||||||
// TODO: Consolidate mDictRoot.
|
// TODO: Consolidate mDictRoot.
|
||||||
const uint8_t *const mDictRoot;
|
const uint8_t *const mDictRoot;
|
||||||
|
const int mOriginalDictSize;
|
||||||
|
const ExtendableBuffer *const mExtendableBuffer;
|
||||||
const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
|
const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
|
||||||
const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
|
const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
|
||||||
int mNodePos;
|
int mNodePos;
|
||||||
|
|
|
@ -33,8 +33,8 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
|
||||||
if (!dicNode->hasChildren()) {
|
if (!dicNode->hasChildren()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(),
|
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
|
||||||
getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||||
int nextPos = dicNode->getChildrenPos();
|
int nextPos = dicNode->getChildrenPos();
|
||||||
int totalChildCount = 0;
|
int totalChildCount = 0;
|
||||||
|
@ -79,8 +79,8 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
|
||||||
int mergedNodeCodePoints[maxCodePointCount];
|
int mergedNodeCodePoints[maxCodePointCount];
|
||||||
int codePointCount = 0;
|
int codePointCount = 0;
|
||||||
|
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(),
|
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
|
||||||
getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
// First, read terminal node and get its probability.
|
// First, read terminal node and get its probability.
|
||||||
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount,
|
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount,
|
||||||
mergedNodeCodePoints);
|
mergedNodeCodePoints);
|
||||||
|
@ -124,8 +124,8 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
|
||||||
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||||
int currentLength = 0;
|
int currentLength = 0;
|
||||||
int pos = getRootPosition();
|
int pos = getRootPosition();
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(),
|
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
|
||||||
getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
while (currentLength < length) {
|
while (currentLength < length) {
|
||||||
// When foundMatchedNode becomes true, currentLength is increased at least once.
|
// When foundMatchedNode becomes true, currentLength is increased at least once.
|
||||||
bool foundMatchedNode = false;
|
bool foundMatchedNode = false;
|
||||||
|
@ -198,8 +198,8 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
|
||||||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(),
|
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
|
||||||
getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
||||||
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
|
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
|
@ -211,8 +211,8 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) cons
|
||||||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(),
|
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
|
||||||
getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
||||||
if (nodeReader.isDeleted()) {
|
if (nodeReader.isDeleted()) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
|
@ -224,8 +224,8 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const
|
||||||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(),
|
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
|
||||||
getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
||||||
if (nodeReader.isDeleted()) {
|
if (nodeReader.isDeleted()) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
|
|
|
@ -21,9 +21,9 @@
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
|
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
|
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/extendable_buffer.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||||
|
|
||||||
|
@ -37,7 +37,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
|
DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
|
||||||
: mBuffer(buffer), mExtendableBuffer(), mHeaderPolicy(mBuffer->getBuffer()),
|
: mBuffer(buffer), mExtendableBuffer(), mHeaderPolicy(mBuffer->getBuffer()),
|
||||||
mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
|
mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
|
||||||
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
|
mOriginalDictSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
|
||||||
|
mBigramListPolicy(mDictRoot, mOriginalDictSize, &mExtendableBuffer),
|
||||||
|
mShortcutListPolicy(mDictRoot, mOriginalDictSize, &mExtendableBuffer) {}
|
||||||
|
|
||||||
~DynamicPatriciaTriePolicy() {
|
~DynamicPatriciaTriePolicy() {
|
||||||
delete mBuffer;
|
delete mBuffer;
|
||||||
|
@ -93,8 +95,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
// TODO: Consolidate mDictRoot.
|
// TODO: Consolidate mDictRoot.
|
||||||
// CAVEAT!: Be careful about array out of bound access with mDictRoot
|
// CAVEAT!: Be careful about array out of bound access with mDictRoot
|
||||||
const uint8_t *const mDictRoot;
|
const uint8_t *const mDictRoot;
|
||||||
const BigramListPolicy mBigramListPolicy;
|
const int mOriginalDictSize;
|
||||||
const ShortcutListPolicy mShortcutListPolicy;
|
const DynamicBigramListPolicy mBigramListPolicy;
|
||||||
|
const DynamicShortcutListPolicy mShortcutListPolicy;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
||||||
|
|
Loading…
Reference in a new issue