am 535eefa9: Merge "Implement ver4 dict PtNode reading methods."
* commit '535eefa9bd350e317d7985b84ccf989483938120': Implement ver4 dict PtNode reading methods.main
commit
28785ebc30
|
@ -87,6 +87,7 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
dynamic_patricia_trie_writing_utils.cpp) \
|
dynamic_patricia_trie_writing_utils.cpp) \
|
||||||
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
|
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
|
||||||
ver4_dict_constants.cpp \
|
ver4_dict_constants.cpp \
|
||||||
|
ver4_patricia_trie_node_reader.cpp \
|
||||||
ver4_patricia_trie_policy.cpp \
|
ver4_patricia_trie_policy.cpp \
|
||||||
ver4_patricia_trie_reading_utils.cpp ) \
|
ver4_patricia_trie_reading_utils.cpp ) \
|
||||||
$(addprefix suggest/policyimpl/dictionary/utils/, \
|
$(addprefix suggest/policyimpl/dictionary/utils/, \
|
||||||
|
|
|
@ -32,7 +32,7 @@ class PtNodeParams {
|
||||||
// Invalid PtNode.
|
// Invalid PtNode.
|
||||||
PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS),
|
PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS),
|
||||||
mCodePointCount(0), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
|
mCodePointCount(0), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
|
||||||
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL), mProbabilityFieldPos(NOT_A_DICT_POS),
|
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), mProbabilityFieldPos(NOT_A_DICT_POS),
|
||||||
mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
|
mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
|
||||||
mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
|
mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
|
||||||
mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
|
mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
|
||||||
|
@ -53,6 +53,7 @@ class PtNodeParams {
|
||||||
memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
|
memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PtNode without terminal id.
|
||||||
PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
|
PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
|
||||||
const int parentPos, const int codePointCount, const int *const codePoints,
|
const int parentPos, const int codePointCount, const int *const codePoints,
|
||||||
const int probabilityFieldPos, const int probability, const int childrenPosFieldPos,
|
const int probabilityFieldPos, const int probability, const int childrenPosFieldPos,
|
||||||
|
@ -60,7 +61,8 @@ class PtNodeParams {
|
||||||
const int bigramPos, const int siblingPos)
|
const int bigramPos, const int siblingPos)
|
||||||
: mHeadPos(headPos), mFlags(flags), mParentPos(parentPos),
|
: mHeadPos(headPos), mFlags(flags), mParentPos(parentPos),
|
||||||
mCodePointCount(codePointCount), mCodePoints(),
|
mCodePointCount(codePointCount), mCodePoints(),
|
||||||
mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL),
|
mTerminalIdFieldPos(NOT_A_DICT_POS),
|
||||||
|
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
|
||||||
mProbabilityFieldPos(probabilityFieldPos), mProbability(probability),
|
mProbabilityFieldPos(probabilityFieldPos), mProbability(probability),
|
||||||
mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
|
mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
|
||||||
mBigramLinkedNodePos(bigramLinkedNodePos), mShortcutPos(shortcutPos),
|
mBigramLinkedNodePos(bigramLinkedNodePos), mShortcutPos(shortcutPos),
|
||||||
|
@ -68,6 +70,22 @@ class PtNodeParams {
|
||||||
memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
|
memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PtNode with a terminal id.
|
||||||
|
PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
|
||||||
|
const int parentPos, const int codePointCount, const int *const codePoints,
|
||||||
|
const int terminalIdFieldPos, const int terminalId, const int probability,
|
||||||
|
const int childrenPosFieldPos, const int childrenPos, const int bigramLinkedNodePos,
|
||||||
|
const int siblingPos)
|
||||||
|
: mHeadPos(headPos), mFlags(flags), mParentPos(parentPos),
|
||||||
|
mCodePointCount(codePointCount), mCodePoints(),
|
||||||
|
mTerminalIdFieldPos(terminalIdFieldPos), mTerminalId(terminalId),
|
||||||
|
mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
|
||||||
|
mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
|
||||||
|
mBigramLinkedNodePos(bigramLinkedNodePos), mShortcutPos(terminalId),
|
||||||
|
mBigramPos(terminalId), mSiblingPos(siblingPos) {
|
||||||
|
memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
|
||||||
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool isValid() const {
|
AK_FORCE_INLINE bool isValid() const {
|
||||||
return mCodePointCount > 0;
|
return mCodePointCount > 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,7 +30,7 @@ const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".sh
|
||||||
const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
|
const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
|
||||||
".shortcut_index_shortcut";
|
".shortcut_index_shortcut";
|
||||||
|
|
||||||
const int Ver4DictConstants::NOT_A_TERMINAL = -1;
|
const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
|
||||||
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
|
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
|
||||||
const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
|
const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
|
||||||
|
|
||||||
|
|
|
@ -34,7 +34,7 @@ class Ver4DictConstants {
|
||||||
static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
|
static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
|
||||||
static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
|
static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
|
||||||
|
|
||||||
static const int NOT_A_TERMINAL;
|
static const int NOT_A_TERMINAL_ID;
|
||||||
static const int PROBABILITY_SIZE;
|
static const int PROBABILITY_SIZE;
|
||||||
static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
|
static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,95 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
|
||||||
|
const int ptNodePos, const int siblingNodePos, const int bigramLinkedNodePos) const {
|
||||||
|
if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
|
||||||
|
// Reading invalid position because of bug or broken dictionary.
|
||||||
|
AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
|
||||||
|
ptNodePos, mBuffer->getTailPosition());
|
||||||
|
ASSERT(false);
|
||||||
|
return PtNodeParams();
|
||||||
|
}
|
||||||
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
|
||||||
|
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
|
int pos = ptNodePos;
|
||||||
|
const int headPos = ptNodePos;
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
pos -= mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||||
|
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
||||||
|
const int parentPosOffset =
|
||||||
|
DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
|
||||||
|
dictBuf, &pos);
|
||||||
|
const int parentPos =
|
||||||
|
DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
|
||||||
|
int codePoints[MAX_WORD_LENGTH];
|
||||||
|
const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
|
||||||
|
dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
|
||||||
|
int terminalIdFieldPos = NOT_A_DICT_POS;
|
||||||
|
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||||
|
int probability = NOT_A_PROBABILITY;
|
||||||
|
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
||||||
|
terminalIdFieldPos = pos;
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
terminalIdFieldPos += mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
|
||||||
|
probability = mProbabilityDictContent->getProbability(terminalId);
|
||||||
|
}
|
||||||
|
int childrenPosFieldPos = pos;
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
childrenPosFieldPos += mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
int childrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
|
||||||
|
dictBuf, &pos);
|
||||||
|
if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) {
|
||||||
|
childrenPos += mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
int newBigramLinkedNodePos = bigramLinkedNodePos;
|
||||||
|
if (siblingNodePos == NOT_A_DICT_POS) {
|
||||||
|
if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) {
|
||||||
|
newBigramLinkedNodePos = childrenPos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
pos += mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
// Sibling position is the tail position of original PtNode.
|
||||||
|
int newSiblingNodePos = (siblingNodePos == NOT_A_DICT_POS) ? pos : siblingNodePos;
|
||||||
|
// Read destination node if the read node is a moved node.
|
||||||
|
if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) {
|
||||||
|
// The destination position is stored at the same place as the parent position.
|
||||||
|
return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos,
|
||||||
|
newBigramLinkedNodePos);
|
||||||
|
} else {
|
||||||
|
return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
|
||||||
|
terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
|
||||||
|
newBigramLinkedNodePos, newSiblingNodePos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,58 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H
|
||||||
|
#define LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class BufferWithExtendableBuffer;
|
||||||
|
class ProbabilityDictContent;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
|
||||||
|
* node and reads node attributes including probability form probabilityBuffer.
|
||||||
|
*/
|
||||||
|
class Ver4PatriciaTrieNodeReader : public PtNodeReader {
|
||||||
|
public:
|
||||||
|
Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
|
||||||
|
const ProbabilityDictContent *const probabilityDictContent)
|
||||||
|
: mBuffer(buffer), mProbabilityDictContent(probabilityDictContent) {}
|
||||||
|
|
||||||
|
~Ver4PatriciaTrieNodeReader() {}
|
||||||
|
|
||||||
|
virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const {
|
||||||
|
return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos,
|
||||||
|
NOT_A_DICT_POS /* siblingNodePos */, NOT_A_DICT_POS /* bigramLinkedNodePos */);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
|
||||||
|
|
||||||
|
const BufferWithExtendableBuffer *const mBuffer;
|
||||||
|
const ProbabilityDictContent *const mProbabilityDictContent;
|
||||||
|
|
||||||
|
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
|
||||||
|
const int siblingNodePos, const int bigramLinkedNodePos) const;
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H */
|
|
@ -16,46 +16,111 @@
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
|
||||||
|
|
||||||
|
#include "suggest/core/dicnode/dic_node.h"
|
||||||
|
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||||
DicNodeVector *const childDicNodes) const {
|
DicNodeVector *const childDicNodes) const {
|
||||||
// TODO: Implement.
|
if (!dicNode->hasChildren()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
|
||||||
|
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
|
||||||
|
while (!readingHelper.isEnd()) {
|
||||||
|
const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
|
||||||
|
if (!ptNodeParams.isValid()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
|
||||||
|
if (isTerminal && mHeaderPolicy.isDecayingDict()) {
|
||||||
|
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
|
||||||
|
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
|
||||||
|
// valid terminal DicNode.
|
||||||
|
isTerminal = getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY)
|
||||||
|
!= NOT_A_PROBABILITY;
|
||||||
|
}
|
||||||
|
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
|
||||||
|
ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
|
||||||
|
ptNodeParams.hasChildren(),
|
||||||
|
ptNodeParams.isBlacklisted()
|
||||||
|
|| ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
|
||||||
|
ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
|
||||||
|
readingHelper.readNextSiblingNode(ptNodeParams);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const {
|
int *const outUnigramProbability) const {
|
||||||
// TODO: Implement.
|
DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
|
||||||
return 0;
|
readingHelper.initWithPtNodePos(ptNodePos);
|
||||||
|
return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
|
maxCodePointCount, outCodePoints, outUnigramProbability);
|
||||||
}
|
}
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||||
const int length, const bool forceLowerCaseSearch) const {
|
const int length, const bool forceLowerCaseSearch) const {
|
||||||
// TODO: Implement.
|
DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
|
||||||
return NOT_A_DICT_POS;
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
|
return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
|
||||||
}
|
}
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
|
int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
|
||||||
const int bigramProbability) const {
|
const int bigramProbability) const {
|
||||||
// TODO: Implement.
|
if (mHeaderPolicy.isDecayingDict()) {
|
||||||
|
// Both probabilities are encoded. Decode them and get probability.
|
||||||
|
return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
|
||||||
|
} else {
|
||||||
|
if (unigramProbability == NOT_A_PROBABILITY) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
|
} else if (bigramProbability == NOT_A_PROBABILITY) {
|
||||||
|
return ProbabilityUtils::backoff(unigramProbability);
|
||||||
|
} else {
|
||||||
|
// bigramProbability is a bigram probability delta.
|
||||||
|
return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
|
||||||
|
bigramProbability);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
|
int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
|
||||||
// TODO: Implement.
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
|
const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
|
||||||
|
if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
|
||||||
|
return NOT_A_PROBABILITY;
|
||||||
|
}
|
||||||
|
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
||||||
|
}
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
||||||
// TODO: Implement.
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
|
const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
|
||||||
|
if (ptNodeParams.isDeleted()) {
|
||||||
|
return NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
|
return ptNodeParams.getTerminalId();
|
||||||
|
}
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
|
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
|
||||||
// TODO: Implement.
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
|
const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
|
||||||
|
if (ptNodeParams.isDeleted()) {
|
||||||
|
return NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
|
return ptNodeParams.getTerminalId();
|
||||||
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
|
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
|
||||||
const int probability) {
|
const int probability) {
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -33,7 +34,11 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
public:
|
public:
|
||||||
Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers)
|
Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers)
|
||||||
: mBuffers(buffers),
|
: mBuffers(buffers),
|
||||||
mHeaderPolicy(mBuffers.get()->getRawDictBuffer(), FormatUtils::VERSION_4) {};
|
mHeaderPolicy(mBuffers.get()->getRawDictBuffer(), FormatUtils::VERSION_4),
|
||||||
|
mDictBuffer(mBuffers.get()->getRawDictBuffer() + mHeaderPolicy.getSize(),
|
||||||
|
mBuffers.get()->getRawDictBufferSize() - mHeaderPolicy.getSize(),
|
||||||
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
|
mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()) {};
|
||||||
|
|
||||||
AK_FORCE_INLINE int getRootPosition() const {
|
AK_FORCE_INLINE int getRootPosition() const {
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -91,6 +96,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
|
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
|
||||||
const HeaderPolicy mHeaderPolicy;
|
const HeaderPolicy mHeaderPolicy;
|
||||||
|
BufferWithExtendableBuffer mDictBuffer;
|
||||||
|
Ver4PatriciaTrieNodeReader mNodeReader;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
|
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
|
||||||
|
|
|
@ -18,9 +18,15 @@
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
/* static */ int Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(
|
||||||
|
const uint8_t *const buffer, int *pos) {
|
||||||
|
return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos);
|
||||||
|
}
|
||||||
|
|
||||||
/* static */ int Ver4PatriciaTrieReadingUtils::getProbability(
|
/* static */ int Ver4PatriciaTrieReadingUtils::getProbability(
|
||||||
const BufferWithExtendableBuffer *const probabilityBuffer, const int terminalId) {
|
const BufferWithExtendableBuffer *const probabilityBuffer, const int terminalId) {
|
||||||
int pos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
|
int pos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
|
||||||
|
|
|
@ -27,6 +27,9 @@ class BufferWithExtendableBuffer;
|
||||||
|
|
||||||
class Ver4PatriciaTrieReadingUtils {
|
class Ver4PatriciaTrieReadingUtils {
|
||||||
public:
|
public:
|
||||||
|
static int getTerminalIdAndAdvancePosition(const uint8_t *const buffer,
|
||||||
|
int *const pos);
|
||||||
|
|
||||||
static int getProbability(const BufferWithExtendableBuffer *const probabilityBuffer,
|
static int getProbability(const BufferWithExtendableBuffer *const probabilityBuffer,
|
||||||
const int terminalId);
|
const int terminalId);
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,7 @@ import java.io.IOException;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
|
||||||
|
// TODO: Add a test to evaluate the speed of operations of Ver4 dictionary.
|
||||||
@LargeTest
|
@LargeTest
|
||||||
public class Ver4BinaryDictionaryTests extends AndroidTestCase {
|
public class Ver4BinaryDictionaryTests extends AndroidTestCase {
|
||||||
private static final String TAG = Ver4BinaryDictionaryTests.class.getSimpleName();
|
private static final String TAG = Ver4BinaryDictionaryTests.class.getSimpleName();
|
||||||
|
@ -90,4 +91,33 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
|
||||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
assertTrue(binaryDictionary.isValidDictionary());
|
assertTrue(binaryDictionary.isValidDictionary());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Add large tests.
|
||||||
|
public void testReadProbability() {
|
||||||
|
final String dictVersion = Long.toString(System.currentTimeMillis());
|
||||||
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
|
getDictionaryOptions(TEST_LOCALE, dictVersion));
|
||||||
|
|
||||||
|
final int frequency = 100;
|
||||||
|
dict.add("a", frequency, null, false /* isNotAWord */);
|
||||||
|
dict.add("aaa", frequency, null, false /* isNotAWord */);
|
||||||
|
dict.add("ab", frequency, null, false /* isNotAWord */);
|
||||||
|
|
||||||
|
DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
|
||||||
|
try {
|
||||||
|
encoder.writeDictionary(dict, FORMAT_OPTIONS);
|
||||||
|
} catch (IOException e) {
|
||||||
|
Log.e(TAG, "IOException while writing dictionary", e);
|
||||||
|
} catch (UnsupportedFormatException e) {
|
||||||
|
Log.e(TAG, "Unsupported format", e);
|
||||||
|
}
|
||||||
|
File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
|
||||||
|
BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
|
||||||
|
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
|
||||||
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
assertTrue(binaryDictionary.isValidDictionary());
|
||||||
|
assertEquals(frequency, binaryDictionary.getFrequency("a"));
|
||||||
|
assertEquals(frequency, binaryDictionary.getFrequency("aaa"));
|
||||||
|
assertEquals(frequency, binaryDictionary.getFrequency("ab"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue