Move dicnode to AOSP
Bug: 8187060 Change-Id: I72398fa45b12683bd46d23c5ca69e6bcd5ca2b7emain
parent
c1dec87a76
commit
38c26dd0bf
|
@ -26,7 +26,10 @@ include $(CLEAR_VARS)
|
||||||
LATIN_IME_SRC_DIR := src
|
LATIN_IME_SRC_DIR := src
|
||||||
LATIN_IME_SRC_FULLPATH_DIR := $(LOCAL_PATH)/$(LATIN_IME_SRC_DIR)
|
LATIN_IME_SRC_FULLPATH_DIR := $(LOCAL_PATH)/$(LATIN_IME_SRC_DIR)
|
||||||
|
|
||||||
LOCAL_C_INCLUDES += $(LATIN_IME_SRC_FULLPATH_DIR) $(LATIN_IME_SRC_FULLPATH_DIR)/suggest
|
LOCAL_C_INCLUDES += \
|
||||||
|
$(LATIN_IME_SRC_FULLPATH_DIR) \
|
||||||
|
$(LATIN_IME_SRC_FULLPATH_DIR)/suggest \
|
||||||
|
$(LATIN_IME_SRC_FULLPATH_DIR)/suggest/core/dicnode
|
||||||
|
|
||||||
LOCAL_CFLAGS += -Werror -Wall -Wextra -Weffc++ -Wformat=2 -Wcast-qual -Wcast-align \
|
LOCAL_CFLAGS += -Werror -Wall -Wextra -Weffc++ -Wformat=2 -Wcast-qual -Wcast-align \
|
||||||
-Wwrite-strings -Wfloat-equal -Wpointer-arith -Winit-self -Wredundant-decls -Wno-system-headers
|
-Wwrite-strings -Wfloat-equal -Wpointer-arith -Winit-self -Wredundant-decls -Wno-system-headers
|
||||||
|
@ -59,6 +62,8 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
proximity_info_state_utils.cpp \
|
proximity_info_state_utils.cpp \
|
||||||
unigram_dictionary.cpp \
|
unigram_dictionary.cpp \
|
||||||
words_priority_queue.cpp \
|
words_priority_queue.cpp \
|
||||||
|
suggest/core/dicnode/dic_node.cpp \
|
||||||
|
suggest/core/dicnode/dic_node_utils.cpp \
|
||||||
suggest/gesture_suggest.cpp \
|
suggest/gesture_suggest.cpp \
|
||||||
suggest/typing_suggest.cpp
|
suggest/typing_suggest.cpp
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,44 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2012 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "dic_node.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
DicNode::DicNode(const DicNode &dicNode)
|
||||||
|
:
|
||||||
|
#if DEBUG_DICT
|
||||||
|
mProfiler(dicNode.mProfiler),
|
||||||
|
#endif
|
||||||
|
mDicNodeProperties(dicNode.mDicNodeProperties), mDicNodeState(dicNode.mDicNodeState),
|
||||||
|
mIsCachedForNextSuggestion(dicNode.mIsCachedForNextSuggestion), mIsUsed(dicNode.mIsUsed),
|
||||||
|
mReleaseListener(0) {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
|
||||||
|
DicNode &DicNode::operator=(const DicNode &dicNode) {
|
||||||
|
#if DEBUG_DICT
|
||||||
|
mProfiler = dicNode.mProfiler;
|
||||||
|
#endif
|
||||||
|
mDicNodeProperties = dicNode.mDicNodeProperties;
|
||||||
|
mDicNodeState = dicNode.mDicNodeState;
|
||||||
|
mIsCachedForNextSuggestion = dicNode.mIsCachedForNextSuggestion;
|
||||||
|
mIsUsed = dicNode.mIsUsed;
|
||||||
|
mReleaseListener = dicNode.mReleaseListener;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace latinime
|
|
@ -0,0 +1,572 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2012 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DIC_NODE_H
|
||||||
|
#define LATINIME_DIC_NODE_H
|
||||||
|
|
||||||
|
#include "char_utils.h"
|
||||||
|
#include "defines.h"
|
||||||
|
#include "dic_node_state.h"
|
||||||
|
#include "dic_node_profiler.h"
|
||||||
|
#include "dic_node_properties.h"
|
||||||
|
#include "dic_node_release_listener.h"
|
||||||
|
|
||||||
|
#if DEBUG_DICT
|
||||||
|
#define LOGI_SHOW_ADD_COST_PROP \
|
||||||
|
do { char charBuf[50]; \
|
||||||
|
INTS_TO_CHARS(getOutputWordBuf(), getDepth(), charBuf); \
|
||||||
|
AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \
|
||||||
|
__FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \
|
||||||
|
getInputIndex(0), getNormalizedCompoundDistance(), charBuf); } while (0)
|
||||||
|
#define DUMP_WORD_AND_SCORE(header) \
|
||||||
|
do { char charBuf[50]; char prevWordCharBuf[50]; \
|
||||||
|
INTS_TO_CHARS(getOutputWordBuf(), getDepth(), charBuf); \
|
||||||
|
INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, \
|
||||||
|
mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf); \
|
||||||
|
AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d,,", header, \
|
||||||
|
getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \
|
||||||
|
getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \
|
||||||
|
getInputIndex(0)); \
|
||||||
|
} while (0)
|
||||||
|
#else
|
||||||
|
#define LOGI_SHOW_ADD_COST_PROP
|
||||||
|
#define DUMP_WORD_AND_SCORE(header)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
// Naming convention
|
||||||
|
// - Distance: "Weighted" edit distance -- used both for spatial and language.
|
||||||
|
// - Compound Distance: Spatial Distance + Language Distance -- used for pruning and scoring
|
||||||
|
// - Cost: delta/diff for Distance -- used both for spatial and language
|
||||||
|
// - Length: "Non-weighted" -- used only for spatial
|
||||||
|
// - Probability: "Non-weighted" -- used only for language
|
||||||
|
|
||||||
|
// This struct is purely a bucket to return values. No instances of this struct should be kept.
|
||||||
|
struct DicNode_InputStateG {
|
||||||
|
bool mNeedsToUpdateInputStateG;
|
||||||
|
int mPointerId;
|
||||||
|
int16_t mInputIndex;
|
||||||
|
int mPrevCodePoint;
|
||||||
|
float mTerminalDiffCost;
|
||||||
|
float mRawLength;
|
||||||
|
DoubleLetterLevel mDoubleLetterLevel;
|
||||||
|
};
|
||||||
|
|
||||||
|
class DicNode {
|
||||||
|
// Caveat: We define Weighting as a friend class of DicNode to let Weighting change
|
||||||
|
// the distance of DicNode.
|
||||||
|
// Caution!!! In general, we avoid using the "friend" access modifier.
|
||||||
|
// This is an exception to explicitly hide DicNode::addCost() from all classes but Weighting.
|
||||||
|
friend class Weighting;
|
||||||
|
|
||||||
|
public:
|
||||||
|
#if DEBUG_DICT
|
||||||
|
DicNodeProfiler mProfiler;
|
||||||
|
#endif
|
||||||
|
//////////////////
|
||||||
|
// Memory utils //
|
||||||
|
//////////////////
|
||||||
|
AK_FORCE_INLINE static void managedDelete(DicNode *node) {
|
||||||
|
node->remove();
|
||||||
|
}
|
||||||
|
// end
|
||||||
|
/////////////////
|
||||||
|
|
||||||
|
AK_FORCE_INLINE DicNode()
|
||||||
|
:
|
||||||
|
#if DEBUG_DICT
|
||||||
|
mProfiler(),
|
||||||
|
#endif
|
||||||
|
mDicNodeProperties(), mDicNodeState(), mIsCachedForNextSuggestion(false),
|
||||||
|
mIsUsed(false), mReleaseListener(0) {}
|
||||||
|
|
||||||
|
DicNode(const DicNode &dicNode);
|
||||||
|
DicNode &operator=(const DicNode &dicNode);
|
||||||
|
virtual ~DicNode() {}
|
||||||
|
|
||||||
|
// TODO: minimize arguments by looking binary_format
|
||||||
|
// Init for copy
|
||||||
|
void initByCopy(const DicNode *dicNode) {
|
||||||
|
mIsUsed = true;
|
||||||
|
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||||
|
mDicNodeProperties.init(&dicNode->mDicNodeProperties);
|
||||||
|
mDicNodeState.init(&dicNode->mDicNodeState);
|
||||||
|
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: minimize arguments by looking binary_format
|
||||||
|
// Init for root with prevWordNodePos which is used for bigram
|
||||||
|
void initAsRoot(const int pos, const int childrenPos, const int childrenCount,
|
||||||
|
const int prevWordNodePos) {
|
||||||
|
mIsUsed = true;
|
||||||
|
mIsCachedForNextSuggestion = false;
|
||||||
|
mDicNodeProperties.init(
|
||||||
|
pos, 0, childrenPos, 0, 0, 0, childrenCount, 0, 0, false, false, true, 0, 0);
|
||||||
|
mDicNodeState.init(prevWordNodePos);
|
||||||
|
PROF_NODE_RESET(mProfiler);
|
||||||
|
}
|
||||||
|
|
||||||
|
void initAsPassingChild(DicNode *parentNode) {
|
||||||
|
mIsUsed = true;
|
||||||
|
mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion;
|
||||||
|
const int c = parentNode->getNodeTypedCodePoint();
|
||||||
|
mDicNodeProperties.init(&parentNode->mDicNodeProperties, c);
|
||||||
|
mDicNodeState.init(&parentNode->mDicNodeState);
|
||||||
|
PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: minimize arguments by looking binary_format
|
||||||
|
// Init for root with previous word
|
||||||
|
void initAsRootWithPreviousWord(DicNode *dicNode, const int pos, const int childrenPos,
|
||||||
|
const int childrenCount) {
|
||||||
|
mIsUsed = true;
|
||||||
|
mIsCachedForNextSuggestion = false;
|
||||||
|
mDicNodeProperties.init(
|
||||||
|
pos, 0, childrenPos, 0, 0, 0, childrenCount, 0, 0, false, false, true, 0, 0);
|
||||||
|
// TODO: Move to dicNodeState?
|
||||||
|
mDicNodeState.mDicNodeStateOutput.init(); // reset for next word
|
||||||
|
mDicNodeState.mDicNodeStateInput.init(
|
||||||
|
&dicNode->mDicNodeState.mDicNodeStateInput, true /* resetTerminalDiffCost */);
|
||||||
|
mDicNodeState.mDicNodeStateScoring.init(
|
||||||
|
&dicNode->mDicNodeState.mDicNodeStateScoring);
|
||||||
|
mDicNodeState.mDicNodeStatePrevWord.init(
|
||||||
|
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1,
|
||||||
|
dicNode->mDicNodeProperties.getProbability(),
|
||||||
|
dicNode->mDicNodeProperties.getPos(),
|
||||||
|
dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
|
||||||
|
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(),
|
||||||
|
dicNode->getOutputWordBuf(),
|
||||||
|
dicNode->mDicNodeProperties.getDepth(),
|
||||||
|
dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevSpacePositions,
|
||||||
|
mDicNodeState.mDicNodeStateInput.getInputIndex(0) /* lastInputIndex */);
|
||||||
|
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: minimize arguments by looking binary_format
|
||||||
|
void initAsChild(DicNode *dicNode, const int pos, const uint8_t flags, const int childrenPos,
|
||||||
|
const int attributesPos, const int siblingPos, const int nodeCodePoint,
|
||||||
|
const int childrenCount, const int probability, const int bigramProbability,
|
||||||
|
const bool isTerminal, const bool hasMultipleChars, const bool hasChildren,
|
||||||
|
const uint16_t additionalSubwordLength, const int *additionalSubword) {
|
||||||
|
mIsUsed = true;
|
||||||
|
uint16_t newDepth = static_cast<uint16_t>(dicNode->getDepth() + 1);
|
||||||
|
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||||
|
const uint16_t newLeavingDepth = static_cast<uint16_t>(
|
||||||
|
dicNode->mDicNodeProperties.getLeavingDepth() + additionalSubwordLength);
|
||||||
|
mDicNodeProperties.init(pos, flags, childrenPos, attributesPos, siblingPos, nodeCodePoint,
|
||||||
|
childrenCount, probability, bigramProbability, isTerminal, hasMultipleChars,
|
||||||
|
hasChildren, newDepth, newLeavingDepth);
|
||||||
|
mDicNodeState.init(&dicNode->mDicNodeState, additionalSubwordLength, additionalSubword);
|
||||||
|
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void remove() {
|
||||||
|
mIsUsed = false;
|
||||||
|
if (mReleaseListener) {
|
||||||
|
mReleaseListener->onReleased(this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isUsed() const {
|
||||||
|
return mIsUsed;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isRoot() const {
|
||||||
|
return getDepth() == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool hasChildren() const {
|
||||||
|
return mDicNodeProperties.hasChildren();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isLeavingNode() const {
|
||||||
|
ASSERT(getDepth() <= getLeavingDepth());
|
||||||
|
return getDepth() == getLeavingDepth();
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool isFirstLetter() const {
|
||||||
|
return getDepth() == 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isCached() const {
|
||||||
|
return mIsCachedForNextSuggestion;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setCached() {
|
||||||
|
mIsCachedForNextSuggestion = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used to expand the node in DicNodeUtils
|
||||||
|
int getNodeTypedCodePoint() const {
|
||||||
|
return mDicNodeState.mDicNodeStateOutput.getCodePointAt(getDepth());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isImpossibleBigramWord() const {
|
||||||
|
const int probability = mDicNodeProperties.getProbability();
|
||||||
|
if (probability == 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()
|
||||||
|
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1;
|
||||||
|
const int currentWordLen = getDepth();
|
||||||
|
return (prevWordLen == 1 && currentWordLen == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isCapitalized() const {
|
||||||
|
const int c = getOutputWordBuf()[0];
|
||||||
|
return isAsciiUpper(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isFirstWord() const {
|
||||||
|
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_VALID_WORD;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isCompletion(const int inputSize) const {
|
||||||
|
return mDicNodeState.mDicNodeStateInput.getInputIndex(0) >= inputSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool canDoLookAheadCorrection(const int inputSize) const {
|
||||||
|
return mDicNodeState.mDicNodeStateInput.getInputIndex(0) < inputSize - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used to get bigram probability in DicNodeUtils
|
||||||
|
int getPos() const {
|
||||||
|
return mDicNodeProperties.getPos();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used to get bigram probability in DicNodeUtils
|
||||||
|
int getPrevWordPos() const {
|
||||||
|
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used in DicNodeUtils
|
||||||
|
int getChildrenPos() const {
|
||||||
|
return mDicNodeProperties.getChildrenPos();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used in DicNodeUtils
|
||||||
|
int getChildrenCount() const {
|
||||||
|
return mDicNodeProperties.getChildrenCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used in DicNodeUtils
|
||||||
|
int getProbability() const {
|
||||||
|
return mDicNodeProperties.getProbability();
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool isTerminalWordNode() const {
|
||||||
|
const bool isTerminalNodes = mDicNodeProperties.isTerminal();
|
||||||
|
const int currentNodeDepth = getDepth();
|
||||||
|
const int terminalNodeDepth = mDicNodeProperties.getLeavingDepth();
|
||||||
|
return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool shouldBeFilterdBySafetyNetForBigram() const {
|
||||||
|
const uint16_t currentDepth = getDepth();
|
||||||
|
const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()
|
||||||
|
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1;
|
||||||
|
return !(currentDepth > 0 && (currentDepth != 1 || prevWordLen != 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
uint16_t getLeavingDepth() const {
|
||||||
|
return mDicNodeProperties.getLeavingDepth();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isTotalInputSizeExceedingLimit() const {
|
||||||
|
const int prevWordsLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
|
||||||
|
const int currentWordDepth = getDepth();
|
||||||
|
// TODO: 3 can be 2? Needs to be investigated.
|
||||||
|
// TODO: Have a const variable for 3 (or 2)
|
||||||
|
return prevWordsLen + currentWordDepth > MAX_WORD_LENGTH - 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: This may be defective. Needs to be revised.
|
||||||
|
bool truncateNode(const DicNode *const topNode, const int inputCommitPoint) {
|
||||||
|
const int prevWordLenOfTop = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
|
||||||
|
int newPrevWordStartIndex = inputCommitPoint;
|
||||||
|
int charCount = 0;
|
||||||
|
// Find new word start index
|
||||||
|
for (int i = 0; i < prevWordLenOfTop; ++i) {
|
||||||
|
const int c = mDicNodeState.mDicNodeStatePrevWord.getPrevWordCodePointAt(i);
|
||||||
|
// TODO: Check other separators.
|
||||||
|
if (c != KEYCODE_SPACE && c != KEYCODE_SINGLE_QUOTE) {
|
||||||
|
if (charCount == inputCommitPoint) {
|
||||||
|
newPrevWordStartIndex = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
++charCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!mDicNodeState.mDicNodeStatePrevWord.startsWith(
|
||||||
|
&topNode->mDicNodeState.mDicNodeStatePrevWord, newPrevWordStartIndex - 1)) {
|
||||||
|
// Node mismatch.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
mDicNodeState.mDicNodeStateInput.truncate(inputCommitPoint);
|
||||||
|
mDicNodeState.mDicNodeStatePrevWord.truncate(newPrevWordStartIndex);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void outputResult(int *dest) const {
|
||||||
|
const uint16_t prevWordLength = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
|
||||||
|
const uint16_t currentDepth = getDepth();
|
||||||
|
DicNodeUtils::appendTwoWords(mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
|
||||||
|
prevWordLength, getOutputWordBuf(), currentDepth, dest);
|
||||||
|
DUMP_WORD_AND_SCORE("OUTPUT");
|
||||||
|
}
|
||||||
|
|
||||||
|
void outputSpacePositionsResult(int *spaceIndices) const {
|
||||||
|
mDicNodeState.mDicNodeStatePrevWord.outputSpacePositions(spaceIndices);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool hasMultipleWords() const {
|
||||||
|
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
float getProximityCorrectionCount() const {
|
||||||
|
return static_cast<float>(mDicNodeState.mDicNodeStateScoring.getProximityCorrectionCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
float getEditCorrectionCount() const {
|
||||||
|
return static_cast<float>(mDicNodeState.mDicNodeStateScoring.getEditCorrectionCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used to prune nodes
|
||||||
|
float getNormalizedCompoundDistance() const {
|
||||||
|
return mDicNodeState.mDicNodeStateScoring.getNormalizedCompoundDistance();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used to prune nodes
|
||||||
|
float getNormalizedSpatialDistance() const {
|
||||||
|
return mDicNodeState.mDicNodeStateScoring.getSpatialDistance()
|
||||||
|
/ static_cast<float>(getInputIndex(0) + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used to prune nodes
|
||||||
|
float getCompoundDistance() const {
|
||||||
|
return mDicNodeState.mDicNodeStateScoring.getCompoundDistance();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used to prune nodes
|
||||||
|
float getCompoundDistance(const float languageWeight) const {
|
||||||
|
return mDicNodeState.mDicNodeStateScoring.getCompoundDistance(languageWeight);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note that "cost" means delta for "distance" that is weighted.
|
||||||
|
float getTotalPrevWordsLanguageCost() const {
|
||||||
|
return mDicNodeState.mDicNodeStateScoring.getTotalPrevWordsLanguageCost();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used to commit input partially
|
||||||
|
int getPrevWordNodePos() const {
|
||||||
|
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE const int *getOutputWordBuf() const {
|
||||||
|
return mDicNodeState.mDicNodeStateOutput.mWordBuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getPrevCodePointG(int pointerId) const {
|
||||||
|
return mDicNodeState.mDicNodeStateInput.getPrevCodePoint(pointerId);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Whether the current codepoint can be an intentional omission, in which case the traversal
|
||||||
|
// algorithm will always check for a possible omission here.
|
||||||
|
bool canBeIntentionalOmission() const {
|
||||||
|
return isIntentionalOmissionCodePoint(getNodeCodePoint());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Whether the omission is so frequent that it should incur zero cost.
|
||||||
|
bool isZeroCostOmission() const {
|
||||||
|
// TODO: do not hardcode and read from header
|
||||||
|
return (getNodeCodePoint() == KEYCODE_SINGLE_QUOTE);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: remove
|
||||||
|
float getTerminalDiffCostG(int path) const {
|
||||||
|
return mDicNodeState.mDicNodeStateInput.getTerminalDiffCost(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////
|
||||||
|
// Temporary getter //
|
||||||
|
// TODO: Remove //
|
||||||
|
//////////////////////
|
||||||
|
// TODO: Remove once touch path is merged into ProximityInfoState
|
||||||
|
int getNodeCodePoint() const {
|
||||||
|
return mDicNodeProperties.getNodeCodePoint();
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////
|
||||||
|
// Utils for cost calculation //
|
||||||
|
////////////////////////////////
|
||||||
|
AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const {
|
||||||
|
return mDicNodeProperties.getNodeCodePoint()
|
||||||
|
== dicNode->mDicNodeProperties.getNodeCodePoint();
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: remove
|
||||||
|
// TODO: rename getNextInputIndex
|
||||||
|
int16_t getInputIndex(int pointerId) const {
|
||||||
|
return mDicNodeState.mDicNodeStateInput.getInputIndex(pointerId);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Getter of features for scoring //
|
||||||
|
////////////////////////////////////
|
||||||
|
float getSpatialDistanceForScoring() const {
|
||||||
|
return mDicNodeState.mDicNodeStateScoring.getSpatialDistance();
|
||||||
|
}
|
||||||
|
|
||||||
|
float getLanguageDistanceForScoring() const {
|
||||||
|
return mDicNodeState.mDicNodeStateScoring.getLanguageDistance();
|
||||||
|
}
|
||||||
|
|
||||||
|
float getLanguageDistanceRatePerWordForScoring() const {
|
||||||
|
const float langDist = getLanguageDistanceForScoring();
|
||||||
|
const float totalWordCount =
|
||||||
|
static_cast<float>(mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1);
|
||||||
|
return langDist / totalWordCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
float getRawLength() const {
|
||||||
|
return mDicNodeState.mDicNodeStateScoring.getRawLength();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isLessThanOneErrorForScoring() const {
|
||||||
|
return mDicNodeState.mDicNodeStateScoring.getEditCorrectionCount()
|
||||||
|
+ mDicNodeState.mDicNodeStateScoring.getProximityCorrectionCount() <= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
DoubleLetterLevel getDoubleLetterLevel() const {
|
||||||
|
return mDicNodeState.mDicNodeStateScoring.getDoubleLetterLevel();
|
||||||
|
}
|
||||||
|
|
||||||
|
void setDoubleLetterLevel(DoubleLetterLevel doubleLetterLevel) {
|
||||||
|
mDicNodeState.mDicNodeStateScoring.setDoubleLetterLevel(doubleLetterLevel);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t getFlags() const {
|
||||||
|
return mDicNodeProperties.getFlags();
|
||||||
|
}
|
||||||
|
|
||||||
|
int getAttributesPos() const {
|
||||||
|
return mDicNodeProperties.getAttributesPos();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint16_t getDepth() const {
|
||||||
|
return mDicNodeProperties.getDepth();
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void dump(const char *tag) const {
|
||||||
|
#if DEBUG_DICT
|
||||||
|
DUMP_WORD_AND_SCORE(tag);
|
||||||
|
#if DEBUG_DUMP_ERROR
|
||||||
|
mProfiler.dump();
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void setReleaseListener(DicNodeReleaseListener *releaseListener) {
|
||||||
|
mReleaseListener = releaseListener;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool compare(const DicNode *right) {
|
||||||
|
if (!isUsed() && !right->isUsed()) {
|
||||||
|
// Compare pointer values here for stable comparison
|
||||||
|
return this > right;
|
||||||
|
}
|
||||||
|
if (!isUsed()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (!right->isUsed()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const float diff =
|
||||||
|
right->getNormalizedCompoundDistance() - getNormalizedCompoundDistance();
|
||||||
|
static const float MIN_DIFF = 0.000001f;
|
||||||
|
if (diff > MIN_DIFF) {
|
||||||
|
return true;
|
||||||
|
} else if (diff < -MIN_DIFF) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const int depth = getDepth();
|
||||||
|
const int depthDiff = right->getDepth() - depth;
|
||||||
|
if (depthDiff != 0) {
|
||||||
|
return depthDiff > 0;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < depth; ++i) {
|
||||||
|
const int codePoint = mDicNodeState.mDicNodeStateOutput.getCodePointAt(i);
|
||||||
|
const int rightCodePoint = right->mDicNodeState.mDicNodeStateOutput.getCodePointAt(i);
|
||||||
|
if (codePoint != rightCodePoint) {
|
||||||
|
return rightCodePoint > codePoint;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Compare pointer values here for stable comparison
|
||||||
|
return this > right;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DicNodeProperties mDicNodeProperties;
|
||||||
|
DicNodeState mDicNodeState;
|
||||||
|
// TODO: Remove
|
||||||
|
bool mIsCachedForNextSuggestion;
|
||||||
|
bool mIsUsed;
|
||||||
|
DicNodeReleaseListener *mReleaseListener;
|
||||||
|
|
||||||
|
AK_FORCE_INLINE int getTotalInputIndex() const {
|
||||||
|
int index = 0;
|
||||||
|
for (int i = 0; i < MAX_POINTER_COUNT_G; i++) {
|
||||||
|
index += mDicNodeState.mDicNodeStateInput.getInputIndex(i);
|
||||||
|
}
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Caveat: Must not be called outside Weighting
|
||||||
|
// This restriction is guaranteed by "friend"
|
||||||
|
AK_FORCE_INLINE void addCost(const float spatialCost, const float languageCost,
|
||||||
|
const bool doNormalization, const int inputSize, const bool isEditCorrection,
|
||||||
|
const bool isProximityCorrection) {
|
||||||
|
if (DEBUG_GEO_FULL) {
|
||||||
|
LOGI_SHOW_ADD_COST_PROP;
|
||||||
|
}
|
||||||
|
mDicNodeState.mDicNodeStateScoring.addCost(spatialCost, languageCost, doNormalization,
|
||||||
|
inputSize, getTotalInputIndex(), isEditCorrection, isProximityCorrection);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Caveat: Must not be called outside Weighting
|
||||||
|
// This restriction is guaranteed by "friend"
|
||||||
|
AK_FORCE_INLINE void forwardInputIndex(const int pointerId, const int count,
|
||||||
|
const bool overwritesPrevCodePointByNodeCodePoint) {
|
||||||
|
if (count == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
mDicNodeState.mDicNodeStateInput.forwardInputIndex(pointerId, count);
|
||||||
|
if (overwritesPrevCodePointByNodeCodePoint) {
|
||||||
|
mDicNodeState.mDicNodeStateInput.setPrevCodePoint(0, getNodeCodePoint());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void updateInputIndexG(DicNode_InputStateG *inputStateG) {
|
||||||
|
mDicNodeState.mDicNodeStateInput.updateInputIndexG(inputStateG->mPointerId,
|
||||||
|
inputStateG->mInputIndex, inputStateG->mPrevCodePoint,
|
||||||
|
inputStateG->mTerminalDiffCost, inputStateG->mRawLength);
|
||||||
|
mDicNodeState.mDicNodeStateScoring.addRawLength(inputStateG->mRawLength);
|
||||||
|
mDicNodeState.mDicNodeStateScoring.setDoubleLetterLevel(inputStateG->mDoubleLetterLevel);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_DIC_NODE_H
|
|
@ -0,0 +1,213 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2012 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DIC_NODE_PRIORITY_QUEUE_H
|
||||||
|
#define LATINIME_DIC_NODE_PRIORITY_QUEUE_H
|
||||||
|
|
||||||
|
#include <queue>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "dic_node.h"
|
||||||
|
#include "dic_node_release_listener.h"
|
||||||
|
|
||||||
|
#define MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY 200
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class DicNodePriorityQueue : public DicNodeReleaseListener {
|
||||||
|
public:
|
||||||
|
AK_FORCE_INLINE DicNodePriorityQueue()
|
||||||
|
: MAX_CAPACITY(MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY),
|
||||||
|
mMaxSize(MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY), mDicNodesBuf(), mUnusedNodeIndices(),
|
||||||
|
mNextUnusedNodeId(0), mDicNodesQueue() {
|
||||||
|
mDicNodesBuf.resize(MAX_CAPACITY + 1);
|
||||||
|
mUnusedNodeIndices.resize(MAX_CAPACITY + 1);
|
||||||
|
reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Non virtual inline destructor -- never inherit this class
|
||||||
|
AK_FORCE_INLINE ~DicNodePriorityQueue() {}
|
||||||
|
|
||||||
|
int getSize() const {
|
||||||
|
return static_cast<int>(mDicNodesQueue.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
int getMaxSize() const {
|
||||||
|
return mMaxSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void setMaxSize(const int maxSize) {
|
||||||
|
mMaxSize = min(maxSize, MAX_CAPACITY);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void reset() {
|
||||||
|
clearAndResize(MAX_CAPACITY);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void clear() {
|
||||||
|
clearAndResize(mMaxSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void clearAndResize(const int maxSize) {
|
||||||
|
while (!mDicNodesQueue.empty()) {
|
||||||
|
mDicNodesQueue.pop();
|
||||||
|
}
|
||||||
|
setMaxSize(maxSize);
|
||||||
|
for (int i = 0; i < MAX_CAPACITY + 1; ++i) {
|
||||||
|
mDicNodesBuf[i].remove();
|
||||||
|
mDicNodesBuf[i].setReleaseListener(this);
|
||||||
|
mUnusedNodeIndices[i] = i == MAX_CAPACITY ? NOT_A_NODE_ID : static_cast<int>(i) + 1;
|
||||||
|
}
|
||||||
|
mNextUnusedNodeId = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE DicNode *newDicNode(DicNode *dicNode) {
|
||||||
|
DicNode *newNode = searchEmptyDicNode();
|
||||||
|
if (newNode) {
|
||||||
|
DicNodeUtils::initByCopy(dicNode, newNode);
|
||||||
|
return newNode;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy
|
||||||
|
AK_FORCE_INLINE DicNode *copyPush(DicNode *dicNode) {
|
||||||
|
return copyPush(dicNode, mMaxSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void copyPop(DicNode *dest) {
|
||||||
|
if (mDicNodesQueue.empty()) {
|
||||||
|
ASSERT(false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
DicNode *node = mDicNodesQueue.top();
|
||||||
|
if (dest) {
|
||||||
|
DicNodeUtils::initByCopy(node, dest);
|
||||||
|
}
|
||||||
|
node->remove();
|
||||||
|
mDicNodesQueue.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
void onReleased(DicNode *dicNode) {
|
||||||
|
const int index = static_cast<int>(dicNode - &mDicNodesBuf[0]);
|
||||||
|
if (mUnusedNodeIndices[index] != NOT_A_NODE_ID) {
|
||||||
|
// it's already released
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
mUnusedNodeIndices[index] = mNextUnusedNodeId;
|
||||||
|
mNextUnusedNodeId = index;
|
||||||
|
ASSERT(index >= 0 && index < (MAX_CAPACITY + 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void dump() const {
|
||||||
|
AKLOGI("\n\n\n\n\n===========================");
|
||||||
|
for (int i = 0; i < MAX_CAPACITY + 1; ++i) {
|
||||||
|
if (mDicNodesBuf[i].isUsed()) {
|
||||||
|
mDicNodesBuf[i].dump("QUEUE: ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
AKLOGI("===========================\n\n\n\n\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(DicNodePriorityQueue);
|
||||||
|
static const int NOT_A_NODE_ID = -1;
|
||||||
|
|
||||||
|
AK_FORCE_INLINE static bool compareDicNode(DicNode *left, DicNode *right) {
|
||||||
|
return left->compare(right);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct DicNodeComparator {
|
||||||
|
bool operator ()(DicNode *left, DicNode *right) {
|
||||||
|
return compareDicNode(left, right);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef std::priority_queue<DicNode *, std::vector<DicNode *>, DicNodeComparator> DicNodesQueue;
|
||||||
|
const int MAX_CAPACITY;
|
||||||
|
int mMaxSize;
|
||||||
|
std::vector<DicNode> mDicNodesBuf; // of each element of mDicNodesBuf respectively
|
||||||
|
std::vector<int> mUnusedNodeIndices;
|
||||||
|
int mNextUnusedNodeId;
|
||||||
|
DicNodesQueue mDicNodesQueue;
|
||||||
|
|
||||||
|
inline bool isFull(const int maxSize) const {
|
||||||
|
return getSize() >= maxSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void pop() {
|
||||||
|
copyPop(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool betterThanWorstDicNode(DicNode *dicNode) const {
|
||||||
|
DicNode *worstNode = mDicNodesQueue.top();
|
||||||
|
if (!worstNode) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return compareDicNode(dicNode, worstNode);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE DicNode *searchEmptyDicNode() {
|
||||||
|
// TODO: Currently O(n) but should be improved to O(1)
|
||||||
|
if (MAX_CAPACITY == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (mNextUnusedNodeId == NOT_A_NODE_ID) {
|
||||||
|
AKLOGI("No unused node found.");
|
||||||
|
for (int i = 0; i < MAX_CAPACITY + 1; ++i) {
|
||||||
|
AKLOGI("Dump node availability, %d, %d, %d",
|
||||||
|
i, mDicNodesBuf[i].isUsed(), mUnusedNodeIndices[i]);
|
||||||
|
}
|
||||||
|
ASSERT(false);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
DicNode *dicNode = &mDicNodesBuf[mNextUnusedNodeId];
|
||||||
|
markNodeAsUsed(dicNode);
|
||||||
|
return dicNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void markNodeAsUsed(DicNode *dicNode) {
|
||||||
|
const int index = static_cast<int>(dicNode - &mDicNodesBuf[0]);
|
||||||
|
mNextUnusedNodeId = mUnusedNodeIndices[index];
|
||||||
|
mUnusedNodeIndices[index] = NOT_A_NODE_ID;
|
||||||
|
ASSERT(index >= 0 && index < (MAX_CAPACITY + 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE DicNode *pushPoolNodeWithMaxSize(DicNode *dicNode, const int maxSize) {
|
||||||
|
if (!dicNode) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (!isFull(maxSize)) {
|
||||||
|
mDicNodesQueue.push(dicNode);
|
||||||
|
return dicNode;
|
||||||
|
}
|
||||||
|
if (betterThanWorstDicNode(dicNode)) {
|
||||||
|
pop();
|
||||||
|
mDicNodesQueue.push(dicNode);
|
||||||
|
return dicNode;
|
||||||
|
}
|
||||||
|
dicNode->remove();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy
|
||||||
|
AK_FORCE_INLINE DicNode *copyPush(DicNode *dicNode, const int maxSize) {
|
||||||
|
return pushPoolNodeWithMaxSize(newDicNode(dicNode), maxSize);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_DIC_NODE_PRIORITY_QUEUE_H
|
|
@ -0,0 +1,181 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2012 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DIC_NODE_PROFILER_H
|
||||||
|
#define LATINIME_DIC_NODE_PROFILER_H
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
#if DEBUG_DICT
|
||||||
|
#define PROF_SPACE_SUBSTITUTION(profiler) profiler.profSpaceSubstitution()
|
||||||
|
#define PROF_SPACE_OMISSION(profiler) profiler.profSpaceOmission()
|
||||||
|
#define PROF_ADDITIONAL_PROXIMITY(profiler) profiler.profAdditionalProximity()
|
||||||
|
#define PROF_SUBSTITUTION(profiler) profiler.profSubstitution()
|
||||||
|
#define PROF_OMISSION(profiler) profiler.profOmission()
|
||||||
|
#define PROF_INSERTION(profiler) profiler.profInsertion()
|
||||||
|
#define PROF_MATCH(profiler) profiler.profMatch()
|
||||||
|
#define PROF_COMPLETION(profiler) profiler.profCompletion()
|
||||||
|
#define PROF_TRANSPOSITION(profiler) profiler.profTransposition()
|
||||||
|
#define PROF_NEARESTKEY(profiler) profiler.profNearestKey()
|
||||||
|
#define PROF_TERMINAL(profiler) profiler.profTerminal()
|
||||||
|
#define PROF_NEW_WORD(profiler) profiler.profNewWord()
|
||||||
|
#define PROF_NEW_WORD_BIGRAM(profiler) profiler.profNewWordBigram()
|
||||||
|
#define PROF_NODE_RESET(profiler) profiler.reset()
|
||||||
|
#define PROF_NODE_COPY(src, dest) dest.copy(src)
|
||||||
|
#else
|
||||||
|
#define PROF_SPACE_SUBSTITUTION(profiler)
|
||||||
|
#define PROF_SPACE_OMISSION(profiler)
|
||||||
|
#define PROF_ADDITONAL_PROXIMITY(profiler)
|
||||||
|
#define PROF_SUBSTITUTION(profiler)
|
||||||
|
#define PROF_OMISSION(profiler)
|
||||||
|
#define PROF_INSERTION(profiler)
|
||||||
|
#define PROF_MATCH(profiler)
|
||||||
|
#define PROF_COMPLETION(profiler)
|
||||||
|
#define PROF_TRANSPOSITION(profiler)
|
||||||
|
#define PROF_NEARESTKEY(profiler)
|
||||||
|
#define PROF_TERMINAL(profiler)
|
||||||
|
#define PROF_NEW_WORD(profiler)
|
||||||
|
#define PROF_NEW_WORD_BIGRAM(profiler)
|
||||||
|
#define PROF_NODE_RESET(profiler)
|
||||||
|
#define PROF_NODE_COPY(src, dest)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class DicNodeProfiler {
|
||||||
|
public:
|
||||||
|
#if DEBUG_DICT
|
||||||
|
AK_FORCE_INLINE DicNodeProfiler()
|
||||||
|
: mProfOmission(0), mProfInsertion(0), mProfTransposition(0),
|
||||||
|
mProfAdditionalProximity(0), mProfSubstitution(0),
|
||||||
|
mProfSpaceSubstitution(0), mProfSpaceOmission(0),
|
||||||
|
mProfMatch(0), mProfCompletion(0), mProfTerminal(0),
|
||||||
|
mProfNearestKey(0), mProfNewWord(0), mProfNewWordBigram(0) {}
|
||||||
|
|
||||||
|
int mProfOmission;
|
||||||
|
int mProfInsertion;
|
||||||
|
int mProfTransposition;
|
||||||
|
int mProfAdditionalProximity;
|
||||||
|
int mProfSubstitution;
|
||||||
|
int mProfSpaceSubstitution;
|
||||||
|
int mProfSpaceOmission;
|
||||||
|
int mProfMatch;
|
||||||
|
int mProfCompletion;
|
||||||
|
int mProfTerminal;
|
||||||
|
int mProfNearestKey;
|
||||||
|
int mProfNewWord;
|
||||||
|
int mProfNewWordBigram;
|
||||||
|
|
||||||
|
void profSpaceSubstitution() {
|
||||||
|
++mProfSpaceSubstitution;
|
||||||
|
}
|
||||||
|
|
||||||
|
void profSpaceOmission() {
|
||||||
|
++mProfSpaceOmission;
|
||||||
|
}
|
||||||
|
|
||||||
|
void profAdditionalProximity() {
|
||||||
|
++mProfAdditionalProximity;
|
||||||
|
}
|
||||||
|
|
||||||
|
void profSubstitution() {
|
||||||
|
++mProfSubstitution;
|
||||||
|
}
|
||||||
|
|
||||||
|
void profOmission() {
|
||||||
|
++mProfOmission;
|
||||||
|
}
|
||||||
|
|
||||||
|
void profInsertion() {
|
||||||
|
++mProfInsertion;
|
||||||
|
}
|
||||||
|
|
||||||
|
void profMatch() {
|
||||||
|
++mProfMatch;
|
||||||
|
}
|
||||||
|
|
||||||
|
void profCompletion() {
|
||||||
|
++mProfCompletion;
|
||||||
|
}
|
||||||
|
|
||||||
|
void profTransposition() {
|
||||||
|
++mProfTransposition;
|
||||||
|
}
|
||||||
|
|
||||||
|
void profNearestKey() {
|
||||||
|
++mProfNearestKey;
|
||||||
|
}
|
||||||
|
|
||||||
|
void profTerminal() {
|
||||||
|
++mProfTerminal;
|
||||||
|
}
|
||||||
|
|
||||||
|
void profNewWord() {
|
||||||
|
++mProfNewWord;
|
||||||
|
}
|
||||||
|
|
||||||
|
void profNewWordBigram() {
|
||||||
|
++mProfNewWordBigram;
|
||||||
|
}
|
||||||
|
|
||||||
|
void reset() {
|
||||||
|
mProfSpaceSubstitution = 0;
|
||||||
|
mProfSpaceOmission = 0;
|
||||||
|
mProfAdditionalProximity = 0;
|
||||||
|
mProfSubstitution = 0;
|
||||||
|
mProfOmission = 0;
|
||||||
|
mProfInsertion = 0;
|
||||||
|
mProfMatch = 0;
|
||||||
|
mProfCompletion = 0;
|
||||||
|
mProfTransposition = 0;
|
||||||
|
mProfNearestKey = 0;
|
||||||
|
mProfTerminal = 0;
|
||||||
|
mProfNewWord = 0;
|
||||||
|
mProfNewWordBigram = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void copy(const DicNodeProfiler *const profiler) {
|
||||||
|
mProfSpaceSubstitution = profiler->mProfSpaceSubstitution;
|
||||||
|
mProfSpaceOmission = profiler->mProfSpaceOmission;
|
||||||
|
mProfAdditionalProximity = profiler->mProfAdditionalProximity;
|
||||||
|
mProfSubstitution = profiler->mProfSubstitution;
|
||||||
|
mProfOmission = profiler->mProfOmission;
|
||||||
|
mProfInsertion = profiler->mProfInsertion;
|
||||||
|
mProfMatch = profiler->mProfMatch;
|
||||||
|
mProfCompletion = profiler->mProfCompletion;
|
||||||
|
mProfTransposition = profiler->mProfTransposition;
|
||||||
|
mProfNearestKey = profiler->mProfNearestKey;
|
||||||
|
mProfTerminal = profiler->mProfTerminal;
|
||||||
|
mProfNewWord = profiler->mProfNewWord;
|
||||||
|
mProfNewWordBigram = profiler->mProfNewWordBigram;
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump() const {
|
||||||
|
AKLOGI("O %d, I %d, T %d, AP %d, S %d, SS %d, SO %d, M %d, C %d, TE %d, NW = %d, NWB = %d",
|
||||||
|
mProfOmission, mProfInsertion, mProfTransposition, mProfAdditionalProximity,
|
||||||
|
mProfSubstitution, mProfSpaceSubstitution, mProfSpaceOmission, mProfMatch,
|
||||||
|
mProfCompletion, mProfTerminal, mProfNewWord, mProfNewWordBigram);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
DicNodeProfiler() {}
|
||||||
|
#endif
|
||||||
|
private:
|
||||||
|
// Caution!!!
|
||||||
|
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||||
|
// for this class
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif // LATINIME_DIC_NODE_PROFILER_H
|
|
@ -0,0 +1,173 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2012 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DIC_NODE_PROPERTIES_H
|
||||||
|
#define LATINIME_DIC_NODE_PROPERTIES_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Node for traversing the lexicon trie.
|
||||||
|
*/
|
||||||
|
class DicNodeProperties {
|
||||||
|
public:
|
||||||
|
AK_FORCE_INLINE DicNodeProperties()
|
||||||
|
: mPos(0), mFlags(0), mChildrenPos(0), mAttributesPos(0), mSiblingPos(0),
|
||||||
|
mChildrenCount(0), mProbability(0), mBigramProbability(0), mNodeCodePoint(0),
|
||||||
|
mDepth(0), mLeavingDepth(0), mIsTerminal(false), mHasMultipleChars(false),
|
||||||
|
mHasChildren(false) {
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ~DicNodeProperties() {}
|
||||||
|
|
||||||
|
// Should be called only once per DicNode is initialized.
|
||||||
|
void init(const int pos, const uint8_t flags, const int childrenPos, const int attributesPos,
|
||||||
|
const int siblingPos, const int nodeCodePoint, const int childrenCount,
|
||||||
|
const int probability, const int bigramProbability, const bool isTerminal,
|
||||||
|
const bool hasMultipleChars, const bool hasChildren, const uint16_t depth,
|
||||||
|
const uint16_t terminalDepth) {
|
||||||
|
mPos = pos;
|
||||||
|
mFlags = flags;
|
||||||
|
mChildrenPos = childrenPos;
|
||||||
|
mAttributesPos = attributesPos;
|
||||||
|
mSiblingPos = siblingPos;
|
||||||
|
mNodeCodePoint = nodeCodePoint;
|
||||||
|
mChildrenCount = childrenCount;
|
||||||
|
mProbability = probability;
|
||||||
|
mBigramProbability = bigramProbability;
|
||||||
|
mIsTerminal = isTerminal;
|
||||||
|
mHasMultipleChars = hasMultipleChars;
|
||||||
|
mHasChildren = hasChildren;
|
||||||
|
mDepth = depth;
|
||||||
|
mLeavingDepth = terminalDepth;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init for copy
|
||||||
|
void init(const DicNodeProperties *const nodeProp) {
|
||||||
|
mPos = nodeProp->mPos;
|
||||||
|
mFlags = nodeProp->mFlags;
|
||||||
|
mChildrenPos = nodeProp->mChildrenPos;
|
||||||
|
mAttributesPos = nodeProp->mAttributesPos;
|
||||||
|
mSiblingPos = nodeProp->mSiblingPos;
|
||||||
|
mNodeCodePoint = nodeProp->mNodeCodePoint;
|
||||||
|
mChildrenCount = nodeProp->mChildrenCount;
|
||||||
|
mProbability = nodeProp->mProbability;
|
||||||
|
mBigramProbability = nodeProp->mBigramProbability;
|
||||||
|
mIsTerminal = nodeProp->mIsTerminal;
|
||||||
|
mHasMultipleChars = nodeProp->mHasMultipleChars;
|
||||||
|
mHasChildren = nodeProp->mHasChildren;
|
||||||
|
mDepth = nodeProp->mDepth;
|
||||||
|
mLeavingDepth = nodeProp->mLeavingDepth;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init as passing child
|
||||||
|
void init(const DicNodeProperties *const nodeProp, const int codePoint) {
|
||||||
|
mPos = nodeProp->mPos;
|
||||||
|
mFlags = nodeProp->mFlags;
|
||||||
|
mChildrenPos = nodeProp->mChildrenPos;
|
||||||
|
mAttributesPos = nodeProp->mAttributesPos;
|
||||||
|
mSiblingPos = nodeProp->mSiblingPos;
|
||||||
|
mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
|
||||||
|
mChildrenCount = nodeProp->mChildrenCount;
|
||||||
|
mProbability = nodeProp->mProbability;
|
||||||
|
mBigramProbability = nodeProp->mBigramProbability;
|
||||||
|
mIsTerminal = nodeProp->mIsTerminal;
|
||||||
|
mHasMultipleChars = nodeProp->mHasMultipleChars;
|
||||||
|
mHasChildren = nodeProp->mHasChildren;
|
||||||
|
mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child
|
||||||
|
mLeavingDepth = nodeProp->mLeavingDepth;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getPos() const {
|
||||||
|
return mPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t getFlags() const {
|
||||||
|
return mFlags;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getChildrenPos() const {
|
||||||
|
return mChildrenPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getAttributesPos() const {
|
||||||
|
return mAttributesPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getChildrenCount() const {
|
||||||
|
return mChildrenCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getProbability() const {
|
||||||
|
return mProbability;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getNodeCodePoint() const {
|
||||||
|
return mNodeCodePoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint16_t getDepth() const {
|
||||||
|
return mDepth;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Move to output?
|
||||||
|
uint16_t getLeavingDepth() const {
|
||||||
|
return mLeavingDepth;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isTerminal() const {
|
||||||
|
return mIsTerminal;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool hasMultipleChars() const {
|
||||||
|
return mHasMultipleChars;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool hasChildren() const {
|
||||||
|
return mChildrenCount > 0 || mDepth != mLeavingDepth;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Caution!!!
|
||||||
|
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||||
|
// for this class
|
||||||
|
|
||||||
|
// Not used
|
||||||
|
int getSiblingPos() const {
|
||||||
|
return mSiblingPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
int mPos;
|
||||||
|
uint8_t mFlags;
|
||||||
|
int mChildrenPos;
|
||||||
|
int mAttributesPos;
|
||||||
|
int mSiblingPos;
|
||||||
|
int mChildrenCount;
|
||||||
|
int mProbability;
|
||||||
|
int mBigramProbability; // not used for now
|
||||||
|
int mNodeCodePoint;
|
||||||
|
uint16_t mDepth;
|
||||||
|
uint16_t mLeavingDepth;
|
||||||
|
bool mIsTerminal;
|
||||||
|
bool mHasMultipleChars;
|
||||||
|
bool mHasChildren;
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_DIC_NODE_PROPERTIES_H
|
|
@ -0,0 +1,33 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2012 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DIC_NODE_RELEASE_LISTENER_H
|
||||||
|
#define LATINIME_DIC_NODE_RELEASE_LISTENER_H
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class DicNodeReleaseListener {
|
||||||
|
public:
|
||||||
|
DicNodeReleaseListener() {}
|
||||||
|
virtual ~DicNodeReleaseListener() {}
|
||||||
|
virtual void onReleased(DicNode *dicNode) = 0;
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(DicNodeReleaseListener);
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_DIC_NODE_RELEASE_LISTENER_H
|
|
@ -0,0 +1,71 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2012 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DIC_NODE_STATE_H
|
||||||
|
#define LATINIME_DIC_NODE_STATE_H
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "dic_node_state_input.h"
|
||||||
|
#include "dic_node_state_output.h"
|
||||||
|
#include "dic_node_state_prevword.h"
|
||||||
|
#include "dic_node_state_scoring.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class DicNodeState {
|
||||||
|
public:
|
||||||
|
DicNodeStateInput mDicNodeStateInput;
|
||||||
|
DicNodeStateOutput mDicNodeStateOutput;
|
||||||
|
DicNodeStatePrevWord mDicNodeStatePrevWord;
|
||||||
|
DicNodeStateScoring mDicNodeStateScoring;
|
||||||
|
|
||||||
|
AK_FORCE_INLINE DicNodeState()
|
||||||
|
: mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStatePrevWord(),
|
||||||
|
mDicNodeStateScoring() {
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ~DicNodeState() {}
|
||||||
|
|
||||||
|
// Init with prevWordPos
|
||||||
|
void init(const int prevWordPos) {
|
||||||
|
mDicNodeStateInput.init();
|
||||||
|
mDicNodeStateOutput.init();
|
||||||
|
mDicNodeStatePrevWord.init(prevWordPos);
|
||||||
|
mDicNodeStateScoring.init();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init by copy
|
||||||
|
AK_FORCE_INLINE void init(const DicNodeState *const src) {
|
||||||
|
mDicNodeStateInput.init(&src->mDicNodeStateInput);
|
||||||
|
mDicNodeStateOutput.init(&src->mDicNodeStateOutput);
|
||||||
|
mDicNodeStatePrevWord.init(&src->mDicNodeStatePrevWord);
|
||||||
|
mDicNodeStateScoring.init(&src->mDicNodeStateScoring);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init by copy and adding subword
|
||||||
|
void init(const DicNodeState *const src, const uint16_t additionalSubwordLength,
|
||||||
|
const int *const additionalSubword) {
|
||||||
|
init(src);
|
||||||
|
mDicNodeStateOutput.addSubword(additionalSubwordLength, additionalSubword);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Caution!!!
|
||||||
|
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||||
|
// for this class
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_DIC_NODE_STATE_H
|
|
@ -0,0 +1,100 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2012 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DIC_NODE_STATE_INPUT_H
|
||||||
|
#define LATINIME_DIC_NODE_STATE_INPUT_H
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
// TODO: Have a .cpp for this class
|
||||||
|
class DicNodeStateInput {
|
||||||
|
public:
|
||||||
|
DicNodeStateInput() {}
|
||||||
|
virtual ~DicNodeStateInput() {}
|
||||||
|
|
||||||
|
// TODO: Merge into DicNodeStatePrevWord::truncate
|
||||||
|
void truncate(const int commitPoint) {
|
||||||
|
mInputIndex[0] -= commitPoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
void init() {
|
||||||
|
for (int i = 0; i < MAX_POINTER_COUNT_G; i++) {
|
||||||
|
// TODO: The initial value for mInputIndex should be -1?
|
||||||
|
//mInputIndex[i] = i == 0 ? 0 : -1;
|
||||||
|
mInputIndex[i] = 0;
|
||||||
|
mPrevCodePoint[i] = NOT_A_CODE_POINT;
|
||||||
|
mTerminalDiffCost[i] = static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void init(const DicNodeStateInput *const src, const bool resetTerminalDiffCost) {
|
||||||
|
for (int i = 0; i < MAX_POINTER_COUNT_G; i++) {
|
||||||
|
mInputIndex[i] = src->mInputIndex[i];
|
||||||
|
mPrevCodePoint[i] = src->mPrevCodePoint[i];
|
||||||
|
mTerminalDiffCost[i] = resetTerminalDiffCost ?
|
||||||
|
static_cast<float>(MAX_VALUE_FOR_WEIGHTING) : src->mTerminalDiffCost[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void updateInputIndexG(const int pointerId, const int inputIndex,
|
||||||
|
const int prevCodePoint, const float terminalDiffCost, const float rawLength) {
|
||||||
|
mInputIndex[pointerId] = inputIndex;
|
||||||
|
mPrevCodePoint[pointerId] = prevCodePoint;
|
||||||
|
mTerminalDiffCost[pointerId] = terminalDiffCost;
|
||||||
|
}
|
||||||
|
|
||||||
|
void init(const DicNodeStateInput *const src) {
|
||||||
|
init(src, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// For transposition
|
||||||
|
void setPrevCodePoint(const int pointerId, const int c) {
|
||||||
|
mPrevCodePoint[pointerId] = c;
|
||||||
|
}
|
||||||
|
|
||||||
|
void forwardInputIndex(const int pointerId, const int val) {
|
||||||
|
if (mInputIndex[pointerId] < 0) {
|
||||||
|
mInputIndex[pointerId] = val;
|
||||||
|
} else {
|
||||||
|
mInputIndex[pointerId] = mInputIndex[pointerId] + val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int getInputIndex(const int pointerId) const {
|
||||||
|
// when "inputIndex" exceeds "inputSize", auto-completion needs to be done
|
||||||
|
return mInputIndex[pointerId];
|
||||||
|
}
|
||||||
|
|
||||||
|
int getPrevCodePoint(const int pointerId) const {
|
||||||
|
return mPrevCodePoint[pointerId];
|
||||||
|
}
|
||||||
|
|
||||||
|
float getTerminalDiffCost(const int pointerId) const {
|
||||||
|
return mTerminalDiffCost[pointerId];
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Caution!!!
|
||||||
|
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||||
|
// for this class
|
||||||
|
int mInputIndex[MAX_POINTER_COUNT_G];
|
||||||
|
int mPrevCodePoint[MAX_POINTER_COUNT_G];
|
||||||
|
float mTerminalDiffCost[MAX_POINTER_COUNT_G];
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_DIC_NODE_STATE_INPUT_H
|
|
@ -0,0 +1,75 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2012 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DIC_NODE_STATE_OUTPUT_H
|
||||||
|
#define LATINIME_DIC_NODE_STATE_OUTPUT_H
|
||||||
|
|
||||||
|
#include <cstring> // for memcpy()
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class DicNodeStateOutput {
|
||||||
|
public:
|
||||||
|
DicNodeStateOutput() : mOutputtedLength(0) {
|
||||||
|
init();
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ~DicNodeStateOutput() {}
|
||||||
|
|
||||||
|
void init() {
|
||||||
|
mOutputtedLength = 0;
|
||||||
|
mWordBuf[0] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void init(const DicNodeStateOutput *const stateOutput) {
|
||||||
|
memcpy(mWordBuf, stateOutput->mWordBuf,
|
||||||
|
stateOutput->mOutputtedLength * sizeof(mWordBuf[0]));
|
||||||
|
mOutputtedLength = stateOutput->mOutputtedLength;
|
||||||
|
if (mOutputtedLength < MAX_WORD_LENGTH) {
|
||||||
|
mWordBuf[mOutputtedLength] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void addSubword(const uint16_t additionalSubwordLength, const int *const additionalSubword) {
|
||||||
|
if (additionalSubword) {
|
||||||
|
memcpy(&mWordBuf[mOutputtedLength], additionalSubword,
|
||||||
|
additionalSubwordLength * sizeof(mWordBuf[0]));
|
||||||
|
mOutputtedLength = static_cast<uint16_t>(mOutputtedLength + additionalSubwordLength);
|
||||||
|
if (mOutputtedLength < MAX_WORD_LENGTH) {
|
||||||
|
mWordBuf[mOutputtedLength] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Remove
|
||||||
|
int getCodePointAt(const int id) const {
|
||||||
|
return mWordBuf[id];
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Move to private
|
||||||
|
int mWordBuf[MAX_WORD_LENGTH];
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Caution!!!
|
||||||
|
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||||
|
// for this class
|
||||||
|
uint16_t mOutputtedLength;
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_DIC_NODE_STATE_OUTPUT_H
|
|
@ -0,0 +1,156 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2012 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DIC_NODE_STATE_PREVWORD_H
|
||||||
|
#define LATINIME_DIC_NODE_STATE_PREVWORD_H
|
||||||
|
|
||||||
|
#include <cstring> // for memset()
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "dic_node_utils.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class DicNodeStatePrevWord {
|
||||||
|
public:
|
||||||
|
AK_FORCE_INLINE DicNodeStatePrevWord()
|
||||||
|
: mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0),
|
||||||
|
mPrevWordNodePos(0) {
|
||||||
|
memset(mPrevWord, 0, sizeof(mPrevWord));
|
||||||
|
memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions));
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ~DicNodeStatePrevWord() {}
|
||||||
|
|
||||||
|
void init() {
|
||||||
|
mPrevWordLength = 0;
|
||||||
|
mPrevWordCount = 0;
|
||||||
|
mPrevWordStart = 0;
|
||||||
|
mPrevWordProbability = -1;
|
||||||
|
mPrevWordNodePos = NOT_VALID_WORD;
|
||||||
|
memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions));
|
||||||
|
}
|
||||||
|
|
||||||
|
void init(const int prevWordNodePos) {
|
||||||
|
mPrevWordLength = 0;
|
||||||
|
mPrevWordCount = 0;
|
||||||
|
mPrevWordStart = 0;
|
||||||
|
mPrevWordProbability = -1;
|
||||||
|
mPrevWordNodePos = prevWordNodePos;
|
||||||
|
memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init by copy
|
||||||
|
AK_FORCE_INLINE void init(const DicNodeStatePrevWord *const prevWord) {
|
||||||
|
mPrevWordLength = prevWord->mPrevWordLength;
|
||||||
|
mPrevWordCount = prevWord->mPrevWordCount;
|
||||||
|
mPrevWordStart = prevWord->mPrevWordStart;
|
||||||
|
mPrevWordProbability = prevWord->mPrevWordProbability;
|
||||||
|
mPrevWordNodePos = prevWord->mPrevWordNodePos;
|
||||||
|
memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0]));
|
||||||
|
memcpy(mPrevSpacePositions, prevWord->mPrevSpacePositions, sizeof(mPrevSpacePositions));
|
||||||
|
}
|
||||||
|
|
||||||
|
void init(const int16_t prevWordCount, const int16_t prevWordProbability,
|
||||||
|
const int prevWordNodePos, const int *const src0, const int16_t length0,
|
||||||
|
const int *const src1, const int16_t length1, const int *const prevSpacePositions,
|
||||||
|
const int lastInputIndex) {
|
||||||
|
mPrevWordCount = prevWordCount;
|
||||||
|
mPrevWordProbability = prevWordProbability;
|
||||||
|
mPrevWordNodePos = prevWordNodePos;
|
||||||
|
const int twoWordsLen =
|
||||||
|
DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord);
|
||||||
|
mPrevWord[twoWordsLen] = KEYCODE_SPACE;
|
||||||
|
mPrevWordStart = length0;
|
||||||
|
mPrevWordLength = static_cast<int16_t>(twoWordsLen + 1);
|
||||||
|
memcpy(mPrevSpacePositions, prevSpacePositions, sizeof(mPrevSpacePositions));
|
||||||
|
mPrevSpacePositions[mPrevWordCount - 1] = lastInputIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
void truncate(const int offset) {
|
||||||
|
// TODO: memmove
|
||||||
|
if (mPrevWordLength < offset) {
|
||||||
|
memset(mPrevWord, 0, sizeof(mPrevWord));
|
||||||
|
mPrevWordLength = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const int newPrevWordLength = mPrevWordLength - offset;
|
||||||
|
memmove(mPrevWord, &mPrevWord[offset], newPrevWordLength * sizeof(mPrevWord[0]));
|
||||||
|
mPrevWordLength = newPrevWordLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
void outputSpacePositions(int *spaceIndices) const {
|
||||||
|
// Convert uint16_t to int
|
||||||
|
for (int i = 0; i < MAX_RESULTS; i++) {
|
||||||
|
spaceIndices[i] = mPrevSpacePositions[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: remove
|
||||||
|
int16_t getPrevWordLength() const {
|
||||||
|
return mPrevWordLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
int16_t getPrevWordCount() const {
|
||||||
|
return mPrevWordCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
int16_t getPrevWordStart() const {
|
||||||
|
return mPrevWordStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
int16_t getPrevWordProbability() const {
|
||||||
|
return mPrevWordProbability;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getPrevWordNodePos() const {
|
||||||
|
return mPrevWordNodePos;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getPrevWordCodePointAt(const int id) const {
|
||||||
|
return mPrevWord[id];
|
||||||
|
}
|
||||||
|
|
||||||
|
bool startsWith(const DicNodeStatePrevWord *const prefix, const int prefixLen) const {
|
||||||
|
if (prefixLen > mPrevWordLength) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < prefixLen; ++i) {
|
||||||
|
if (mPrevWord[i] != prefix->mPrevWord[i]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Move to private
|
||||||
|
int mPrevWord[MAX_WORD_LENGTH];
|
||||||
|
// TODO: Move to private
|
||||||
|
int mPrevSpacePositions[MAX_RESULTS];
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Caution!!!
|
||||||
|
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||||
|
// for this class
|
||||||
|
int16_t mPrevWordCount;
|
||||||
|
int16_t mPrevWordLength;
|
||||||
|
int16_t mPrevWordStart;
|
||||||
|
int16_t mPrevWordProbability;
|
||||||
|
int mPrevWordNodePos;
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_DIC_NODE_STATE_PREVWORD_H
|
|
@ -0,0 +1,166 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2012 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DIC_NODE_STATE_SCORING_H
|
||||||
|
#define LATINIME_DIC_NODE_STATE_SCORING_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class DicNodeStateScoring {
|
||||||
|
public:
|
||||||
|
AK_FORCE_INLINE DicNodeStateScoring()
|
||||||
|
: mDoubleLetterLevel(NOT_A_DOUBLE_LETTER),
|
||||||
|
mEditCorrectionCount(0), mProximityCorrectionCount(0),
|
||||||
|
mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f),
|
||||||
|
mTotalPrevWordsLanguageCost(0.0f), mRawLength(0.0f) {
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ~DicNodeStateScoring() {}
|
||||||
|
|
||||||
|
void init() {
|
||||||
|
mEditCorrectionCount = 0;
|
||||||
|
mProximityCorrectionCount = 0;
|
||||||
|
mNormalizedCompoundDistance = 0.0f;
|
||||||
|
mSpatialDistance = 0.0f;
|
||||||
|
mLanguageDistance = 0.0f;
|
||||||
|
mTotalPrevWordsLanguageCost = 0.0f;
|
||||||
|
mRawLength = 0.0f;
|
||||||
|
mDoubleLetterLevel = NOT_A_DOUBLE_LETTER;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void init(const DicNodeStateScoring *const scoring) {
|
||||||
|
mEditCorrectionCount = scoring->mEditCorrectionCount;
|
||||||
|
mProximityCorrectionCount = scoring->mProximityCorrectionCount;
|
||||||
|
mNormalizedCompoundDistance = scoring->mNormalizedCompoundDistance;
|
||||||
|
mSpatialDistance = scoring->mSpatialDistance;
|
||||||
|
mLanguageDistance = scoring->mLanguageDistance;
|
||||||
|
mTotalPrevWordsLanguageCost = scoring->mTotalPrevWordsLanguageCost;
|
||||||
|
mRawLength = scoring->mRawLength;
|
||||||
|
mDoubleLetterLevel = scoring->mDoubleLetterLevel;
|
||||||
|
}
|
||||||
|
|
||||||
|
void addCost(const float spatialCost, const float languageCost, const bool doNormalization,
|
||||||
|
const int inputSize, const int totalInputIndex, const bool isEditCorrection,
|
||||||
|
const bool isProximityCorrection) {
|
||||||
|
addDistance(spatialCost, languageCost, doNormalization, inputSize, totalInputIndex);
|
||||||
|
if (isEditCorrection) {
|
||||||
|
++mEditCorrectionCount;
|
||||||
|
}
|
||||||
|
if (isProximityCorrection) {
|
||||||
|
++mProximityCorrectionCount;
|
||||||
|
}
|
||||||
|
if (languageCost > 0.0f) {
|
||||||
|
setTotalPrevWordsLanguageCost(mTotalPrevWordsLanguageCost + languageCost);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void addRawLength(const float rawLength) {
|
||||||
|
mRawLength += rawLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
float getCompoundDistance() const {
|
||||||
|
return getCompoundDistance(1.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
float getCompoundDistance(const float languageWeight) const {
|
||||||
|
return mSpatialDistance + mLanguageDistance * languageWeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
float getNormalizedCompoundDistance() const {
|
||||||
|
return mNormalizedCompoundDistance;
|
||||||
|
}
|
||||||
|
|
||||||
|
float getSpatialDistance() const {
|
||||||
|
return mSpatialDistance;
|
||||||
|
}
|
||||||
|
|
||||||
|
float getLanguageDistance() const {
|
||||||
|
return mLanguageDistance;
|
||||||
|
}
|
||||||
|
|
||||||
|
int16_t getEditCorrectionCount() const {
|
||||||
|
return mEditCorrectionCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
int16_t getProximityCorrectionCount() const {
|
||||||
|
return mProximityCorrectionCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
float getRawLength() const {
|
||||||
|
return mRawLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
DoubleLetterLevel getDoubleLetterLevel() const {
|
||||||
|
return mDoubleLetterLevel;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setDoubleLetterLevel(DoubleLetterLevel doubleLetterLevel) {
|
||||||
|
switch(doubleLetterLevel) {
|
||||||
|
case NOT_A_DOUBLE_LETTER:
|
||||||
|
break;
|
||||||
|
case A_DOUBLE_LETTER:
|
||||||
|
if (mDoubleLetterLevel != A_STRONG_DOUBLE_LETTER) {
|
||||||
|
mDoubleLetterLevel = doubleLetterLevel;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case A_STRONG_DOUBLE_LETTER:
|
||||||
|
mDoubleLetterLevel = doubleLetterLevel;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
float getTotalPrevWordsLanguageCost() const {
|
||||||
|
return mTotalPrevWordsLanguageCost;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Caution!!!
|
||||||
|
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||||
|
// for this class
|
||||||
|
DoubleLetterLevel mDoubleLetterLevel;
|
||||||
|
|
||||||
|
int16_t mEditCorrectionCount;
|
||||||
|
int16_t mProximityCorrectionCount;
|
||||||
|
|
||||||
|
float mNormalizedCompoundDistance;
|
||||||
|
float mSpatialDistance;
|
||||||
|
float mLanguageDistance;
|
||||||
|
float mTotalPrevWordsLanguageCost;
|
||||||
|
float mRawLength;
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance,
|
||||||
|
bool doNormalization, int inputSize, int totalInputIndex) {
|
||||||
|
mSpatialDistance += spatialDistance;
|
||||||
|
mLanguageDistance += languageDistance;
|
||||||
|
if (!doNormalization) {
|
||||||
|
mNormalizedCompoundDistance = mSpatialDistance + mLanguageDistance;
|
||||||
|
} else {
|
||||||
|
mNormalizedCompoundDistance = (mSpatialDistance + mLanguageDistance)
|
||||||
|
/ static_cast<float>(max(1, totalInputIndex));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//TODO: remove
|
||||||
|
AK_FORCE_INLINE void setTotalPrevWordsLanguageCost(float totalPrevWordsLanguageCost) {
|
||||||
|
mTotalPrevWordsLanguageCost = totalPrevWordsLanguageCost;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_DIC_NODE_STATE_SCORING_H
|
|
@ -0,0 +1,335 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2012 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "binary_format.h"
|
||||||
|
#include "dic_node.h"
|
||||||
|
#include "dic_node_utils.h"
|
||||||
|
#include "dic_node_vector.h"
|
||||||
|
#include "proximity_info.h"
|
||||||
|
#include "proximity_info_state.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
///////////////////////////////
|
||||||
|
// Node initialization utils //
|
||||||
|
///////////////////////////////
|
||||||
|
|
||||||
|
/* static */ void DicNodeUtils::initAsRoot(const int rootPos, const uint8_t *const dicRoot,
|
||||||
|
const int prevWordNodePos, DicNode *newRootNode) {
|
||||||
|
int curPos = rootPos;
|
||||||
|
const int pos = curPos;
|
||||||
|
const int childrenCount = BinaryFormat::getGroupCountAndForwardPointer(dicRoot, &curPos);
|
||||||
|
const int childrenPos = curPos;
|
||||||
|
newRootNode->initAsRoot(pos, childrenPos, childrenCount, prevWordNodePos);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(const int rootPos,
|
||||||
|
const uint8_t *const dicRoot, DicNode *prevWordLastNode, DicNode *newRootNode) {
|
||||||
|
int curPos = rootPos;
|
||||||
|
const int pos = curPos;
|
||||||
|
const int childrenCount = BinaryFormat::getGroupCountAndForwardPointer(dicRoot, &curPos);
|
||||||
|
const int childrenPos = curPos;
|
||||||
|
newRootNode->initAsRootWithPreviousWord(prevWordLastNode, pos, childrenPos, childrenCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
|
||||||
|
destNode->initByCopy(srcNode);
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////
|
||||||
|
// Traverse node expansion utils //
|
||||||
|
///////////////////////////////////
|
||||||
|
|
||||||
|
/* static */ void DicNodeUtils::createAndGetPassingChildNode(DicNode *dicNode,
|
||||||
|
const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
|
||||||
|
DicNodeVector *childDicNodes) {
|
||||||
|
// Passing multiple chars node. No need to traverse child
|
||||||
|
const int codePoint = dicNode->getNodeTypedCodePoint();
|
||||||
|
const int baseLowerCaseCodePoint = toBaseLowerCase(codePoint);
|
||||||
|
const bool isMatch = isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, codePoint);
|
||||||
|
if (isMatch || isIntentionalOmissionCodePoint(baseLowerCaseCodePoint)) {
|
||||||
|
childDicNodes->pushPassingChild(dicNode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ int DicNodeUtils::createAndGetLeavingChildNode(DicNode *dicNode, int pos,
|
||||||
|
const uint8_t *const dicRoot, const int terminalDepth, const ProximityInfoState *pInfoState,
|
||||||
|
const int pointIndex, const bool exactOnly, const std::vector<int> *const codePointsFilter,
|
||||||
|
const ProximityInfo *const pInfo, DicNodeVector *childDicNodes) {
|
||||||
|
int nextPos = pos;
|
||||||
|
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(dicRoot, &pos);
|
||||||
|
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||||
|
const bool isTerminal = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
|
||||||
|
const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags);
|
||||||
|
|
||||||
|
int codePoint = BinaryFormat::getCodePointAndForwardPointer(dicRoot, &pos);
|
||||||
|
ASSERT(NOT_A_CODE_POINT != codePoint);
|
||||||
|
const int nodeCodePoint = codePoint;
|
||||||
|
// TODO: optimize this
|
||||||
|
int additionalWordBuf[MAX_WORD_LENGTH];
|
||||||
|
uint16_t additionalSubwordLength = 0;
|
||||||
|
additionalWordBuf[additionalSubwordLength++] = codePoint;
|
||||||
|
|
||||||
|
do {
|
||||||
|
const int nextCodePoint = hasMultipleChars
|
||||||
|
? BinaryFormat::getCodePointAndForwardPointer(dicRoot, &pos) : NOT_A_CODE_POINT;
|
||||||
|
const bool isLastChar = (NOT_A_CODE_POINT == nextCodePoint);
|
||||||
|
if (!isLastChar) {
|
||||||
|
additionalWordBuf[additionalSubwordLength++] = nextCodePoint;
|
||||||
|
}
|
||||||
|
codePoint = nextCodePoint;
|
||||||
|
} while (NOT_A_CODE_POINT != codePoint);
|
||||||
|
|
||||||
|
const int probability =
|
||||||
|
isTerminal ? BinaryFormat::readProbabilityWithoutMovingPointer(dicRoot, pos) : -1;
|
||||||
|
pos = BinaryFormat::skipProbability(flags, pos);
|
||||||
|
int childrenPos = hasChildren ? BinaryFormat::readChildrenPosition(dicRoot, flags, pos) : 0;
|
||||||
|
const int attributesPos = BinaryFormat::skipChildrenPosition(flags, pos);
|
||||||
|
const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(dicRoot, flags, pos);
|
||||||
|
|
||||||
|
if (isDicNodeFilteredOut(nodeCodePoint, pInfo, codePointsFilter)) {
|
||||||
|
return siblingPos;
|
||||||
|
}
|
||||||
|
if (!isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, nodeCodePoint)) {
|
||||||
|
return siblingPos;
|
||||||
|
}
|
||||||
|
const int childrenCount = hasChildren
|
||||||
|
? BinaryFormat::getGroupCountAndForwardPointer(dicRoot, &childrenPos) : 0;
|
||||||
|
childDicNodes->pushLeavingChild(dicNode, nextPos, flags, childrenPos, attributesPos, siblingPos,
|
||||||
|
nodeCodePoint, childrenCount, probability, -1 /* bigramProbability */, isTerminal,
|
||||||
|
hasMultipleChars, hasChildren, additionalSubwordLength, additionalWordBuf);
|
||||||
|
return siblingPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ bool DicNodeUtils::isDicNodeFilteredOut(const int nodeCodePoint,
|
||||||
|
const ProximityInfo *const pInfo, const std::vector<int> *const codePointsFilter) {
|
||||||
|
const int filterSize = codePointsFilter ? codePointsFilter->size() : 0;
|
||||||
|
if (filterSize <= 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (pInfo && (pInfo->getKeyIndexOf(nodeCodePoint) == NOT_AN_INDEX
|
||||||
|
|| isIntentionalOmissionCodePoint(nodeCodePoint))) {
|
||||||
|
// If normalized nodeCodePoint is not on the keyboard or skippable, this child is never
|
||||||
|
// filtered.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const int lowerCodePoint = toLowerCase(nodeCodePoint);
|
||||||
|
const int baseLowerCodePoint = toBaseCodePoint(lowerCodePoint);
|
||||||
|
// TODO: Avoid linear search
|
||||||
|
for (int i = 0; i < filterSize; ++i) {
|
||||||
|
// Checking if a normalized code point is in filter characters when pInfo is not
|
||||||
|
// null. When pInfo is null, nodeCodePoint is used to check filtering without
|
||||||
|
// normalizing.
|
||||||
|
if ((pInfo && ((*codePointsFilter)[i] == lowerCodePoint
|
||||||
|
|| (*codePointsFilter)[i] == baseLowerCodePoint))
|
||||||
|
|| (!pInfo && (*codePointsFilter)[i] == nodeCodePoint)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ void DicNodeUtils::createAndGetAllLeavingChildNodes(DicNode *dicNode,
|
||||||
|
const uint8_t *const dicRoot, const ProximityInfoState *pInfoState, const int pointIndex,
|
||||||
|
const bool exactOnly, const std::vector<int> *const codePointsFilter,
|
||||||
|
const ProximityInfo *const pInfo, DicNodeVector *childDicNodes) {
|
||||||
|
const int terminalDepth = dicNode->getLeavingDepth();
|
||||||
|
const int childCount = dicNode->getChildrenCount();
|
||||||
|
int nextPos = dicNode->getChildrenPos();
|
||||||
|
for (int i = 0; i < childCount; i++) {
|
||||||
|
const int filterSize = codePointsFilter ? codePointsFilter->size() : 0;
|
||||||
|
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, dicRoot, terminalDepth, pInfoState,
|
||||||
|
pointIndex, exactOnly, codePointsFilter, pInfo, childDicNodes);
|
||||||
|
if (!pInfo && filterSize > 0 && childDicNodes->exceeds(filterSize)) {
|
||||||
|
// All code points have been found.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode, const uint8_t *const dicRoot,
|
||||||
|
DicNodeVector *childDicNodes) {
|
||||||
|
getProximityChildDicNodes(dicNode, dicRoot, 0, 0, false, childDicNodes);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ void DicNodeUtils::getProximityChildDicNodes(DicNode *dicNode,
|
||||||
|
const uint8_t *const dicRoot, const ProximityInfoState *pInfoState, const int pointIndex,
|
||||||
|
bool exactOnly, DicNodeVector *childDicNodes) {
|
||||||
|
if (dicNode->isTotalInputSizeExceedingLimit()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!dicNode->isLeavingNode()) {
|
||||||
|
DicNodeUtils::createAndGetPassingChildNode(dicNode, pInfoState, pointIndex, exactOnly,
|
||||||
|
childDicNodes);
|
||||||
|
} else {
|
||||||
|
DicNodeUtils::createAndGetAllLeavingChildNodes(dicNode, dicRoot, pInfoState, pointIndex,
|
||||||
|
exactOnly, 0 /* codePointsFilter */, 0 /* pInfo */,
|
||||||
|
childDicNodes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////
|
||||||
|
// Scoring utils //
|
||||||
|
///////////////////
|
||||||
|
/**
|
||||||
|
* Computes the combined bigram / unigram cost for the given dicNode.
|
||||||
|
*/
|
||||||
|
/* static */ float DicNodeUtils::getBigramNodeImprobability(const uint8_t *const dicRoot,
|
||||||
|
const DicNode *const node, hash_map_compat<int, int16_t> *bigramCacheMap) {
|
||||||
|
if (node->isImpossibleBigramWord()) {
|
||||||
|
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
|
||||||
|
}
|
||||||
|
const int probability = getBigramNodeProbability(dicRoot, node, bigramCacheMap);
|
||||||
|
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
|
||||||
|
const float cost = static_cast<float>(MAX_PROBABILITY - probability)
|
||||||
|
/ static_cast<float>(MAX_PROBABILITY);
|
||||||
|
return cost;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ int DicNodeUtils::getBigramNodeProbability(const uint8_t *const dicRoot,
|
||||||
|
const DicNode *const node, hash_map_compat<int, int16_t> *bigramCacheMap) {
|
||||||
|
const int unigramProbability = node->getProbability();
|
||||||
|
const int encodedDiffOfBigramProbability =
|
||||||
|
getBigramNodeEncodedDiffProbability(dicRoot, node, bigramCacheMap);
|
||||||
|
if (NOT_A_PROBABILITY == encodedDiffOfBigramProbability) {
|
||||||
|
return backoff(unigramProbability);
|
||||||
|
}
|
||||||
|
return BinaryFormat::computeProbabilityForBigram(
|
||||||
|
unigramProbability, encodedDiffOfBigramProbability);
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////
|
||||||
|
// Bigram / Unigram dictionary utils //
|
||||||
|
///////////////////////////////////////
|
||||||
|
|
||||||
|
/* static */ int16_t DicNodeUtils::getBigramNodeEncodedDiffProbability(const uint8_t *const dicRoot,
|
||||||
|
const DicNode *const node, hash_map_compat<int, int16_t> *bigramCacheMap) {
|
||||||
|
const int wordPos = node->getPos();
|
||||||
|
const int prevWordPos = node->getPrevWordPos();
|
||||||
|
return getBigramProbability(dicRoot, prevWordPos, wordPos, bigramCacheMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Move this to BigramDictionary
|
||||||
|
/* static */ int16_t DicNodeUtils::getBigramProbability(const uint8_t *const dicRoot, int pos,
|
||||||
|
const int nextPos, hash_map_compat<int, int16_t> *bigramCacheMap) {
|
||||||
|
// TODO: this is painfully slow compared to the method used in the previous version of the
|
||||||
|
// algorithm. Switch to that method.
|
||||||
|
if (NOT_VALID_WORD == pos) return NOT_A_PROBABILITY;
|
||||||
|
if (NOT_VALID_WORD == nextPos) return NOT_A_PROBABILITY;
|
||||||
|
|
||||||
|
// Create a hash code for the given node pair (based on Josh Bloch's effective Java).
|
||||||
|
// TODO: Use a real hash map data structure that deals with collisions.
|
||||||
|
int hash = 17;
|
||||||
|
hash = hash * 31 + pos;
|
||||||
|
hash = hash * 31 + nextPos;
|
||||||
|
|
||||||
|
hash_map_compat<int, int16_t>::const_iterator mapPos = bigramCacheMap->find(hash);
|
||||||
|
if (mapPos != bigramCacheMap->end()) {
|
||||||
|
return mapPos->second;
|
||||||
|
}
|
||||||
|
if (NOT_VALID_WORD == pos) {
|
||||||
|
return NOT_A_PROBABILITY;
|
||||||
|
}
|
||||||
|
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(dicRoot, &pos);
|
||||||
|
if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) {
|
||||||
|
return NOT_A_PROBABILITY;
|
||||||
|
}
|
||||||
|
if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) {
|
||||||
|
BinaryFormat::getCodePointAndForwardPointer(dicRoot, &pos);
|
||||||
|
} else {
|
||||||
|
pos = BinaryFormat::skipOtherCharacters(dicRoot, pos);
|
||||||
|
}
|
||||||
|
pos = BinaryFormat::skipChildrenPosition(flags, pos);
|
||||||
|
pos = BinaryFormat::skipProbability(flags, pos);
|
||||||
|
uint8_t bigramFlags;
|
||||||
|
int count = 0;
|
||||||
|
do {
|
||||||
|
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(dicRoot, &pos);
|
||||||
|
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(dicRoot,
|
||||||
|
bigramFlags, &pos);
|
||||||
|
if (bigramPos == nextPos) {
|
||||||
|
const int16_t probability = BinaryFormat::MASK_ATTRIBUTE_PROBABILITY & bigramFlags;
|
||||||
|
if (static_cast<int>(bigramCacheMap->size()) < MAX_BIGRAM_MAP_SIZE) {
|
||||||
|
(*bigramCacheMap)[hash] = probability;
|
||||||
|
}
|
||||||
|
return probability;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
} while ((0 != (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags))
|
||||||
|
&& count < MAX_BIGRAMS_CONSIDERED_PER_CONTEXT);
|
||||||
|
if (static_cast<int>(bigramCacheMap->size()) < MAX_BIGRAM_MAP_SIZE) {
|
||||||
|
// TODO: does this -1 mean NOT_VALID_WORD?
|
||||||
|
(*bigramCacheMap)[hash] = -1;
|
||||||
|
}
|
||||||
|
return NOT_A_PROBABILITY;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ int DicNodeUtils::getWordPos(const uint8_t *const dicRoot, const int *word,
|
||||||
|
const int wordLength) {
|
||||||
|
if (!word) {
|
||||||
|
return NOT_VALID_WORD;
|
||||||
|
}
|
||||||
|
return BinaryFormat::getTerminalPosition(
|
||||||
|
dicRoot, word, wordLength, false /* forceLowerCaseSearch */);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ bool DicNodeUtils::isMatchedNodeCodePoint(const ProximityInfoState *pInfoState,
|
||||||
|
const int pointIndex, const bool exactOnly, const int nodeCodePoint) {
|
||||||
|
if (!pInfoState) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (exactOnly) {
|
||||||
|
return pInfoState->getPrimaryCodePointAt(pointIndex) == nodeCodePoint;
|
||||||
|
}
|
||||||
|
const ProximityType matchedId = pInfoState->getProximityType(pointIndex, nodeCodePoint,
|
||||||
|
true /* checkProximityChars */);
|
||||||
|
return isProximityChar(matchedId);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////
|
||||||
|
// Char utils //
|
||||||
|
////////////////
|
||||||
|
|
||||||
|
// TODO: Move to char_utils?
|
||||||
|
/* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0,
|
||||||
|
const int *const src1, const int16_t length1, int *dest) {
|
||||||
|
int actualLength0 = 0;
|
||||||
|
for (int i = 0; i < length0; ++i) {
|
||||||
|
if (src0[i] == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
actualLength0 = i + 1;
|
||||||
|
}
|
||||||
|
actualLength0 = min(actualLength0, MAX_WORD_LENGTH);
|
||||||
|
memcpy(dest, src0, actualLength0 * sizeof(dest[0]));
|
||||||
|
if (!src1 || length1 == 0) {
|
||||||
|
return actualLength0;
|
||||||
|
}
|
||||||
|
int actualLength1 = 0;
|
||||||
|
for (int i = 0; i < length1; ++i) {
|
||||||
|
if (src1[i] == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
actualLength1 = i + 1;
|
||||||
|
}
|
||||||
|
actualLength1 = min(actualLength1, MAX_WORD_LENGTH - actualLength0 - 1);
|
||||||
|
memcpy(&dest[actualLength0], src1, actualLength1 * sizeof(dest[0]));
|
||||||
|
return actualLength0 + actualLength1;
|
||||||
|
}
|
||||||
|
} // namespace latinime
|
|
@ -0,0 +1,88 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2012 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DIC_NODE_UTILS_H
|
||||||
|
#define LATINIME_DIC_NODE_UTILS_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "hash_map_compat.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class DicNode;
|
||||||
|
class DicNodeVector;
|
||||||
|
class ProximityInfo;
|
||||||
|
class ProximityInfoState;
|
||||||
|
|
||||||
|
class DicNodeUtils {
|
||||||
|
public:
|
||||||
|
static int appendTwoWords(const int *src0, const int16_t length0, const int *src1,
|
||||||
|
const int16_t length1, int *dest);
|
||||||
|
static void initAsRoot(const int rootPos, const uint8_t *const dicRoot,
|
||||||
|
const int prevWordNodePos, DicNode *newRootNode);
|
||||||
|
static void initAsRootWithPreviousWord(const int rootPos, const uint8_t *const dicRoot,
|
||||||
|
DicNode *prevWordLastNode, DicNode *newRootNode);
|
||||||
|
static void initByCopy(DicNode *srcNode, DicNode *destNode);
|
||||||
|
static void getAllChildDicNodes(DicNode *dicNode, const uint8_t *const dicRoot,
|
||||||
|
DicNodeVector *childDicNodes);
|
||||||
|
static int getWordPos(const uint8_t *const dicRoot, const int *word, const int prevWordLength);
|
||||||
|
static float getBigramNodeImprobability(const uint8_t *const dicRoot,
|
||||||
|
const DicNode *const node, hash_map_compat<int, int16_t> *const bigramCacheMap);
|
||||||
|
static bool isDicNodeFilteredOut(const int nodeCodePoint, const ProximityInfo *const pInfo,
|
||||||
|
const std::vector<int> *const codePointsFilter);
|
||||||
|
// TODO: Move to private
|
||||||
|
static void getProximityChildDicNodes(DicNode *dicNode, const uint8_t *const dicRoot,
|
||||||
|
const ProximityInfoState *pInfoState, const int pointIndex, bool exactOnly,
|
||||||
|
DicNodeVector *childDicNodes);
|
||||||
|
|
||||||
|
// TODO: Move to proximity info
|
||||||
|
static bool isProximityChar(ProximityType type) {
|
||||||
|
return type == MATCH_CHAR || type == PROXIMITY_CHAR || type == ADDITIONAL_PROXIMITY_CHAR;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils);
|
||||||
|
// Max cache size for the space omission error correction bigram lookup
|
||||||
|
static const int MAX_BIGRAM_MAP_SIZE = 20000;
|
||||||
|
// Max number of bigrams to look up
|
||||||
|
static const int MAX_BIGRAMS_CONSIDERED_PER_CONTEXT = 500;
|
||||||
|
|
||||||
|
static int getBigramNodeProbability(const uint8_t *const dicRoot, const DicNode *const node,
|
||||||
|
hash_map_compat<int, int16_t> *bigramCacheMap);
|
||||||
|
static int16_t getBigramNodeEncodedDiffProbability(const uint8_t *const dicRoot,
|
||||||
|
const DicNode *const node, hash_map_compat<int, int16_t> *bigramCacheMap);
|
||||||
|
static void createAndGetPassingChildNode(DicNode *dicNode, const ProximityInfoState *pInfoState,
|
||||||
|
const int pointIndex, const bool exactOnly, DicNodeVector *childDicNodes);
|
||||||
|
static void createAndGetAllLeavingChildNodes(DicNode *dicNode, const uint8_t *const dicRoot,
|
||||||
|
const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
|
||||||
|
const std::vector<int> *const codePointsFilter,
|
||||||
|
const ProximityInfo *const pInfo, DicNodeVector *childDicNodes);
|
||||||
|
static int createAndGetLeavingChildNode(DicNode *dicNode, int pos, const uint8_t *const dicRoot,
|
||||||
|
const int terminalDepth, const ProximityInfoState *pInfoState, const int pointIndex,
|
||||||
|
const bool exactOnly, const std::vector<int> *const codePointsFilter,
|
||||||
|
const ProximityInfo *const pInfo, DicNodeVector *childDicNodes);
|
||||||
|
static int16_t getBigramProbability(const uint8_t *const dicRoot, int pos, const int nextPos,
|
||||||
|
hash_map_compat<int, int16_t> *bigramCacheMap);
|
||||||
|
|
||||||
|
// TODO: Move to proximity info
|
||||||
|
static bool isMatchedNodeCodePoint(const ProximityInfoState *pInfoState, const int pointIndex,
|
||||||
|
const bool exactOnly, const int nodeCodePoint);
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_DIC_NODE_UTILS_H
|
|
@ -0,0 +1,95 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2012 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DIC_NODE_VECTOR_H
|
||||||
|
#define LATINIME_DIC_NODE_VECTOR_H
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "dic_node.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class DicNodeVector {
|
||||||
|
public:
|
||||||
|
#ifdef FLAG_DBG
|
||||||
|
// 0 will introduce resizing the vector.
|
||||||
|
static const int DEFAULT_NODES_SIZE_FOR_OPTIMIZATION = 0;
|
||||||
|
#else
|
||||||
|
static const int DEFAULT_NODES_SIZE_FOR_OPTIMIZATION = 60;
|
||||||
|
#endif
|
||||||
|
AK_FORCE_INLINE DicNodeVector() : mDicNodes(0), mLock(false), mEmptyNode() {}
|
||||||
|
|
||||||
|
// Specify the capacity of the vector
|
||||||
|
AK_FORCE_INLINE DicNodeVector(const int size) : mDicNodes(0), mLock(false), mEmptyNode() {
|
||||||
|
mDicNodes.reserve(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Non virtual inline destructor -- never inherit this class
|
||||||
|
AK_FORCE_INLINE ~DicNodeVector() {}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void clear() {
|
||||||
|
mDicNodes.clear();
|
||||||
|
mLock = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getSizeAndLock() {
|
||||||
|
mLock = true;
|
||||||
|
return static_cast<int>(mDicNodes.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool exceeds(const size_t limit) const {
|
||||||
|
return mDicNodes.size() >= limit;
|
||||||
|
}
|
||||||
|
|
||||||
|
void pushPassingChild(DicNode *dicNode) {
|
||||||
|
ASSERT(!mLock);
|
||||||
|
mDicNodes.push_back(mEmptyNode);
|
||||||
|
mDicNodes.back().initAsPassingChild(dicNode);
|
||||||
|
}
|
||||||
|
|
||||||
|
void pushLeavingChild(DicNode *dicNode, const int pos, const uint8_t flags,
|
||||||
|
const int childrenPos, const int attributesPos, const int siblingPos,
|
||||||
|
const int nodeCodePoint, const int childrenCount, const int probability,
|
||||||
|
const int bigramProbability, const bool isTerminal, const bool hasMultipleChars,
|
||||||
|
const bool hasChildren, const uint16_t additionalSubwordLength,
|
||||||
|
const int *additionalSubword) {
|
||||||
|
ASSERT(!mLock);
|
||||||
|
mDicNodes.push_back(mEmptyNode);
|
||||||
|
mDicNodes.back().initAsChild(dicNode, pos, flags, childrenPos, attributesPos, siblingPos,
|
||||||
|
nodeCodePoint, childrenCount, probability, -1 /* bigramProbability */, isTerminal,
|
||||||
|
hasMultipleChars, hasChildren, additionalSubwordLength, additionalSubword);
|
||||||
|
}
|
||||||
|
|
||||||
|
DicNode *operator[](const int id) {
|
||||||
|
ASSERT(id < static_cast<int>(mDicNodes.size()));
|
||||||
|
return &mDicNodes[id];
|
||||||
|
}
|
||||||
|
|
||||||
|
DicNode *front() {
|
||||||
|
ASSERT(1 <= static_cast<int>(mDicNodes.size()));
|
||||||
|
return &mDicNodes[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(DicNodeVector);
|
||||||
|
std::vector<DicNode> mDicNodes;
|
||||||
|
bool mLock;
|
||||||
|
DicNode mEmptyNode;
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_DIC_NODE_VECTOR_H
|
Loading…
Reference in New Issue