am bde25a4a: am 38c26dd0: Move dicnode to AOSP
* commit 'bde25a4a6334f4120ec696c693ab9bf12ee979e4': Move dicnode to AOSPmain
commit
fdf3c49c8b
|
@ -26,7 +26,10 @@ include $(CLEAR_VARS)
|
|||
LATIN_IME_SRC_DIR := src
|
||||
LATIN_IME_SRC_FULLPATH_DIR := $(LOCAL_PATH)/$(LATIN_IME_SRC_DIR)
|
||||
|
||||
LOCAL_C_INCLUDES += $(LATIN_IME_SRC_FULLPATH_DIR) $(LATIN_IME_SRC_FULLPATH_DIR)/suggest
|
||||
LOCAL_C_INCLUDES += \
|
||||
$(LATIN_IME_SRC_FULLPATH_DIR) \
|
||||
$(LATIN_IME_SRC_FULLPATH_DIR)/suggest \
|
||||
$(LATIN_IME_SRC_FULLPATH_DIR)/suggest/core/dicnode
|
||||
|
||||
LOCAL_CFLAGS += -Werror -Wall -Wextra -Weffc++ -Wformat=2 -Wcast-qual -Wcast-align \
|
||||
-Wwrite-strings -Wfloat-equal -Wpointer-arith -Winit-self -Wredundant-decls -Wno-system-headers
|
||||
|
@ -59,6 +62,8 @@ LATIN_IME_CORE_SRC_FILES := \
|
|||
proximity_info_state_utils.cpp \
|
||||
unigram_dictionary.cpp \
|
||||
words_priority_queue.cpp \
|
||||
suggest/core/dicnode/dic_node.cpp \
|
||||
suggest/core/dicnode/dic_node_utils.cpp \
|
||||
suggest/gesture_suggest.cpp \
|
||||
suggest/typing_suggest.cpp
|
||||
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Copyright (C) 2012 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dic_node.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
DicNode::DicNode(const DicNode &dicNode)
|
||||
:
|
||||
#if DEBUG_DICT
|
||||
mProfiler(dicNode.mProfiler),
|
||||
#endif
|
||||
mDicNodeProperties(dicNode.mDicNodeProperties), mDicNodeState(dicNode.mDicNodeState),
|
||||
mIsCachedForNextSuggestion(dicNode.mIsCachedForNextSuggestion), mIsUsed(dicNode.mIsUsed),
|
||||
mReleaseListener(0) {
|
||||
/* empty */
|
||||
}
|
||||
|
||||
DicNode &DicNode::operator=(const DicNode &dicNode) {
|
||||
#if DEBUG_DICT
|
||||
mProfiler = dicNode.mProfiler;
|
||||
#endif
|
||||
mDicNodeProperties = dicNode.mDicNodeProperties;
|
||||
mDicNodeState = dicNode.mDicNodeState;
|
||||
mIsCachedForNextSuggestion = dicNode.mIsCachedForNextSuggestion;
|
||||
mIsUsed = dicNode.mIsUsed;
|
||||
mReleaseListener = dicNode.mReleaseListener;
|
||||
return *this;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
|
@ -0,0 +1,572 @@
|
|||
/*
|
||||
* Copyright (C) 2012 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_DIC_NODE_H
|
||||
#define LATINIME_DIC_NODE_H
|
||||
|
||||
#include "char_utils.h"
|
||||
#include "defines.h"
|
||||
#include "dic_node_state.h"
|
||||
#include "dic_node_profiler.h"
|
||||
#include "dic_node_properties.h"
|
||||
#include "dic_node_release_listener.h"
|
||||
|
||||
#if DEBUG_DICT
|
||||
#define LOGI_SHOW_ADD_COST_PROP \
|
||||
do { char charBuf[50]; \
|
||||
INTS_TO_CHARS(getOutputWordBuf(), getDepth(), charBuf); \
|
||||
AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \
|
||||
__FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \
|
||||
getInputIndex(0), getNormalizedCompoundDistance(), charBuf); } while (0)
|
||||
#define DUMP_WORD_AND_SCORE(header) \
|
||||
do { char charBuf[50]; char prevWordCharBuf[50]; \
|
||||
INTS_TO_CHARS(getOutputWordBuf(), getDepth(), charBuf); \
|
||||
INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, \
|
||||
mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf); \
|
||||
AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d,,", header, \
|
||||
getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \
|
||||
getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \
|
||||
getInputIndex(0)); \
|
||||
} while (0)
|
||||
#else
|
||||
#define LOGI_SHOW_ADD_COST_PROP
|
||||
#define DUMP_WORD_AND_SCORE(header)
|
||||
#endif
|
||||
|
||||
namespace latinime {
|
||||
|
||||
// Naming convention
|
||||
// - Distance: "Weighted" edit distance -- used both for spatial and language.
|
||||
// - Compound Distance: Spatial Distance + Language Distance -- used for pruning and scoring
|
||||
// - Cost: delta/diff for Distance -- used both for spatial and language
|
||||
// - Length: "Non-weighted" -- used only for spatial
|
||||
// - Probability: "Non-weighted" -- used only for language
|
||||
|
||||
// This struct is purely a bucket to return values. No instances of this struct should be kept.
|
||||
struct DicNode_InputStateG {
|
||||
bool mNeedsToUpdateInputStateG;
|
||||
int mPointerId;
|
||||
int16_t mInputIndex;
|
||||
int mPrevCodePoint;
|
||||
float mTerminalDiffCost;
|
||||
float mRawLength;
|
||||
DoubleLetterLevel mDoubleLetterLevel;
|
||||
};
|
||||
|
||||
class DicNode {
|
||||
// Caveat: We define Weighting as a friend class of DicNode to let Weighting change
|
||||
// the distance of DicNode.
|
||||
// Caution!!! In general, we avoid using the "friend" access modifier.
|
||||
// This is an exception to explicitly hide DicNode::addCost() from all classes but Weighting.
|
||||
friend class Weighting;
|
||||
|
||||
public:
|
||||
#if DEBUG_DICT
|
||||
DicNodeProfiler mProfiler;
|
||||
#endif
|
||||
//////////////////
|
||||
// Memory utils //
|
||||
//////////////////
|
||||
AK_FORCE_INLINE static void managedDelete(DicNode *node) {
|
||||
node->remove();
|
||||
}
|
||||
// end
|
||||
/////////////////
|
||||
|
||||
AK_FORCE_INLINE DicNode()
|
||||
:
|
||||
#if DEBUG_DICT
|
||||
mProfiler(),
|
||||
#endif
|
||||
mDicNodeProperties(), mDicNodeState(), mIsCachedForNextSuggestion(false),
|
||||
mIsUsed(false), mReleaseListener(0) {}
|
||||
|
||||
DicNode(const DicNode &dicNode);
|
||||
DicNode &operator=(const DicNode &dicNode);
|
||||
virtual ~DicNode() {}
|
||||
|
||||
// TODO: minimize arguments by looking binary_format
|
||||
// Init for copy
|
||||
void initByCopy(const DicNode *dicNode) {
|
||||
mIsUsed = true;
|
||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||
mDicNodeProperties.init(&dicNode->mDicNodeProperties);
|
||||
mDicNodeState.init(&dicNode->mDicNodeState);
|
||||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||
}
|
||||
|
||||
// TODO: minimize arguments by looking binary_format
|
||||
// Init for root with prevWordNodePos which is used for bigram
|
||||
void initAsRoot(const int pos, const int childrenPos, const int childrenCount,
|
||||
const int prevWordNodePos) {
|
||||
mIsUsed = true;
|
||||
mIsCachedForNextSuggestion = false;
|
||||
mDicNodeProperties.init(
|
||||
pos, 0, childrenPos, 0, 0, 0, childrenCount, 0, 0, false, false, true, 0, 0);
|
||||
mDicNodeState.init(prevWordNodePos);
|
||||
PROF_NODE_RESET(mProfiler);
|
||||
}
|
||||
|
||||
void initAsPassingChild(DicNode *parentNode) {
|
||||
mIsUsed = true;
|
||||
mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion;
|
||||
const int c = parentNode->getNodeTypedCodePoint();
|
||||
mDicNodeProperties.init(&parentNode->mDicNodeProperties, c);
|
||||
mDicNodeState.init(&parentNode->mDicNodeState);
|
||||
PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
|
||||
}
|
||||
|
||||
// TODO: minimize arguments by looking binary_format
|
||||
// Init for root with previous word
|
||||
void initAsRootWithPreviousWord(DicNode *dicNode, const int pos, const int childrenPos,
|
||||
const int childrenCount) {
|
||||
mIsUsed = true;
|
||||
mIsCachedForNextSuggestion = false;
|
||||
mDicNodeProperties.init(
|
||||
pos, 0, childrenPos, 0, 0, 0, childrenCount, 0, 0, false, false, true, 0, 0);
|
||||
// TODO: Move to dicNodeState?
|
||||
mDicNodeState.mDicNodeStateOutput.init(); // reset for next word
|
||||
mDicNodeState.mDicNodeStateInput.init(
|
||||
&dicNode->mDicNodeState.mDicNodeStateInput, true /* resetTerminalDiffCost */);
|
||||
mDicNodeState.mDicNodeStateScoring.init(
|
||||
&dicNode->mDicNodeState.mDicNodeStateScoring);
|
||||
mDicNodeState.mDicNodeStatePrevWord.init(
|
||||
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1,
|
||||
dicNode->mDicNodeProperties.getProbability(),
|
||||
dicNode->mDicNodeProperties.getPos(),
|
||||
dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
|
||||
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(),
|
||||
dicNode->getOutputWordBuf(),
|
||||
dicNode->mDicNodeProperties.getDepth(),
|
||||
dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevSpacePositions,
|
||||
mDicNodeState.mDicNodeStateInput.getInputIndex(0) /* lastInputIndex */);
|
||||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||
}
|
||||
|
||||
// TODO: minimize arguments by looking binary_format
|
||||
void initAsChild(DicNode *dicNode, const int pos, const uint8_t flags, const int childrenPos,
|
||||
const int attributesPos, const int siblingPos, const int nodeCodePoint,
|
||||
const int childrenCount, const int probability, const int bigramProbability,
|
||||
const bool isTerminal, const bool hasMultipleChars, const bool hasChildren,
|
||||
const uint16_t additionalSubwordLength, const int *additionalSubword) {
|
||||
mIsUsed = true;
|
||||
uint16_t newDepth = static_cast<uint16_t>(dicNode->getDepth() + 1);
|
||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||
const uint16_t newLeavingDepth = static_cast<uint16_t>(
|
||||
dicNode->mDicNodeProperties.getLeavingDepth() + additionalSubwordLength);
|
||||
mDicNodeProperties.init(pos, flags, childrenPos, attributesPos, siblingPos, nodeCodePoint,
|
||||
childrenCount, probability, bigramProbability, isTerminal, hasMultipleChars,
|
||||
hasChildren, newDepth, newLeavingDepth);
|
||||
mDicNodeState.init(&dicNode->mDicNodeState, additionalSubwordLength, additionalSubword);
|
||||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void remove() {
|
||||
mIsUsed = false;
|
||||
if (mReleaseListener) {
|
||||
mReleaseListener->onReleased(this);
|
||||
}
|
||||
}
|
||||
|
||||
bool isUsed() const {
|
||||
return mIsUsed;
|
||||
}
|
||||
|
||||
bool isRoot() const {
|
||||
return getDepth() == 0;
|
||||
}
|
||||
|
||||
bool hasChildren() const {
|
||||
return mDicNodeProperties.hasChildren();
|
||||
}
|
||||
|
||||
bool isLeavingNode() const {
|
||||
ASSERT(getDepth() <= getLeavingDepth());
|
||||
return getDepth() == getLeavingDepth();
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE bool isFirstLetter() const {
|
||||
return getDepth() == 1;
|
||||
}
|
||||
|
||||
bool isCached() const {
|
||||
return mIsCachedForNextSuggestion;
|
||||
}
|
||||
|
||||
void setCached() {
|
||||
mIsCachedForNextSuggestion = true;
|
||||
}
|
||||
|
||||
// Used to expand the node in DicNodeUtils
|
||||
int getNodeTypedCodePoint() const {
|
||||
return mDicNodeState.mDicNodeStateOutput.getCodePointAt(getDepth());
|
||||
}
|
||||
|
||||
bool isImpossibleBigramWord() const {
|
||||
const int probability = mDicNodeProperties.getProbability();
|
||||
if (probability == 0) {
|
||||
return true;
|
||||
}
|
||||
const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()
|
||||
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1;
|
||||
const int currentWordLen = getDepth();
|
||||
return (prevWordLen == 1 && currentWordLen == 1);
|
||||
}
|
||||
|
||||
bool isCapitalized() const {
|
||||
const int c = getOutputWordBuf()[0];
|
||||
return isAsciiUpper(c);
|
||||
}
|
||||
|
||||
bool isFirstWord() const {
|
||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_VALID_WORD;
|
||||
}
|
||||
|
||||
bool isCompletion(const int inputSize) const {
|
||||
return mDicNodeState.mDicNodeStateInput.getInputIndex(0) >= inputSize;
|
||||
}
|
||||
|
||||
bool canDoLookAheadCorrection(const int inputSize) const {
|
||||
return mDicNodeState.mDicNodeStateInput.getInputIndex(0) < inputSize - 1;
|
||||
}
|
||||
|
||||
// Used to get bigram probability in DicNodeUtils
|
||||
int getPos() const {
|
||||
return mDicNodeProperties.getPos();
|
||||
}
|
||||
|
||||
// Used to get bigram probability in DicNodeUtils
|
||||
int getPrevWordPos() const {
|
||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
|
||||
}
|
||||
|
||||
// Used in DicNodeUtils
|
||||
int getChildrenPos() const {
|
||||
return mDicNodeProperties.getChildrenPos();
|
||||
}
|
||||
|
||||
// Used in DicNodeUtils
|
||||
int getChildrenCount() const {
|
||||
return mDicNodeProperties.getChildrenCount();
|
||||
}
|
||||
|
||||
// Used in DicNodeUtils
|
||||
int getProbability() const {
|
||||
return mDicNodeProperties.getProbability();
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE bool isTerminalWordNode() const {
|
||||
const bool isTerminalNodes = mDicNodeProperties.isTerminal();
|
||||
const int currentNodeDepth = getDepth();
|
||||
const int terminalNodeDepth = mDicNodeProperties.getLeavingDepth();
|
||||
return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth;
|
||||
}
|
||||
|
||||
bool shouldBeFilterdBySafetyNetForBigram() const {
|
||||
const uint16_t currentDepth = getDepth();
|
||||
const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()
|
||||
- mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1;
|
||||
return !(currentDepth > 0 && (currentDepth != 1 || prevWordLen != 1));
|
||||
}
|
||||
|
||||
uint16_t getLeavingDepth() const {
|
||||
return mDicNodeProperties.getLeavingDepth();
|
||||
}
|
||||
|
||||
bool isTotalInputSizeExceedingLimit() const {
|
||||
const int prevWordsLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
|
||||
const int currentWordDepth = getDepth();
|
||||
// TODO: 3 can be 2? Needs to be investigated.
|
||||
// TODO: Have a const variable for 3 (or 2)
|
||||
return prevWordsLen + currentWordDepth > MAX_WORD_LENGTH - 3;
|
||||
}
|
||||
|
||||
// TODO: This may be defective. Needs to be revised.
|
||||
bool truncateNode(const DicNode *const topNode, const int inputCommitPoint) {
|
||||
const int prevWordLenOfTop = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
|
||||
int newPrevWordStartIndex = inputCommitPoint;
|
||||
int charCount = 0;
|
||||
// Find new word start index
|
||||
for (int i = 0; i < prevWordLenOfTop; ++i) {
|
||||
const int c = mDicNodeState.mDicNodeStatePrevWord.getPrevWordCodePointAt(i);
|
||||
// TODO: Check other separators.
|
||||
if (c != KEYCODE_SPACE && c != KEYCODE_SINGLE_QUOTE) {
|
||||
if (charCount == inputCommitPoint) {
|
||||
newPrevWordStartIndex = i;
|
||||
break;
|
||||
}
|
||||
++charCount;
|
||||
}
|
||||
}
|
||||
if (!mDicNodeState.mDicNodeStatePrevWord.startsWith(
|
||||
&topNode->mDicNodeState.mDicNodeStatePrevWord, newPrevWordStartIndex - 1)) {
|
||||
// Node mismatch.
|
||||
return false;
|
||||
}
|
||||
mDicNodeState.mDicNodeStateInput.truncate(inputCommitPoint);
|
||||
mDicNodeState.mDicNodeStatePrevWord.truncate(newPrevWordStartIndex);
|
||||
return true;
|
||||
}
|
||||
|
||||
void outputResult(int *dest) const {
|
||||
const uint16_t prevWordLength = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
|
||||
const uint16_t currentDepth = getDepth();
|
||||
DicNodeUtils::appendTwoWords(mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
|
||||
prevWordLength, getOutputWordBuf(), currentDepth, dest);
|
||||
DUMP_WORD_AND_SCORE("OUTPUT");
|
||||
}
|
||||
|
||||
void outputSpacePositionsResult(int *spaceIndices) const {
|
||||
mDicNodeState.mDicNodeStatePrevWord.outputSpacePositions(spaceIndices);
|
||||
}
|
||||
|
||||
bool hasMultipleWords() const {
|
||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() > 0;
|
||||
}
|
||||
|
||||
float getProximityCorrectionCount() const {
|
||||
return static_cast<float>(mDicNodeState.mDicNodeStateScoring.getProximityCorrectionCount());
|
||||
}
|
||||
|
||||
float getEditCorrectionCount() const {
|
||||
return static_cast<float>(mDicNodeState.mDicNodeStateScoring.getEditCorrectionCount());
|
||||
}
|
||||
|
||||
// Used to prune nodes
|
||||
float getNormalizedCompoundDistance() const {
|
||||
return mDicNodeState.mDicNodeStateScoring.getNormalizedCompoundDistance();
|
||||
}
|
||||
|
||||
// Used to prune nodes
|
||||
float getNormalizedSpatialDistance() const {
|
||||
return mDicNodeState.mDicNodeStateScoring.getSpatialDistance()
|
||||
/ static_cast<float>(getInputIndex(0) + 1);
|
||||
}
|
||||
|
||||
// Used to prune nodes
|
||||
float getCompoundDistance() const {
|
||||
return mDicNodeState.mDicNodeStateScoring.getCompoundDistance();
|
||||
}
|
||||
|
||||
// Used to prune nodes
|
||||
float getCompoundDistance(const float languageWeight) const {
|
||||
return mDicNodeState.mDicNodeStateScoring.getCompoundDistance(languageWeight);
|
||||
}
|
||||
|
||||
// Note that "cost" means delta for "distance" that is weighted.
|
||||
float getTotalPrevWordsLanguageCost() const {
|
||||
return mDicNodeState.mDicNodeStateScoring.getTotalPrevWordsLanguageCost();
|
||||
}
|
||||
|
||||
// Used to commit input partially
|
||||
int getPrevWordNodePos() const {
|
||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE const int *getOutputWordBuf() const {
|
||||
return mDicNodeState.mDicNodeStateOutput.mWordBuf;
|
||||
}
|
||||
|
||||
int getPrevCodePointG(int pointerId) const {
|
||||
return mDicNodeState.mDicNodeStateInput.getPrevCodePoint(pointerId);
|
||||
}
|
||||
|
||||
// Whether the current codepoint can be an intentional omission, in which case the traversal
|
||||
// algorithm will always check for a possible omission here.
|
||||
bool canBeIntentionalOmission() const {
|
||||
return isIntentionalOmissionCodePoint(getNodeCodePoint());
|
||||
}
|
||||
|
||||
// Whether the omission is so frequent that it should incur zero cost.
|
||||
bool isZeroCostOmission() const {
|
||||
// TODO: do not hardcode and read from header
|
||||
return (getNodeCodePoint() == KEYCODE_SINGLE_QUOTE);
|
||||
}
|
||||
|
||||
// TODO: remove
|
||||
float getTerminalDiffCostG(int path) const {
|
||||
return mDicNodeState.mDicNodeStateInput.getTerminalDiffCost(path);
|
||||
}
|
||||
|
||||
//////////////////////
|
||||
// Temporary getter //
|
||||
// TODO: Remove //
|
||||
//////////////////////
|
||||
// TODO: Remove once touch path is merged into ProximityInfoState
|
||||
int getNodeCodePoint() const {
|
||||
return mDicNodeProperties.getNodeCodePoint();
|
||||
}
|
||||
|
||||
////////////////////////////////
|
||||
// Utils for cost calculation //
|
||||
////////////////////////////////
|
||||
AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const {
|
||||
return mDicNodeProperties.getNodeCodePoint()
|
||||
== dicNode->mDicNodeProperties.getNodeCodePoint();
|
||||
}
|
||||
|
||||
// TODO: remove
|
||||
// TODO: rename getNextInputIndex
|
||||
int16_t getInputIndex(int pointerId) const {
|
||||
return mDicNodeState.mDicNodeStateInput.getInputIndex(pointerId);
|
||||
}
|
||||
|
||||
////////////////////////////////////
|
||||
// Getter of features for scoring //
|
||||
////////////////////////////////////
|
||||
float getSpatialDistanceForScoring() const {
|
||||
return mDicNodeState.mDicNodeStateScoring.getSpatialDistance();
|
||||
}
|
||||
|
||||
float getLanguageDistanceForScoring() const {
|
||||
return mDicNodeState.mDicNodeStateScoring.getLanguageDistance();
|
||||
}
|
||||
|
||||
float getLanguageDistanceRatePerWordForScoring() const {
|
||||
const float langDist = getLanguageDistanceForScoring();
|
||||
const float totalWordCount =
|
||||
static_cast<float>(mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1);
|
||||
return langDist / totalWordCount;
|
||||
}
|
||||
|
||||
float getRawLength() const {
|
||||
return mDicNodeState.mDicNodeStateScoring.getRawLength();
|
||||
}
|
||||
|
||||
bool isLessThanOneErrorForScoring() const {
|
||||
return mDicNodeState.mDicNodeStateScoring.getEditCorrectionCount()
|
||||
+ mDicNodeState.mDicNodeStateScoring.getProximityCorrectionCount() <= 1;
|
||||
}
|
||||
|
||||
DoubleLetterLevel getDoubleLetterLevel() const {
|
||||
return mDicNodeState.mDicNodeStateScoring.getDoubleLetterLevel();
|
||||
}
|
||||
|
||||
void setDoubleLetterLevel(DoubleLetterLevel doubleLetterLevel) {
|
||||
mDicNodeState.mDicNodeStateScoring.setDoubleLetterLevel(doubleLetterLevel);
|
||||
}
|
||||
|
||||
uint8_t getFlags() const {
|
||||
return mDicNodeProperties.getFlags();
|
||||
}
|
||||
|
||||
int getAttributesPos() const {
|
||||
return mDicNodeProperties.getAttributesPos();
|
||||
}
|
||||
|
||||
inline uint16_t getDepth() const {
|
||||
return mDicNodeProperties.getDepth();
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void dump(const char *tag) const {
|
||||
#if DEBUG_DICT
|
||||
DUMP_WORD_AND_SCORE(tag);
|
||||
#if DEBUG_DUMP_ERROR
|
||||
mProfiler.dump();
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void setReleaseListener(DicNodeReleaseListener *releaseListener) {
|
||||
mReleaseListener = releaseListener;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE bool compare(const DicNode *right) {
|
||||
if (!isUsed() && !right->isUsed()) {
|
||||
// Compare pointer values here for stable comparison
|
||||
return this > right;
|
||||
}
|
||||
if (!isUsed()) {
|
||||
return true;
|
||||
}
|
||||
if (!right->isUsed()) {
|
||||
return false;
|
||||
}
|
||||
const float diff =
|
||||
right->getNormalizedCompoundDistance() - getNormalizedCompoundDistance();
|
||||
static const float MIN_DIFF = 0.000001f;
|
||||
if (diff > MIN_DIFF) {
|
||||
return true;
|
||||
} else if (diff < -MIN_DIFF) {
|
||||
return false;
|
||||
}
|
||||
const int depth = getDepth();
|
||||
const int depthDiff = right->getDepth() - depth;
|
||||
if (depthDiff != 0) {
|
||||
return depthDiff > 0;
|
||||
}
|
||||
for (int i = 0; i < depth; ++i) {
|
||||
const int codePoint = mDicNodeState.mDicNodeStateOutput.getCodePointAt(i);
|
||||
const int rightCodePoint = right->mDicNodeState.mDicNodeStateOutput.getCodePointAt(i);
|
||||
if (codePoint != rightCodePoint) {
|
||||
return rightCodePoint > codePoint;
|
||||
}
|
||||
}
|
||||
// Compare pointer values here for stable comparison
|
||||
return this > right;
|
||||
}
|
||||
|
||||
private:
|
||||
DicNodeProperties mDicNodeProperties;
|
||||
DicNodeState mDicNodeState;
|
||||
// TODO: Remove
|
||||
bool mIsCachedForNextSuggestion;
|
||||
bool mIsUsed;
|
||||
DicNodeReleaseListener *mReleaseListener;
|
||||
|
||||
AK_FORCE_INLINE int getTotalInputIndex() const {
|
||||
int index = 0;
|
||||
for (int i = 0; i < MAX_POINTER_COUNT_G; i++) {
|
||||
index += mDicNodeState.mDicNodeStateInput.getInputIndex(i);
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
// Caveat: Must not be called outside Weighting
|
||||
// This restriction is guaranteed by "friend"
|
||||
AK_FORCE_INLINE void addCost(const float spatialCost, const float languageCost,
|
||||
const bool doNormalization, const int inputSize, const bool isEditCorrection,
|
||||
const bool isProximityCorrection) {
|
||||
if (DEBUG_GEO_FULL) {
|
||||
LOGI_SHOW_ADD_COST_PROP;
|
||||
}
|
||||
mDicNodeState.mDicNodeStateScoring.addCost(spatialCost, languageCost, doNormalization,
|
||||
inputSize, getTotalInputIndex(), isEditCorrection, isProximityCorrection);
|
||||
}
|
||||
|
||||
// Caveat: Must not be called outside Weighting
|
||||
// This restriction is guaranteed by "friend"
|
||||
AK_FORCE_INLINE void forwardInputIndex(const int pointerId, const int count,
|
||||
const bool overwritesPrevCodePointByNodeCodePoint) {
|
||||
if (count == 0) {
|
||||
return;
|
||||
}
|
||||
mDicNodeState.mDicNodeStateInput.forwardInputIndex(pointerId, count);
|
||||
if (overwritesPrevCodePointByNodeCodePoint) {
|
||||
mDicNodeState.mDicNodeStateInput.setPrevCodePoint(0, getNodeCodePoint());
|
||||
}
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void updateInputIndexG(DicNode_InputStateG *inputStateG) {
|
||||
mDicNodeState.mDicNodeStateInput.updateInputIndexG(inputStateG->mPointerId,
|
||||
inputStateG->mInputIndex, inputStateG->mPrevCodePoint,
|
||||
inputStateG->mTerminalDiffCost, inputStateG->mRawLength);
|
||||
mDicNodeState.mDicNodeStateScoring.addRawLength(inputStateG->mRawLength);
|
||||
mDicNodeState.mDicNodeStateScoring.setDoubleLetterLevel(inputStateG->mDoubleLetterLevel);
|
||||
}
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DIC_NODE_H
|
|
@ -0,0 +1,213 @@
|
|||
/*
|
||||
* Copyright (C) 2012 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_DIC_NODE_PRIORITY_QUEUE_H
|
||||
#define LATINIME_DIC_NODE_PRIORITY_QUEUE_H
|
||||
|
||||
#include <queue>
|
||||
#include <vector>
|
||||
|
||||
#include "defines.h"
|
||||
#include "dic_node.h"
|
||||
#include "dic_node_release_listener.h"
|
||||
|
||||
#define MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY 200
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class DicNodePriorityQueue : public DicNodeReleaseListener {
|
||||
public:
|
||||
AK_FORCE_INLINE DicNodePriorityQueue()
|
||||
: MAX_CAPACITY(MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY),
|
||||
mMaxSize(MAX_DIC_NODE_PRIORITY_QUEUE_CAPACITY), mDicNodesBuf(), mUnusedNodeIndices(),
|
||||
mNextUnusedNodeId(0), mDicNodesQueue() {
|
||||
mDicNodesBuf.resize(MAX_CAPACITY + 1);
|
||||
mUnusedNodeIndices.resize(MAX_CAPACITY + 1);
|
||||
reset();
|
||||
}
|
||||
|
||||
// Non virtual inline destructor -- never inherit this class
|
||||
AK_FORCE_INLINE ~DicNodePriorityQueue() {}
|
||||
|
||||
int getSize() const {
|
||||
return static_cast<int>(mDicNodesQueue.size());
|
||||
}
|
||||
|
||||
int getMaxSize() const {
|
||||
return mMaxSize;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void setMaxSize(const int maxSize) {
|
||||
mMaxSize = min(maxSize, MAX_CAPACITY);
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void reset() {
|
||||
clearAndResize(MAX_CAPACITY);
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void clear() {
|
||||
clearAndResize(mMaxSize);
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void clearAndResize(const int maxSize) {
|
||||
while (!mDicNodesQueue.empty()) {
|
||||
mDicNodesQueue.pop();
|
||||
}
|
||||
setMaxSize(maxSize);
|
||||
for (int i = 0; i < MAX_CAPACITY + 1; ++i) {
|
||||
mDicNodesBuf[i].remove();
|
||||
mDicNodesBuf[i].setReleaseListener(this);
|
||||
mUnusedNodeIndices[i] = i == MAX_CAPACITY ? NOT_A_NODE_ID : static_cast<int>(i) + 1;
|
||||
}
|
||||
mNextUnusedNodeId = 0;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE DicNode *newDicNode(DicNode *dicNode) {
|
||||
DicNode *newNode = searchEmptyDicNode();
|
||||
if (newNode) {
|
||||
DicNodeUtils::initByCopy(dicNode, newNode);
|
||||
return newNode;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Copy
|
||||
AK_FORCE_INLINE DicNode *copyPush(DicNode *dicNode) {
|
||||
return copyPush(dicNode, mMaxSize);
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void copyPop(DicNode *dest) {
|
||||
if (mDicNodesQueue.empty()) {
|
||||
ASSERT(false);
|
||||
return;
|
||||
}
|
||||
DicNode *node = mDicNodesQueue.top();
|
||||
if (dest) {
|
||||
DicNodeUtils::initByCopy(node, dest);
|
||||
}
|
||||
node->remove();
|
||||
mDicNodesQueue.pop();
|
||||
}
|
||||
|
||||
void onReleased(DicNode *dicNode) {
|
||||
const int index = static_cast<int>(dicNode - &mDicNodesBuf[0]);
|
||||
if (mUnusedNodeIndices[index] != NOT_A_NODE_ID) {
|
||||
// it's already released
|
||||
return;
|
||||
}
|
||||
mUnusedNodeIndices[index] = mNextUnusedNodeId;
|
||||
mNextUnusedNodeId = index;
|
||||
ASSERT(index >= 0 && index < (MAX_CAPACITY + 1));
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void dump() const {
|
||||
AKLOGI("\n\n\n\n\n===========================");
|
||||
for (int i = 0; i < MAX_CAPACITY + 1; ++i) {
|
||||
if (mDicNodesBuf[i].isUsed()) {
|
||||
mDicNodesBuf[i].dump("QUEUE: ");
|
||||
}
|
||||
}
|
||||
AKLOGI("===========================\n\n\n\n\n");
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(DicNodePriorityQueue);
|
||||
static const int NOT_A_NODE_ID = -1;
|
||||
|
||||
AK_FORCE_INLINE static bool compareDicNode(DicNode *left, DicNode *right) {
|
||||
return left->compare(right);
|
||||
}
|
||||
|
||||
struct DicNodeComparator {
|
||||
bool operator ()(DicNode *left, DicNode *right) {
|
||||
return compareDicNode(left, right);
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::priority_queue<DicNode *, std::vector<DicNode *>, DicNodeComparator> DicNodesQueue;
|
||||
const int MAX_CAPACITY;
|
||||
int mMaxSize;
|
||||
std::vector<DicNode> mDicNodesBuf; // of each element of mDicNodesBuf respectively
|
||||
std::vector<int> mUnusedNodeIndices;
|
||||
int mNextUnusedNodeId;
|
||||
DicNodesQueue mDicNodesQueue;
|
||||
|
||||
inline bool isFull(const int maxSize) const {
|
||||
return getSize() >= maxSize;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void pop() {
|
||||
copyPop(0);
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE bool betterThanWorstDicNode(DicNode *dicNode) const {
|
||||
DicNode *worstNode = mDicNodesQueue.top();
|
||||
if (!worstNode) {
|
||||
return true;
|
||||
}
|
||||
return compareDicNode(dicNode, worstNode);
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE DicNode *searchEmptyDicNode() {
|
||||
// TODO: Currently O(n) but should be improved to O(1)
|
||||
if (MAX_CAPACITY == 0) {
|
||||
return 0;
|
||||
}
|
||||
if (mNextUnusedNodeId == NOT_A_NODE_ID) {
|
||||
AKLOGI("No unused node found.");
|
||||
for (int i = 0; i < MAX_CAPACITY + 1; ++i) {
|
||||
AKLOGI("Dump node availability, %d, %d, %d",
|
||||
i, mDicNodesBuf[i].isUsed(), mUnusedNodeIndices[i]);
|
||||
}
|
||||
ASSERT(false);
|
||||
return 0;
|
||||
}
|
||||
DicNode *dicNode = &mDicNodesBuf[mNextUnusedNodeId];
|
||||
markNodeAsUsed(dicNode);
|
||||
return dicNode;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void markNodeAsUsed(DicNode *dicNode) {
|
||||
const int index = static_cast<int>(dicNode - &mDicNodesBuf[0]);
|
||||
mNextUnusedNodeId = mUnusedNodeIndices[index];
|
||||
mUnusedNodeIndices[index] = NOT_A_NODE_ID;
|
||||
ASSERT(index >= 0 && index < (MAX_CAPACITY + 1));
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE DicNode *pushPoolNodeWithMaxSize(DicNode *dicNode, const int maxSize) {
|
||||
if (!dicNode) {
|
||||
return 0;
|
||||
}
|
||||
if (!isFull(maxSize)) {
|
||||
mDicNodesQueue.push(dicNode);
|
||||
return dicNode;
|
||||
}
|
||||
if (betterThanWorstDicNode(dicNode)) {
|
||||
pop();
|
||||
mDicNodesQueue.push(dicNode);
|
||||
return dicNode;
|
||||
}
|
||||
dicNode->remove();
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Copy
|
||||
AK_FORCE_INLINE DicNode *copyPush(DicNode *dicNode, const int maxSize) {
|
||||
return pushPoolNodeWithMaxSize(newDicNode(dicNode), maxSize);
|
||||
}
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DIC_NODE_PRIORITY_QUEUE_H
|
|
@ -0,0 +1,181 @@
|
|||
/*
|
||||
* Copyright (C) 2012 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_DIC_NODE_PROFILER_H
|
||||
#define LATINIME_DIC_NODE_PROFILER_H
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
#if DEBUG_DICT
|
||||
#define PROF_SPACE_SUBSTITUTION(profiler) profiler.profSpaceSubstitution()
|
||||
#define PROF_SPACE_OMISSION(profiler) profiler.profSpaceOmission()
|
||||
#define PROF_ADDITIONAL_PROXIMITY(profiler) profiler.profAdditionalProximity()
|
||||
#define PROF_SUBSTITUTION(profiler) profiler.profSubstitution()
|
||||
#define PROF_OMISSION(profiler) profiler.profOmission()
|
||||
#define PROF_INSERTION(profiler) profiler.profInsertion()
|
||||
#define PROF_MATCH(profiler) profiler.profMatch()
|
||||
#define PROF_COMPLETION(profiler) profiler.profCompletion()
|
||||
#define PROF_TRANSPOSITION(profiler) profiler.profTransposition()
|
||||
#define PROF_NEARESTKEY(profiler) profiler.profNearestKey()
|
||||
#define PROF_TERMINAL(profiler) profiler.profTerminal()
|
||||
#define PROF_NEW_WORD(profiler) profiler.profNewWord()
|
||||
#define PROF_NEW_WORD_BIGRAM(profiler) profiler.profNewWordBigram()
|
||||
#define PROF_NODE_RESET(profiler) profiler.reset()
|
||||
#define PROF_NODE_COPY(src, dest) dest.copy(src)
|
||||
#else
|
||||
#define PROF_SPACE_SUBSTITUTION(profiler)
|
||||
#define PROF_SPACE_OMISSION(profiler)
|
||||
#define PROF_ADDITONAL_PROXIMITY(profiler)
|
||||
#define PROF_SUBSTITUTION(profiler)
|
||||
#define PROF_OMISSION(profiler)
|
||||
#define PROF_INSERTION(profiler)
|
||||
#define PROF_MATCH(profiler)
|
||||
#define PROF_COMPLETION(profiler)
|
||||
#define PROF_TRANSPOSITION(profiler)
|
||||
#define PROF_NEARESTKEY(profiler)
|
||||
#define PROF_TERMINAL(profiler)
|
||||
#define PROF_NEW_WORD(profiler)
|
||||
#define PROF_NEW_WORD_BIGRAM(profiler)
|
||||
#define PROF_NODE_RESET(profiler)
|
||||
#define PROF_NODE_COPY(src, dest)
|
||||
#endif
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class DicNodeProfiler {
|
||||
public:
|
||||
#if DEBUG_DICT
|
||||
AK_FORCE_INLINE DicNodeProfiler()
|
||||
: mProfOmission(0), mProfInsertion(0), mProfTransposition(0),
|
||||
mProfAdditionalProximity(0), mProfSubstitution(0),
|
||||
mProfSpaceSubstitution(0), mProfSpaceOmission(0),
|
||||
mProfMatch(0), mProfCompletion(0), mProfTerminal(0),
|
||||
mProfNearestKey(0), mProfNewWord(0), mProfNewWordBigram(0) {}
|
||||
|
||||
int mProfOmission;
|
||||
int mProfInsertion;
|
||||
int mProfTransposition;
|
||||
int mProfAdditionalProximity;
|
||||
int mProfSubstitution;
|
||||
int mProfSpaceSubstitution;
|
||||
int mProfSpaceOmission;
|
||||
int mProfMatch;
|
||||
int mProfCompletion;
|
||||
int mProfTerminal;
|
||||
int mProfNearestKey;
|
||||
int mProfNewWord;
|
||||
int mProfNewWordBigram;
|
||||
|
||||
void profSpaceSubstitution() {
|
||||
++mProfSpaceSubstitution;
|
||||
}
|
||||
|
||||
void profSpaceOmission() {
|
||||
++mProfSpaceOmission;
|
||||
}
|
||||
|
||||
void profAdditionalProximity() {
|
||||
++mProfAdditionalProximity;
|
||||
}
|
||||
|
||||
void profSubstitution() {
|
||||
++mProfSubstitution;
|
||||
}
|
||||
|
||||
void profOmission() {
|
||||
++mProfOmission;
|
||||
}
|
||||
|
||||
void profInsertion() {
|
||||
++mProfInsertion;
|
||||
}
|
||||
|
||||
void profMatch() {
|
||||
++mProfMatch;
|
||||
}
|
||||
|
||||
void profCompletion() {
|
||||
++mProfCompletion;
|
||||
}
|
||||
|
||||
void profTransposition() {
|
||||
++mProfTransposition;
|
||||
}
|
||||
|
||||
void profNearestKey() {
|
||||
++mProfNearestKey;
|
||||
}
|
||||
|
||||
void profTerminal() {
|
||||
++mProfTerminal;
|
||||
}
|
||||
|
||||
void profNewWord() {
|
||||
++mProfNewWord;
|
||||
}
|
||||
|
||||
void profNewWordBigram() {
|
||||
++mProfNewWordBigram;
|
||||
}
|
||||
|
||||
void reset() {
|
||||
mProfSpaceSubstitution = 0;
|
||||
mProfSpaceOmission = 0;
|
||||
mProfAdditionalProximity = 0;
|
||||
mProfSubstitution = 0;
|
||||
mProfOmission = 0;
|
||||
mProfInsertion = 0;
|
||||
mProfMatch = 0;
|
||||
mProfCompletion = 0;
|
||||
mProfTransposition = 0;
|
||||
mProfNearestKey = 0;
|
||||
mProfTerminal = 0;
|
||||
mProfNewWord = 0;
|
||||
mProfNewWordBigram = 0;
|
||||
}
|
||||
|
||||
void copy(const DicNodeProfiler *const profiler) {
|
||||
mProfSpaceSubstitution = profiler->mProfSpaceSubstitution;
|
||||
mProfSpaceOmission = profiler->mProfSpaceOmission;
|
||||
mProfAdditionalProximity = profiler->mProfAdditionalProximity;
|
||||
mProfSubstitution = profiler->mProfSubstitution;
|
||||
mProfOmission = profiler->mProfOmission;
|
||||
mProfInsertion = profiler->mProfInsertion;
|
||||
mProfMatch = profiler->mProfMatch;
|
||||
mProfCompletion = profiler->mProfCompletion;
|
||||
mProfTransposition = profiler->mProfTransposition;
|
||||
mProfNearestKey = profiler->mProfNearestKey;
|
||||
mProfTerminal = profiler->mProfTerminal;
|
||||
mProfNewWord = profiler->mProfNewWord;
|
||||
mProfNewWordBigram = profiler->mProfNewWordBigram;
|
||||
}
|
||||
|
||||
void dump() const {
|
||||
AKLOGI("O %d, I %d, T %d, AP %d, S %d, SS %d, SO %d, M %d, C %d, TE %d, NW = %d, NWB = %d",
|
||||
mProfOmission, mProfInsertion, mProfTransposition, mProfAdditionalProximity,
|
||||
mProfSubstitution, mProfSpaceSubstitution, mProfSpaceOmission, mProfMatch,
|
||||
mProfCompletion, mProfTerminal, mProfNewWord, mProfNewWordBigram);
|
||||
}
|
||||
#else
|
||||
DicNodeProfiler() {}
|
||||
#endif
|
||||
private:
|
||||
// Caution!!!
|
||||
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||
// for this class
|
||||
};
|
||||
}
|
||||
#endif // LATINIME_DIC_NODE_PROFILER_H
|
|
@ -0,0 +1,173 @@
|
|||
/*
|
||||
* Copyright (C) 2012 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_DIC_NODE_PROPERTIES_H
|
||||
#define LATINIME_DIC_NODE_PROPERTIES_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
/**
|
||||
* Node for traversing the lexicon trie.
|
||||
*/
|
||||
class DicNodeProperties {
|
||||
public:
|
||||
AK_FORCE_INLINE DicNodeProperties()
|
||||
: mPos(0), mFlags(0), mChildrenPos(0), mAttributesPos(0), mSiblingPos(0),
|
||||
mChildrenCount(0), mProbability(0), mBigramProbability(0), mNodeCodePoint(0),
|
||||
mDepth(0), mLeavingDepth(0), mIsTerminal(false), mHasMultipleChars(false),
|
||||
mHasChildren(false) {
|
||||
}
|
||||
|
||||
virtual ~DicNodeProperties() {}
|
||||
|
||||
// Should be called only once per DicNode is initialized.
|
||||
void init(const int pos, const uint8_t flags, const int childrenPos, const int attributesPos,
|
||||
const int siblingPos, const int nodeCodePoint, const int childrenCount,
|
||||
const int probability, const int bigramProbability, const bool isTerminal,
|
||||
const bool hasMultipleChars, const bool hasChildren, const uint16_t depth,
|
||||
const uint16_t terminalDepth) {
|
||||
mPos = pos;
|
||||
mFlags = flags;
|
||||
mChildrenPos = childrenPos;
|
||||
mAttributesPos = attributesPos;
|
||||
mSiblingPos = siblingPos;
|
||||
mNodeCodePoint = nodeCodePoint;
|
||||
mChildrenCount = childrenCount;
|
||||
mProbability = probability;
|
||||
mBigramProbability = bigramProbability;
|
||||
mIsTerminal = isTerminal;
|
||||
mHasMultipleChars = hasMultipleChars;
|
||||
mHasChildren = hasChildren;
|
||||
mDepth = depth;
|
||||
mLeavingDepth = terminalDepth;
|
||||
}
|
||||
|
||||
// Init for copy
|
||||
void init(const DicNodeProperties *const nodeProp) {
|
||||
mPos = nodeProp->mPos;
|
||||
mFlags = nodeProp->mFlags;
|
||||
mChildrenPos = nodeProp->mChildrenPos;
|
||||
mAttributesPos = nodeProp->mAttributesPos;
|
||||
mSiblingPos = nodeProp->mSiblingPos;
|
||||
mNodeCodePoint = nodeProp->mNodeCodePoint;
|
||||
mChildrenCount = nodeProp->mChildrenCount;
|
||||
mProbability = nodeProp->mProbability;
|
||||
mBigramProbability = nodeProp->mBigramProbability;
|
||||
mIsTerminal = nodeProp->mIsTerminal;
|
||||
mHasMultipleChars = nodeProp->mHasMultipleChars;
|
||||
mHasChildren = nodeProp->mHasChildren;
|
||||
mDepth = nodeProp->mDepth;
|
||||
mLeavingDepth = nodeProp->mLeavingDepth;
|
||||
}
|
||||
|
||||
// Init as passing child
|
||||
void init(const DicNodeProperties *const nodeProp, const int codePoint) {
|
||||
mPos = nodeProp->mPos;
|
||||
mFlags = nodeProp->mFlags;
|
||||
mChildrenPos = nodeProp->mChildrenPos;
|
||||
mAttributesPos = nodeProp->mAttributesPos;
|
||||
mSiblingPos = nodeProp->mSiblingPos;
|
||||
mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
|
||||
mChildrenCount = nodeProp->mChildrenCount;
|
||||
mProbability = nodeProp->mProbability;
|
||||
mBigramProbability = nodeProp->mBigramProbability;
|
||||
mIsTerminal = nodeProp->mIsTerminal;
|
||||
mHasMultipleChars = nodeProp->mHasMultipleChars;
|
||||
mHasChildren = nodeProp->mHasChildren;
|
||||
mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child
|
||||
mLeavingDepth = nodeProp->mLeavingDepth;
|
||||
}
|
||||
|
||||
int getPos() const {
|
||||
return mPos;
|
||||
}
|
||||
|
||||
uint8_t getFlags() const {
|
||||
return mFlags;
|
||||
}
|
||||
|
||||
int getChildrenPos() const {
|
||||
return mChildrenPos;
|
||||
}
|
||||
|
||||
int getAttributesPos() const {
|
||||
return mAttributesPos;
|
||||
}
|
||||
|
||||
int getChildrenCount() const {
|
||||
return mChildrenCount;
|
||||
}
|
||||
|
||||
int getProbability() const {
|
||||
return mProbability;
|
||||
}
|
||||
|
||||
int getNodeCodePoint() const {
|
||||
return mNodeCodePoint;
|
||||
}
|
||||
|
||||
uint16_t getDepth() const {
|
||||
return mDepth;
|
||||
}
|
||||
|
||||
// TODO: Move to output?
|
||||
uint16_t getLeavingDepth() const {
|
||||
return mLeavingDepth;
|
||||
}
|
||||
|
||||
bool isTerminal() const {
|
||||
return mIsTerminal;
|
||||
}
|
||||
|
||||
bool hasMultipleChars() const {
|
||||
return mHasMultipleChars;
|
||||
}
|
||||
|
||||
bool hasChildren() const {
|
||||
return mChildrenCount > 0 || mDepth != mLeavingDepth;
|
||||
}
|
||||
|
||||
private:
|
||||
// Caution!!!
|
||||
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||
// for this class
|
||||
|
||||
// Not used
|
||||
int getSiblingPos() const {
|
||||
return mSiblingPos;
|
||||
}
|
||||
|
||||
int mPos;
|
||||
uint8_t mFlags;
|
||||
int mChildrenPos;
|
||||
int mAttributesPos;
|
||||
int mSiblingPos;
|
||||
int mChildrenCount;
|
||||
int mProbability;
|
||||
int mBigramProbability; // not used for now
|
||||
int mNodeCodePoint;
|
||||
uint16_t mDepth;
|
||||
uint16_t mLeavingDepth;
|
||||
bool mIsTerminal;
|
||||
bool mHasMultipleChars;
|
||||
bool mHasChildren;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DIC_NODE_PROPERTIES_H
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Copyright (C) 2012 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_DIC_NODE_RELEASE_LISTENER_H
|
||||
#define LATINIME_DIC_NODE_RELEASE_LISTENER_H
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class DicNodeReleaseListener {
|
||||
public:
|
||||
DicNodeReleaseListener() {}
|
||||
virtual ~DicNodeReleaseListener() {}
|
||||
virtual void onReleased(DicNode *dicNode) = 0;
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(DicNodeReleaseListener);
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DIC_NODE_RELEASE_LISTENER_H
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Copyright (C) 2012 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_DIC_NODE_STATE_H
|
||||
#define LATINIME_DIC_NODE_STATE_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "dic_node_state_input.h"
|
||||
#include "dic_node_state_output.h"
|
||||
#include "dic_node_state_prevword.h"
|
||||
#include "dic_node_state_scoring.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class DicNodeState {
|
||||
public:
|
||||
DicNodeStateInput mDicNodeStateInput;
|
||||
DicNodeStateOutput mDicNodeStateOutput;
|
||||
DicNodeStatePrevWord mDicNodeStatePrevWord;
|
||||
DicNodeStateScoring mDicNodeStateScoring;
|
||||
|
||||
AK_FORCE_INLINE DicNodeState()
|
||||
: mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStatePrevWord(),
|
||||
mDicNodeStateScoring() {
|
||||
}
|
||||
|
||||
virtual ~DicNodeState() {}
|
||||
|
||||
// Init with prevWordPos
|
||||
void init(const int prevWordPos) {
|
||||
mDicNodeStateInput.init();
|
||||
mDicNodeStateOutput.init();
|
||||
mDicNodeStatePrevWord.init(prevWordPos);
|
||||
mDicNodeStateScoring.init();
|
||||
}
|
||||
|
||||
// Init by copy
|
||||
AK_FORCE_INLINE void init(const DicNodeState *const src) {
|
||||
mDicNodeStateInput.init(&src->mDicNodeStateInput);
|
||||
mDicNodeStateOutput.init(&src->mDicNodeStateOutput);
|
||||
mDicNodeStatePrevWord.init(&src->mDicNodeStatePrevWord);
|
||||
mDicNodeStateScoring.init(&src->mDicNodeStateScoring);
|
||||
}
|
||||
|
||||
// Init by copy and adding subword
|
||||
void init(const DicNodeState *const src, const uint16_t additionalSubwordLength,
|
||||
const int *const additionalSubword) {
|
||||
init(src);
|
||||
mDicNodeStateOutput.addSubword(additionalSubwordLength, additionalSubword);
|
||||
}
|
||||
|
||||
private:
|
||||
// Caution!!!
|
||||
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||
// for this class
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DIC_NODE_STATE_H
|
|
@ -0,0 +1,100 @@
|
|||
/*
|
||||
* Copyright (C) 2012 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_DIC_NODE_STATE_INPUT_H
|
||||
#define LATINIME_DIC_NODE_STATE_INPUT_H
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
// TODO: Have a .cpp for this class
|
||||
class DicNodeStateInput {
|
||||
public:
|
||||
DicNodeStateInput() {}
|
||||
virtual ~DicNodeStateInput() {}
|
||||
|
||||
// TODO: Merge into DicNodeStatePrevWord::truncate
|
||||
void truncate(const int commitPoint) {
|
||||
mInputIndex[0] -= commitPoint;
|
||||
}
|
||||
|
||||
void init() {
|
||||
for (int i = 0; i < MAX_POINTER_COUNT_G; i++) {
|
||||
// TODO: The initial value for mInputIndex should be -1?
|
||||
//mInputIndex[i] = i == 0 ? 0 : -1;
|
||||
mInputIndex[i] = 0;
|
||||
mPrevCodePoint[i] = NOT_A_CODE_POINT;
|
||||
mTerminalDiffCost[i] = static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
|
||||
}
|
||||
}
|
||||
|
||||
void init(const DicNodeStateInput *const src, const bool resetTerminalDiffCost) {
|
||||
for (int i = 0; i < MAX_POINTER_COUNT_G; i++) {
|
||||
mInputIndex[i] = src->mInputIndex[i];
|
||||
mPrevCodePoint[i] = src->mPrevCodePoint[i];
|
||||
mTerminalDiffCost[i] = resetTerminalDiffCost ?
|
||||
static_cast<float>(MAX_VALUE_FOR_WEIGHTING) : src->mTerminalDiffCost[i];
|
||||
}
|
||||
}
|
||||
|
||||
void updateInputIndexG(const int pointerId, const int inputIndex,
|
||||
const int prevCodePoint, const float terminalDiffCost, const float rawLength) {
|
||||
mInputIndex[pointerId] = inputIndex;
|
||||
mPrevCodePoint[pointerId] = prevCodePoint;
|
||||
mTerminalDiffCost[pointerId] = terminalDiffCost;
|
||||
}
|
||||
|
||||
void init(const DicNodeStateInput *const src) {
|
||||
init(src, false);
|
||||
}
|
||||
|
||||
// For transposition
|
||||
void setPrevCodePoint(const int pointerId, const int c) {
|
||||
mPrevCodePoint[pointerId] = c;
|
||||
}
|
||||
|
||||
void forwardInputIndex(const int pointerId, const int val) {
|
||||
if (mInputIndex[pointerId] < 0) {
|
||||
mInputIndex[pointerId] = val;
|
||||
} else {
|
||||
mInputIndex[pointerId] = mInputIndex[pointerId] + val;
|
||||
}
|
||||
}
|
||||
|
||||
int getInputIndex(const int pointerId) const {
|
||||
// when "inputIndex" exceeds "inputSize", auto-completion needs to be done
|
||||
return mInputIndex[pointerId];
|
||||
}
|
||||
|
||||
int getPrevCodePoint(const int pointerId) const {
|
||||
return mPrevCodePoint[pointerId];
|
||||
}
|
||||
|
||||
float getTerminalDiffCost(const int pointerId) const {
|
||||
return mTerminalDiffCost[pointerId];
|
||||
}
|
||||
|
||||
private:
|
||||
// Caution!!!
|
||||
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||
// for this class
|
||||
int mInputIndex[MAX_POINTER_COUNT_G];
|
||||
int mPrevCodePoint[MAX_POINTER_COUNT_G];
|
||||
float mTerminalDiffCost[MAX_POINTER_COUNT_G];
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DIC_NODE_STATE_INPUT_H
|
|
@ -0,0 +1,75 @@
|
|||
/*
|
||||
* Copyright (C) 2012 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_DIC_NODE_STATE_OUTPUT_H
|
||||
#define LATINIME_DIC_NODE_STATE_OUTPUT_H
|
||||
|
||||
#include <cstring> // for memcpy()
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class DicNodeStateOutput {
|
||||
public:
|
||||
DicNodeStateOutput() : mOutputtedLength(0) {
|
||||
init();
|
||||
}
|
||||
|
||||
virtual ~DicNodeStateOutput() {}
|
||||
|
||||
void init() {
|
||||
mOutputtedLength = 0;
|
||||
mWordBuf[0] = 0;
|
||||
}
|
||||
|
||||
void init(const DicNodeStateOutput *const stateOutput) {
|
||||
memcpy(mWordBuf, stateOutput->mWordBuf,
|
||||
stateOutput->mOutputtedLength * sizeof(mWordBuf[0]));
|
||||
mOutputtedLength = stateOutput->mOutputtedLength;
|
||||
if (mOutputtedLength < MAX_WORD_LENGTH) {
|
||||
mWordBuf[mOutputtedLength] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void addSubword(const uint16_t additionalSubwordLength, const int *const additionalSubword) {
|
||||
if (additionalSubword) {
|
||||
memcpy(&mWordBuf[mOutputtedLength], additionalSubword,
|
||||
additionalSubwordLength * sizeof(mWordBuf[0]));
|
||||
mOutputtedLength = static_cast<uint16_t>(mOutputtedLength + additionalSubwordLength);
|
||||
if (mOutputtedLength < MAX_WORD_LENGTH) {
|
||||
mWordBuf[mOutputtedLength] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Remove
|
||||
int getCodePointAt(const int id) const {
|
||||
return mWordBuf[id];
|
||||
}
|
||||
|
||||
// TODO: Move to private
|
||||
int mWordBuf[MAX_WORD_LENGTH];
|
||||
|
||||
private:
|
||||
// Caution!!!
|
||||
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||
// for this class
|
||||
uint16_t mOutputtedLength;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DIC_NODE_STATE_OUTPUT_H
|
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
* Copyright (C) 2012 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_DIC_NODE_STATE_PREVWORD_H
|
||||
#define LATINIME_DIC_NODE_STATE_PREVWORD_H
|
||||
|
||||
#include <cstring> // for memset()
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
#include "dic_node_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class DicNodeStatePrevWord {
|
||||
public:
|
||||
AK_FORCE_INLINE DicNodeStatePrevWord()
|
||||
: mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0),
|
||||
mPrevWordNodePos(0) {
|
||||
memset(mPrevWord, 0, sizeof(mPrevWord));
|
||||
memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions));
|
||||
}
|
||||
|
||||
virtual ~DicNodeStatePrevWord() {}
|
||||
|
||||
void init() {
|
||||
mPrevWordLength = 0;
|
||||
mPrevWordCount = 0;
|
||||
mPrevWordStart = 0;
|
||||
mPrevWordProbability = -1;
|
||||
mPrevWordNodePos = NOT_VALID_WORD;
|
||||
memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions));
|
||||
}
|
||||
|
||||
void init(const int prevWordNodePos) {
|
||||
mPrevWordLength = 0;
|
||||
mPrevWordCount = 0;
|
||||
mPrevWordStart = 0;
|
||||
mPrevWordProbability = -1;
|
||||
mPrevWordNodePos = prevWordNodePos;
|
||||
memset(mPrevSpacePositions, 0, sizeof(mPrevSpacePositions));
|
||||
}
|
||||
|
||||
// Init by copy
|
||||
AK_FORCE_INLINE void init(const DicNodeStatePrevWord *const prevWord) {
|
||||
mPrevWordLength = prevWord->mPrevWordLength;
|
||||
mPrevWordCount = prevWord->mPrevWordCount;
|
||||
mPrevWordStart = prevWord->mPrevWordStart;
|
||||
mPrevWordProbability = prevWord->mPrevWordProbability;
|
||||
mPrevWordNodePos = prevWord->mPrevWordNodePos;
|
||||
memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0]));
|
||||
memcpy(mPrevSpacePositions, prevWord->mPrevSpacePositions, sizeof(mPrevSpacePositions));
|
||||
}
|
||||
|
||||
void init(const int16_t prevWordCount, const int16_t prevWordProbability,
|
||||
const int prevWordNodePos, const int *const src0, const int16_t length0,
|
||||
const int *const src1, const int16_t length1, const int *const prevSpacePositions,
|
||||
const int lastInputIndex) {
|
||||
mPrevWordCount = prevWordCount;
|
||||
mPrevWordProbability = prevWordProbability;
|
||||
mPrevWordNodePos = prevWordNodePos;
|
||||
const int twoWordsLen =
|
||||
DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord);
|
||||
mPrevWord[twoWordsLen] = KEYCODE_SPACE;
|
||||
mPrevWordStart = length0;
|
||||
mPrevWordLength = static_cast<int16_t>(twoWordsLen + 1);
|
||||
memcpy(mPrevSpacePositions, prevSpacePositions, sizeof(mPrevSpacePositions));
|
||||
mPrevSpacePositions[mPrevWordCount - 1] = lastInputIndex;
|
||||
}
|
||||
|
||||
void truncate(const int offset) {
|
||||
// TODO: memmove
|
||||
if (mPrevWordLength < offset) {
|
||||
memset(mPrevWord, 0, sizeof(mPrevWord));
|
||||
mPrevWordLength = 0;
|
||||
return;
|
||||
}
|
||||
const int newPrevWordLength = mPrevWordLength - offset;
|
||||
memmove(mPrevWord, &mPrevWord[offset], newPrevWordLength * sizeof(mPrevWord[0]));
|
||||
mPrevWordLength = newPrevWordLength;
|
||||
}
|
||||
|
||||
void outputSpacePositions(int *spaceIndices) const {
|
||||
// Convert uint16_t to int
|
||||
for (int i = 0; i < MAX_RESULTS; i++) {
|
||||
spaceIndices[i] = mPrevSpacePositions[i];
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: remove
|
||||
int16_t getPrevWordLength() const {
|
||||
return mPrevWordLength;
|
||||
}
|
||||
|
||||
int16_t getPrevWordCount() const {
|
||||
return mPrevWordCount;
|
||||
}
|
||||
|
||||
int16_t getPrevWordStart() const {
|
||||
return mPrevWordStart;
|
||||
}
|
||||
|
||||
int16_t getPrevWordProbability() const {
|
||||
return mPrevWordProbability;
|
||||
}
|
||||
|
||||
int getPrevWordNodePos() const {
|
||||
return mPrevWordNodePos;
|
||||
}
|
||||
|
||||
int getPrevWordCodePointAt(const int id) const {
|
||||
return mPrevWord[id];
|
||||
}
|
||||
|
||||
bool startsWith(const DicNodeStatePrevWord *const prefix, const int prefixLen) const {
|
||||
if (prefixLen > mPrevWordLength) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < prefixLen; ++i) {
|
||||
if (mPrevWord[i] != prefix->mPrevWord[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO: Move to private
|
||||
int mPrevWord[MAX_WORD_LENGTH];
|
||||
// TODO: Move to private
|
||||
int mPrevSpacePositions[MAX_RESULTS];
|
||||
|
||||
private:
|
||||
// Caution!!!
|
||||
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||
// for this class
|
||||
int16_t mPrevWordCount;
|
||||
int16_t mPrevWordLength;
|
||||
int16_t mPrevWordStart;
|
||||
int16_t mPrevWordProbability;
|
||||
int mPrevWordNodePos;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DIC_NODE_STATE_PREVWORD_H
|
|
@ -0,0 +1,166 @@
|
|||
/*
|
||||
* Copyright (C) 2012 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_DIC_NODE_STATE_SCORING_H
|
||||
#define LATINIME_DIC_NODE_STATE_SCORING_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class DicNodeStateScoring {
|
||||
public:
|
||||
AK_FORCE_INLINE DicNodeStateScoring()
|
||||
: mDoubleLetterLevel(NOT_A_DOUBLE_LETTER),
|
||||
mEditCorrectionCount(0), mProximityCorrectionCount(0),
|
||||
mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f),
|
||||
mTotalPrevWordsLanguageCost(0.0f), mRawLength(0.0f) {
|
||||
}
|
||||
|
||||
virtual ~DicNodeStateScoring() {}
|
||||
|
||||
void init() {
|
||||
mEditCorrectionCount = 0;
|
||||
mProximityCorrectionCount = 0;
|
||||
mNormalizedCompoundDistance = 0.0f;
|
||||
mSpatialDistance = 0.0f;
|
||||
mLanguageDistance = 0.0f;
|
||||
mTotalPrevWordsLanguageCost = 0.0f;
|
||||
mRawLength = 0.0f;
|
||||
mDoubleLetterLevel = NOT_A_DOUBLE_LETTER;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void init(const DicNodeStateScoring *const scoring) {
|
||||
mEditCorrectionCount = scoring->mEditCorrectionCount;
|
||||
mProximityCorrectionCount = scoring->mProximityCorrectionCount;
|
||||
mNormalizedCompoundDistance = scoring->mNormalizedCompoundDistance;
|
||||
mSpatialDistance = scoring->mSpatialDistance;
|
||||
mLanguageDistance = scoring->mLanguageDistance;
|
||||
mTotalPrevWordsLanguageCost = scoring->mTotalPrevWordsLanguageCost;
|
||||
mRawLength = scoring->mRawLength;
|
||||
mDoubleLetterLevel = scoring->mDoubleLetterLevel;
|
||||
}
|
||||
|
||||
void addCost(const float spatialCost, const float languageCost, const bool doNormalization,
|
||||
const int inputSize, const int totalInputIndex, const bool isEditCorrection,
|
||||
const bool isProximityCorrection) {
|
||||
addDistance(spatialCost, languageCost, doNormalization, inputSize, totalInputIndex);
|
||||
if (isEditCorrection) {
|
||||
++mEditCorrectionCount;
|
||||
}
|
||||
if (isProximityCorrection) {
|
||||
++mProximityCorrectionCount;
|
||||
}
|
||||
if (languageCost > 0.0f) {
|
||||
setTotalPrevWordsLanguageCost(mTotalPrevWordsLanguageCost + languageCost);
|
||||
}
|
||||
}
|
||||
|
||||
void addRawLength(const float rawLength) {
|
||||
mRawLength += rawLength;
|
||||
}
|
||||
|
||||
float getCompoundDistance() const {
|
||||
return getCompoundDistance(1.0f);
|
||||
}
|
||||
|
||||
float getCompoundDistance(const float languageWeight) const {
|
||||
return mSpatialDistance + mLanguageDistance * languageWeight;
|
||||
}
|
||||
|
||||
float getNormalizedCompoundDistance() const {
|
||||
return mNormalizedCompoundDistance;
|
||||
}
|
||||
|
||||
float getSpatialDistance() const {
|
||||
return mSpatialDistance;
|
||||
}
|
||||
|
||||
float getLanguageDistance() const {
|
||||
return mLanguageDistance;
|
||||
}
|
||||
|
||||
int16_t getEditCorrectionCount() const {
|
||||
return mEditCorrectionCount;
|
||||
}
|
||||
|
||||
int16_t getProximityCorrectionCount() const {
|
||||
return mProximityCorrectionCount;
|
||||
}
|
||||
|
||||
float getRawLength() const {
|
||||
return mRawLength;
|
||||
}
|
||||
|
||||
DoubleLetterLevel getDoubleLetterLevel() const {
|
||||
return mDoubleLetterLevel;
|
||||
}
|
||||
|
||||
void setDoubleLetterLevel(DoubleLetterLevel doubleLetterLevel) {
|
||||
switch(doubleLetterLevel) {
|
||||
case NOT_A_DOUBLE_LETTER:
|
||||
break;
|
||||
case A_DOUBLE_LETTER:
|
||||
if (mDoubleLetterLevel != A_STRONG_DOUBLE_LETTER) {
|
||||
mDoubleLetterLevel = doubleLetterLevel;
|
||||
}
|
||||
break;
|
||||
case A_STRONG_DOUBLE_LETTER:
|
||||
mDoubleLetterLevel = doubleLetterLevel;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
float getTotalPrevWordsLanguageCost() const {
|
||||
return mTotalPrevWordsLanguageCost;
|
||||
}
|
||||
|
||||
private:
|
||||
// Caution!!!
|
||||
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||
// for this class
|
||||
DoubleLetterLevel mDoubleLetterLevel;
|
||||
|
||||
int16_t mEditCorrectionCount;
|
||||
int16_t mProximityCorrectionCount;
|
||||
|
||||
float mNormalizedCompoundDistance;
|
||||
float mSpatialDistance;
|
||||
float mLanguageDistance;
|
||||
float mTotalPrevWordsLanguageCost;
|
||||
float mRawLength;
|
||||
|
||||
AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance,
|
||||
bool doNormalization, int inputSize, int totalInputIndex) {
|
||||
mSpatialDistance += spatialDistance;
|
||||
mLanguageDistance += languageDistance;
|
||||
if (!doNormalization) {
|
||||
mNormalizedCompoundDistance = mSpatialDistance + mLanguageDistance;
|
||||
} else {
|
||||
mNormalizedCompoundDistance = (mSpatialDistance + mLanguageDistance)
|
||||
/ static_cast<float>(max(1, totalInputIndex));
|
||||
}
|
||||
}
|
||||
|
||||
//TODO: remove
|
||||
AK_FORCE_INLINE void setTotalPrevWordsLanguageCost(float totalPrevWordsLanguageCost) {
|
||||
mTotalPrevWordsLanguageCost = totalPrevWordsLanguageCost;
|
||||
}
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DIC_NODE_STATE_SCORING_H
|
|
@ -0,0 +1,335 @@
|
|||
/*
|
||||
* Copyright (C) 2012 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
#include "binary_format.h"
|
||||
#include "dic_node.h"
|
||||
#include "dic_node_utils.h"
|
||||
#include "dic_node_vector.h"
|
||||
#include "proximity_info.h"
|
||||
#include "proximity_info_state.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
///////////////////////////////
|
||||
// Node initialization utils //
|
||||
///////////////////////////////
|
||||
|
||||
/* static */ void DicNodeUtils::initAsRoot(const int rootPos, const uint8_t *const dicRoot,
|
||||
const int prevWordNodePos, DicNode *newRootNode) {
|
||||
int curPos = rootPos;
|
||||
const int pos = curPos;
|
||||
const int childrenCount = BinaryFormat::getGroupCountAndForwardPointer(dicRoot, &curPos);
|
||||
const int childrenPos = curPos;
|
||||
newRootNode->initAsRoot(pos, childrenPos, childrenCount, prevWordNodePos);
|
||||
}
|
||||
|
||||
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(const int rootPos,
|
||||
const uint8_t *const dicRoot, DicNode *prevWordLastNode, DicNode *newRootNode) {
|
||||
int curPos = rootPos;
|
||||
const int pos = curPos;
|
||||
const int childrenCount = BinaryFormat::getGroupCountAndForwardPointer(dicRoot, &curPos);
|
||||
const int childrenPos = curPos;
|
||||
newRootNode->initAsRootWithPreviousWord(prevWordLastNode, pos, childrenPos, childrenCount);
|
||||
}
|
||||
|
||||
/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
|
||||
destNode->initByCopy(srcNode);
|
||||
}
|
||||
|
||||
///////////////////////////////////
|
||||
// Traverse node expansion utils //
|
||||
///////////////////////////////////
|
||||
|
||||
/* static */ void DicNodeUtils::createAndGetPassingChildNode(DicNode *dicNode,
|
||||
const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
|
||||
DicNodeVector *childDicNodes) {
|
||||
// Passing multiple chars node. No need to traverse child
|
||||
const int codePoint = dicNode->getNodeTypedCodePoint();
|
||||
const int baseLowerCaseCodePoint = toBaseLowerCase(codePoint);
|
||||
const bool isMatch = isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, codePoint);
|
||||
if (isMatch || isIntentionalOmissionCodePoint(baseLowerCaseCodePoint)) {
|
||||
childDicNodes->pushPassingChild(dicNode);
|
||||
}
|
||||
}
|
||||
|
||||
/* static */ int DicNodeUtils::createAndGetLeavingChildNode(DicNode *dicNode, int pos,
|
||||
const uint8_t *const dicRoot, const int terminalDepth, const ProximityInfoState *pInfoState,
|
||||
const int pointIndex, const bool exactOnly, const std::vector<int> *const codePointsFilter,
|
||||
const ProximityInfo *const pInfo, DicNodeVector *childDicNodes) {
|
||||
int nextPos = pos;
|
||||
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(dicRoot, &pos);
|
||||
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||
const bool isTerminal = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
|
||||
const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags);
|
||||
|
||||
int codePoint = BinaryFormat::getCodePointAndForwardPointer(dicRoot, &pos);
|
||||
ASSERT(NOT_A_CODE_POINT != codePoint);
|
||||
const int nodeCodePoint = codePoint;
|
||||
// TODO: optimize this
|
||||
int additionalWordBuf[MAX_WORD_LENGTH];
|
||||
uint16_t additionalSubwordLength = 0;
|
||||
additionalWordBuf[additionalSubwordLength++] = codePoint;
|
||||
|
||||
do {
|
||||
const int nextCodePoint = hasMultipleChars
|
||||
? BinaryFormat::getCodePointAndForwardPointer(dicRoot, &pos) : NOT_A_CODE_POINT;
|
||||
const bool isLastChar = (NOT_A_CODE_POINT == nextCodePoint);
|
||||
if (!isLastChar) {
|
||||
additionalWordBuf[additionalSubwordLength++] = nextCodePoint;
|
||||
}
|
||||
codePoint = nextCodePoint;
|
||||
} while (NOT_A_CODE_POINT != codePoint);
|
||||
|
||||
const int probability =
|
||||
isTerminal ? BinaryFormat::readProbabilityWithoutMovingPointer(dicRoot, pos) : -1;
|
||||
pos = BinaryFormat::skipProbability(flags, pos);
|
||||
int childrenPos = hasChildren ? BinaryFormat::readChildrenPosition(dicRoot, flags, pos) : 0;
|
||||
const int attributesPos = BinaryFormat::skipChildrenPosition(flags, pos);
|
||||
const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(dicRoot, flags, pos);
|
||||
|
||||
if (isDicNodeFilteredOut(nodeCodePoint, pInfo, codePointsFilter)) {
|
||||
return siblingPos;
|
||||
}
|
||||
if (!isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, nodeCodePoint)) {
|
||||
return siblingPos;
|
||||
}
|
||||
const int childrenCount = hasChildren
|
||||
? BinaryFormat::getGroupCountAndForwardPointer(dicRoot, &childrenPos) : 0;
|
||||
childDicNodes->pushLeavingChild(dicNode, nextPos, flags, childrenPos, attributesPos, siblingPos,
|
||||
nodeCodePoint, childrenCount, probability, -1 /* bigramProbability */, isTerminal,
|
||||
hasMultipleChars, hasChildren, additionalSubwordLength, additionalWordBuf);
|
||||
return siblingPos;
|
||||
}
|
||||
|
||||
/* static */ bool DicNodeUtils::isDicNodeFilteredOut(const int nodeCodePoint,
|
||||
const ProximityInfo *const pInfo, const std::vector<int> *const codePointsFilter) {
|
||||
const int filterSize = codePointsFilter ? codePointsFilter->size() : 0;
|
||||
if (filterSize <= 0) {
|
||||
return false;
|
||||
}
|
||||
if (pInfo && (pInfo->getKeyIndexOf(nodeCodePoint) == NOT_AN_INDEX
|
||||
|| isIntentionalOmissionCodePoint(nodeCodePoint))) {
|
||||
// If normalized nodeCodePoint is not on the keyboard or skippable, this child is never
|
||||
// filtered.
|
||||
return false;
|
||||
}
|
||||
const int lowerCodePoint = toLowerCase(nodeCodePoint);
|
||||
const int baseLowerCodePoint = toBaseCodePoint(lowerCodePoint);
|
||||
// TODO: Avoid linear search
|
||||
for (int i = 0; i < filterSize; ++i) {
|
||||
// Checking if a normalized code point is in filter characters when pInfo is not
|
||||
// null. When pInfo is null, nodeCodePoint is used to check filtering without
|
||||
// normalizing.
|
||||
if ((pInfo && ((*codePointsFilter)[i] == lowerCodePoint
|
||||
|| (*codePointsFilter)[i] == baseLowerCodePoint))
|
||||
|| (!pInfo && (*codePointsFilter)[i] == nodeCodePoint)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */ void DicNodeUtils::createAndGetAllLeavingChildNodes(DicNode *dicNode,
|
||||
const uint8_t *const dicRoot, const ProximityInfoState *pInfoState, const int pointIndex,
|
||||
const bool exactOnly, const std::vector<int> *const codePointsFilter,
|
||||
const ProximityInfo *const pInfo, DicNodeVector *childDicNodes) {
|
||||
const int terminalDepth = dicNode->getLeavingDepth();
|
||||
const int childCount = dicNode->getChildrenCount();
|
||||
int nextPos = dicNode->getChildrenPos();
|
||||
for (int i = 0; i < childCount; i++) {
|
||||
const int filterSize = codePointsFilter ? codePointsFilter->size() : 0;
|
||||
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, dicRoot, terminalDepth, pInfoState,
|
||||
pointIndex, exactOnly, codePointsFilter, pInfo, childDicNodes);
|
||||
if (!pInfo && filterSize > 0 && childDicNodes->exceeds(filterSize)) {
|
||||
// All code points have been found.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode, const uint8_t *const dicRoot,
|
||||
DicNodeVector *childDicNodes) {
|
||||
getProximityChildDicNodes(dicNode, dicRoot, 0, 0, false, childDicNodes);
|
||||
}
|
||||
|
||||
/* static */ void DicNodeUtils::getProximityChildDicNodes(DicNode *dicNode,
|
||||
const uint8_t *const dicRoot, const ProximityInfoState *pInfoState, const int pointIndex,
|
||||
bool exactOnly, DicNodeVector *childDicNodes) {
|
||||
if (dicNode->isTotalInputSizeExceedingLimit()) {
|
||||
return;
|
||||
}
|
||||
if (!dicNode->isLeavingNode()) {
|
||||
DicNodeUtils::createAndGetPassingChildNode(dicNode, pInfoState, pointIndex, exactOnly,
|
||||
childDicNodes);
|
||||
} else {
|
||||
DicNodeUtils::createAndGetAllLeavingChildNodes(dicNode, dicRoot, pInfoState, pointIndex,
|
||||
exactOnly, 0 /* codePointsFilter */, 0 /* pInfo */,
|
||||
childDicNodes);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////
|
||||
// Scoring utils //
|
||||
///////////////////
|
||||
/**
|
||||
* Computes the combined bigram / unigram cost for the given dicNode.
|
||||
*/
|
||||
/* static */ float DicNodeUtils::getBigramNodeImprobability(const uint8_t *const dicRoot,
|
||||
const DicNode *const node, hash_map_compat<int, int16_t> *bigramCacheMap) {
|
||||
if (node->isImpossibleBigramWord()) {
|
||||
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
|
||||
}
|
||||
const int probability = getBigramNodeProbability(dicRoot, node, bigramCacheMap);
|
||||
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
|
||||
const float cost = static_cast<float>(MAX_PROBABILITY - probability)
|
||||
/ static_cast<float>(MAX_PROBABILITY);
|
||||
return cost;
|
||||
}
|
||||
|
||||
/* static */ int DicNodeUtils::getBigramNodeProbability(const uint8_t *const dicRoot,
|
||||
const DicNode *const node, hash_map_compat<int, int16_t> *bigramCacheMap) {
|
||||
const int unigramProbability = node->getProbability();
|
||||
const int encodedDiffOfBigramProbability =
|
||||
getBigramNodeEncodedDiffProbability(dicRoot, node, bigramCacheMap);
|
||||
if (NOT_A_PROBABILITY == encodedDiffOfBigramProbability) {
|
||||
return backoff(unigramProbability);
|
||||
}
|
||||
return BinaryFormat::computeProbabilityForBigram(
|
||||
unigramProbability, encodedDiffOfBigramProbability);
|
||||
}
|
||||
|
||||
///////////////////////////////////////
|
||||
// Bigram / Unigram dictionary utils //
|
||||
///////////////////////////////////////
|
||||
|
||||
/* static */ int16_t DicNodeUtils::getBigramNodeEncodedDiffProbability(const uint8_t *const dicRoot,
|
||||
const DicNode *const node, hash_map_compat<int, int16_t> *bigramCacheMap) {
|
||||
const int wordPos = node->getPos();
|
||||
const int prevWordPos = node->getPrevWordPos();
|
||||
return getBigramProbability(dicRoot, prevWordPos, wordPos, bigramCacheMap);
|
||||
}
|
||||
|
||||
// TODO: Move this to BigramDictionary
|
||||
/* static */ int16_t DicNodeUtils::getBigramProbability(const uint8_t *const dicRoot, int pos,
|
||||
const int nextPos, hash_map_compat<int, int16_t> *bigramCacheMap) {
|
||||
// TODO: this is painfully slow compared to the method used in the previous version of the
|
||||
// algorithm. Switch to that method.
|
||||
if (NOT_VALID_WORD == pos) return NOT_A_PROBABILITY;
|
||||
if (NOT_VALID_WORD == nextPos) return NOT_A_PROBABILITY;
|
||||
|
||||
// Create a hash code for the given node pair (based on Josh Bloch's effective Java).
|
||||
// TODO: Use a real hash map data structure that deals with collisions.
|
||||
int hash = 17;
|
||||
hash = hash * 31 + pos;
|
||||
hash = hash * 31 + nextPos;
|
||||
|
||||
hash_map_compat<int, int16_t>::const_iterator mapPos = bigramCacheMap->find(hash);
|
||||
if (mapPos != bigramCacheMap->end()) {
|
||||
return mapPos->second;
|
||||
}
|
||||
if (NOT_VALID_WORD == pos) {
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(dicRoot, &pos);
|
||||
if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) {
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) {
|
||||
BinaryFormat::getCodePointAndForwardPointer(dicRoot, &pos);
|
||||
} else {
|
||||
pos = BinaryFormat::skipOtherCharacters(dicRoot, pos);
|
||||
}
|
||||
pos = BinaryFormat::skipChildrenPosition(flags, pos);
|
||||
pos = BinaryFormat::skipProbability(flags, pos);
|
||||
uint8_t bigramFlags;
|
||||
int count = 0;
|
||||
do {
|
||||
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(dicRoot, &pos);
|
||||
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(dicRoot,
|
||||
bigramFlags, &pos);
|
||||
if (bigramPos == nextPos) {
|
||||
const int16_t probability = BinaryFormat::MASK_ATTRIBUTE_PROBABILITY & bigramFlags;
|
||||
if (static_cast<int>(bigramCacheMap->size()) < MAX_BIGRAM_MAP_SIZE) {
|
||||
(*bigramCacheMap)[hash] = probability;
|
||||
}
|
||||
return probability;
|
||||
}
|
||||
count++;
|
||||
} while ((0 != (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags))
|
||||
&& count < MAX_BIGRAMS_CONSIDERED_PER_CONTEXT);
|
||||
if (static_cast<int>(bigramCacheMap->size()) < MAX_BIGRAM_MAP_SIZE) {
|
||||
// TODO: does this -1 mean NOT_VALID_WORD?
|
||||
(*bigramCacheMap)[hash] = -1;
|
||||
}
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
|
||||
/* static */ int DicNodeUtils::getWordPos(const uint8_t *const dicRoot, const int *word,
|
||||
const int wordLength) {
|
||||
if (!word) {
|
||||
return NOT_VALID_WORD;
|
||||
}
|
||||
return BinaryFormat::getTerminalPosition(
|
||||
dicRoot, word, wordLength, false /* forceLowerCaseSearch */);
|
||||
}
|
||||
|
||||
/* static */ bool DicNodeUtils::isMatchedNodeCodePoint(const ProximityInfoState *pInfoState,
|
||||
const int pointIndex, const bool exactOnly, const int nodeCodePoint) {
|
||||
if (!pInfoState) {
|
||||
return true;
|
||||
}
|
||||
if (exactOnly) {
|
||||
return pInfoState->getPrimaryCodePointAt(pointIndex) == nodeCodePoint;
|
||||
}
|
||||
const ProximityType matchedId = pInfoState->getProximityType(pointIndex, nodeCodePoint,
|
||||
true /* checkProximityChars */);
|
||||
return isProximityChar(matchedId);
|
||||
}
|
||||
|
||||
////////////////
|
||||
// Char utils //
|
||||
////////////////
|
||||
|
||||
// TODO: Move to char_utils?
|
||||
/* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0,
|
||||
const int *const src1, const int16_t length1, int *dest) {
|
||||
int actualLength0 = 0;
|
||||
for (int i = 0; i < length0; ++i) {
|
||||
if (src0[i] == 0) {
|
||||
break;
|
||||
}
|
||||
actualLength0 = i + 1;
|
||||
}
|
||||
actualLength0 = min(actualLength0, MAX_WORD_LENGTH);
|
||||
memcpy(dest, src0, actualLength0 * sizeof(dest[0]));
|
||||
if (!src1 || length1 == 0) {
|
||||
return actualLength0;
|
||||
}
|
||||
int actualLength1 = 0;
|
||||
for (int i = 0; i < length1; ++i) {
|
||||
if (src1[i] == 0) {
|
||||
break;
|
||||
}
|
||||
actualLength1 = i + 1;
|
||||
}
|
||||
actualLength1 = min(actualLength1, MAX_WORD_LENGTH - actualLength0 - 1);
|
||||
memcpy(&dest[actualLength0], src1, actualLength1 * sizeof(dest[0]));
|
||||
return actualLength0 + actualLength1;
|
||||
}
|
||||
} // namespace latinime
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* Copyright (C) 2012 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_DIC_NODE_UTILS_H
|
||||
#define LATINIME_DIC_NODE_UTILS_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
|
||||
#include "defines.h"
|
||||
#include "hash_map_compat.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class DicNode;
|
||||
class DicNodeVector;
|
||||
class ProximityInfo;
|
||||
class ProximityInfoState;
|
||||
|
||||
class DicNodeUtils {
|
||||
public:
|
||||
static int appendTwoWords(const int *src0, const int16_t length0, const int *src1,
|
||||
const int16_t length1, int *dest);
|
||||
static void initAsRoot(const int rootPos, const uint8_t *const dicRoot,
|
||||
const int prevWordNodePos, DicNode *newRootNode);
|
||||
static void initAsRootWithPreviousWord(const int rootPos, const uint8_t *const dicRoot,
|
||||
DicNode *prevWordLastNode, DicNode *newRootNode);
|
||||
static void initByCopy(DicNode *srcNode, DicNode *destNode);
|
||||
static void getAllChildDicNodes(DicNode *dicNode, const uint8_t *const dicRoot,
|
||||
DicNodeVector *childDicNodes);
|
||||
static int getWordPos(const uint8_t *const dicRoot, const int *word, const int prevWordLength);
|
||||
static float getBigramNodeImprobability(const uint8_t *const dicRoot,
|
||||
const DicNode *const node, hash_map_compat<int, int16_t> *const bigramCacheMap);
|
||||
static bool isDicNodeFilteredOut(const int nodeCodePoint, const ProximityInfo *const pInfo,
|
||||
const std::vector<int> *const codePointsFilter);
|
||||
// TODO: Move to private
|
||||
static void getProximityChildDicNodes(DicNode *dicNode, const uint8_t *const dicRoot,
|
||||
const ProximityInfoState *pInfoState, const int pointIndex, bool exactOnly,
|
||||
DicNodeVector *childDicNodes);
|
||||
|
||||
// TODO: Move to proximity info
|
||||
static bool isProximityChar(ProximityType type) {
|
||||
return type == MATCH_CHAR || type == PROXIMITY_CHAR || type == ADDITIONAL_PROXIMITY_CHAR;
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils);
|
||||
// Max cache size for the space omission error correction bigram lookup
|
||||
static const int MAX_BIGRAM_MAP_SIZE = 20000;
|
||||
// Max number of bigrams to look up
|
||||
static const int MAX_BIGRAMS_CONSIDERED_PER_CONTEXT = 500;
|
||||
|
||||
static int getBigramNodeProbability(const uint8_t *const dicRoot, const DicNode *const node,
|
||||
hash_map_compat<int, int16_t> *bigramCacheMap);
|
||||
static int16_t getBigramNodeEncodedDiffProbability(const uint8_t *const dicRoot,
|
||||
const DicNode *const node, hash_map_compat<int, int16_t> *bigramCacheMap);
|
||||
static void createAndGetPassingChildNode(DicNode *dicNode, const ProximityInfoState *pInfoState,
|
||||
const int pointIndex, const bool exactOnly, DicNodeVector *childDicNodes);
|
||||
static void createAndGetAllLeavingChildNodes(DicNode *dicNode, const uint8_t *const dicRoot,
|
||||
const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
|
||||
const std::vector<int> *const codePointsFilter,
|
||||
const ProximityInfo *const pInfo, DicNodeVector *childDicNodes);
|
||||
static int createAndGetLeavingChildNode(DicNode *dicNode, int pos, const uint8_t *const dicRoot,
|
||||
const int terminalDepth, const ProximityInfoState *pInfoState, const int pointIndex,
|
||||
const bool exactOnly, const std::vector<int> *const codePointsFilter,
|
||||
const ProximityInfo *const pInfo, DicNodeVector *childDicNodes);
|
||||
static int16_t getBigramProbability(const uint8_t *const dicRoot, int pos, const int nextPos,
|
||||
hash_map_compat<int, int16_t> *bigramCacheMap);
|
||||
|
||||
// TODO: Move to proximity info
|
||||
static bool isMatchedNodeCodePoint(const ProximityInfoState *pInfoState, const int pointIndex,
|
||||
const bool exactOnly, const int nodeCodePoint);
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DIC_NODE_UTILS_H
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* Copyright (C) 2012 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_DIC_NODE_VECTOR_H
|
||||
#define LATINIME_DIC_NODE_VECTOR_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "defines.h"
|
||||
#include "dic_node.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class DicNodeVector {
|
||||
public:
|
||||
#ifdef FLAG_DBG
|
||||
// 0 will introduce resizing the vector.
|
||||
static const int DEFAULT_NODES_SIZE_FOR_OPTIMIZATION = 0;
|
||||
#else
|
||||
static const int DEFAULT_NODES_SIZE_FOR_OPTIMIZATION = 60;
|
||||
#endif
|
||||
AK_FORCE_INLINE DicNodeVector() : mDicNodes(0), mLock(false), mEmptyNode() {}
|
||||
|
||||
// Specify the capacity of the vector
|
||||
AK_FORCE_INLINE DicNodeVector(const int size) : mDicNodes(0), mLock(false), mEmptyNode() {
|
||||
mDicNodes.reserve(size);
|
||||
}
|
||||
|
||||
// Non virtual inline destructor -- never inherit this class
|
||||
AK_FORCE_INLINE ~DicNodeVector() {}
|
||||
|
||||
AK_FORCE_INLINE void clear() {
|
||||
mDicNodes.clear();
|
||||
mLock = false;
|
||||
}
|
||||
|
||||
int getSizeAndLock() {
|
||||
mLock = true;
|
||||
return static_cast<int>(mDicNodes.size());
|
||||
}
|
||||
|
||||
bool exceeds(const size_t limit) const {
|
||||
return mDicNodes.size() >= limit;
|
||||
}
|
||||
|
||||
void pushPassingChild(DicNode *dicNode) {
|
||||
ASSERT(!mLock);
|
||||
mDicNodes.push_back(mEmptyNode);
|
||||
mDicNodes.back().initAsPassingChild(dicNode);
|
||||
}
|
||||
|
||||
void pushLeavingChild(DicNode *dicNode, const int pos, const uint8_t flags,
|
||||
const int childrenPos, const int attributesPos, const int siblingPos,
|
||||
const int nodeCodePoint, const int childrenCount, const int probability,
|
||||
const int bigramProbability, const bool isTerminal, const bool hasMultipleChars,
|
||||
const bool hasChildren, const uint16_t additionalSubwordLength,
|
||||
const int *additionalSubword) {
|
||||
ASSERT(!mLock);
|
||||
mDicNodes.push_back(mEmptyNode);
|
||||
mDicNodes.back().initAsChild(dicNode, pos, flags, childrenPos, attributesPos, siblingPos,
|
||||
nodeCodePoint, childrenCount, probability, -1 /* bigramProbability */, isTerminal,
|
||||
hasMultipleChars, hasChildren, additionalSubwordLength, additionalSubword);
|
||||
}
|
||||
|
||||
DicNode *operator[](const int id) {
|
||||
ASSERT(id < static_cast<int>(mDicNodes.size()));
|
||||
return &mDicNodes[id];
|
||||
}
|
||||
|
||||
DicNode *front() {
|
||||
ASSERT(1 <= static_cast<int>(mDicNodes.size()));
|
||||
return &mDicNodes[0];
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(DicNodeVector);
|
||||
std::vector<DicNode> mDicNodes;
|
||||
bool mLock;
|
||||
DicNode mEmptyNode;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DIC_NODE_VECTOR_H
|
Loading…
Reference in New Issue