am f99f1a75
: Merge "Check bigrams for lowercased previous word if original is not found"
* commit 'f99f1a757b7b7f0bb6375ed7087d762b7e0bf7d1': Check bigrams for lowercased previous word if original is not found
This commit is contained in:
commit
4b4e5f6c16
3 changed files with 9 additions and 11 deletions
|
@ -280,15 +280,6 @@ namespace latinime {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ int DicNodeUtils::getWordPos(const uint8_t *const dicRoot, const int *word,
|
|
||||||
const int wordLength) {
|
|
||||||
if (!word) {
|
|
||||||
return NOT_VALID_WORD;
|
|
||||||
}
|
|
||||||
return BinaryFormat::getTerminalPosition(
|
|
||||||
dicRoot, word, wordLength, false /* forceLowerCaseSearch */);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* static */ bool DicNodeUtils::isMatchedNodeCodePoint(const ProximityInfoState *pInfoState,
|
/* static */ bool DicNodeUtils::isMatchedNodeCodePoint(const ProximityInfoState *pInfoState,
|
||||||
const int pointIndex, const bool exactOnly, const int nodeCodePoint) {
|
const int pointIndex, const bool exactOnly, const int nodeCodePoint) {
|
||||||
if (!pInfoState) {
|
if (!pInfoState) {
|
||||||
|
|
|
@ -41,7 +41,6 @@ class DicNodeUtils {
|
||||||
static void initByCopy(DicNode *srcNode, DicNode *destNode);
|
static void initByCopy(DicNode *srcNode, DicNode *destNode);
|
||||||
static void getAllChildDicNodes(DicNode *dicNode, const uint8_t *const dicRoot,
|
static void getAllChildDicNodes(DicNode *dicNode, const uint8_t *const dicRoot,
|
||||||
DicNodeVector *childDicNodes);
|
DicNodeVector *childDicNodes);
|
||||||
static int getWordPos(const uint8_t *const dicRoot, const int *word, const int prevWordLength);
|
|
||||||
static float getBigramNodeImprobability(const uint8_t *const dicRoot,
|
static float getBigramNodeImprobability(const uint8_t *const dicRoot,
|
||||||
const DicNode *const node, hash_map_compat<int, int16_t> *const bigramCacheMap);
|
const DicNode *const node, hash_map_compat<int, int16_t> *const bigramCacheMap);
|
||||||
static bool isDicNodeFilteredOut(const int nodeCodePoint, const ProximityInfo *const pInfo,
|
static bool isDicNodeFilteredOut(const int nodeCodePoint, const ProximityInfo *const pInfo,
|
||||||
|
|
|
@ -69,7 +69,15 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre
|
||||||
mPrevWordPos = NOT_VALID_WORD;
|
mPrevWordPos = NOT_VALID_WORD;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
mPrevWordPos = DicNodeUtils::getWordPos(dictionary->getOffsetDict(), prevWord, prevWordLength);
|
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
|
||||||
|
mPrevWordPos = BinaryFormat::getTerminalPosition(dictionary->getOffsetDict(), prevWord,
|
||||||
|
prevWordLength, false /* forceLowerCaseSearch */);
|
||||||
|
if (mPrevWordPos == NOT_VALID_WORD) {
|
||||||
|
// Check bigrams for lower-cased previous word if original was not found. Useful for
|
||||||
|
// auto-capitalized words like "The [current_word]".
|
||||||
|
mPrevWordPos = BinaryFormat::getTerminalPosition(dictionary->getOffsetDict(), prevWord,
|
||||||
|
prevWordLength, true /* forceLowerCaseSearch */);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo,
|
void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo,
|
||||||
|
|
Loading…
Reference in a new issue