Check bigrams for lowercased previous word if original is not found
Bug: 8592527 Change-Id: I70c4b81263d8b2cb7050364c124589f66900cfce
This commit is contained in:
parent
f595a0626d
commit
5fa33a701d
3 changed files with 9 additions and 11 deletions
|
@ -280,15 +280,6 @@ namespace latinime {
|
|||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
|
||||
/* static */ int DicNodeUtils::getWordPos(const uint8_t *const dicRoot, const int *word,
|
||||
const int wordLength) {
|
||||
if (!word) {
|
||||
return NOT_VALID_WORD;
|
||||
}
|
||||
return BinaryFormat::getTerminalPosition(
|
||||
dicRoot, word, wordLength, false /* forceLowerCaseSearch */);
|
||||
}
|
||||
|
||||
/* static */ bool DicNodeUtils::isMatchedNodeCodePoint(const ProximityInfoState *pInfoState,
|
||||
const int pointIndex, const bool exactOnly, const int nodeCodePoint) {
|
||||
if (!pInfoState) {
|
||||
|
|
|
@ -41,7 +41,6 @@ class DicNodeUtils {
|
|||
static void initByCopy(DicNode *srcNode, DicNode *destNode);
|
||||
static void getAllChildDicNodes(DicNode *dicNode, const uint8_t *const dicRoot,
|
||||
DicNodeVector *childDicNodes);
|
||||
static int getWordPos(const uint8_t *const dicRoot, const int *word, const int prevWordLength);
|
||||
static float getBigramNodeImprobability(const uint8_t *const dicRoot,
|
||||
const DicNode *const node, hash_map_compat<int, int16_t> *const bigramCacheMap);
|
||||
static bool isDicNodeFilteredOut(const int nodeCodePoint, const ProximityInfo *const pInfo,
|
||||
|
|
|
@ -69,7 +69,15 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre
|
|||
mPrevWordPos = NOT_VALID_WORD;
|
||||
return;
|
||||
}
|
||||
mPrevWordPos = DicNodeUtils::getWordPos(dictionary->getOffsetDict(), prevWord, prevWordLength);
|
||||
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
|
||||
mPrevWordPos = BinaryFormat::getTerminalPosition(dictionary->getOffsetDict(), prevWord,
|
||||
prevWordLength, false /* forceLowerCaseSearch */);
|
||||
if (mPrevWordPos == NOT_VALID_WORD) {
|
||||
// Check bigrams for lower-cased previous word if original was not found. Useful for
|
||||
// auto-capitalized words like "The [current_word]".
|
||||
mPrevWordPos = BinaryFormat::getTerminalPosition(dictionary->getOffsetDict(), prevWord,
|
||||
prevWordLength, true /* forceLowerCaseSearch */);
|
||||
}
|
||||
}
|
||||
|
||||
void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo,
|
||||
|
|
Loading…
Reference in a new issue