Merge "Tag the whitelisted entries in native code." into jb-mr1-dev

main
Jean Chalard 2012-08-13 03:24:47 -07:00 committed by Android (Google) Code Review
commit 8c12bd87cc
6 changed files with 49 additions and 21 deletions

View File

@ -52,6 +52,8 @@ class BinaryFormat {
// Mask for attribute frequency, stored on 4 bits inside the flags byte. // Mask for attribute frequency, stored on 4 bits inside the flags byte.
static const int MASK_ATTRIBUTE_FREQUENCY = 0x0F; static const int MASK_ATTRIBUTE_FREQUENCY = 0x0F;
// The numeric value of the shortcut frequency that means 'whitelist'.
static const int WHITELIST_SHORTCUT_FREQUENCY = 15;
// Mask and flags for attribute address type selection. // Mask and flags for attribute address type selection.
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
@ -99,6 +101,7 @@ class BinaryFormat {
static bool hasChildrenInFlags(const uint8_t flags); static bool hasChildrenInFlags(const uint8_t flags);
static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags, static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
int *pos); int *pos);
static int getAttributeFrequencyFromFlags(const int flags);
static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord, static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord,
const int length, const bool forceLowerCaseSearch); const int length, const bool forceLowerCaseSearch);
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth, static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
@ -340,6 +343,10 @@ inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *con
} }
} }
inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) {
return flags & MASK_ATTRIBUTE_FREQUENCY;
}
// This function gets the byte position of the last chargroup of the exact matching word in the // This function gets the byte position of the last chargroup of the exact matching word in the
// dictionary. If no match is found, it returns NOT_VALID_WORD. // dictionary. If no match is found, it returns NOT_VALID_WORD.
inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,

View File

@ -157,6 +157,10 @@ void Correction::checkState() {
} }
} }
bool Correction::sameAsTyped() {
return mProximityInfoState.sameAsTyped(mWord, mOutputIndex);
}
int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
const int wordCount, const bool isSpaceProximity, const unsigned short *word) { const int wordCount, const bool isSpaceProximity, const unsigned short *word) {
return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray, return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,

View File

@ -105,6 +105,7 @@ class Correction {
const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance, const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance,
const bool doAutoCompletion, const int maxErrors); const bool doAutoCompletion, const int maxErrors);
void checkState(); void checkState();
bool sameAsTyped();
bool initProcessState(const int index); bool initProcessState(const int index);
int getInputIndex(); int getInputIndex();

View File

@ -160,6 +160,21 @@ class ProximityInfoState {
return mTouchPositionCorrectionEnabled; return mTouchPositionCorrectionEnabled;
} }
inline bool sameAsTyped(const unsigned short *word, int length) const {
if (length != mInputLength) {
return false;
}
const int *inputCodes = mInputCodes;
while (length--) {
if (static_cast<unsigned int>(*inputCodes) != static_cast<unsigned int>(*word)) {
return false;
}
inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
word++;
}
return true;
}
private: private:
DISALLOW_COPY_AND_ASSIGN(ProximityInfoState); DISALLOW_COPY_AND_ASSIGN(ProximityInfoState);
///////////////////////////////////////// /////////////////////////////////////////
@ -179,21 +194,6 @@ class ProximityInfoState {
return mInputXCoordinates && mInputYCoordinates; return mInputXCoordinates && mInputYCoordinates;
} }
bool sameAsTyped(const unsigned short *word, int length) const {
if (length != mInputLength) {
return false;
}
const int *inputCodes = mInputCodes;
while (length--) {
if ((unsigned int) *inputCodes != (unsigned int) *word) {
return false;
}
inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
word++;
}
return true;
}
// const // const
const ProximityInfo *mProximityInfo; const ProximityInfo *mProximityInfo;
bool mHasTouchPositionCorrectionData; bool mHasTouchPositionCorrectionData;

View File

@ -46,7 +46,7 @@ class TerminalAttributes {
// Gets the shortcut target itself as a uint16_t string. For parameters and return value // Gets the shortcut target itself as a uint16_t string. For parameters and return value
// see BinaryFormat::getWordAtAddress. // see BinaryFormat::getWordAtAddress.
// TODO: make the output an uint32_t* to handle the whole unicode range. // TODO: make the output an uint32_t* to handle the whole unicode range.
inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord) { inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord, int *outFreq) {
const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos); const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
mHasNextShortcutTarget = mHasNextShortcutTarget =
0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT); 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
@ -56,6 +56,7 @@ class TerminalAttributes {
if (NOT_A_CHARACTER == charCode) break; if (NOT_A_CHARACTER == charCode) break;
outWord[i] = (uint16_t)charCode; outWord[i] = (uint16_t)charCode;
} }
*outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags);
mPos += BinaryFormat::CHARACTER_ARRAY_TERMINATOR_SIZE; mPos += BinaryFormat::CHARACTER_ARRAY_TERMINATOR_SIZE;
return i; return i;
} }

View File

@ -391,8 +391,12 @@ inline void UnigramDictionary::onTerminal(const int probability,
const int finalProbability = const int finalProbability =
correction->getFinalProbability(probability, &wordPointer, &wordLength); correction->getFinalProbability(probability, &wordPointer, &wordLength);
if (finalProbability != NOT_A_PROBABILITY) { if (finalProbability != NOT_A_PROBABILITY) {
if (0 != finalProbability) {
// If the probability is 0, we don't want to add this word. However we still
// want to add its shortcuts (including a possible whitelist entry) if any.
addWord(wordPointer, wordLength, finalProbability, masterQueue, addWord(wordPointer, wordLength, finalProbability, masterQueue,
Dictionary::KIND_CORRECTION); Dictionary::KIND_CORRECTION);
}
const int shortcutProbability = finalProbability > 0 ? finalProbability - 1 : 0; const int shortcutProbability = finalProbability > 0 ? finalProbability - 1 : 0;
// Please note that the shortcut candidates will be added to the master queue only. // Please note that the shortcut candidates will be added to the master queue only.
@ -407,10 +411,21 @@ inline void UnigramDictionary::onTerminal(const int probability,
// with the same score. For the moment we use -1 to make sure the shortcut will // with the same score. For the moment we use -1 to make sure the shortcut will
// never be in front of the word. // never be in front of the word.
uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
int shortcutFrequency;
const int shortcutTargetStringLength = iterator.getNextShortcutTarget( const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
MAX_WORD_LENGTH_INTERNAL, shortcutTarget); MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency);
addWord(shortcutTarget, shortcutTargetStringLength, shortcutProbability, int shortcutScore;
masterQueue, Dictionary::KIND_CORRECTION); int kind;
if (shortcutFrequency == BinaryFormat::WHITELIST_SHORTCUT_FREQUENCY
&& correction->sameAsTyped()) {
shortcutScore = S_INT_MAX;
kind = Dictionary::KIND_WHITELIST;
} else {
shortcutScore = shortcutProbability;
kind = Dictionary::KIND_CORRECTION;
}
addWord(shortcutTarget, shortcutTargetStringLength, shortcutScore,
masterQueue, kind);
} }
} }
} }