Tag the whitelisted entries in native code.

Since this is already used in Java land, this actually does
activate the whitelist path, and the code is now fully
functional. We still have to remove the old whitelist resource
and to compile the dictionary that includes the whitelist.

Bug: 6906525
Change-Id: Iacde5313e303b9ed792940efaf6bcfa4ee1317bd
main
Jean Chalard 2012-08-10 16:52:27 +09:00
parent 814510305c
commit b14fc88e48
6 changed files with 49 additions and 21 deletions

View File

@ -52,6 +52,8 @@ class BinaryFormat {
// Mask for attribute frequency, stored on 4 bits inside the flags byte.
static const int MASK_ATTRIBUTE_FREQUENCY = 0x0F;
// The numeric value of the shortcut frequency that means 'whitelist'.
static const int WHITELIST_SHORTCUT_FREQUENCY = 15;
// Mask and flags for attribute address type selection.
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
@ -99,6 +101,7 @@ class BinaryFormat {
static bool hasChildrenInFlags(const uint8_t flags);
static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
int *pos);
static int getAttributeFrequencyFromFlags(const int flags);
static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord,
const int length, const bool forceLowerCaseSearch);
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
@ -340,6 +343,10 @@ inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *con
}
}
inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) {
return flags & MASK_ATTRIBUTE_FREQUENCY;
}
// This function gets the byte position of the last chargroup of the exact matching word in the
// dictionary. If no match is found, it returns NOT_VALID_WORD.
inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,

View File

@ -157,6 +157,10 @@ void Correction::checkState() {
}
}
bool Correction::sameAsTyped() {
return mProximityInfoState.sameAsTyped(mWord, mOutputIndex);
}
int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
const int wordCount, const bool isSpaceProximity, const unsigned short *word) {
return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,

View File

@ -105,6 +105,7 @@ class Correction {
const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance,
const bool doAutoCompletion, const int maxErrors);
void checkState();
bool sameAsTyped();
bool initProcessState(const int index);
int getInputIndex();

View File

@ -160,6 +160,21 @@ class ProximityInfoState {
return mTouchPositionCorrectionEnabled;
}
inline bool sameAsTyped(const unsigned short *word, int length) const {
if (length != mInputLength) {
return false;
}
const int *inputCodes = mInputCodes;
while (length--) {
if (static_cast<unsigned int>(*inputCodes) != static_cast<unsigned int>(*word)) {
return false;
}
inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
word++;
}
return true;
}
private:
DISALLOW_COPY_AND_ASSIGN(ProximityInfoState);
/////////////////////////////////////////
@ -179,21 +194,6 @@ class ProximityInfoState {
return mInputXCoordinates && mInputYCoordinates;
}
bool sameAsTyped(const unsigned short *word, int length) const {
if (length != mInputLength) {
return false;
}
const int *inputCodes = mInputCodes;
while (length--) {
if ((unsigned int) *inputCodes != (unsigned int) *word) {
return false;
}
inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
word++;
}
return true;
}
// const
const ProximityInfo *mProximityInfo;
bool mHasTouchPositionCorrectionData;

View File

@ -46,7 +46,7 @@ class TerminalAttributes {
// Gets the shortcut target itself as a uint16_t string. For parameters and return value
// see BinaryFormat::getWordAtAddress.
// TODO: make the output an uint32_t* to handle the whole unicode range.
inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord) {
inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord, int *outFreq) {
const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
mHasNextShortcutTarget =
0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
@ -56,6 +56,7 @@ class TerminalAttributes {
if (NOT_A_CHARACTER == charCode) break;
outWord[i] = (uint16_t)charCode;
}
*outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags);
mPos += BinaryFormat::CHARACTER_ARRAY_TERMINATOR_SIZE;
return i;
}

View File

@ -391,8 +391,12 @@ inline void UnigramDictionary::onTerminal(const int probability,
const int finalProbability =
correction->getFinalProbability(probability, &wordPointer, &wordLength);
if (finalProbability != NOT_A_PROBABILITY) {
addWord(wordPointer, wordLength, finalProbability, masterQueue,
Dictionary::KIND_CORRECTION);
if (0 != finalProbability) {
// If the probability is 0, we don't want to add this word. However we still
// want to add its shortcuts (including a possible whitelist entry) if any.
addWord(wordPointer, wordLength, finalProbability, masterQueue,
Dictionary::KIND_CORRECTION);
}
const int shortcutProbability = finalProbability > 0 ? finalProbability - 1 : 0;
// Please note that the shortcut candidates will be added to the master queue only.
@ -407,10 +411,21 @@ inline void UnigramDictionary::onTerminal(const int probability,
// with the same score. For the moment we use -1 to make sure the shortcut will
// never be in front of the word.
uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
int shortcutFrequency;
const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
addWord(shortcutTarget, shortcutTargetStringLength, shortcutProbability,
masterQueue, Dictionary::KIND_CORRECTION);
MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency);
int shortcutScore;
int kind;
if (shortcutFrequency == BinaryFormat::WHITELIST_SHORTCUT_FREQUENCY
&& correction->sameAsTyped()) {
shortcutScore = S_INT_MAX;
kind = Dictionary::KIND_WHITELIST;
} else {
shortcutScore = shortcutProbability;
kind = Dictionary::KIND_CORRECTION;
}
addWord(shortcutTarget, shortcutTargetStringLength, shortcutScore,
masterQueue, kind);
}
}
}