Merge "Tag the whitelisted entries in native code." into jb-mr1-dev
commit
8c12bd87cc
|
@ -52,6 +52,8 @@ class BinaryFormat {
|
||||||
|
|
||||||
// Mask for attribute frequency, stored on 4 bits inside the flags byte.
|
// Mask for attribute frequency, stored on 4 bits inside the flags byte.
|
||||||
static const int MASK_ATTRIBUTE_FREQUENCY = 0x0F;
|
static const int MASK_ATTRIBUTE_FREQUENCY = 0x0F;
|
||||||
|
// The numeric value of the shortcut frequency that means 'whitelist'.
|
||||||
|
static const int WHITELIST_SHORTCUT_FREQUENCY = 15;
|
||||||
|
|
||||||
// Mask and flags for attribute address type selection.
|
// Mask and flags for attribute address type selection.
|
||||||
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
|
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
|
||||||
|
@ -99,6 +101,7 @@ class BinaryFormat {
|
||||||
static bool hasChildrenInFlags(const uint8_t flags);
|
static bool hasChildrenInFlags(const uint8_t flags);
|
||||||
static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
|
static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
|
||||||
int *pos);
|
int *pos);
|
||||||
|
static int getAttributeFrequencyFromFlags(const int flags);
|
||||||
static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord,
|
static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord,
|
||||||
const int length, const bool forceLowerCaseSearch);
|
const int length, const bool forceLowerCaseSearch);
|
||||||
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
|
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
|
||||||
|
@ -340,6 +343,10 @@ inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *con
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) {
|
||||||
|
return flags & MASK_ATTRIBUTE_FREQUENCY;
|
||||||
|
}
|
||||||
|
|
||||||
// This function gets the byte position of the last chargroup of the exact matching word in the
|
// This function gets the byte position of the last chargroup of the exact matching word in the
|
||||||
// dictionary. If no match is found, it returns NOT_VALID_WORD.
|
// dictionary. If no match is found, it returns NOT_VALID_WORD.
|
||||||
inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
||||||
|
|
|
@ -157,6 +157,10 @@ void Correction::checkState() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Correction::sameAsTyped() {
|
||||||
|
return mProximityInfoState.sameAsTyped(mWord, mOutputIndex);
|
||||||
|
}
|
||||||
|
|
||||||
int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
|
int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
|
||||||
const int wordCount, const bool isSpaceProximity, const unsigned short *word) {
|
const int wordCount, const bool isSpaceProximity, const unsigned short *word) {
|
||||||
return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,
|
return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,
|
||||||
|
|
|
@ -105,6 +105,7 @@ class Correction {
|
||||||
const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance,
|
const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance,
|
||||||
const bool doAutoCompletion, const int maxErrors);
|
const bool doAutoCompletion, const int maxErrors);
|
||||||
void checkState();
|
void checkState();
|
||||||
|
bool sameAsTyped();
|
||||||
bool initProcessState(const int index);
|
bool initProcessState(const int index);
|
||||||
|
|
||||||
int getInputIndex();
|
int getInputIndex();
|
||||||
|
|
|
@ -160,6 +160,21 @@ class ProximityInfoState {
|
||||||
return mTouchPositionCorrectionEnabled;
|
return mTouchPositionCorrectionEnabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline bool sameAsTyped(const unsigned short *word, int length) const {
|
||||||
|
if (length != mInputLength) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const int *inputCodes = mInputCodes;
|
||||||
|
while (length--) {
|
||||||
|
if (static_cast<unsigned int>(*inputCodes) != static_cast<unsigned int>(*word)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
|
||||||
|
word++;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(ProximityInfoState);
|
DISALLOW_COPY_AND_ASSIGN(ProximityInfoState);
|
||||||
/////////////////////////////////////////
|
/////////////////////////////////////////
|
||||||
|
@ -179,21 +194,6 @@ class ProximityInfoState {
|
||||||
return mInputXCoordinates && mInputYCoordinates;
|
return mInputXCoordinates && mInputYCoordinates;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool sameAsTyped(const unsigned short *word, int length) const {
|
|
||||||
if (length != mInputLength) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const int *inputCodes = mInputCodes;
|
|
||||||
while (length--) {
|
|
||||||
if ((unsigned int) *inputCodes != (unsigned int) *word) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
|
|
||||||
word++;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// const
|
// const
|
||||||
const ProximityInfo *mProximityInfo;
|
const ProximityInfo *mProximityInfo;
|
||||||
bool mHasTouchPositionCorrectionData;
|
bool mHasTouchPositionCorrectionData;
|
||||||
|
|
|
@ -46,7 +46,7 @@ class TerminalAttributes {
|
||||||
// Gets the shortcut target itself as a uint16_t string. For parameters and return value
|
// Gets the shortcut target itself as a uint16_t string. For parameters and return value
|
||||||
// see BinaryFormat::getWordAtAddress.
|
// see BinaryFormat::getWordAtAddress.
|
||||||
// TODO: make the output an uint32_t* to handle the whole unicode range.
|
// TODO: make the output an uint32_t* to handle the whole unicode range.
|
||||||
inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord) {
|
inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord, int *outFreq) {
|
||||||
const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
|
const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
|
||||||
mHasNextShortcutTarget =
|
mHasNextShortcutTarget =
|
||||||
0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
|
0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
|
||||||
|
@ -56,6 +56,7 @@ class TerminalAttributes {
|
||||||
if (NOT_A_CHARACTER == charCode) break;
|
if (NOT_A_CHARACTER == charCode) break;
|
||||||
outWord[i] = (uint16_t)charCode;
|
outWord[i] = (uint16_t)charCode;
|
||||||
}
|
}
|
||||||
|
*outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags);
|
||||||
mPos += BinaryFormat::CHARACTER_ARRAY_TERMINATOR_SIZE;
|
mPos += BinaryFormat::CHARACTER_ARRAY_TERMINATOR_SIZE;
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
|
@ -391,8 +391,12 @@ inline void UnigramDictionary::onTerminal(const int probability,
|
||||||
const int finalProbability =
|
const int finalProbability =
|
||||||
correction->getFinalProbability(probability, &wordPointer, &wordLength);
|
correction->getFinalProbability(probability, &wordPointer, &wordLength);
|
||||||
if (finalProbability != NOT_A_PROBABILITY) {
|
if (finalProbability != NOT_A_PROBABILITY) {
|
||||||
addWord(wordPointer, wordLength, finalProbability, masterQueue,
|
if (0 != finalProbability) {
|
||||||
Dictionary::KIND_CORRECTION);
|
// If the probability is 0, we don't want to add this word. However we still
|
||||||
|
// want to add its shortcuts (including a possible whitelist entry) if any.
|
||||||
|
addWord(wordPointer, wordLength, finalProbability, masterQueue,
|
||||||
|
Dictionary::KIND_CORRECTION);
|
||||||
|
}
|
||||||
|
|
||||||
const int shortcutProbability = finalProbability > 0 ? finalProbability - 1 : 0;
|
const int shortcutProbability = finalProbability > 0 ? finalProbability - 1 : 0;
|
||||||
// Please note that the shortcut candidates will be added to the master queue only.
|
// Please note that the shortcut candidates will be added to the master queue only.
|
||||||
|
@ -407,10 +411,21 @@ inline void UnigramDictionary::onTerminal(const int probability,
|
||||||
// with the same score. For the moment we use -1 to make sure the shortcut will
|
// with the same score. For the moment we use -1 to make sure the shortcut will
|
||||||
// never be in front of the word.
|
// never be in front of the word.
|
||||||
uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
|
uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
|
||||||
|
int shortcutFrequency;
|
||||||
const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
|
const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
|
||||||
MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
|
MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency);
|
||||||
addWord(shortcutTarget, shortcutTargetStringLength, shortcutProbability,
|
int shortcutScore;
|
||||||
masterQueue, Dictionary::KIND_CORRECTION);
|
int kind;
|
||||||
|
if (shortcutFrequency == BinaryFormat::WHITELIST_SHORTCUT_FREQUENCY
|
||||||
|
&& correction->sameAsTyped()) {
|
||||||
|
shortcutScore = S_INT_MAX;
|
||||||
|
kind = Dictionary::KIND_WHITELIST;
|
||||||
|
} else {
|
||||||
|
shortcutScore = shortcutProbability;
|
||||||
|
kind = Dictionary::KIND_CORRECTION;
|
||||||
|
}
|
||||||
|
addWord(shortcutTarget, shortcutTargetStringLength, shortcutScore,
|
||||||
|
masterQueue, kind);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue