Suggest excessive characters

bug: 3193883

Change-Id: Iea7a0fce7ce62d8779a7c7e4613d50db30d82b07
main
satok 2010-12-08 16:04:16 +09:00
parent e1c216a1a3
commit cdbbea735f
4 changed files with 65 additions and 67 deletions

View File

@ -34,6 +34,8 @@ import java.util.List;
*/ */
public class Suggest implements Dictionary.WordCallback { public class Suggest implements Dictionary.WordCallback {
public static final String TAG = "Suggest";
public static final int APPROX_MAX_WORD_LENGTH = 32; public static final int APPROX_MAX_WORD_LENGTH = 32;
public static final int CORRECTION_NONE = 0; public static final int CORRECTION_NONE = 0;
@ -188,33 +190,6 @@ public class Suggest implements Dictionary.WordCallback {
} }
} }
private boolean haveSufficientCommonality(String original, CharSequence suggestion) {
final int originalLength = original.length();
final int suggestionLength = suggestion.length();
final int minLength = Math.min(originalLength, suggestionLength);
if (minLength <= 2) return true;
int matching = 0;
int lessMatching = 0; // Count matches if we skip one character
int i;
for (i = 0; i < minLength; i++) {
final char origChar = ExpandableDictionary.toLowerCase(original.charAt(i));
if (origChar == ExpandableDictionary.toLowerCase(suggestion.charAt(i))) {
matching++;
lessMatching++;
} else if (i + 1 < suggestionLength
&& origChar == ExpandableDictionary.toLowerCase(suggestion.charAt(i + 1))) {
lessMatching++;
}
}
matching = Math.max(matching, lessMatching);
if (minLength <= 4) {
return matching >= 2;
} else {
return matching > minLength / 2;
}
}
/** /**
* Returns a list of words that match the list of character codes passed in. * Returns a list of words that match the list of character codes passed in.
* This list will be overwritten the next time this function is called. * This list will be overwritten the next time this function is called.
@ -311,6 +286,11 @@ public class Suggest implements Dictionary.WordCallback {
// the normalized score of the second suggestion, behave less aggressive. // the normalized score of the second suggestion, behave less aggressive.
final double normalizedScore = LatinIMEUtil.calcNormalizedScore( final double normalizedScore = LatinIMEUtil.calcNormalizedScore(
mOriginalWord, mSuggestions.get(0), mPriorities[0]); mOriginalWord, mSuggestions.get(0), mPriorities[0]);
if (LatinImeLogger.sDBG) {
Log.d(TAG, "Normalized " + mOriginalWord + "," + mSuggestions.get(0) + ","
+ mPriorities[0] + normalizedScore
+ "(" + mAutoCompleteThreshold + ")");
}
if (normalizedScore >= mAutoCompleteThreshold) { if (normalizedScore >= mAutoCompleteThreshold) {
mHaveCorrection = true; mHaveCorrection = true;
} }
@ -319,15 +299,6 @@ public class Suggest implements Dictionary.WordCallback {
if (mOriginalWord != null) { if (mOriginalWord != null) {
mSuggestions.add(0, mOriginalWord.toString()); mSuggestions.add(0, mOriginalWord.toString());
} }
// Check if the first suggestion has a minimum number of characters in common
if (wordComposer.size() > 1 && mSuggestions.size() > 1
&& (mCorrectionMode == CORRECTION_FULL
|| mCorrectionMode == CORRECTION_FULL_BIGRAM)) {
if (!haveSufficientCommonality(mLowerOriginalWord, mSuggestions.get(1))) {
mHaveCorrection = false;
}
}
if (mAutoTextEnabled) { if (mAutoTextEnabled) {
int i = 0; int i = 0;
int max = 6; int max = 6;

View File

@ -23,10 +23,12 @@
#ifndef LOG_TAG #ifndef LOG_TAG
#define LOG_TAG "LatinIME: " #define LOG_TAG "LatinIME: "
#endif #endif
#define DEBUG_DICT 1 #define DEBUG_DICT true
#define DEBUG_SHOW_FOUND_WORD false
#else // FLAG_DBG #else // FLAG_DBG
#define LOGI #define LOGI
#define DEBUG_DICT 0 #define DEBUG_DICT false
#define DEBUG_SHOW_FOUND_WORD false
#endif // FLAG_DBG #endif // FLAG_DBG
// 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words // 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words
@ -48,7 +50,8 @@
#define NOT_VALID_WORD -99 #define NOT_VALID_WORD -99
#define SUGGEST_MISSING_CHARACTERS true #define SUGGEST_MISSING_CHARACTERS true
#define SUGGEST_MISSING_CHARACTERS_THRESHOLD 5
#define SUGGEST_EXCESSIVE_CHARACTERS true
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java // This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
// This is only used for the size of array. Not to be used in c functions. // This is only used for the size of array. Not to be used in c functions.

View File

@ -46,19 +46,30 @@ int UnigramDictionary::getSuggestions(int *codes, int codesSize, unsigned short
initSuggestions(codes, codesSize, outWords, frequencies); initSuggestions(codes, codesSize, outWords, frequencies);
int suggestedWordsCount = getSuggestionCandidates(codesSize, -1, nextLetters, int suggestedWordsCount = getSuggestionCandidates(codesSize, -1, -1, nextLetters,
nextLettersSize); nextLettersSize);
// If there aren't sufficient suggestions, search for words by allowing wild cards at // If there aren't sufficient suggestions, search for words by allowing wild cards at
// the different character positions. This feature is not ready for prime-time as we need // the different character positions. This feature is not ready for prime-time as we need
// to figure out the best ranking for such words compared to proximity corrections and // to figure out the best ranking for such words compared to proximity corrections and
// completions. // completions.
if (SUGGEST_MISSING_CHARACTERS && suggestedWordsCount < SUGGEST_MISSING_CHARACTERS_THRESHOLD) { if (SUGGEST_MISSING_CHARACTERS) {
for (int i = 0; i < codesSize; ++i) { for (int i = 0; i < codesSize; ++i) {
int tempCount = getSuggestionCandidates(codesSize, i, NULL, 0); if (DEBUG_DICT) LOGI("--- Suggest missing characters %d", i);
const int tempCount = getSuggestionCandidates(codesSize, i, -1, NULL, 0);
if (tempCount > suggestedWordsCount) {
suggestedWordsCount = tempCount;
}
}
}
// Suggest excessive characters
if (SUGGEST_EXCESSIVE_CHARACTERS) {
for (int i = 0; i < codesSize; ++i) {
if (DEBUG_DICT) LOGI("--- Suggest excessive characters %d", i);
const int tempCount = getSuggestionCandidates(codesSize, -1, i, NULL, 0);
if (tempCount > suggestedWordsCount) { if (tempCount > suggestedWordsCount) {
suggestedWordsCount = tempCount; suggestedWordsCount = tempCount;
break;
} }
} }
} }
@ -86,14 +97,14 @@ void UnigramDictionary::initSuggestions(int *codes, int codesSize, unsigned shor
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2; mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
} }
int UnigramDictionary::getSuggestionCandidates(int inputLength, int skipPos, int UnigramDictionary::getSuggestionCandidates(int inputLength, int skipPos, int excessivePos,
int *nextLetters, int nextLettersSize) { int *nextLetters, int nextLettersSize) {
if (DEBUG_DICT) LOGI("getSuggestionCandidates"); if (DEBUG_DICT) LOGI("getSuggestionCandidates");
int initialPos = 0; int initialPos = 0;
if (IS_LATEST_DICT_VERSION) { if (IS_LATEST_DICT_VERSION) {
initialPos = DICTIONARY_HEADER_SIZE; initialPos = DICTIONARY_HEADER_SIZE;
} }
getWords(initialPos, inputLength, skipPos, nextLetters, nextLettersSize); getWords(initialPos, inputLength, skipPos, excessivePos, nextLetters, nextLettersSize);
// Get the word count // Get the word count
int suggestedWordsCount = 0; int suggestedWordsCount = 0;
@ -115,7 +126,7 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
if (DEBUG_DICT) { if (DEBUG_DICT) {
char s[length + 1]; char s[length + 1];
for (int i = 0; i <= length; i++) s[i] = word[i]; for (int i = 0; i <= length; i++) s[i] = word[i];
LOGI("Found word = %s, freq = %d : \n", s, frequency); if (DEBUG_SHOW_FOUND_WORD) LOGI("Found word = %s, freq = %d : \n", s, frequency);
} }
if (length > MAX_WORD_LENGTH) { if (length > MAX_WORD_LENGTH) {
if (DEBUG_DICT) LOGI("Exceeded max word length."); if (DEBUG_DICT) LOGI("Exceeded max word length.");
@ -132,6 +143,11 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
insertAt++; insertAt++;
} }
if (insertAt < MAX_WORDS) { if (insertAt < MAX_WORDS) {
if (DEBUG_DICT) {
char s[length + 1];
for (int i = 0; i <= length; i++) s[i] = word[i];
LOGI("Added word = %s, freq = %d : \n", s, frequency);
}
memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]), memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
(char*) mFrequencies + insertAt * sizeof(mFrequencies[0]), (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
(MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0])); (MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0]));
@ -181,16 +197,16 @@ static const char QUOTE = '\'';
// Keep this for comparing spec to new getWords // Keep this for comparing spec to new getWords
void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength, const int skipPos, void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength, const int skipPos,
int *nextLetters, const int nextLettersSize) { const int excessivePos, int *nextLetters, const int nextLettersSize) {
int initialPosition = initialPos; int initialPosition = initialPos;
const int count = Dictionary::getCount(DICT, &initialPosition); const int count = Dictionary::getCount(DICT, &initialPosition);
getWordsRec(count, initialPosition, 0, getWordsRec(count, initialPosition, 0,
min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH), min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH),
mInputLength <= 0, 1, 0, 0, skipPos, nextLetters, nextLettersSize); mInputLength <= 0, 1, 0, 0, skipPos, excessivePos, nextLetters, nextLettersSize);
} }
void UnigramDictionary::getWords(const int rootPos, const int inputLength, const int skipPos, void UnigramDictionary::getWords(const int rootPos, const int inputLength, const int skipPos,
int *nextLetters, const int nextLettersSize) { const int excessivePos, int *nextLetters, const int nextLettersSize) {
int rootPosition = rootPos; int rootPosition = rootPos;
const int MAX_DEPTH = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); const int MAX_DEPTH = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
// Get the number of child of root, then increment the position // Get the number of child of root, then increment the position
@ -216,9 +232,9 @@ void UnigramDictionary::getWords(const int rootPos, const int inputLength, const
// depth will never be greater than MAX_DEPTH because in that case, // depth will never be greater than MAX_DEPTH because in that case,
// needsToTraverseChildrenNodes should be false // needsToTraverseChildrenNodes should be false
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth,
MAX_DEPTH, traverseAllNodes, snr, inputIndex, diffs, skipPos, nextLetters, MAX_DEPTH, traverseAllNodes, snr, inputIndex, diffs, skipPos, excessivePos,
nextLettersSize, &childCount, &firstChildPos, &traverseAllNodes, &snr, nextLetters, nextLettersSize, &childCount, &firstChildPos, &traverseAllNodes,
&inputIndex, &diffs, &siblingPos); &snr, &inputIndex, &diffs, &siblingPos);
// Next sibling pos // Next sibling pos
mStackSiblingPos[depth] = siblingPos; mStackSiblingPos[depth] = siblingPos;
if (needsToTraverseChildrenNodes) { if (needsToTraverseChildrenNodes) {
@ -232,7 +248,7 @@ void UnigramDictionary::getWords(const int rootPos, const int inputLength, const
mStackSiblingPos[depth] = firstChildPos; mStackSiblingPos[depth] = firstChildPos;
} }
} else { } else {
// Goes to parent node // Goes to parent sibling node
--depth; --depth;
} }
} }
@ -241,7 +257,8 @@ void UnigramDictionary::getWords(const int rootPos, const int inputLength, const
// snr : frequency? // snr : frequency?
void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth, void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth,
const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex, const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex,
const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize) { const int diffs, const int skipPos, const int excessivePos, int *nextLetters,
const int nextLettersSize) {
int siblingPos = pos; int siblingPos = pos;
for (int i = 0; i < childrenCount; ++i) { for (int i = 0; i < childrenCount; ++i) {
int newCount; int newCount;
@ -253,14 +270,16 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
int newDiffs; int newDiffs;
int newSiblingPos; int newSiblingPos;
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth, const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth,
traverseAllNodes, snr, inputIndex, diffs, skipPos, nextLetters, nextLettersSize, traverseAllNodes, snr, inputIndex, diffs, skipPos, excessivePos, nextLetters,
nextLettersSize,
&newCount, &newChildPosition, &newTraverseAllNodes, &newSnr, &newCount, &newChildPosition, &newTraverseAllNodes, &newSnr,
&newInputIndex, &newDiffs, &newSiblingPos); &newInputIndex, &newDiffs, &newSiblingPos);
siblingPos = newSiblingPos; siblingPos = newSiblingPos;
if (needsToTraverseChildrenNodes) { if (needsToTraverseChildrenNodes) {
getWordsRec(newCount, newChildPosition, newDepth, maxDepth, newTraverseAllNodes, getWordsRec(newCount, newChildPosition, newDepth, maxDepth, newTraverseAllNodes,
newSnr, newInputIndex, newDiffs, skipPos, nextLetters, nextLettersSize); newSnr, newInputIndex, newDiffs, skipPos, excessivePos, nextLetters,
nextLettersSize);
} }
} }
} }
@ -312,14 +331,18 @@ inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
} }
inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth, inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex, const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,
const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize, const int diffs, const int skipPos, const int excessivePos, int *nextLetters,
int *newCount, int *newChildPosition, bool *newTraverseAllNodes, const int nextLettersSize, int *newCount, int *newChildPosition, bool *newTraverseAllNodes,
int *newSnr, int*newInputIndex, int *newDiffs, int *nextSiblingPosition) { int *newSnr, int*newInputIndex, int *newDiffs, int *nextSiblingPosition) {
if (DEBUG_DICT) assert(skipPos < 0 || excessivePos < 0);
unsigned short c; unsigned short c;
int childPosition; int childPosition;
bool terminal; bool terminal;
int freq; int freq;
if (excessivePos == depth) ++inputIndex;
*nextSiblingPosition = Dictionary::setDictionaryValues(DICT, IS_LATEST_DICT_VERSION, pos, &c, *nextSiblingPosition = Dictionary::setDictionaryValues(DICT, IS_LATEST_DICT_VERSION, pos, &c,
&childPosition, &terminal, &freq); &childPosition, &terminal, &freq);

View File

@ -31,7 +31,7 @@ public:
private: private:
void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies); void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies);
int getSuggestionCandidates(int inputLength, int skipPos, int *nextLetters, int getSuggestionCandidates(int inputLength, int skipPos, int excessivePos, int *nextLetters,
int nextLettersSize); int nextLettersSize);
void getVersionNumber(); void getVersionNumber();
bool checkIfDictVersionIsLatest(); bool checkIfDictVersionIsLatest();
@ -43,12 +43,12 @@ private:
unsigned short toLowerCase(unsigned short c); unsigned short toLowerCase(unsigned short c);
void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth, void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs, const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
const int skipPos, int *nextLetters, const int nextLettersSize); const int skipPos, const int excessivePos, int *nextLetters, const int nextLettersSize);
void getWords(const int rootPos, const int inputLength, const int skipPos, void getWords(const int rootPos, const int inputLength, const int skipPos,
int *nextLetters, const int nextLettersSize); const int excessivePos, int *nextLetters, const int nextLettersSize);
// Keep getWordsOld for comparing performance between getWords and getWordsOld // Keep getWordsOld for comparing performance between getWords and getWordsOld
void getWordsOld(const int initialPos, const int inputLength, const int skipPos, void getWordsOld(const int initialPos, const int inputLength, const int skipPos,
int *nextLetters, const int nextLettersSize); const int excessivePos, int *nextLetters, const int nextLettersSize);
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize); void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word, void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
const int mInputLength, const int depth, const int snr, int *nextLetters, const int mInputLength, const int depth, const int snr, int *nextLetters,
@ -59,10 +59,11 @@ private:
const int inputIndex, const int skipPos, const int depth); const int inputIndex, const int skipPos, const int depth);
int getMatchedProximityId(const int *currentChars, const unsigned short c, const int skipPos); int getMatchedProximityId(const int *currentChars, const unsigned short c, const int skipPos);
bool processCurrentNode(const int pos, const int depth, bool processCurrentNode(const int pos, const int depth,
const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex, const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,
const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize, const int diffs, const int skipPos, const int excessivePos, int *nextLetters,
int *newCount, int *newChildPosition, bool *newTraverseAllNodes, const int nextLettersSize, int *newCount, int *newChildPosition,
int *newSnr, int*newInputIndex, int *newDiffs, int *nextSiblingPosition); bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
int *nextSiblingPosition);
const unsigned char *DICT; const unsigned char *DICT;
const int MAX_WORDS; const int MAX_WORDS;
const int MAX_WORD_LENGTH; const int MAX_WORD_LENGTH;