Merge "Suggest excessive characters"
This commit is contained in:
commit
59cd73b916
4 changed files with 65 additions and 67 deletions
|
@ -34,6 +34,8 @@ import java.util.List;
|
|||
*/
|
||||
public class Suggest implements Dictionary.WordCallback {
|
||||
|
||||
public static final String TAG = "Suggest";
|
||||
|
||||
public static final int APPROX_MAX_WORD_LENGTH = 32;
|
||||
|
||||
public static final int CORRECTION_NONE = 0;
|
||||
|
@ -188,33 +190,6 @@ public class Suggest implements Dictionary.WordCallback {
|
|||
}
|
||||
}
|
||||
|
||||
private boolean haveSufficientCommonality(String original, CharSequence suggestion) {
|
||||
final int originalLength = original.length();
|
||||
final int suggestionLength = suggestion.length();
|
||||
final int minLength = Math.min(originalLength, suggestionLength);
|
||||
if (minLength <= 2) return true;
|
||||
int matching = 0;
|
||||
int lessMatching = 0; // Count matches if we skip one character
|
||||
int i;
|
||||
for (i = 0; i < minLength; i++) {
|
||||
final char origChar = ExpandableDictionary.toLowerCase(original.charAt(i));
|
||||
if (origChar == ExpandableDictionary.toLowerCase(suggestion.charAt(i))) {
|
||||
matching++;
|
||||
lessMatching++;
|
||||
} else if (i + 1 < suggestionLength
|
||||
&& origChar == ExpandableDictionary.toLowerCase(suggestion.charAt(i + 1))) {
|
||||
lessMatching++;
|
||||
}
|
||||
}
|
||||
matching = Math.max(matching, lessMatching);
|
||||
|
||||
if (minLength <= 4) {
|
||||
return matching >= 2;
|
||||
} else {
|
||||
return matching > minLength / 2;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of words that match the list of character codes passed in.
|
||||
* This list will be overwritten the next time this function is called.
|
||||
|
@ -311,6 +286,11 @@ public class Suggest implements Dictionary.WordCallback {
|
|||
// the normalized score of the second suggestion, behave less aggressive.
|
||||
final double normalizedScore = LatinIMEUtil.calcNormalizedScore(
|
||||
mOriginalWord, mSuggestions.get(0), mPriorities[0]);
|
||||
if (LatinImeLogger.sDBG) {
|
||||
Log.d(TAG, "Normalized " + mOriginalWord + "," + mSuggestions.get(0) + ","
|
||||
+ mPriorities[0] + normalizedScore
|
||||
+ "(" + mAutoCompleteThreshold + ")");
|
||||
}
|
||||
if (normalizedScore >= mAutoCompleteThreshold) {
|
||||
mHaveCorrection = true;
|
||||
}
|
||||
|
@ -319,15 +299,6 @@ public class Suggest implements Dictionary.WordCallback {
|
|||
if (mOriginalWord != null) {
|
||||
mSuggestions.add(0, mOriginalWord.toString());
|
||||
}
|
||||
|
||||
// Check if the first suggestion has a minimum number of characters in common
|
||||
if (wordComposer.size() > 1 && mSuggestions.size() > 1
|
||||
&& (mCorrectionMode == CORRECTION_FULL
|
||||
|| mCorrectionMode == CORRECTION_FULL_BIGRAM)) {
|
||||
if (!haveSufficientCommonality(mLowerOriginalWord, mSuggestions.get(1))) {
|
||||
mHaveCorrection = false;
|
||||
}
|
||||
}
|
||||
if (mAutoTextEnabled) {
|
||||
int i = 0;
|
||||
int max = 6;
|
||||
|
|
|
@ -23,10 +23,12 @@
|
|||
#ifndef LOG_TAG
|
||||
#define LOG_TAG "LatinIME: "
|
||||
#endif
|
||||
#define DEBUG_DICT 1
|
||||
#define DEBUG_DICT true
|
||||
#define DEBUG_SHOW_FOUND_WORD false
|
||||
#else // FLAG_DBG
|
||||
#define LOGI
|
||||
#define DEBUG_DICT 0
|
||||
#define DEBUG_DICT false
|
||||
#define DEBUG_SHOW_FOUND_WORD false
|
||||
#endif // FLAG_DBG
|
||||
|
||||
// 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words
|
||||
|
@ -48,7 +50,8 @@
|
|||
#define NOT_VALID_WORD -99
|
||||
|
||||
#define SUGGEST_MISSING_CHARACTERS true
|
||||
#define SUGGEST_MISSING_CHARACTERS_THRESHOLD 5
|
||||
|
||||
#define SUGGEST_EXCESSIVE_CHARACTERS true
|
||||
|
||||
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
|
||||
// This is only used for the size of array. Not to be used in c functions.
|
||||
|
|
|
@ -46,19 +46,30 @@ int UnigramDictionary::getSuggestions(int *codes, int codesSize, unsigned short
|
|||
|
||||
initSuggestions(codes, codesSize, outWords, frequencies);
|
||||
|
||||
int suggestedWordsCount = getSuggestionCandidates(codesSize, -1, nextLetters,
|
||||
int suggestedWordsCount = getSuggestionCandidates(codesSize, -1, -1, nextLetters,
|
||||
nextLettersSize);
|
||||
|
||||
// If there aren't sufficient suggestions, search for words by allowing wild cards at
|
||||
// the different character positions. This feature is not ready for prime-time as we need
|
||||
// to figure out the best ranking for such words compared to proximity corrections and
|
||||
// completions.
|
||||
if (SUGGEST_MISSING_CHARACTERS && suggestedWordsCount < SUGGEST_MISSING_CHARACTERS_THRESHOLD) {
|
||||
if (SUGGEST_MISSING_CHARACTERS) {
|
||||
for (int i = 0; i < codesSize; ++i) {
|
||||
int tempCount = getSuggestionCandidates(codesSize, i, NULL, 0);
|
||||
if (DEBUG_DICT) LOGI("--- Suggest missing characters %d", i);
|
||||
const int tempCount = getSuggestionCandidates(codesSize, i, -1, NULL, 0);
|
||||
if (tempCount > suggestedWordsCount) {
|
||||
suggestedWordsCount = tempCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Suggest excessive characters
|
||||
if (SUGGEST_EXCESSIVE_CHARACTERS) {
|
||||
for (int i = 0; i < codesSize; ++i) {
|
||||
if (DEBUG_DICT) LOGI("--- Suggest excessive characters %d", i);
|
||||
const int tempCount = getSuggestionCandidates(codesSize, -1, i, NULL, 0);
|
||||
if (tempCount > suggestedWordsCount) {
|
||||
suggestedWordsCount = tempCount;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -86,14 +97,14 @@ void UnigramDictionary::initSuggestions(int *codes, int codesSize, unsigned shor
|
|||
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
|
||||
}
|
||||
|
||||
int UnigramDictionary::getSuggestionCandidates(int inputLength, int skipPos,
|
||||
int UnigramDictionary::getSuggestionCandidates(int inputLength, int skipPos, int excessivePos,
|
||||
int *nextLetters, int nextLettersSize) {
|
||||
if (DEBUG_DICT) LOGI("getSuggestionCandidates");
|
||||
int initialPos = 0;
|
||||
if (IS_LATEST_DICT_VERSION) {
|
||||
initialPos = DICTIONARY_HEADER_SIZE;
|
||||
}
|
||||
getWords(initialPos, inputLength, skipPos, nextLetters, nextLettersSize);
|
||||
getWords(initialPos, inputLength, skipPos, excessivePos, nextLetters, nextLettersSize);
|
||||
|
||||
// Get the word count
|
||||
int suggestedWordsCount = 0;
|
||||
|
@ -115,7 +126,7 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
|
|||
if (DEBUG_DICT) {
|
||||
char s[length + 1];
|
||||
for (int i = 0; i <= length; i++) s[i] = word[i];
|
||||
LOGI("Found word = %s, freq = %d : \n", s, frequency);
|
||||
if (DEBUG_SHOW_FOUND_WORD) LOGI("Found word = %s, freq = %d : \n", s, frequency);
|
||||
}
|
||||
if (length > MAX_WORD_LENGTH) {
|
||||
if (DEBUG_DICT) LOGI("Exceeded max word length.");
|
||||
|
@ -132,6 +143,11 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
|
|||
insertAt++;
|
||||
}
|
||||
if (insertAt < MAX_WORDS) {
|
||||
if (DEBUG_DICT) {
|
||||
char s[length + 1];
|
||||
for (int i = 0; i <= length; i++) s[i] = word[i];
|
||||
LOGI("Added word = %s, freq = %d : \n", s, frequency);
|
||||
}
|
||||
memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
|
||||
(char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
|
||||
(MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0]));
|
||||
|
@ -181,16 +197,16 @@ static const char QUOTE = '\'';
|
|||
|
||||
// Keep this for comparing spec to new getWords
|
||||
void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength, const int skipPos,
|
||||
int *nextLetters, const int nextLettersSize) {
|
||||
const int excessivePos, int *nextLetters, const int nextLettersSize) {
|
||||
int initialPosition = initialPos;
|
||||
const int count = Dictionary::getCount(DICT, &initialPosition);
|
||||
getWordsRec(count, initialPosition, 0,
|
||||
min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH),
|
||||
mInputLength <= 0, 1, 0, 0, skipPos, nextLetters, nextLettersSize);
|
||||
mInputLength <= 0, 1, 0, 0, skipPos, excessivePos, nextLetters, nextLettersSize);
|
||||
}
|
||||
|
||||
void UnigramDictionary::getWords(const int rootPos, const int inputLength, const int skipPos,
|
||||
int *nextLetters, const int nextLettersSize) {
|
||||
const int excessivePos, int *nextLetters, const int nextLettersSize) {
|
||||
int rootPosition = rootPos;
|
||||
const int MAX_DEPTH = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
||||
// Get the number of child of root, then increment the position
|
||||
|
@ -216,9 +232,9 @@ void UnigramDictionary::getWords(const int rootPos, const int inputLength, const
|
|||
// depth will never be greater than MAX_DEPTH because in that case,
|
||||
// needsToTraverseChildrenNodes should be false
|
||||
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth,
|
||||
MAX_DEPTH, traverseAllNodes, snr, inputIndex, diffs, skipPos, nextLetters,
|
||||
nextLettersSize, &childCount, &firstChildPos, &traverseAllNodes, &snr,
|
||||
&inputIndex, &diffs, &siblingPos);
|
||||
MAX_DEPTH, traverseAllNodes, snr, inputIndex, diffs, skipPos, excessivePos,
|
||||
nextLetters, nextLettersSize, &childCount, &firstChildPos, &traverseAllNodes,
|
||||
&snr, &inputIndex, &diffs, &siblingPos);
|
||||
// Next sibling pos
|
||||
mStackSiblingPos[depth] = siblingPos;
|
||||
if (needsToTraverseChildrenNodes) {
|
||||
|
@ -232,7 +248,7 @@ void UnigramDictionary::getWords(const int rootPos, const int inputLength, const
|
|||
mStackSiblingPos[depth] = firstChildPos;
|
||||
}
|
||||
} else {
|
||||
// Goes to parent node
|
||||
// Goes to parent sibling node
|
||||
--depth;
|
||||
}
|
||||
}
|
||||
|
@ -241,7 +257,8 @@ void UnigramDictionary::getWords(const int rootPos, const int inputLength, const
|
|||
// snr : frequency?
|
||||
void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth,
|
||||
const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex,
|
||||
const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize) {
|
||||
const int diffs, const int skipPos, const int excessivePos, int *nextLetters,
|
||||
const int nextLettersSize) {
|
||||
int siblingPos = pos;
|
||||
for (int i = 0; i < childrenCount; ++i) {
|
||||
int newCount;
|
||||
|
@ -253,14 +270,16 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
|
|||
int newDiffs;
|
||||
int newSiblingPos;
|
||||
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth,
|
||||
traverseAllNodes, snr, inputIndex, diffs, skipPos, nextLetters, nextLettersSize,
|
||||
traverseAllNodes, snr, inputIndex, diffs, skipPos, excessivePos, nextLetters,
|
||||
nextLettersSize,
|
||||
&newCount, &newChildPosition, &newTraverseAllNodes, &newSnr,
|
||||
&newInputIndex, &newDiffs, &newSiblingPos);
|
||||
siblingPos = newSiblingPos;
|
||||
|
||||
if (needsToTraverseChildrenNodes) {
|
||||
getWordsRec(newCount, newChildPosition, newDepth, maxDepth, newTraverseAllNodes,
|
||||
newSnr, newInputIndex, newDiffs, skipPos, nextLetters, nextLettersSize);
|
||||
newSnr, newInputIndex, newDiffs, skipPos, excessivePos, nextLetters,
|
||||
nextLettersSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -312,14 +331,18 @@ inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
|
|||
}
|
||||
|
||||
inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
|
||||
const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex,
|
||||
const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize,
|
||||
int *newCount, int *newChildPosition, bool *newTraverseAllNodes,
|
||||
const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,
|
||||
const int diffs, const int skipPos, const int excessivePos, int *nextLetters,
|
||||
const int nextLettersSize, int *newCount, int *newChildPosition, bool *newTraverseAllNodes,
|
||||
int *newSnr, int*newInputIndex, int *newDiffs, int *nextSiblingPosition) {
|
||||
if (DEBUG_DICT) assert(skipPos < 0 || excessivePos < 0);
|
||||
unsigned short c;
|
||||
int childPosition;
|
||||
bool terminal;
|
||||
int freq;
|
||||
|
||||
if (excessivePos == depth) ++inputIndex;
|
||||
|
||||
*nextSiblingPosition = Dictionary::setDictionaryValues(DICT, IS_LATEST_DICT_VERSION, pos, &c,
|
||||
&childPosition, &terminal, &freq);
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ public:
|
|||
|
||||
private:
|
||||
void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies);
|
||||
int getSuggestionCandidates(int inputLength, int skipPos, int *nextLetters,
|
||||
int getSuggestionCandidates(int inputLength, int skipPos, int excessivePos, int *nextLetters,
|
||||
int nextLettersSize);
|
||||
void getVersionNumber();
|
||||
bool checkIfDictVersionIsLatest();
|
||||
|
@ -43,12 +43,12 @@ private:
|
|||
unsigned short toLowerCase(unsigned short c);
|
||||
void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
|
||||
const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
|
||||
const int skipPos, int *nextLetters, const int nextLettersSize);
|
||||
const int skipPos, const int excessivePos, int *nextLetters, const int nextLettersSize);
|
||||
void getWords(const int rootPos, const int inputLength, const int skipPos,
|
||||
int *nextLetters, const int nextLettersSize);
|
||||
const int excessivePos, int *nextLetters, const int nextLettersSize);
|
||||
// Keep getWordsOld for comparing performance between getWords and getWordsOld
|
||||
void getWordsOld(const int initialPos, const int inputLength, const int skipPos,
|
||||
int *nextLetters, const int nextLettersSize);
|
||||
const int excessivePos, int *nextLetters, const int nextLettersSize);
|
||||
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
|
||||
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
|
||||
const int mInputLength, const int depth, const int snr, int *nextLetters,
|
||||
|
@ -59,10 +59,11 @@ private:
|
|||
const int inputIndex, const int skipPos, const int depth);
|
||||
int getMatchedProximityId(const int *currentChars, const unsigned short c, const int skipPos);
|
||||
bool processCurrentNode(const int pos, const int depth,
|
||||
const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex,
|
||||
const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize,
|
||||
int *newCount, int *newChildPosition, bool *newTraverseAllNodes,
|
||||
int *newSnr, int*newInputIndex, int *newDiffs, int *nextSiblingPosition);
|
||||
const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,
|
||||
const int diffs, const int skipPos, const int excessivePos, int *nextLetters,
|
||||
const int nextLettersSize, int *newCount, int *newChildPosition,
|
||||
bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
|
||||
int *nextSiblingPosition);
|
||||
const unsigned char *DICT;
|
||||
const int MAX_WORDS;
|
||||
const int MAX_WORD_LENGTH;
|
||||
|
|
Loading…
Reference in a new issue