Merge "Add the suggestion algorithm of words with space proximity" into honeycomb-mr1

This commit is contained in:
satok 2011-03-04 22:24:05 -08:00 committed by Android (Google) Code Review
commit f9a5bfa147
7 changed files with 113 additions and 45 deletions

View file

@ -174,7 +174,7 @@ public class Keyboard {
mDefaultHeight = mDefaultWidth; mDefaultHeight = mDefaultWidth;
mId = id; mId = id;
loadKeyboard(context, xmlLayoutResId); loadKeyboard(context, xmlLayoutResId);
mProximityInfo = new ProximityInfo(mDisplayWidth, mDisplayHeight, GRID_WIDTH, GRID_HEIGHT); mProximityInfo = new ProximityInfo(GRID_WIDTH, GRID_HEIGHT);
} }
public int getProximityInfo() { public int getProximityInfo() {
@ -378,7 +378,7 @@ public class Keyboard {
mGridNeighbors[(y / mCellHeight) * GRID_WIDTH + (x / mCellWidth)] = cell; mGridNeighbors[(y / mCellHeight) * GRID_WIDTH + (x / mCellWidth)] = cell;
} }
} }
mProximityInfo.setProximityInfo(mGridNeighbors); mProximityInfo.setProximityInfo(mGridNeighbors, getMinWidth(), getHeight(), mKeys);
} }
public boolean isInside(Key key, int x, int y) { public boolean isInside(Key key, int x, int y) {

View file

@ -18,18 +18,17 @@ package com.android.inputmethod.keyboard;
import com.android.inputmethod.latin.Utils; import com.android.inputmethod.latin.Utils;
import java.util.Arrays;
import java.util.List;
public class ProximityInfo { public class ProximityInfo {
public static final int MAX_PROXIMITY_CHARS_SIZE = 16; public static final int MAX_PROXIMITY_CHARS_SIZE = 16;
private final int mDisplayWidth;
private final int mDisplayHeight;
private final int mGridWidth; private final int mGridWidth;
private final int mGridHeight; private final int mGridHeight;
private final int mGridSize; private final int mGridSize;
ProximityInfo(int displayWidth, int displayHeight, int gridWidth, int gridHeight) { ProximityInfo(int gridWidth, int gridHeight) {
mDisplayWidth = displayWidth;
mDisplayHeight = displayHeight;
mGridWidth = gridWidth; mGridWidth = gridWidth;
mGridHeight = gridHeight; mGridHeight = gridHeight;
mGridSize = mGridWidth * mGridHeight; mGridSize = mGridWidth * mGridHeight;
@ -43,20 +42,19 @@ public class ProximityInfo {
int displayHeight, int gridWidth, int gridHeight, int[] proximityCharsArray); int displayHeight, int gridWidth, int gridHeight, int[] proximityCharsArray);
private native void releaseProximityInfoNative(int nativeProximityInfo); private native void releaseProximityInfoNative(int nativeProximityInfo);
public final void setProximityInfo(int[][] gridNeighbors) { public final void setProximityInfo(int[][] gridNeighborKeyIndexes, int keyboardWidth,
int keyboardHeight, List<Key> keys) {
int[] proximityCharsArray = new int[mGridSize * MAX_PROXIMITY_CHARS_SIZE]; int[] proximityCharsArray = new int[mGridSize * MAX_PROXIMITY_CHARS_SIZE];
Arrays.fill(proximityCharsArray, KeyDetector.NOT_A_CODE);
for (int i = 0; i < mGridSize; ++i) { for (int i = 0; i < mGridSize; ++i) {
final int proximityCharsLength = gridNeighbors[i].length; final int proximityCharsLength = gridNeighborKeyIndexes[i].length;
for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE; ++j) { for (int j = 0; j < proximityCharsLength; ++j) {
int charCode = KeyDetector.NOT_A_KEY; proximityCharsArray[i * MAX_PROXIMITY_CHARS_SIZE + j] =
if (j < proximityCharsLength) { keys.get(gridNeighborKeyIndexes[i][j]).mCode;
charCode = gridNeighbors[i][j];
}
proximityCharsArray[i * MAX_PROXIMITY_CHARS_SIZE + j] = charCode;
} }
} }
mNativeProximityInfo = setProximityInfoNative(MAX_PROXIMITY_CHARS_SIZE, mNativeProximityInfo = setProximityInfoNative(MAX_PROXIMITY_CHARS_SIZE,
mDisplayWidth, mDisplayHeight, mGridWidth, mGridHeight, proximityCharsArray); keyboardWidth, keyboardHeight, mGridWidth, mGridHeight, proximityCharsArray);
} }
// TODO: Get rid of this function's input (keyboard). // TODO: Get rid of this function's input (keyboard).

View file

@ -129,10 +129,13 @@ static void prof_out(void) {
#define DICTIONARY_HEADER_SIZE 2 #define DICTIONARY_HEADER_SIZE 2
#define NOT_VALID_WORD -99 #define NOT_VALID_WORD -99
#define KEYCODE_SPACE ' '
#define SUGGEST_WORDS_WITH_MISSING_CHARACTER true #define SUGGEST_WORDS_WITH_MISSING_CHARACTER true
#define SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER true #define SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER true
#define SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER true #define SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER true
#define SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS true #define SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS true
#define SUGGEST_WORDS_WITH_SPACE_PROXIMITY true
// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent. // The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 75 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 75

View file

@ -17,18 +17,48 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#define LOG_TAG "LatinIME: proximity_info.cpp"
#include "proximity_info.h" #include "proximity_info.h"
namespace latinime { namespace latinime {
ProximityInfo::ProximityInfo(int maxProximityCharsSize, int displayWidth, int displayHeight, ProximityInfo::ProximityInfo(const int maxProximityCharsSize, const int keyboardWidth,
int gridWidth, int gridHeight, uint32_t const *proximityCharsArray) const int keyboardHeight, const int gridWidth, const int gridHeight,
: MAX_PROXIMITY_CHARS_SIZE(maxProximityCharsSize), DISPLAY_WIDTH(displayWidth), const uint32_t *proximityCharsArray)
DISPLAY_HEIGHT(displayHeight), GRID_WIDTH(gridWidth), GRID_HEIGHT(gridHeight) { : MAX_PROXIMITY_CHARS_SIZE(maxProximityCharsSize), KEYBOARD_WIDTH(keyboardWidth),
mProximityCharsArray = new uint32_t[GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE]; KEYBOARD_HEIGHT(keyboardHeight), GRID_WIDTH(gridWidth), GRID_HEIGHT(gridHeight),
memcpy(mProximityCharsArray, proximityCharsArray, sizeof(mProximityCharsArray)); CELL_WIDTH((keyboardWidth + gridWidth - 1) / gridWidth),
CELL_HEIGHT((keyboardHeight + gridHeight - 1) / gridHeight) {
const int len = GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE;
mProximityCharsArray = new uint32_t[len];
if (DEBUG_PROXIMITY_INFO) {
LOGI("Create proximity info array %d", len);
}
memcpy(mProximityCharsArray, proximityCharsArray, len * sizeof(mProximityCharsArray[0]));
} }
ProximityInfo::~ProximityInfo() { ProximityInfo::~ProximityInfo() {
delete[] mProximityCharsArray; delete[] mProximityCharsArray;
} }
inline int ProximityInfo::getStartIndexFromCoordinates(const int x, const int y) const {
return (y / CELL_HEIGHT) * GRID_WIDTH + (x / CELL_WIDTH)
* MAX_PROXIMITY_CHARS_SIZE;
} }
bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
const int startIndex = getStartIndexFromCoordinates(x, y);
if (DEBUG_PROXIMITY_INFO) {
LOGI("hasSpaceProximity: index %d", startIndex);
}
for (int i = 0; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
if (DEBUG_PROXIMITY_INFO) {
LOGI("Index: %d", mProximityCharsArray[startIndex + i]);
}
if (mProximityCharsArray[startIndex + i] == KEYCODE_SPACE) {
return true;
}
}
return false;
}
} // namespace latinime

View file

@ -25,15 +25,20 @@ namespace latinime {
class ProximityInfo { class ProximityInfo {
public: public:
ProximityInfo(int maxProximityCharsSize, int displayWidth, int displayHeight, int gridWidth, ProximityInfo(const int maxProximityCharsSize, const int keyboardWidth,
int gridHeight, uint32_t const *proximityCharsArray); const int keybaordHeight, const int gridWidth, const int gridHeight,
const uint32_t *proximityCharsArray);
~ProximityInfo(); ~ProximityInfo();
bool hasSpaceProximity(const int x, const int y) const;
private: private:
const int MAX_PROXIMITY_CHARS_SIZE; int getStartIndexFromCoordinates(const int x, const int y) const;
const int DISPLAY_WIDTH; const int CELL_WIDTH;
const int DISPLAY_HEIGHT; const int CELL_HEIGHT;
const int KEYBOARD_WIDTH;
const int KEYBOARD_HEIGHT;
const int GRID_WIDTH; const int GRID_WIDTH;
const int GRID_HEIGHT; const int GRID_HEIGHT;
const int MAX_PROXIMITY_CHARS_SIZE;
uint32_t *mProximityCharsArray; uint32_t *mProximityCharsArray;
}; };
}; // namespace latinime }; // namespace latinime

View file

@ -142,7 +142,7 @@ int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const
outWords, frequencies); outWords, frequencies);
} }
PROF_START(6); PROF_START(20);
// Get the word count // Get the word count
int suggestedWordsCount = 0; int suggestedWordsCount = 0;
while (suggestedWordsCount < MAX_WORDS && mFrequencies[suggestedWordsCount] > 0) { while (suggestedWordsCount < MAX_WORDS && mFrequencies[suggestedWordsCount] > 0) {
@ -158,7 +158,7 @@ int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const
} }
} }
} }
PROF_END(6); PROF_END(20);
PROF_CLOSE; PROF_CLOSE;
return suggestedWordsCount; return suggestedWordsCount;
} }
@ -172,12 +172,6 @@ void UnigramDictionary::getWordSuggestions(const ProximityInfo *proximityInfo,
initSuggestions(codes, codesSize, outWords, frequencies); initSuggestions(codes, codesSize, outWords, frequencies);
if (DEBUG_DICT) assert(codesSize == mInputLength); if (DEBUG_DICT) assert(codesSize == mInputLength);
if (DEBUG_PROXIMITY_INFO) {
for (int i = 0; i < codesSize; ++i) {
LOGI("Input[%d] x = %d, y = %d", i, xcoordinates[i], ycoordinates[i]);
}
}
const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
PROF_END(0); PROF_END(0);
@ -227,6 +221,25 @@ void UnigramDictionary::getWordSuggestions(const ProximityInfo *proximityInfo,
} }
} }
PROF_END(5); PROF_END(5);
PROF_START(6);
if (SUGGEST_WORDS_WITH_SPACE_PROXIMITY) {
// The first and last "mistyped spaces" are taken care of by excessive character handling
for (int i = 1; i < codesSize - 1; ++i) {
if (DEBUG_DICT) LOGI("--- Suggest words with proximity space %d", i);
const int x = xcoordinates[i];
const int y = ycoordinates[i];
if (DEBUG_PROXIMITY_INFO)
LOGI("Input[%d] x = %d, y = %d, has space proximity = %d",
i, x, y, proximityInfo->hasSpaceProximity(x, y));
if (proximityInfo->hasSpaceProximity(x, y)) {
getMistypedSpaceWords(mInputLength, i);
}
}
}
PROF_END(6);
} }
void UnigramDictionary::initSuggestions(const int *codes, const int codesSize, void UnigramDictionary::initSuggestions(const int *codes, const int codesSize,
@ -387,27 +400,31 @@ inline static void multiplyRate(const int rate, int *freq) {
} }
} }
bool UnigramDictionary::getMissingSpaceWords(const int inputLength, const int missingSpacePos) { bool UnigramDictionary::getSplitTwoWordsSuggestion(const int inputLength,
if (missingSpacePos <= 0 || missingSpacePos >= inputLength const int firstWordStartPos, const int firstWordLength, const int secondWordStartPos,
|| inputLength >= MAX_WORD_LENGTH) return false; const int secondWordLength) {
const int newWordLength = inputLength + 1; if (inputLength >= MAX_WORD_LENGTH) return false;
if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
|| firstWordStartPos < 0 || secondWordStartPos >= inputLength)
return false;
const int newWordLength = firstWordLength + secondWordLength + 1;
// Allocating variable length array on stack // Allocating variable length array on stack
unsigned short word[newWordLength]; unsigned short word[newWordLength];
const int firstFreq = getBestWordFreq(0, missingSpacePos, mWord); const int firstFreq = getBestWordFreq(firstWordStartPos, firstWordLength, mWord);
if (DEBUG_DICT) LOGI("First freq: %d", firstFreq); if (DEBUG_DICT) LOGI("First freq: %d", firstFreq);
if (firstFreq <= 0) return false; if (firstFreq <= 0) return false;
for (int i = 0; i < missingSpacePos; ++i) { for (int i = 0; i < firstWordLength; ++i) {
word[i] = mWord[i]; word[i] = mWord[i];
} }
const int secondFreq = getBestWordFreq(missingSpacePos, inputLength - missingSpacePos, mWord); const int secondFreq = getBestWordFreq(secondWordStartPos, secondWordLength, mWord);
if (DEBUG_DICT) LOGI("Second freq: %d", secondFreq); if (DEBUG_DICT) LOGI("Second freq: %d", secondFreq);
if (secondFreq <= 0) return false; if (secondFreq <= 0) return false;
word[missingSpacePos] = SPACE; word[firstWordLength] = SPACE;
for (int i = (missingSpacePos + 1); i < newWordLength; ++i) { for (int i = (firstWordLength + 1); i < newWordLength; ++i) {
word[i] = mWord[i - missingSpacePos - 1]; word[i] = mWord[i - firstWordLength - 1];
} }
int pairFreq = ((firstFreq + secondFreq) / 2); int pairFreq = ((firstFreq + secondFreq) / 2);
@ -417,6 +434,17 @@ bool UnigramDictionary::getMissingSpaceWords(const int inputLength, const int mi
return true; return true;
} }
bool UnigramDictionary::getMissingSpaceWords(const int inputLength, const int missingSpacePos) {
return getSplitTwoWordsSuggestion(
inputLength, 0, missingSpacePos, missingSpacePos, inputLength - missingSpacePos);
}
bool UnigramDictionary::getMistypedSpaceWords(const int inputLength, const int spaceProximityPos) {
return getSplitTwoWordsSuggestion(
inputLength, 0, spaceProximityPos, spaceProximityPos + 1,
inputLength - spaceProximityPos - 1);
}
// Keep this for comparing spec to new getWords // Keep this for comparing spec to new getWords
void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength, const int skipPos, void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength, const int skipPos,
const int excessivePos, const int transposedPos,int *nextLetters, const int excessivePos, const int transposedPos,int *nextLetters,

View file

@ -64,7 +64,11 @@ private:
const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs, const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters, const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters,
const int nextLettersSize); const int nextLettersSize);
bool getSplitTwoWordsSuggestion(const int inputLength,
const int firstWordStartPos, const int firstWordLength,
const int secondWordStartPos, const int secondWordLength);
bool getMissingSpaceWords(const int inputLength, const int missingSpacePos); bool getMissingSpaceWords(const int inputLength, const int missingSpacePos);
bool getMistypedSpaceWords(const int inputLength, const int spaceProximityPos);
// Keep getWordsOld for comparing performance between getWords and getWordsOld // Keep getWordsOld for comparing performance between getWords and getWordsOld
void getWordsOld(const int initialPos, const int inputLength, const int skipPos, void getWordsOld(const int initialPos, const int inputLength, const int skipPos,
const int excessivePos, const int transposedPos, int *nextLetters, const int excessivePos, const int transposedPos, int *nextLetters,