Merge "Use 32-bit code points for suggestions output"

main
Ken Wakasa 2012-10-31 09:36:00 -07:00 committed by Android (Google) Code Review
commit 01f6a61e51
20 changed files with 329 additions and 363 deletions

View File

@ -51,8 +51,7 @@ public final class BinaryDictionary extends Dictionary {
private long mNativeDict; private long mNativeDict;
private final Locale mLocale; private final Locale mLocale;
private final int[] mInputCodePoints = new int[MAX_WORD_LENGTH]; private final int[] mInputCodePoints = new int[MAX_WORD_LENGTH];
// TODO: The below should be int[] mOutputCodePoints private final int[] mOutputCodePoints = new int[MAX_WORD_LENGTH * MAX_RESULTS];
private final char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_RESULTS];
private final int[] mSpaceIndices = new int[MAX_SPACES]; private final int[] mSpaceIndices = new int[MAX_SPACES];
private final int[] mOutputScores = new int[MAX_RESULTS]; private final int[] mOutputScores = new int[MAX_RESULTS];
private final int[] mOutputTypes = new int[MAX_RESULTS]; private final int[] mOutputTypes = new int[MAX_RESULTS];
@ -88,9 +87,9 @@ public final class BinaryDictionary extends Dictionary {
* @param useFullEditDistance whether to use the full edit distance in suggestions * @param useFullEditDistance whether to use the full edit distance in suggestions
* @param dictType the dictionary type, as a human-readable string * @param dictType the dictionary type, as a human-readable string
*/ */
public BinaryDictionary(final Context context, public BinaryDictionary(final Context context, final String filename, final long offset,
final String filename, final long offset, final long length, final long length, final boolean useFullEditDistance, final Locale locale,
final boolean useFullEditDistance, final Locale locale, final String dictType) { final String dictType) {
super(dictType); super(dictType);
mLocale = locale; mLocale = locale;
mUseFullEditDistance = useFullEditDistance; mUseFullEditDistance = useFullEditDistance;
@ -109,10 +108,10 @@ public final class BinaryDictionary extends Dictionary {
private native int getSuggestionsNative(long dict, long proximityInfo, long traverseSession, private native int getSuggestionsNative(long dict, long proximityInfo, long traverseSession,
int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds, int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds,
int[] inputCodePoints, int codesSize, int commitPoint, boolean isGesture, int[] inputCodePoints, int codesSize, int commitPoint, boolean isGesture,
int[] prevWordCodePointArray, boolean useFullEditDistance, char[] outputChars, int[] prevWordCodePointArray, boolean useFullEditDistance, int[] outputCodePoints,
int[] outputScores, int[] outputIndices, int[] outputTypes); int[] outputScores, int[] outputIndices, int[] outputTypes);
private static native float calcNormalizedScoreNative(char[] before, char[] after, int score); private static native float calcNormalizedScoreNative(int[] before, int[] after, int score);
private static native int editDistanceNative(char[] before, char[] after); private static native int editDistanceNative(int[] before, int[] after);
// TODO: Move native dict into session // TODO: Move native dict into session
private final void loadDictionary(final String path, final long startOffset, private final void loadDictionary(final String path, final long startOffset,
@ -153,7 +152,8 @@ public final class BinaryDictionary extends Dictionary {
proximityInfo.getNativeProximityInfo(), getTraverseSession(sessionId).getSession(), proximityInfo.getNativeProximityInfo(), getTraverseSession(sessionId).getSession(),
ips.getXCoordinates(), ips.getYCoordinates(), ips.getTimes(), ips.getPointerIds(), ips.getXCoordinates(), ips.getYCoordinates(), ips.getTimes(), ips.getPointerIds(),
mInputCodePoints, codesSize, 0 /* commitPoint */, isGesture, prevWordCodePointArray, mInputCodePoints, codesSize, 0 /* commitPoint */, isGesture, prevWordCodePointArray,
mUseFullEditDistance, mOutputChars, mOutputScores, mSpaceIndices, mOutputTypes); mUseFullEditDistance, mOutputCodePoints, mOutputScores, mSpaceIndices,
mOutputTypes);
final int count = Math.min(tmpCount, MAX_PREDICTIONS); final int count = Math.min(tmpCount, MAX_PREDICTIONS);
final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList(); final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList();
@ -161,14 +161,14 @@ public final class BinaryDictionary extends Dictionary {
if (composerSize > 0 && mOutputScores[j] < 1) break; if (composerSize > 0 && mOutputScores[j] < 1) break;
final int start = j * MAX_WORD_LENGTH; final int start = j * MAX_WORD_LENGTH;
int len = 0; int len = 0;
while (len < MAX_WORD_LENGTH && mOutputChars[start + len] != 0) { while (len < MAX_WORD_LENGTH && mOutputCodePoints[start + len] != 0) {
++len; ++len;
} }
if (len > 0) { if (len > 0) {
final int score = SuggestedWordInfo.KIND_WHITELIST == mOutputTypes[j] final int score = SuggestedWordInfo.KIND_WHITELIST == mOutputTypes[j]
? SuggestedWordInfo.MAX_SCORE : mOutputScores[j]; ? SuggestedWordInfo.MAX_SCORE : mOutputScores[j];
suggestions.add(new SuggestedWordInfo( suggestions.add(new SuggestedWordInfo(new String(mOutputCodePoints, start, len),
new String(mOutputChars, start, len), score, mOutputTypes[j], mDictType)); score, mOutputTypes[j], mDictType));
} }
} }
return suggestions; return suggestions;
@ -180,14 +180,16 @@ public final class BinaryDictionary extends Dictionary {
public static float calcNormalizedScore(final String before, final String after, public static float calcNormalizedScore(final String before, final String after,
final int score) { final int score) {
return calcNormalizedScoreNative(before.toCharArray(), after.toCharArray(), score); return calcNormalizedScoreNative(StringUtils.toCodePointArray(before),
StringUtils.toCodePointArray(after), score);
} }
public static int editDistance(final String before, final String after) { public static int editDistance(final String before, final String after) {
if (before == null || after == null) { if (before == null || after == null) {
throw new IllegalArgumentException(); throw new IllegalArgumentException();
} }
return editDistanceNative(before.toCharArray(), after.toCharArray()); return editDistanceNative(StringUtils.toCodePointArray(before),
StringUtils.toCodePointArray(after));
} }
@Override @Override
@ -206,9 +208,9 @@ public final class BinaryDictionary extends Dictionary {
// calls when checking for changes in an entire dictionary. // calls when checking for changes in an entire dictionary.
public boolean isValidBigram(final String word1, final String word2) { public boolean isValidBigram(final String word1, final String word2) {
if (TextUtils.isEmpty(word1) || TextUtils.isEmpty(word2)) return false; if (TextUtils.isEmpty(word1) || TextUtils.isEmpty(word2)) return false;
final int[] chars1 = StringUtils.toCodePointArray(word1); final int[] codePoints1 = StringUtils.toCodePointArray(word1);
final int[] chars2 = StringUtils.toCodePointArray(word2); final int[] codePoints2 = StringUtils.toCodePointArray(word2);
return isValidBigramNative(mNativeDict, chars1, chars2); return isValidBigramNative(mNativeDict, codePoints1, codePoints2);
} }
@Override @Override

View File

@ -132,7 +132,7 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray, jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
jintArray inputCodePointsArray, jint arraySize, jint commitPoint, jboolean isGesture, jintArray inputCodePointsArray, jint arraySize, jint commitPoint, jboolean isGesture,
jintArray prevWordCodePointsForBigrams, jboolean useFullEditDistance, jintArray prevWordCodePointsForBigrams, jboolean useFullEditDistance,
jcharArray outputCharsArray, jintArray scoresArray, jintArray spaceIndicesArray, jintArray outputCodePointsArray, jintArray scoresArray, jintArray spaceIndicesArray,
jintArray outputTypesArray) { jintArray outputTypesArray) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return 0; if (!dictionary) return 0;
@ -162,16 +162,15 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
} }
// Output values // Output values
// TODO: Should be "outputCodePointsLength" and "int outputCodePoints[]" const jsize outputCodePointsLength = env->GetArrayLength(outputCodePointsArray);
const jsize outputCharsLength = env->GetArrayLength(outputCharsArray); int outputCodePoints[outputCodePointsLength];
unsigned short outputChars[outputCharsLength];
const jsize scoresLength = env->GetArrayLength(scoresArray); const jsize scoresLength = env->GetArrayLength(scoresArray);
int scores[scoresLength]; int scores[scoresLength];
const jsize spaceIndicesLength = env->GetArrayLength(spaceIndicesArray); const jsize spaceIndicesLength = env->GetArrayLength(spaceIndicesArray);
int spaceIndices[spaceIndicesLength]; int spaceIndices[spaceIndicesLength];
const jsize outputTypesLength = env->GetArrayLength(outputTypesArray); const jsize outputTypesLength = env->GetArrayLength(outputTypesArray);
int outputTypes[outputTypesLength]; int outputTypes[outputTypesLength];
memset(outputChars, 0, sizeof(outputChars)); memset(outputCodePoints, 0, sizeof(outputCodePoints));
memset(scores, 0, sizeof(scores)); memset(scores, 0, sizeof(scores));
memset(spaceIndices, 0, sizeof(spaceIndices)); memset(spaceIndices, 0, sizeof(spaceIndices));
memset(outputTypes, 0, sizeof(outputTypes)); memset(outputTypes, 0, sizeof(outputTypes));
@ -180,16 +179,15 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
if (isGesture || arraySize > 0) { if (isGesture || arraySize > 0) {
count = dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates, count = dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
times, pointerIds, inputCodePoints, arraySize, prevWordCodePoints, times, pointerIds, inputCodePoints, arraySize, prevWordCodePoints,
prevWordCodePointsLength, commitPoint, isGesture, useFullEditDistance, outputChars, prevWordCodePointsLength, commitPoint, isGesture, useFullEditDistance,
scores, spaceIndices, outputTypes); outputCodePoints, scores, spaceIndices, outputTypes);
} else { } else {
count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength, count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength,
inputCodePoints, arraySize, outputChars, scores, outputTypes); inputCodePoints, arraySize, outputCodePoints, scores, outputTypes);
} }
// Copy back the output values // Copy back the output values
// TODO: Should be SetIntArrayRegion() env->SetIntArrayRegion(outputCodePointsArray, 0, outputCodePointsLength, outputCodePoints);
env->SetCharArrayRegion(outputCharsArray, 0, outputCharsLength, outputChars);
env->SetIntArrayRegion(scoresArray, 0, scoresLength, scores); env->SetIntArrayRegion(scoresArray, 0, scoresLength, scores);
env->SetIntArrayRegion(spaceIndicesArray, 0, spaceIndicesLength, spaceIndices); env->SetIntArrayRegion(spaceIndicesArray, 0, spaceIndicesLength, spaceIndices);
env->SetIntArrayRegion(outputTypesArray, 0, outputTypesLength, outputTypes); env->SetIntArrayRegion(outputTypesArray, 0, outputTypesLength, outputTypes);
@ -221,29 +219,27 @@ static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jobject obj
} }
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object, static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object,
jcharArray before, jcharArray after, jint score) { jintArray before, jintArray after, jint score) {
jsize beforeLength = env->GetArrayLength(before); jsize beforeLength = env->GetArrayLength(before);
jsize afterLength = env->GetArrayLength(after); jsize afterLength = env->GetArrayLength(after);
jchar beforeChars[beforeLength]; int beforeCodePoints[beforeLength];
jchar afterChars[afterLength]; int afterCodePoints[afterLength];
env->GetCharArrayRegion(before, 0, beforeLength, beforeChars); env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
env->GetCharArrayRegion(after, 0, afterLength, afterChars); env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
return Correction::RankingAlgorithm::calcNormalizedScore( return Correction::RankingAlgorithm::calcNormalizedScore(beforeCodePoints, beforeLength,
static_cast<unsigned short *>(beforeChars), beforeLength, afterCodePoints, afterLength, score);
static_cast<unsigned short *>(afterChars), afterLength, score);
} }
static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jobject object, static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jobject object, jintArray before,
jcharArray before, jcharArray after) { jintArray after) {
jsize beforeLength = env->GetArrayLength(before); jsize beforeLength = env->GetArrayLength(before);
jsize afterLength = env->GetArrayLength(after); jsize afterLength = env->GetArrayLength(after);
jchar beforeChars[beforeLength]; int beforeCodePoints[beforeLength];
jchar afterChars[afterLength]; int afterCodePoints[afterLength];
env->GetCharArrayRegion(before, 0, beforeLength, beforeChars); env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
env->GetCharArrayRegion(after, 0, afterLength, afterChars); env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
return Correction::RankingAlgorithm::editDistance( return Correction::RankingAlgorithm::editDistance(beforeCodePoints, beforeLength,
static_cast<unsigned short *>(beforeChars), beforeLength, afterCodePoints, afterLength);
static_cast<unsigned short *>(afterChars), afterLength);
} }
static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jlong dict) { static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jlong dict) {
@ -279,15 +275,15 @@ static JNINativeMethod sMethods[] = {
{"openNative", "(Ljava/lang/String;JJIIII)J", {"openNative", "(Ljava/lang/String;JJIIII)J",
reinterpret_cast<void *>(latinime_BinaryDictionary_open)}, reinterpret_cast<void *>(latinime_BinaryDictionary_open)},
{"closeNative", "(J)V", reinterpret_cast<void *>(latinime_BinaryDictionary_close)}, {"closeNative", "(J)V", reinterpret_cast<void *>(latinime_BinaryDictionary_close)},
{"getSuggestionsNative", "(JJJ[I[I[I[I[IIIZ[IZ[C[I[I[I)I", {"getSuggestionsNative", "(JJJ[I[I[I[I[IIIZ[IZ[I[I[I[I)I",
reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)}, reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)},
{"getFrequencyNative", "(J[I)I", {"getFrequencyNative", "(J[I)I",
reinterpret_cast<void *>(latinime_BinaryDictionary_getFrequency)}, reinterpret_cast<void *>(latinime_BinaryDictionary_getFrequency)},
{"isValidBigramNative", "(J[I[I)Z", {"isValidBigramNative", "(J[I[I)Z",
reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram)}, reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram)},
{"calcNormalizedScoreNative", "([C[CI)F", {"calcNormalizedScoreNative", "([I[II)F",
reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore)}, reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore)},
{"editDistanceNative", "([C[C)I", {"editDistanceNative", "([I[I)I",
reinterpret_cast<void *>(latinime_BinaryDictionary_editDistance)} reinterpret_cast<void *>(latinime_BinaryDictionary_editDistance)}
}; };

View File

@ -36,13 +36,13 @@ BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength,
BigramDictionary::~BigramDictionary() { BigramDictionary::~BigramDictionary() {
} }
bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequency, bool BigramDictionary::addWordBigram(int *word, int length, int frequency, int *bigramFreq,
int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const { int *bigramCodePoints, int *outputTypes) const {
word[length] = 0; word[length] = 0;
if (DEBUG_DICT) { if (DEBUG_DICT) {
#ifdef FLAG_DBG #ifdef FLAG_DBG
char s[length + 1]; char s[length + 1];
for (int i = 0; i <= length; i++) s[i] = word[i]; for (int i = 0; i <= length; i++) s[i] = static_cast<char>(word[i]);
AKLOGI("Bigram: Found word = %s, freq = %d :", s, frequency); AKLOGI("Bigram: Found word = %s, freq = %d :", s, frequency);
#endif #endif
} }
@ -51,7 +51,8 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
int insertAt = 0; int insertAt = 0;
while (insertAt < MAX_PREDICTIONS) { while (insertAt < MAX_PREDICTIONS) {
if (frequency > bigramFreq[insertAt] || (bigramFreq[insertAt] == frequency if (frequency > bigramFreq[insertAt] || (bigramFreq[insertAt] == frequency
&& length < Dictionary::wideStrLen(bigramChars + insertAt * MAX_WORD_LENGTH))) { && length < Dictionary::wideStrLen(
bigramCodePoints + insertAt * MAX_WORD_LENGTH))) {
break; break;
} }
insertAt++; insertAt++;
@ -65,10 +66,10 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
(MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramFreq[0])); (MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramFreq[0]));
bigramFreq[insertAt] = frequency; bigramFreq[insertAt] = frequency;
outputTypes[insertAt] = Dictionary::KIND_PREDICTION; outputTypes[insertAt] = Dictionary::KIND_PREDICTION;
memmove(bigramChars + (insertAt + 1) * MAX_WORD_LENGTH, memmove(bigramCodePoints + (insertAt + 1) * MAX_WORD_LENGTH,
bigramChars + insertAt * MAX_WORD_LENGTH, bigramCodePoints + insertAt * MAX_WORD_LENGTH,
(MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramChars[0]) * MAX_WORD_LENGTH); (MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramCodePoints[0]) * MAX_WORD_LENGTH);
unsigned short *dest = bigramChars + insertAt * MAX_WORD_LENGTH; int *dest = bigramCodePoints + insertAt * MAX_WORD_LENGTH;
while (length--) { while (length--) {
*dest++ = *word++; *dest++ = *word++;
} }
@ -86,7 +87,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
* prevWordLength: its length. * prevWordLength: its length.
* inputCodes: what user typed, in the same format as for UnigramDictionary::getSuggestions. * inputCodes: what user typed, in the same format as for UnigramDictionary::getSuggestions.
* codesSize: the size of the codes array. * codesSize: the size of the codes array.
* bigramChars: an array for output, at the same format as outwords for getSuggestions. * bigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
* bigramFreq: an array to output frequencies. * bigramFreq: an array to output frequencies.
* outputTypes: an array to output types. * outputTypes: an array to output types.
* This method returns the number of bigrams this word has, for backward compatibility. * This method returns the number of bigrams this word has, for backward compatibility.
@ -97,8 +98,8 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
* and the bigrams are used to boost unigram result scores, it makes little sense to * and the bigrams are used to boost unigram result scores, it makes little sense to
* reduce their scope to the ones that match the first letter. * reduce their scope to the ones that match the first letter.
*/ */
int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, int *inputCodes, int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *inputCodes,
int codesSize, unsigned short *bigramChars, int *bigramFreq, int *outputTypes) const { int codesSize, int *bigramCodePoints, int *bigramFreq, int *outputTypes) const {
// TODO: remove unused arguments, and refrain from storing stuff in members of this class // TODO: remove unused arguments, and refrain from storing stuff in members of this class
// TODO: have "in" arguments before "out" ones, and make out args explicit in the name // TODO: have "in" arguments before "out" ones, and make out args explicit in the name
@ -117,7 +118,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
int bigramCount = 0; int bigramCount = 0;
do { do {
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
uint16_t bigramBuffer[MAX_WORD_LENGTH]; int bigramBuffer[MAX_WORD_LENGTH];
int unigramFreq = 0; int unigramFreq = 0;
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags, const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
&pos); &pos);
@ -134,7 +135,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
// here, but it can't get too bad. // here, but it can't get too bad.
const int frequency = const int frequency =
BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreqTemp); BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreqTemp);
if (addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramChars, if (addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramCodePoints,
outputTypes)) { outputTypes)) {
++bigramCount; ++bigramCount;
} }
@ -190,12 +191,12 @@ void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *p
} while (0 != (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags)); } while (0 != (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
} }
bool BigramDictionary::checkFirstCharacter(unsigned short *word, int *inputCodes) const { bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodes) const {
// Checks whether this word starts with same character or neighboring characters of // Checks whether this word starts with same character or neighboring characters of
// what user typed. // what user typed.
int maxAlt = MAX_ALTERNATIVES; int maxAlt = MAX_ALTERNATIVES;
const unsigned short firstBaseChar = toBaseLowerCase(*word); const int firstBaseChar = toBaseLowerCase(*word);
while (maxAlt > 0) { while (maxAlt > 0) {
if (toBaseLowerCase(*inputCodes) == firstBaseChar) { if (toBaseLowerCase(*inputCodes) == firstBaseChar) {
return true; return true;

View File

@ -27,23 +27,23 @@ namespace latinime {
class BigramDictionary { class BigramDictionary {
public: public:
BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions); BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions);
int getBigrams(const int32_t *word, int length, int *inputCodes, int codesSize, int getBigrams(const int *word, int length, int *inputCodes, int codesSize, int *outWords,
unsigned short *outWords, int *frequencies, int *outputTypes) const; int *frequencies, int *outputTypes) const;
void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength, void fillBigramAddressToFrequencyMapAndFilter(const int *prevWord, const int prevWordLength,
std::map<int, int> *map, uint8_t *filter) const; std::map<int, int> *map, uint8_t *filter) const;
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const; bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
~BigramDictionary(); ~BigramDictionary();
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary); DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary);
bool addWordBigram(unsigned short *word, int length, int frequency, bool addWordBigram(int *word, int length, int frequency, int *bigramFreq, int *bigramCodePoints,
int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const; int *outputTypes) const;
int getBigramAddress(int *pos, bool advance); int getBigramAddress(int *pos, bool advance);
int getBigramFreq(int *pos); int getBigramFreq(int *pos);
void searchForTerminalNode(int addressLookingFor, int frequency); void searchForTerminalNode(int addressLookingFor, int frequency);
bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; } bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; }
bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; } bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; }
bool checkFirstCharacter(unsigned short *word, int *inputCodes) const; bool checkFirstCharacter(int *word, int *inputCodes) const;
int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength, int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const; const bool forceLowerCaseSearch) const;
const unsigned char *DICT; const unsigned char *DICT;

View File

@ -84,7 +84,7 @@ class BinaryFormat {
static unsigned int getFlags(const uint8_t *const dict); static unsigned int getFlags(const uint8_t *const dict);
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos); static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos); static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
static int32_t getCodePointAndForwardPointer(const uint8_t *const dict, int *pos); static int getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos); static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos);
static int skipOtherCharacters(const uint8_t *const dict, const int pos); static int skipOtherCharacters(const uint8_t *const dict, const int pos);
static int skipChildrenPosition(const uint8_t flags, const int pos); static int skipChildrenPosition(const uint8_t flags, const int pos);
@ -98,10 +98,10 @@ class BinaryFormat {
static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags, static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
int *pos); int *pos);
static int getAttributeFrequencyFromFlags(const int flags); static int getAttributeFrequencyFromFlags(const int flags);
static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord, static int getTerminalPosition(const uint8_t *const root, const int *const inWord,
const int length, const bool forceLowerCaseSearch); const int length, const bool forceLowerCaseSearch);
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth, static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
uint16_t *outWord, int *outUnigramFrequency); int *outWord, int *outUnigramFrequency);
static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq); static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq);
static int getProbability(const int position, const std::map<int, int> *bigramMap, static int getProbability(const int position, const std::map<int, int> *bigramMap,
const uint8_t *bigramFilter, const int unigramFreq); const uint8_t *bigramFilter, const int unigramFreq);
@ -176,17 +176,17 @@ inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict
return dict[(*pos)++]; return dict[(*pos)++];
} }
inline int32_t BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) { inline int BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) {
const int origin = *pos; const int origin = *pos;
const int32_t codePoint = dict[origin]; const int codePoint = dict[origin];
if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
if (codePoint == CHARACTER_ARRAY_TERMINATOR) { if (codePoint == CHARACTER_ARRAY_TERMINATOR) {
*pos = origin + 1; *pos = origin + 1;
return NOT_A_CODE_POINT; return NOT_A_CODE_POINT;
} else { } else {
*pos = origin + 3; *pos = origin + 3;
const int32_t char_1 = codePoint << 16; const int char_1 = codePoint << 16;
const int32_t char_2 = char_1 + (dict[origin + 1] << 8); const int char_2 = char_1 + (dict[origin + 1] << 8);
return char_2 + dict[origin + 2]; return char_2 + dict[origin + 2];
} }
} else { } else {
@ -202,7 +202,7 @@ inline int BinaryFormat::readFrequencyWithoutMovingPointer(const uint8_t *const
inline int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const int pos) { inline int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const int pos) {
int currentPos = pos; int currentPos = pos;
int32_t character = dict[currentPos++]; int character = dict[currentPos++];
while (CHARACTER_ARRAY_TERMINATOR != character) { while (CHARACTER_ARRAY_TERMINATOR != character) {
if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
currentPos += MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE; currentPos += MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE;
@ -352,8 +352,8 @@ inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) {
// This function gets the byte position of the last chargroup of the exact matching word in the // This function gets the byte position of the last chargroup of the exact matching word in the
// dictionary. If no match is found, it returns NOT_VALID_WORD. // dictionary. If no match is found, it returns NOT_VALID_WORD.
inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, const int *const inWord,
const int32_t *const inWord, const int length, const bool forceLowerCaseSearch) { const int length, const bool forceLowerCaseSearch) {
int pos = 0; int pos = 0;
int wordPos = 0; int wordPos = 0;
@ -362,14 +362,14 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
// there was no match (or we would have found it). // there was no match (or we would have found it).
if (wordPos >= length) return NOT_VALID_WORD; if (wordPos >= length) return NOT_VALID_WORD;
int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos); int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
const int32_t wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos]; const int wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos];
while (true) { while (true) {
// If there are no more character groups in this node, it means we could not // If there are no more character groups in this node, it means we could not
// find a matching character for this depth, therefore there is no match. // find a matching character for this depth, therefore there is no match.
if (0 >= charGroupCount) return NOT_VALID_WORD; if (0 >= charGroupCount) return NOT_VALID_WORD;
const int charGroupPos = pos; const int charGroupPos = pos;
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
int32_t character = BinaryFormat::getCodePointAndForwardPointer(root, &pos); int character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
if (character == wChar) { if (character == wChar) {
// This is the correct node. Only one character group may start with the same // This is the correct node. Only one character group may start with the same
// char within a node, so either we found our match in this node, or there is // char within a node, so either we found our match in this node, or there is
@ -439,7 +439,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
* Return value : the length of the word, of 0 if the word was not found. * Return value : the length of the word, of 0 if the word was not found.
*/ */
inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address, inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address,
const int maxDepth, uint16_t *outWord, int *outUnigramFrequency) { const int maxDepth, int *outWord, int *outUnigramFrequency) {
int pos = 0; int pos = 0;
int wordPos = 0; int wordPos = 0;
@ -457,13 +457,13 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
--charGroupCount) { --charGroupCount) {
const int startPos = pos; const int startPos = pos;
const uint8_t flags = getFlagsAndForwardPointer(root, &pos); const uint8_t flags = getFlagsAndForwardPointer(root, &pos);
const int32_t character = getCodePointAndForwardPointer(root, &pos); const int character = getCodePointAndForwardPointer(root, &pos);
if (address == startPos) { if (address == startPos) {
// We found the address. Copy the rest of the word in the buffer and return // We found the address. Copy the rest of the word in the buffer and return
// the length. // the length.
outWord[wordPos] = character; outWord[wordPos] = character;
if (FLAG_HAS_MULTIPLE_CHARS & flags) { if (FLAG_HAS_MULTIPLE_CHARS & flags) {
int32_t nextChar = getCodePointAndForwardPointer(root, &pos); int nextChar = getCodePointAndForwardPointer(root, &pos);
// We count chars in order to avoid infinite loops if the file is broken or // We count chars in order to avoid infinite loops if the file is broken or
// if there is some other bug // if there is some other bug
int charCount = maxDepth; int charCount = maxDepth;
@ -522,13 +522,12 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
if (0 != lastCandidateGroupPos) { if (0 != lastCandidateGroupPos) {
const uint8_t lastFlags = const uint8_t lastFlags =
getFlagsAndForwardPointer(root, &lastCandidateGroupPos); getFlagsAndForwardPointer(root, &lastCandidateGroupPos);
const int32_t lastChar = const int lastChar =
getCodePointAndForwardPointer(root, &lastCandidateGroupPos); getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
// We copy all the characters in this group to the buffer // We copy all the characters in this group to the buffer
outWord[wordPos] = lastChar; outWord[wordPos] = lastChar;
if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) { if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
int32_t nextChar = int nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
int charCount = maxDepth; int charCount = maxDepth;
while (-1 != nextChar && --charCount > 0) { while (-1 != nextChar && --charCount > 0) {
outWord[++wordPos] = nextChar; outWord[++wordPos] = nextChar;

View File

@ -18,22 +18,23 @@
#define LATINIME_CHAR_UTILS_H #define LATINIME_CHAR_UTILS_H
#include <cctype> #include <cctype>
#include <stdint.h>
#include "defines.h"
namespace latinime { namespace latinime {
inline static bool isAsciiUpper(unsigned short c) { inline static bool isAsciiUpper(int c) {
// Note: isupper(...) reports false positives for some Cyrillic characters, causing them to // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
// be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...). // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
return (c >= 'A' && c <= 'Z'); return (c >= 'A' && c <= 'Z');
} }
inline static unsigned short toAsciiLower(unsigned short c) { inline static int toAsciiLower(int c) {
return c - 'A' + 'a'; return c - 'A' + 'a';
} }
inline static bool isAscii(unsigned short c) { inline static bool isAscii(int c) {
return isascii(static_cast<int>(c)) != 0; return isascii(c) != 0;
} }
unsigned short latin_tolower(const unsigned short c); unsigned short latin_tolower(const unsigned short c);
@ -44,33 +45,32 @@ unsigned short latin_tolower(const unsigned short c);
* if c is not a combined character, or the base character if it * if c is not a combined character, or the base character if it
* is combined. * is combined.
*/ */
static const int BASE_CHARS_SIZE = 0x0500; static const int BASE_CHARS_SIZE = 0x0500;
extern const uint16_t BASE_CHARS[BASE_CHARS_SIZE]; extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
inline static unsigned short toBaseChar(unsigned short c) { inline static int toBaseCodePoint(int c) {
if (c < BASE_CHARS_SIZE) { if (c < BASE_CHARS_SIZE) {
return BASE_CHARS[c]; return static_cast<int>(BASE_CHARS[c]);
} }
return c; return c;
} }
inline static unsigned short toLowerCase(const unsigned short c) { inline static int toLowerCase(const int c) {
if (isAsciiUpper(c)) { if (isAsciiUpper(c)) {
return toAsciiLower(c); return toAsciiLower(c);
} else if (isAscii(c)) { } else if (isAscii(c)) {
return c; return c;
} }
return latin_tolower(c); return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
} }
inline static unsigned short toBaseLowerCase(const unsigned short c) { inline static int toBaseLowerCase(const int c) {
return toLowerCase(toBaseChar(c)); return toLowerCase(toBaseCodePoint(c));
} }
inline static bool isSkippableChar(const uint16_t character) { inline static bool isSkippableCodePoint(const int codePoint) {
// TODO: Do not hardcode here // TODO: Do not hardcode here
return character == '\'' || character == '-'; return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS;
} }
} // namespace latinime } // namespace latinime

View File

@ -60,8 +60,8 @@ inline static void dumpEditDistance10ForDebug(int *editDistanceTable,
} }
} }
inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input, inline static void calcEditDistanceOneStep(int *editDistanceTable, const int *input,
const int inputSize, const unsigned short *output, const int outputLength) { const int inputSize, const int *output, const int outputLength) {
// TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched. // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
// Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j]. // Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j].
// Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated, // Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated,
@ -71,10 +71,10 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne
const int *const prevprev = const int *const prevprev =
outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0; outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0;
current[0] = outputLength; current[0] = outputLength;
const uint32_t co = toBaseLowerCase(output[outputLength - 1]); const int co = toBaseLowerCase(output[outputLength - 1]);
const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0; const int prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
for (int i = 1; i <= inputSize; ++i) { for (int i = 1; i <= inputSize; ++i) {
const uint32_t ci = toBaseLowerCase(input[i - 1]); const int ci = toBaseLowerCase(input[i - 1]);
const uint16_t cost = (ci == co) ? 0 : 1; const uint16_t cost = (ci == co) ? 0 : 1;
current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost)); current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) { if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
@ -94,11 +94,9 @@ inline static int getCurrentEditDistance(int *editDistanceTable, const int editD
////////////////////// //////////////////////
// inline functions // // inline functions //
////////////////////// //////////////////////
static const char SINGLE_QUOTE = '\''; inline bool Correction::isSingleQuote(const int c) {
const int userTypedChar = mProximityInfoState.getPrimaryCodePointAt(mInputIndex);
inline bool Correction::isSingleQuote(const unsigned short c) { return (c == KEYCODE_SINGLE_QUOTE && userTypedChar != KEYCODE_SINGLE_QUOTE);
const unsigned short userTypedChar = mProximityInfoState.getPrimaryCharAt(mInputIndex);
return (c == SINGLE_QUOTE && userTypedChar != SINGLE_QUOTE);
} }
//////////////// ////////////////
@ -162,22 +160,22 @@ bool Correction::sameAsTyped() {
} }
int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
const int wordCount, const bool isSpaceProximity, const unsigned short *word) { const int wordCount, const bool isSpaceProximity, const int *word) {
return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray, return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,
wordCount, this, isSpaceProximity, word); wordCount, this, isSpaceProximity, word);
} }
int Correction::getFinalProbability(const int probability, unsigned short **word, int *wordLength) { int Correction::getFinalProbability(const int probability, int **word, int *wordLength) {
return getFinalProbabilityInternal(probability, word, wordLength, mInputSize); return getFinalProbabilityInternal(probability, word, wordLength, mInputSize);
} }
int Correction::getFinalProbabilityForSubQueue(const int probability, unsigned short **word, int Correction::getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength,
int *wordLength, const int inputSize) { const int inputSize) {
return getFinalProbabilityInternal(probability, word, wordLength, inputSize); return getFinalProbabilityInternal(probability, word, wordLength, inputSize);
} }
int Correction::getFinalProbabilityInternal(const int probability, unsigned short **word, int Correction::getFinalProbabilityInternal(const int probability, int **word, int *wordLength,
int *wordLength, const int inputSize) { const int inputSize) {
const int outputIndex = mTerminalOutputIndex; const int outputIndex = mTerminalOutputIndex;
const int inputIndex = mTerminalInputIndex; const int inputIndex = mTerminalInputIndex;
*wordLength = outputIndex + 1; *wordLength = outputIndex + 1;
@ -273,15 +271,15 @@ bool Correction::needsToPrune() const {
|| (!mDoAutoCompletion && (mOutputIndex > mInputSize)); || (!mDoAutoCompletion && (mOutputIndex > mInputSize));
} }
void Correction::addCharToCurrentWord(const int32_t c) { void Correction::addCharToCurrentWord(const int c) {
mWord[mOutputIndex] = c; mWord[mOutputIndex] = c;
const unsigned short *primaryInputWord = mProximityInfoState.getPrimaryInputWord(); const int *primaryInputWord = mProximityInfoState.getPrimaryInputWord();
calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize, calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize, mWord,
mWord, mOutputIndex + 1); mOutputIndex + 1);
} }
Correction::CorrectionType Correction::processSkipChar( Correction::CorrectionType Correction::processSkipChar(const int c, const bool isTerminal,
const int32_t c, const bool isTerminal, const bool inputIndexIncremented) { const bool inputIndexIncremented) {
addCharToCurrentWord(c); addCharToCurrentWord(c);
mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0); mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
mTerminalOutputIndex = mOutputIndex; mTerminalOutputIndex = mOutputIndex;
@ -309,8 +307,7 @@ inline bool isProximityCharOrEquivalentChar(ProximityType type) {
return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR; return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR;
} }
Correction::CorrectionType Correction::processCharAndCalcState( Correction::CorrectionType Correction::processCharAndCalcState(const int c, const bool isTerminal) {
const int32_t c, const bool isTerminal) {
const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount); const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
if (correctionCount > mMaxErrors) { if (correctionCount > mMaxErrors) {
return processUnrelatedCorrectionType(); return processUnrelatedCorrectionType();
@ -628,10 +625,10 @@ Correction::CorrectionType Correction::processCharAndCalcState(
} }
} }
inline static int getQuoteCount(const unsigned short *word, const int length) { inline static int getQuoteCount(const int *word, const int length) {
int quoteCount = 0; int quoteCount = 0;
for (int i = 0; i < length; ++i) { for (int i = 0; i < length; ++i) {
if (word[i] == SINGLE_QUOTE) { if (word[i] == KEYCODE_SINGLE_QUOTE) {
++quoteCount; ++quoteCount;
} }
} }
@ -639,7 +636,7 @@ inline static int getQuoteCount(const unsigned short *word, const int length) {
} }
inline static bool isUpperCase(unsigned short c) { inline static bool isUpperCase(unsigned short c) {
return isAsciiUpper(toBaseChar(c)); return isAsciiUpper(toBaseCodePoint(c));
} }
////////////////////// //////////////////////
@ -672,7 +669,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
// TODO: use mExcessiveCount // TODO: use mExcessiveCount
const int matchCount = inputSize - correction->mProximityCount - excessiveCount; const int matchCount = inputSize - correction->mProximityCount - excessiveCount;
const unsigned short *word = correction->mWord; const int *word = correction->mWord;
const bool skipped = skippedCount > 0; const bool skipped = skippedCount > 0;
const int quoteDiffCount = max(0, getQuoteCount(word, outputLength) const int quoteDiffCount = max(0, getQuoteCount(word, outputLength)
@ -911,7 +908,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
/* static */ /* static */
int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords( int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
const int *freqArray, const int *wordLengthArray, const int wordCount, const int *freqArray, const int *wordLengthArray, const int wordCount,
const Correction *correction, const bool isSpaceProximity, const unsigned short *word) { const Correction *correction, const bool isSpaceProximity, const int *word) {
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
bool firstCapitalizedWordDemotion = false; bool firstCapitalizedWordDemotion = false;
@ -1040,9 +1037,8 @@ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
} }
/* Damerau-Levenshtein distance */ /* Damerau-Levenshtein distance */
inline static int editDistanceInternal( inline static int editDistanceInternal(int *editDistanceTable, const int *before,
int *editDistanceTable, const unsigned short *before, const int beforeLength, const int *after, const int afterLength) {
const int beforeLength, const unsigned short *after, const int afterLength) {
// dp[li][lo] dp[a][b] = dp[ a * lo + b] // dp[li][lo] dp[a][b] = dp[ a * lo + b]
int *dp = editDistanceTable; int *dp = editDistanceTable;
const int li = beforeLength + 1; const int li = beforeLength + 1;
@ -1056,9 +1052,9 @@ inline static int editDistanceInternal(
for (int i = 0; i < li - 1; ++i) { for (int i = 0; i < li - 1; ++i) {
for (int j = 0; j < lo - 1; ++j) { for (int j = 0; j < lo - 1; ++j) {
const uint32_t ci = toBaseLowerCase(before[i]); const int ci = toBaseLowerCase(before[i]);
const uint32_t co = toBaseLowerCase(after[j]); const int co = toBaseLowerCase(after[j]);
const uint16_t cost = (ci == co) ? 0 : 1; const int cost = (ci == co) ? 0 : 1;
dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1, dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost)); min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1]) if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
@ -1080,8 +1076,8 @@ inline static int editDistanceInternal(
return dp[li * lo - 1]; return dp[li * lo - 1];
} }
int Correction::RankingAlgorithm::editDistance(const unsigned short *before, int Correction::RankingAlgorithm::editDistance(const int *before, const int beforeLength,
const int beforeLength, const unsigned short *after, const int afterLength) { const int *after, const int afterLength) {
int table[(beforeLength + 1) * (afterLength + 1)]; int table[(beforeLength + 1) * (afterLength + 1)];
return editDistanceInternal(table, before, beforeLength, after, afterLength); return editDistanceInternal(table, before, beforeLength, after, afterLength);
} }
@ -1109,9 +1105,8 @@ int Correction::RankingAlgorithm::editDistance(const unsigned short *before,
// So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2. // So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2.
/* static */ /* static */
float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *before, float Correction::RankingAlgorithm::calcNormalizedScore(const int *before, const int beforeLength,
const int beforeLength, const unsigned short *after, const int afterLength, const int *after, const int afterLength, const int score) {
const int score) {
if (0 == beforeLength || 0 == afterLength) { if (0 == beforeLength || 0 == afterLength) {
return 0; return 0;
} }

View File

@ -78,14 +78,13 @@ class Correction {
return ++mTotalTraverseCount; return ++mTotalTraverseCount;
} }
int getFreqForSplitMultipleWords( int getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
const int *freqArray, const int *wordLengthArray, const int wordCount, const int wordCount, const bool isSpaceProximity, const int *word);
const bool isSpaceProximity, const unsigned short *word); int getFinalProbability(const int probability, int **word, int *wordLength);
int getFinalProbability(const int probability, unsigned short **word, int *wordLength); int getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength,
int getFinalProbabilityForSubQueue(const int probability, unsigned short **word, const int inputSize);
int *wordLength, const int inputSize);
CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal); CorrectionType processCharAndCalcState(const int c, const bool isTerminal);
///////////////////////// /////////////////////////
// Tree helper methods // Tree helper methods
@ -110,28 +109,28 @@ class Correction {
const int inputSize); const int inputSize);
static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
const int wordCount, const Correction *correction, const bool isSpaceProximity, const int wordCount, const Correction *correction, const bool isSpaceProximity,
const unsigned short *word); const int *word);
static float calcNormalizedScore(const unsigned short *before, const int beforeLength, static float calcNormalizedScore(const int *before, const int beforeLength,
const unsigned short *after, const int afterLength, const int score); const int *after, const int afterLength, const int score);
static int editDistance(const unsigned short *before, static int editDistance(const int *before, const int beforeLength, const int *after,
const int beforeLength, const unsigned short *after, const int afterLength); const int afterLength);
private: private:
static const int MAX_INITIAL_SCORE = 255; static const int MAX_INITIAL_SCORE = 255;
}; };
// proximity info state // proximity info state
void initInputParams(const ProximityInfo *proximityInfo, const int32_t *inputCodes, void initInputParams(const ProximityInfo *proximityInfo, const int *inputCodes,
const int inputSize, const int *xCoordinates, const int *yCoordinates) { const int inputSize, const int *xCoordinates, const int *yCoordinates) {
mProximityInfoState.initInputParams(0, MAX_POINT_TO_KEY_LENGTH, mProximityInfoState.initInputParams(0, MAX_POINT_TO_KEY_LENGTH,
proximityInfo, inputCodes, inputSize, xCoordinates, yCoordinates, 0, 0, false); proximityInfo, inputCodes, inputSize, xCoordinates, yCoordinates, 0, 0, false);
} }
const unsigned short *getPrimaryInputWord() const { const int *getPrimaryInputWord() const {
return mProximityInfoState.getPrimaryInputWord(); return mProximityInfoState.getPrimaryInputWord();
} }
unsigned short getPrimaryCharAt(const int index) const { int getPrimaryCodePointAt(const int index) const {
return mProximityInfoState.getPrimaryCharAt(index); return mProximityInfoState.getPrimaryCodePointAt(index);
} }
private: private:
@ -214,13 +213,13 @@ class Correction {
inline void incrementInputIndex(); inline void incrementInputIndex();
inline void incrementOutputIndex(); inline void incrementOutputIndex();
inline void startToTraverseAllNodes(); inline void startToTraverseAllNodes();
inline bool isSingleQuote(const unsigned short c); inline bool isSingleQuote(const int c);
inline CorrectionType processSkipChar( inline CorrectionType processSkipChar(const int c, const bool isTerminal,
const int32_t c, const bool isTerminal, const bool inputIndexIncremented); const bool inputIndexIncremented);
inline CorrectionType processUnrelatedCorrectionType(); inline CorrectionType processUnrelatedCorrectionType();
inline void addCharToCurrentWord(const int32_t c); inline void addCharToCurrentWord(const int c);
inline int getFinalProbabilityInternal(const int probability, unsigned short **word, inline int getFinalProbabilityInternal(const int probability, int **word, int *wordLength,
int *wordLength, const int inputSize); const int inputSize);
static const int TYPED_LETTER_MULTIPLIER = 2; static const int TYPED_LETTER_MULTIPLIER = 2;
static const int FULL_WORD_MULTIPLIER = 2; static const int FULL_WORD_MULTIPLIER = 2;
@ -240,7 +239,7 @@ class Correction {
uint8_t mTotalTraverseCount; uint8_t mTotalTraverseCount;
// The following arrays are state buffer. // The following arrays are state buffer.
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; int mWord[MAX_WORD_LENGTH_INTERNAL];
int mDistances[MAX_WORD_LENGTH_INTERNAL]; int mDistances[MAX_WORD_LENGTH_INTERNAL];
// Edit distance calculation requires a buffer with (N+1)^2 length for the input length N. // Edit distance calculation requires a buffer with (N+1)^2 length for the input length N.

View File

@ -30,17 +30,15 @@
#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) do { \ #define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) do { \
dumpResult(words, frequencies, maxWordCount, maxWordLength); } while (0) dumpResult(words, frequencies, maxWordCount, maxWordLength); } while (0)
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0) #define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
#define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while (0) #define INTS_TO_CHARS(input, length, output) do { \
// TODO: INTS_TO_CHARS intArrayToCharArray(input, length, output); } while (0)
#define SHORTS_TO_CHARS(input, length, output) do { \
shortArrayToCharArray(input, length, output); } while (0)
static inline void dumpWordInfo(const unsigned short *word, const int length, static inline void dumpWordInfo(const int *word, const int length, const int rank,
const int rank, const int frequency) { const int frequency) {
static char charBuf[50]; static char charBuf[50];
int i = 0; int i = 0;
for (; i < length; ++i) { for (; i < length; ++i) {
const unsigned short c = word[i]; const int c = word[i];
if (c == 0) { if (c == 0) {
break; break;
} }
@ -53,8 +51,7 @@ static inline void dumpWordInfo(const unsigned short *word, const int length,
} }
} }
static inline void dumpResult( static inline void dumpResult(const int *outWords, const int *frequencies, const int maxWordCounts,
const unsigned short *outWords, const int *frequencies, const int maxWordCounts,
const int maxWordLength) { const int maxWordLength) {
AKLOGI("--- DUMP RESULT ---------"); AKLOGI("--- DUMP RESULT ---------");
for (int i = 0; i < maxWordCounts; ++i) { for (int i = 0; i < maxWordCounts; ++i) {
@ -63,11 +60,11 @@ static inline void dumpResult(
AKLOGI("-------------------------"); AKLOGI("-------------------------");
} }
static inline void dumpWord(const unsigned short *word, const int length) { static inline void dumpWord(const int *word, const int length) {
static char charBuf[50]; static char charBuf[50];
int i = 0; int i = 0;
for (; i < length; ++i) { for (; i < length; ++i) {
const unsigned short c = word[i]; const int c = word[i];
if (c == 0) { if (c == 0) {
break; break;
} }
@ -80,22 +77,10 @@ static inline void dumpWord(const unsigned short *word, const int length) {
} }
} }
static inline void dumpWordInt(const int *word, const int length) { static inline void intArrayToCharArray(const int *input, const int length, char *output) {
static char charBuf[50];
for (int i = 0; i < length; ++i) {
charBuf[i] = word[i];
}
charBuf[length] = 0;
AKLOGI("i[ %s ]", charBuf);
}
// TODO: Change this to intArrayToCharArray
static inline void shortArrayToCharArray(
const unsigned short *input, const int length, char *output) {
int i = 0; int i = 0;
for (;i < length; ++i) { for (; i < length; ++i) {
const unsigned short c = input[i]; const int c = input[i];
if (c == 0) { if (c == 0) {
break; break;
} }
@ -137,11 +122,9 @@ static inline void showStackTrace() {
#define AKLOGI(fmt, ...) #define AKLOGI(fmt, ...)
#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) #define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength)
#define DUMP_WORD(word, length) #define DUMP_WORD(word, length)
#define DUMP_WORD_INT(word, length)
#define ASSERT(success) #define ASSERT(success)
#define SHOW_STACK_TRACE #define SHOW_STACK_TRACE
// TODO: INTS_TO_CHARS #define INTS_TO_CHARS(input, length, output)
#define SHORTS_TO_CHARS(input, length, output)
#endif #endif
#ifdef FLAG_DO_PROFILE #ifdef FLAG_DO_PROFILE
@ -286,6 +269,8 @@ static inline void prof_out(void) {
#define NOT_A_PROBABILITY (-1) #define NOT_A_PROBABILITY (-1)
#define KEYCODE_SPACE ' ' #define KEYCODE_SPACE ' '
#define KEYCODE_SINGLE_QUOTE '\''
#define KEYCODE_HYPHEN_MINUS '-'
#define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true #define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true

View File

@ -54,11 +54,10 @@ Dictionary::~Dictionary() {
} }
int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *codes,
int *codes, int codesSize, int *prevWordChars, int codesSize, int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture,
int prevWordLength, int commitPoint, bool isGesture, bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices,
bool useFullEditDistance, unsigned short *outWords, int *outputTypes) const {
int *frequencies, int *spaceIndices, int *outputTypes) const {
int result = 0; int result = 0;
if (isGesture) { if (isGesture) {
DicTraverseWrapper::initDicTraverseSession( DicTraverseWrapper::initDicTraverseSession(
@ -83,7 +82,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSessi
} }
int Dictionary::getBigrams(const int32_t *word, int length, int *codes, int codesSize, int Dictionary::getBigrams(const int32_t *word, int length, int *codes, int codesSize,
unsigned short *outWords, int *frequencies, int *outputTypes) const { int *outWords, int *frequencies, int *outputTypes) const {
if (length <= 0) return 0; if (length <= 0) return 0;
return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies, return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies,
outputTypes); outputTypes);

View File

@ -47,11 +47,11 @@ class Dictionary {
int getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, int *xcoordinates, int getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, int *xcoordinates,
int *ycoordinates, int *times, int *pointerIds, int *codes, int codesSize, int *ycoordinates, int *times, int *pointerIds, int *codes, int codesSize,
int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture, int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture,
bool useFullEditDistance, unsigned short *outWords, bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices,
int *frequencies, int *spaceIndices, int *outputTypes) const; int *outputTypes) const;
int getBigrams(const int32_t *word, int length, int *codes, int codesSize, int getBigrams(const int32_t *word, int length, int *codes, int codesSize, int *outWords,
unsigned short *outWords, int *frequencies, int *outputTypes) const; int *frequencies, int *outputTypes) const;
int getFrequency(const int32_t *word, int length) const; int getFrequency(const int32_t *word, int length) const;
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const; bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const;
@ -68,7 +68,7 @@ class Dictionary {
// public static utility methods // public static utility methods
// static inline methods should be defined in the header file // static inline methods should be defined in the header file
static int wideStrLen(unsigned short *str); static int wideStrLen(int *str);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary); DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
@ -88,7 +88,7 @@ class Dictionary {
// public static utility methods // public static utility methods
// static inline methods should be defined in the header file // static inline methods should be defined in the header file
inline int Dictionary::wideStrLen(unsigned short *str) { inline int Dictionary::wideStrLen(int *str) {
if (!str) return 0; if (!str) return 0;
int length = 0; int length = 0;
while (*str) { while (*str) {

View File

@ -38,15 +38,14 @@ class GestureDecoderWrapper : public IncrementalDecoderInterface {
} }
int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, int *outWords,
unsigned short *outWords, int *frequencies, int *outputIndices, int *frequencies, int *outputIndices, int *outputTypes) const {
int *outputTypes) const {
if (!mIncrementalDecoderInterface) { if (!mIncrementalDecoderInterface) {
return 0; return 0;
} }
return mIncrementalDecoderInterface->getSuggestions( return mIncrementalDecoderInterface->getSuggestions(pInfo, traverseSession, inputXs,
pInfo, traverseSession, inputXs, inputYs, times, pointerIds, codes, inputYs, times, pointerIds, codes, inputSize, commitPoint, outWords, frequencies,
inputSize, commitPoint, outWords, frequencies, outputIndices, outputTypes); outputIndices, outputTypes);
} }
static void setGestureDecoderFactoryMethod( static void setGestureDecoderFactoryMethod(

View File

@ -28,10 +28,9 @@ class ProximityInfo;
class IncrementalDecoderInterface { class IncrementalDecoderInterface {
public: public:
virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession, virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs,
int *inputXs, int *inputYs, int *times, int *pointerIds, int *codes, int *inputYs, int *times, int *pointerIds, int *codes, int inputSize, int commitPoint,
int inputSize, int commitPoint, unsigned short *outWords, int *frequencies, int *outWords, int *frequencies, int *outputIndices, int *outputTypes) const = 0;
int *outputIndices, int *outputTypes) const = 0;
IncrementalDecoderInterface() { }; IncrementalDecoderInterface() { };
virtual ~IncrementalDecoderInterface() { }; virtual ~IncrementalDecoderInterface() { };
private: private:

View File

@ -38,15 +38,14 @@ class IncrementalDecoderWrapper : public IncrementalDecoderInterface {
} }
int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, int *outWords,
unsigned short *outWords, int *frequencies, int *outputIndices, int *frequencies, int *outputIndices, int *outputTypes) const {
int *outputTypes) const {
if (!mIncrementalDecoderInterface) { if (!mIncrementalDecoderInterface) {
return 0; return 0;
} }
return mIncrementalDecoderInterface->getSuggestions( return mIncrementalDecoderInterface->getSuggestions(pInfo, traverseSession, inputXs,
pInfo, traverseSession, inputXs, inputYs, times, pointerIds, codes, inputYs, times, pointerIds, codes, inputSize, commitPoint, outWords, frequencies,
inputSize, commitPoint, outWords, frequencies, outputIndices, outputTypes); outputIndices, outputTypes);
} }
static void setIncrementalDecoderFactoryMethod( static void setIncrementalDecoderFactoryMethod(

View File

@ -34,7 +34,7 @@ const float ProximityInfoState::NOT_A_DISTANCE_FLOAT = -1.0f;
const int ProximityInfoState::NOT_A_CODE = -1; const int ProximityInfoState::NOT_A_CODE = -1;
void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength, void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength,
const ProximityInfo *proximityInfo, const int32_t *const inputCodes, const int inputSize, const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize,
const int *const xCoordinates, const int *const yCoordinates, const int *const times, const int *const xCoordinates, const int *const yCoordinates, const int *const times,
const int *const pointerIds, const bool isGeometric) { const int *const pointerIds, const bool isGeometric) {
@ -63,7 +63,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
// - mNormalizedSquaredDistances // - mNormalizedSquaredDistances
// TODO: Merge // TODO: Merge
for (int i = 0; i < inputSize; ++i) { for (int i = 0; i < inputSize; ++i) {
const int32_t primaryKey = inputCodes[i]; const int primaryKey = inputCodes[i];
const int x = xCoordinates[i]; const int x = xCoordinates[i];
const int y = yCoordinates[i]; const int y = yCoordinates[i];
int *proximities = &mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL]; int *proximities = &mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL];
@ -146,7 +146,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
AKLOGI("Init ProximityInfoState: (%d)PID = %d", i, pid); AKLOGI("Init ProximityInfoState: (%d)PID = %d", i, pid);
} }
if (pointerId == pid) { if (pointerId == pid) {
const int c = isGeometric ? NOT_A_COORDINATE : getPrimaryCharAt(i); const int c = isGeometric ? NOT_A_COORDINATE : getPrimaryCodePointAt(i);
const int x = proximityOnly ? NOT_A_COORDINATE : xCoordinates[i]; const int x = proximityOnly ? NOT_A_COORDINATE : xCoordinates[i];
const int y = proximityOnly ? NOT_A_COORDINATE : yCoordinates[i]; const int y = proximityOnly ? NOT_A_COORDINATE : yCoordinates[i];
const int time = times ? times[i] : -1; const int time = times ? times[i] : -1;
@ -306,12 +306,12 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
&& xCoordinates && yCoordinates; && xCoordinates && yCoordinates;
if (!isGeometric && pointerId == 0) { if (!isGeometric && pointerId == 0) {
for (int i = 0; i < inputSize; ++i) { for (int i = 0; i < inputSize; ++i) {
mPrimaryInputWord[i] = getPrimaryCharAt(i); mPrimaryInputWord[i] = getPrimaryCodePointAt(i);
} }
for (int i = 0; i < mInputSize && mTouchPositionCorrectionEnabled; ++i) { for (int i = 0; i < mInputSize && mTouchPositionCorrectionEnabled; ++i) {
const int *proximityChars = getProximityCharsAt(i); const int *proximityCodePoints = getProximityCodePointsAt(i);
const int primaryKey = proximityChars[0]; const int primaryKey = proximityCodePoints[0];
const int x = xCoordinates[i]; const int x = xCoordinates[i];
const int y = yCoordinates[i]; const int y = yCoordinates[i];
if (DEBUG_PROXIMITY_CHARS) { if (DEBUG_PROXIMITY_CHARS) {
@ -319,11 +319,12 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
a += 0; a += 0;
AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y); AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y);
} }
for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityChars[j] > 0; ++j) { for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityCodePoints[j] > 0;
const int currentChar = proximityChars[j]; ++j) {
const int currentCodePoint = proximityCodePoints[j];
const float squaredDistance = const float squaredDistance =
hasInputCoordinates() ? calculateNormalizedSquaredDistance( hasInputCoordinates() ? calculateNormalizedSquaredDistance(
mProximityInfo->getKeyIndexOf(currentChar), i) : mProximityInfo->getKeyIndexOf(currentCodePoint), i) :
NOT_A_DISTANCE_FLOAT; NOT_A_DISTANCE_FLOAT;
if (squaredDistance >= 0.0f) { if (squaredDistance >= 0.0f) {
mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] = mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] =
@ -334,7 +335,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO; PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO;
} }
if (DEBUG_PROXIMITY_CHARS) { if (DEBUG_PROXIMITY_CHARS) {
AKLOGI("--- Proximity (%d) = %c", j, currentChar); AKLOGI("--- Proximity (%d) = %c", j, currentCodePoint);
} }
} }
} }
@ -449,7 +450,7 @@ float ProximityInfoState::getPointScore(
// Sampling touch point and pushing information to vectors. // Sampling touch point and pushing information to vectors.
// Returning if previous point is popped or not. // Returning if previous point is popped or not.
bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y, bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeCodePoint, int x, int y,
const int time, const bool sample, const bool isLastPoint, const float sumAngle, const int time, const bool sample, const bool isLastPoint, const float sumAngle,
NearKeysDistanceMap *const currentNearKeysDistances, NearKeysDistanceMap *const currentNearKeysDistances,
const NearKeysDistanceMap *const prevNearKeysDistances, const NearKeysDistanceMap *const prevNearKeysDistances,
@ -458,7 +459,7 @@ bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar
size_t size = mInputXs.size(); size_t size = mInputXs.size();
bool popped = false; bool popped = false;
if (nodeChar < 0 && sample) { if (nodeCodePoint < 0 && sample) {
const float nearest = updateNearKeysDistances(x, y, currentNearKeysDistances); const float nearest = updateNearKeysDistances(x, y, currentNearKeysDistances);
const float score = getPointScore(x, y, time, isLastPoint, nearest, sumAngle, const float score = getPointScore(x, y, time, isLastPoint, nearest, sumAngle,
currentNearKeysDistances, prevNearKeysDistances, prevPrevNearKeysDistances); currentNearKeysDistances, prevNearKeysDistances, prevPrevNearKeysDistances);
@ -487,8 +488,8 @@ bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar
} }
} }
if (nodeChar >= 0 && (x < 0 || y < 0)) { if (nodeCodePoint >= 0 && (x < 0 || y < 0)) {
const int keyId = mProximityInfo->getKeyIndexOf(nodeChar); const int keyId = mProximityInfo->getKeyIndexOf(nodeCodePoint);
if (keyId >= 0) { if (keyId >= 0) {
x = mProximityInfo->getKeyCenterXOfKeyIdG(keyId); x = mProximityInfo->getKeyCenterXOfKeyIdG(keyId);
y = mProximityInfo->getKeyCenterYOfKeyIdG(keyId); y = mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
@ -543,7 +544,7 @@ float ProximityInfoState::getPointToKeyLength(const int inputIndex, const int co
const int index = inputIndex * mProximityInfo->getKeyCount() + keyId; const int index = inputIndex * mProximityInfo->getKeyCount() + keyId;
return min(mDistanceCache[index], mMaxPointToKeyLength); return min(mDistanceCache[index], mMaxPointToKeyLength);
} }
if (isSkippableChar(codePoint)) { if (isSkippableCodePoint(codePoint)) {
return 0.0f; return 0.0f;
} }
// If the char is not a key on the keyboard then return the max length. // If the char is not a key on the keyboard then return the max length.
@ -960,9 +961,9 @@ bool ProximityInfoState::suppressCharProbabilities(const int index0, const int i
return true; return true;
} }
// Get a word that is detected by tracing highest probability sequence into charBuf and returns // Get a word that is detected by tracing highest probability sequence into codePointBuf and
// probability of generating the word. // returns probability of generating the word.
float ProximityInfoState::getHighestProbabilitySequence(uint16_t *const charBuf) const { float ProximityInfoState::getHighestProbabilitySequence(int *const codePointBuf) const {
static const float DEMOTION_LOG_PROBABILITY = 0.3f; static const float DEMOTION_LOG_PROBABILITY = 0.3f;
int index = 0; int index = 0;
float sumLogProbability = 0.0f; float sumLogProbability = 0.0f;
@ -980,12 +981,12 @@ float ProximityInfoState::getHighestProbabilitySequence(uint16_t *const charBuf)
} }
} }
if (character != NOT_AN_INDEX) { if (character != NOT_AN_INDEX) {
charBuf[index] = mProximityInfo->getCodePointOf(character); codePointBuf[index] = mProximityInfo->getCodePointOf(character);
index++; index++;
} }
sumLogProbability += minLogProbability; sumLogProbability += minLogProbability;
} }
charBuf[index] = '\0'; codePointBuf[index] = '\0';
return sumLogProbability; return sumLogProbability;
} }

View File

@ -43,7 +43,7 @@ class ProximityInfoState {
// Defined in proximity_info_state.cpp // // Defined in proximity_info_state.cpp //
///////////////////////////////////////// /////////////////////////////////////////
void initInputParams(const int pointerId, const float maxPointToKeyLength, void initInputParams(const int pointerId, const float maxPointToKeyLength,
const ProximityInfo *proximityInfo, const int32_t *const inputCodes, const ProximityInfo *proximityInfo, const int *const inputCodes,
const int inputSize, const int *xCoordinates, const int *yCoordinates, const int inputSize, const int *xCoordinates, const int *yCoordinates,
const int *const times, const int *const pointerIds, const bool isGeometric); const int *const times, const int *const pointerIds, const bool isGeometric);
@ -65,15 +65,15 @@ class ProximityInfoState {
virtual ~ProximityInfoState() {} virtual ~ProximityInfoState() {}
inline unsigned short getPrimaryCharAt(const int index) const { inline int getPrimaryCodePointAt(const int index) const {
return getProximityCharsAt(index)[0]; return getProximityCodePointsAt(index)[0];
} }
inline bool existsCharInProximityAt(const int index, const int c) const { inline bool existsCodePointInProximityAt(const int index, const int c) const {
const int *chars = getProximityCharsAt(index); const int *codePoints = getProximityCodePointsAt(index);
int i = 0; int i = 0;
while (chars[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE_INTERNAL) { while (codePoints[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE_INTERNAL) {
if (chars[i++] == c) { if (codePoints[i++] == c) {
return true; return true;
} }
} }
@ -82,13 +82,13 @@ class ProximityInfoState {
inline bool existsAdjacentProximityChars(const int index) const { inline bool existsAdjacentProximityChars(const int index) const {
if (index < 0 || index >= mInputSize) return false; if (index < 0 || index >= mInputSize) return false;
const int currentChar = getPrimaryCharAt(index); const int currentCodePoint = getPrimaryCodePointAt(index);
const int leftIndex = index - 1; const int leftIndex = index - 1;
if (leftIndex >= 0 && existsCharInProximityAt(leftIndex, currentChar)) { if (leftIndex >= 0 && existsCodePointInProximityAt(leftIndex, currentCodePoint)) {
return true; return true;
} }
const int rightIndex = index + 1; const int rightIndex = index + 1;
if (rightIndex < mInputSize && existsCharInProximityAt(rightIndex, currentChar)) { if (rightIndex < mInputSize && existsCodePointInProximityAt(rightIndex, currentCodePoint)) {
return true; return true;
} }
return false; return false;
@ -106,15 +106,15 @@ class ProximityInfoState {
// Notice : accented characters do not have a proximity list, so they are alone // Notice : accented characters do not have a proximity list, so they are alone
// in their list. The non-accented version of the character should be considered // in their list. The non-accented version of the character should be considered
// "close", but not the other keys close to the non-accented version. // "close", but not the other keys close to the non-accented version.
inline ProximityType getMatchedProximityId(const int index, inline ProximityType getMatchedProximityId(const int index, const int c,
const unsigned short c, const bool checkProximityChars, int *proximityIndex = 0) const { const bool checkProximityChars, int *proximityIndex = 0) const {
const int *currentChars = getProximityCharsAt(index); const int *currentCodePoints = getProximityCodePointsAt(index);
const int firstChar = currentChars[0]; const int firstCodePoint = currentCodePoints[0];
const unsigned short baseLowerC = toBaseLowerCase(c); const int baseLowerC = toBaseLowerCase(c);
// The first char in the array is what user typed. If it matches right away, // The first char in the array is what user typed. If it matches right away,
// that means the user typed that same char for this pos. // that means the user typed that same char for this pos.
if (firstChar == baseLowerC || firstChar == c) { if (firstCodePoint == baseLowerC || firstCodePoint == c) {
return EQUIVALENT_CHAR; return EQUIVALENT_CHAR;
} }
@ -123,14 +123,14 @@ class ProximityInfoState {
// If the non-accented, lowercased version of that first character matches c, // If the non-accented, lowercased version of that first character matches c,
// then we have a non-accented version of the accented character the user // then we have a non-accented version of the accented character the user
// typed. Treat it as a close char. // typed. Treat it as a close char.
if (toBaseLowerCase(firstChar) == baseLowerC) if (toBaseLowerCase(firstCodePoint) == baseLowerC)
return NEAR_PROXIMITY_CHAR; return NEAR_PROXIMITY_CHAR;
// Not an exact nor an accent-alike match: search the list of close keys // Not an exact nor an accent-alike match: search the list of close keys
int j = 1; int j = 1;
while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
&& currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { && currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c); const bool matched = (currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c);
if (matched) { if (matched) {
if (proximityIndex) { if (proximityIndex) {
*proximityIndex = j; *proximityIndex = j;
@ -140,11 +140,12 @@ class ProximityInfoState {
++j; ++j;
} }
if (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL if (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
&& currentChars[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { && currentCodePoints[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
++j; ++j;
while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
&& currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { && currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c); const bool matched =
(currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c);
if (matched) { if (matched) {
if (proximityIndex) { if (proximityIndex) {
*proximityIndex = j; *proximityIndex = j;
@ -165,7 +166,7 @@ class ProximityInfoState {
inputIndex * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + proximityIndex]; inputIndex * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + proximityIndex];
} }
inline const unsigned short *getPrimaryInputWord() const { inline const int *getPrimaryInputWord() const {
return mPrimaryInputWord; return mPrimaryInputWord;
} }
@ -173,13 +174,13 @@ class ProximityInfoState {
return mTouchPositionCorrectionEnabled; return mTouchPositionCorrectionEnabled;
} }
inline bool sameAsTyped(const unsigned short *word, int length) const { inline bool sameAsTyped(const int *word, int length) const {
if (length != mInputSize) { if (length != mInputSize) {
return false; return false;
} }
const int *inputCodes = mInputCodes; const int *inputCodes = mInputCodes;
while (length--) { while (length--) {
if (static_cast<unsigned int>(*inputCodes) != static_cast<unsigned int>(*word)) { if (*inputCodes != *word) {
return false; return false;
} }
inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL; inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
@ -236,7 +237,7 @@ class ProximityInfoState {
// Returns angle of three points. x, y, and z are indices. // Returns angle of three points. x, y, and z are indices.
float getPointsAngle(const int index0, const int index1, const int index2) const; float getPointsAngle(const int index0, const int index1, const int index2) const;
float getHighestProbabilitySequence(uint16_t *const charBuf) const; float getHighestProbabilitySequence(int *const codePointBuf) const;
float getProbability(const int index, const int charCode) const; float getProbability(const int index, const int charCode) const;
@ -255,7 +256,7 @@ class ProximityInfoState {
float calculateSquaredDistanceFromSweetSpotCenter( float calculateSquaredDistanceFromSweetSpotCenter(
const int keyIndex, const int inputIndex) const; const int keyIndex, const int inputIndex) const;
bool pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y, const int time, bool pushTouchPoint(const int inputIndex, const int nodeCodePoint, int x, int y, const int time,
const bool sample, const bool isLastPoint, const float sumAngle, const bool sample, const bool isLastPoint, const float sumAngle,
NearKeysDistanceMap *const currentNearKeysDistances, NearKeysDistanceMap *const currentNearKeysDistances,
const NearKeysDistanceMap *const prevNearKeysDistances, const NearKeysDistanceMap *const prevNearKeysDistances,
@ -269,7 +270,7 @@ class ProximityInfoState {
return mInputXs.size() > 0 && mInputYs.size() > 0; return mInputXs.size() > 0 && mInputYs.size() > 0;
} }
inline const int *getProximityCharsAt(const int index) const { inline const int *getProximityCodePointsAt(const int index) const {
return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE_INTERNAL); return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE_INTERNAL);
} }
@ -322,10 +323,10 @@ class ProximityInfoState {
// inputs including the current input point. // inputs including the current input point.
std::vector<NearKeycodesSet> mSearchKeysVector; std::vector<NearKeycodesSet> mSearchKeysVector;
bool mTouchPositionCorrectionEnabled; bool mTouchPositionCorrectionEnabled;
int32_t mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL]; int mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL]; int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
int mInputSize; int mInputSize;
unsigned short mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL]; int mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL];
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_PROXIMITY_INFO_STATE_H #endif // LATINIME_PROXIMITY_INFO_STATE_H

View File

@ -43,18 +43,16 @@ class TerminalAttributes {
return mHasNextShortcutTarget; return mHasNextShortcutTarget;
} }
// Gets the shortcut target itself as a uint16_t string. For parameters and return value // Gets the shortcut target itself as an int string. For parameters and return value
// see BinaryFormat::getWordAtAddress. // see BinaryFormat::getWordAtAddress.
// TODO: make the output an uint32_t* to handle the whole unicode range. inline int getNextShortcutTarget(const int maxDepth, int *outWord, int *outFreq) {
inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord, int *outFreq) {
const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos); const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
mHasNextShortcutTarget = mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
unsigned int i; unsigned int i;
for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) { for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) {
const int codePoint = BinaryFormat::getCodePointAndForwardPointer(mDict, &mPos); const int codePoint = BinaryFormat::getCodePointAndForwardPointer(mDict, &mPos);
if (NOT_A_CODE_POINT == codePoint) break; if (NOT_A_CODE_POINT == codePoint) break;
outWord[i] = (uint16_t)codePoint; outWord[i] = codePoint;
} }
*outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags); *outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags);
return i; return i;

View File

@ -55,13 +55,13 @@ UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, int fullW
UnigramDictionary::~UnigramDictionary() { UnigramDictionary::~UnigramDictionary() {
} }
static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize) { static inline int getCodesBufferSize(const int *codes, const int codesSize) {
return static_cast<unsigned int>(sizeof(*codes)) * codesSize; return sizeof(*codes) * codesSize;
} }
// TODO: This needs to take a const unsigned short* and not tinker with its contents // TODO: This needs to take a const int* and not tinker with its contents
static inline void addWord(unsigned short *word, int length, int frequency, static inline void addWord(int *word, int length, int frequency, WordsPriorityQueue *queue,
WordsPriorityQueue *queue, int type) { int type) {
queue->push(frequency, word, length, type); queue->push(frequency, word, length, type);
} }
@ -171,9 +171,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize, const int *ycoordinates, const int *codes, const int codesSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, unsigned short *outWords, int *frequencies, const bool useFullEditDistance, int *outWords, int *frequencies, int *outputTypes) const {
int *outputTypes) const {
WordsPriorityQueuePool queuePool(MAX_WORDS, SUB_QUEUE_MAX_WORDS, MAX_WORD_LENGTH); WordsPriorityQueuePool queuePool(MAX_WORDS, SUB_QUEUE_MAX_WORDS, MAX_WORD_LENGTH);
queuePool.clearAll(); queuePool.clearAll();
Correction masterCorrection; Correction masterCorrection;
@ -218,7 +216,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
AKLOGI("Returning %d words", suggestedWordsCount); AKLOGI("Returning %d words", suggestedWordsCount);
/// Print the returned words /// Print the returned words
for (int j = 0; j < suggestedWordsCount; ++j) { for (int j = 0; j < suggestedWordsCount; ++j) {
short unsigned int *w = outWords + j * MAX_WORD_LENGTH; int *w = outWords + j * MAX_WORD_LENGTH;
char s[MAX_WORD_LENGTH]; char s[MAX_WORD_LENGTH];
for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i]; for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
(void)s; // To suppress compiler warning (void)s; // To suppress compiler warning
@ -230,12 +228,11 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
return suggestedWordsCount; return suggestedWordsCount;
} }
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *xcoordinates, const int *ycoordinates, const int *codes, const int *ycoordinates, const int *codes, const int inputSize,
const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, Correction *correction, const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool)
WordsPriorityQueuePool *queuePool) const { const {
PROF_OPEN; PROF_OPEN;
PROF_START(0); PROF_START(0);
PROF_END(0); PROF_END(0);
@ -284,7 +281,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
if (queue->size() > 0) { if (queue->size() > 0) {
WordsPriorityQueue::SuggestedWord *sw = queue->top(); WordsPriorityQueue::SuggestedWord *sw = queue->top();
const int score = sw->mScore; const int score = sw->mScore;
const unsigned short *word = sw->mWord; const int *word = sw->mWord;
const int wordLength = sw->mWordLength; const int wordLength = sw->mWordLength;
float ns = Correction::RankingAlgorithm::calcNormalizedScore( float ns = Correction::RankingAlgorithm::calcNormalizedScore(
correction->getPrimaryInputWord(), i, word, wordLength, score); correction->getPrimaryInputWord(), i, word, wordLength, score);
@ -303,7 +300,7 @@ void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int
Correction *correction) const { Correction *correction) const {
if (DEBUG_DICT) { if (DEBUG_DICT) {
AKLOGI("initSuggest"); AKLOGI("initSuggest");
DUMP_WORD_INT(codes, inputSize); DUMP_WORD(codes, inputSize);
} }
correction->initInputParams(proximityInfo, codes, inputSize, xCoordinates, yCoordinates); correction->initInputParams(proximityInfo, codes, inputSize, xCoordinates, yCoordinates);
const int maxDepth = min(inputSize * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); const int maxDepth = min(inputSize * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
@ -376,7 +373,7 @@ inline void UnigramDictionary::onTerminal(const int probability,
const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT; const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
int wordLength; int wordLength;
unsigned short *wordPointer; int *wordPointer;
if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) { if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) {
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
@ -404,7 +401,7 @@ inline void UnigramDictionary::onTerminal(const int probability,
// so that the insert order is protected inside the queue for words // so that the insert order is protected inside the queue for words
// with the same score. For the moment we use -1 to make sure the shortcut will // with the same score. For the moment we use -1 to make sure the shortcut will
// never be in front of the word. // never be in front of the word.
uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; int shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
int shortcutFrequency; int shortcutFrequency;
const int shortcutTargetStringLength = iterator.getNextShortcutTarget( const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency); MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency);
@ -444,7 +441,7 @@ int UnigramDictionary::getSubStringSuggestion(
const bool hasAutoCorrectionCandidate, const int currentWordIndex, const bool hasAutoCorrectionCandidate, const int currentWordIndex,
const int inputWordStartPos, const int inputWordLength, const int inputWordStartPos, const int inputWordLength,
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const { int *wordLengthArray, int *outputWord, int *outputWordLength) const {
if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) { if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) {
return FLAG_MULTIPLE_SUGGEST_ABORT; return FLAG_MULTIPLE_SUGGEST_ABORT;
} }
@ -487,13 +484,13 @@ int UnigramDictionary::getSubStringSuggestion(
// TODO: Remove the safety net above // // TODO: Remove the safety net above //
////////////////////////////////////////////// //////////////////////////////////////////////
unsigned short *tempOutputWord = 0; int *tempOutputWord = 0;
int nextWordLength = 0; int nextWordLength = 0;
// TODO: Optimize init suggestion // TODO: Optimize init suggestion
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
inputSize, correction); inputSize, correction);
unsigned short word[MAX_WORD_LENGTH_INTERNAL]; int word[MAX_WORD_LENGTH_INTERNAL];
int freq = getMostFrequentWordLike( int freq = getMostFrequentWordLike(
inputWordStartPos, inputWordLength, correction, word); inputWordStartPos, inputWordLength, correction, word);
if (freq > 0) { if (freq > 0) {
@ -592,7 +589,7 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
const bool useFullEditDistance, const int inputSize, Correction *correction, const bool useFullEditDistance, const int inputSize, Correction *correction,
WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate, WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate,
const int startInputPos, const int startWordIndex, const int outputWordLength, const int startInputPos, const int startWordIndex, const int outputWordLength,
int *freqArray, int *wordLengthArray, unsigned short *outputWord) const { int *freqArray, int *wordLengthArray, int *outputWord) const {
if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) { if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
// Return if the last word index // Return if the last word index
return; return;
@ -678,7 +675,7 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit
} }
// Allocating fixed length array on stack // Allocating fixed length array on stack
unsigned short outputWord[MAX_WORD_LENGTH]; int outputWord[MAX_WORD_LENGTH];
int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS]; int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS]; int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
const int outputWordLength = 0; const int outputWordLength = 0;
@ -693,11 +690,11 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit
// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous // Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
// interface. // interface.
inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
const int inputSize, Correction *correction, unsigned short *word) const { const int inputSize, Correction *correction, int *word) const {
uint16_t inWord[inputSize]; int inWord[inputSize];
for (int i = 0; i < inputSize; ++i) { for (int i = 0; i < inputSize; ++i) {
inWord[i] = (uint16_t)correction->getPrimaryCharAt(startInputIndex + i); inWord[i] = correction->getPrimaryCodePointAt(startInputIndex + i);
} }
return getMostFrequentWordLikeInner(inWord, inputSize, word); return getMostFrequentWordLikeInner(inWord, inputSize, word);
} }
@ -715,14 +712,14 @@ inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
// In and out parameters may point to the same location. This function takes care // In and out parameters may point to the same location. This function takes care
// not to use any input parameters after it wrote into its outputs. // not to use any input parameters after it wrote into its outputs.
static inline bool testCharGroupForContinuedLikeness(const uint8_t flags, static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
const uint8_t *const root, const int startPos, const uint16_t *const inWord, const uint8_t *const root, const int startPos, const int *const inWord,
const int startInputIndex, const int inputSize, int32_t *outNewWord, int *outInputIndex, const int startInputIndex, const int inputSize, int *outNewWord, int *outInputIndex,
int *outPos) { int *outPos) {
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
int pos = startPos; int pos = startPos;
int32_t codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos); int codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
int32_t baseChar = toBaseLowerCase(codePoint); int baseChar = toBaseLowerCase(codePoint);
const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]); const int wChar = toBaseLowerCase(inWord[startInputIndex]);
if (baseChar != wChar) { if (baseChar != wChar) {
*outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos; *outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos;
@ -753,8 +750,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
// It will compare the frequency to the max frequency, and if greater, will // It will compare the frequency to the max frequency, and if greater, will
// copy the word into the output buffer. In output value maxFreq, it will // copy the word into the output buffer. In output value maxFreq, it will
// write the new maximum frequency if it changed. // write the new maximum frequency if it changed.
static inline void onTerminalWordLike(const int freq, int32_t *newWord, const int length, static inline void onTerminalWordLike(const int freq, int *newWord, const int length, int *outWord,
short unsigned int *outWord, int *maxFreq) { int *maxFreq) {
if (freq > *maxFreq) { if (freq > *maxFreq) {
for (int q = 0; q < length; ++q) { for (int q = 0; q < length; ++q) {
outWord[q] = newWord[q]; outWord[q] = newWord[q];
@ -766,9 +763,9 @@ static inline void onTerminalWordLike(const int freq, int32_t *newWord, const in
// Will find the highest frequency of the words like the one passed as an argument, // Will find the highest frequency of the words like the one passed as an argument,
// that is, everything that only differs by case/accents. // that is, everything that only differs by case/accents.
int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord, int UnigramDictionary::getMostFrequentWordLikeInner(const int *const inWord, const int inputSize,
const int inputSize, short unsigned int *outWord) const { int *outWord) const {
int32_t newWord[MAX_WORD_LENGTH_INTERNAL]; int newWord[MAX_WORD_LENGTH_INTERNAL];
int depth = 0; int depth = 0;
int maxFreq = -1; int maxFreq = -1;
const uint8_t *const root = DICT_ROOT; const uint8_t *const root = DICT_ROOT;
@ -828,7 +825,7 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord
return maxFreq; return maxFreq;
} }
int UnigramDictionary::getFrequency(const int32_t *const inWord, const int length) const { int UnigramDictionary::getFrequency(const int *const inWord, const int length) const {
const uint8_t *const root = DICT_ROOT; const uint8_t *const root = DICT_ROOT;
int pos = BinaryFormat::getTerminalPosition(root, inWord, length, int pos = BinaryFormat::getTerminalPosition(root, inWord, length,
false /* forceLowerCaseSearch */); false /* forceLowerCaseSearch */);
@ -853,8 +850,7 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt
} }
// TODO: remove this function. // TODO: remove this function.
int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offset, int UnigramDictionary::getBigramPosition(int pos, int *word, int offset, int length) const {
int length) const {
return -1; return -1;
} }
@ -900,7 +896,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// else if FLAG_IS_TERMINAL: the frequency // else if FLAG_IS_TERMINAL: the frequency
// else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address // else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address
// Note that you can't have a node that both is not a terminal and has no children. // Note that you can't have a node that both is not a terminal and has no children.
int32_t c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos); int c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
assert(NOT_A_CODE_POINT != c); assert(NOT_A_CODE_POINT != c);
// We are going to loop through each character and make it look like it's a different // We are going to loop through each character and make it look like it's a different
@ -914,7 +910,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// We prefetch the next char. If 'c' is the last char of this node, we will have // We prefetch the next char. If 'c' is the last char of this node, we will have
// NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node // NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node
// should behave as a terminal or not and whether we have children. // should behave as a terminal or not and whether we have children.
const int32_t nextc = hasMultipleChars const int nextc = hasMultipleChars
? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT; ? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT;
const bool isLastChar = (NOT_A_CODE_POINT == nextc); const bool isLastChar = (NOT_A_CODE_POINT == nextc);
// If there are more chars in this nodes, then this virtual node is not a terminal. // If there are more chars in this nodes, then this virtual node is not a terminal.

View File

@ -41,12 +41,12 @@ class UnigramDictionary {
static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2; static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2;
UnigramDictionary(const uint8_t *const streamStart, int fullWordMultiplier, int maxWordLength, UnigramDictionary(const uint8_t *const streamStart, int fullWordMultiplier, int maxWordLength,
int maxWords, const unsigned int flags); int maxWords, const unsigned int flags);
int getFrequency(const int32_t *const inWord, const int length) const; int getFrequency(const int *const inWord, const int length) const;
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; int getBigramPosition(int pos, int *word, int offset, int length) const;
int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize, const int *ycoordinates, const int *codes, const int codesSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, unsigned short *outWords, int *frequencies, const bool useFullEditDistance, int *outWords, int *frequencies,
int *outputTypes) const; int *outputTypes) const;
virtual ~UnigramDictionary(); virtual ~UnigramDictionary();
@ -93,9 +93,9 @@ class UnigramDictionary {
int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool, int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
const int currentWordIndex) const; const int currentWordIndex) const;
int getMostFrequentWordLike(const int startInputIndex, const int inputSize, int getMostFrequentWordLike(const int startInputIndex, const int inputSize,
Correction *correction, unsigned short *word) const; Correction *correction, int *word) const;
int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int inputSize, int getMostFrequentWordLikeInner(const int *const inWord, const int inputSize,
short unsigned int *outWord) const; int *outWord) const;
int getSubStringSuggestion( int getSubStringSuggestion(
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
const int *codes, const bool useFullEditDistance, Correction *correction, const int *codes, const bool useFullEditDistance, Correction *correction,
@ -103,14 +103,13 @@ class UnigramDictionary {
const bool hasAutoCorrectionCandidate, const int currentWordIndex, const bool hasAutoCorrectionCandidate, const int currentWordIndex,
const int inputWordStartPos, const int inputWordLength, const int inputWordStartPos, const int inputWordLength,
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const; int *wordLengthArray, int *outputWord, int *outputWordLength) const;
void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *xcoordinates, const int *ycoordinates, const int *codes, const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const bool useFullEditDistance, const int inputSize, const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool,
Correction *correction, WordsPriorityQueuePool *queuePool,
const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex, const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex,
const int outputWordLength, int *freqArray, int *wordLengthArray, const int outputWordLength, int *freqArray, int *wordLengthArray,
unsigned short *outputWord) const; int *outputWord) const;
const uint8_t *const DICT_ROOT; const uint8_t *const DICT_ROOT;
const int MAX_WORD_LENGTH; const int MAX_WORD_LENGTH;

View File

@ -30,15 +30,15 @@ class WordsPriorityQueue {
class SuggestedWord { class SuggestedWord {
public: public:
int mScore; int mScore;
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; int mWord[MAX_WORD_LENGTH_INTERNAL];
int mWordLength; int mWordLength;
bool mUsed; bool mUsed;
int mType; int mType;
void setParams(int score, unsigned short *word, int wordLength, int type) { void setParams(int score, int *word, int wordLength, int type) {
mScore = score; mScore = score;
mWordLength = wordLength; mWordLength = wordLength;
memcpy(mWord, word, sizeof(unsigned short) * wordLength); memcpy(mWord, word, sizeof(int) * wordLength);
mUsed = true; mUsed = true;
mType = type; mType = type;
} }
@ -57,9 +57,9 @@ class WordsPriorityQueue {
delete[] mSuggestedWords; delete[] mSuggestedWords;
} }
void push(int score, unsigned short *word, int wordLength, int type) { void push(int score, int *word, int wordLength, int type) {
SuggestedWord *sw = 0; SuggestedWord *sw = 0;
if (mSuggestions.size() >= MAX_WORDS) { if (size() >= MAX_WORDS) {
sw = mSuggestions.top(); sw = mSuggestions.top();
const int minScore = sw->mScore; const int minScore = sw->mScore;
if (minScore >= score) { if (minScore >= score) {
@ -94,11 +94,10 @@ class WordsPriorityQueue {
return sw; return sw;
} }
int outputSuggestions(const unsigned short *before, const int beforeLength, int outputSuggestions(const int *before, const int beforeLength, int *frequencies,
int *frequencies, unsigned short *outputChars, int* outputTypes) { int *outputCodePoints, int* outputTypes) {
mHighestSuggestedWord = 0; mHighestSuggestedWord = 0;
const unsigned int size = min( const int size = min(MAX_WORDS, static_cast<int>(mSuggestions.size()));
MAX_WORDS, static_cast<unsigned int>(mSuggestions.size()));
SuggestedWord *swBuffer[size]; SuggestedWord *swBuffer[size];
int index = size - 1; int index = size - 1;
while (!mSuggestions.empty() && index >= 0) { while (!mSuggestions.empty() && index >= 0) {
@ -113,9 +112,9 @@ class WordsPriorityQueue {
} }
if (size >= 2) { if (size >= 2) {
SuggestedWord *nsMaxSw = 0; SuggestedWord *nsMaxSw = 0;
unsigned int maxIndex = 0; int maxIndex = 0;
float maxNs = 0; float maxNs = 0;
for (unsigned int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
SuggestedWord *tempSw = swBuffer[i]; SuggestedWord *tempSw = swBuffer[i];
if (!tempSw) { if (!tempSw) {
continue; continue;
@ -132,17 +131,17 @@ class WordsPriorityQueue {
swBuffer[0] = nsMaxSw; swBuffer[0] = nsMaxSw;
} }
} }
for (unsigned int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
SuggestedWord *sw = swBuffer[i]; SuggestedWord *sw = swBuffer[i];
if (!sw) { if (!sw) {
AKLOGE("SuggestedWord is null %d", i); AKLOGE("SuggestedWord is null %d", i);
continue; continue;
} }
const unsigned int wordLength = sw->mWordLength; const int wordLength = sw->mWordLength;
unsigned short *targetAddress = outputChars + i * MAX_WORD_LENGTH; int *targetAddress = outputCodePoints + i * MAX_WORD_LENGTH;
frequencies[i] = sw->mScore; frequencies[i] = sw->mScore;
outputTypes[i] = sw->mType; outputTypes[i] = sw->mType;
memcpy(targetAddress, sw->mWord, wordLength * sizeof(unsigned short)); memcpy(targetAddress, sw->mWord, wordLength * sizeof(int));
if (wordLength < MAX_WORD_LENGTH) { if (wordLength < MAX_WORD_LENGTH) {
targetAddress[wordLength] = 0; targetAddress[wordLength] = 0;
} }
@ -152,7 +151,7 @@ class WordsPriorityQueue {
} }
int size() const { int size() const {
return mSuggestions.size(); return static_cast<int>(mSuggestions.size());
} }
void clear() { void clear() {
@ -175,13 +174,13 @@ class WordsPriorityQueue {
DUMP_WORD(mHighestSuggestedWord->mWord, mHighestSuggestedWord->mWordLength); DUMP_WORD(mHighestSuggestedWord->mWord, mHighestSuggestedWord->mWordLength);
} }
float getHighestNormalizedScore(const unsigned short *before, const int beforeLength, float getHighestNormalizedScore(const int *before, const int beforeLength, int **outWord,
unsigned short **outWord, int *outScore, int *outLength) { int *outScore, int *outLength) {
if (!mHighestSuggestedWord) { if (!mHighestSuggestedWord) {
return 0.0; return 0.0;
} }
return getNormalizedScore( return getNormalizedScore(mHighestSuggestedWord, before, beforeLength, outWord, outScore,
mHighestSuggestedWord, before, beforeLength, outWord, outScore, outLength); outLength);
} }
private: private:
@ -192,9 +191,8 @@ class WordsPriorityQueue {
} }
}; };
SuggestedWord *getFreeSuggestedWord(int score, unsigned short *word, SuggestedWord *getFreeSuggestedWord(int score, int *word, int wordLength, int type) {
int wordLength, int type) { for (int i = 0; i < MAX_WORD_LENGTH; ++i) {
for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) {
if (!mSuggestedWords[i].mUsed) { if (!mSuggestedWords[i].mUsed) {
mSuggestedWords[i].setParams(score, word, wordLength, type); mSuggestedWords[i].setParams(score, word, wordLength, type);
return &mSuggestedWords[i]; return &mSuggestedWords[i];
@ -203,10 +201,10 @@ class WordsPriorityQueue {
return 0; return 0;
} }
static float getNormalizedScore(SuggestedWord *sw, const unsigned short *before, static float getNormalizedScore(SuggestedWord *sw, const int *before, const int beforeLength,
const int beforeLength, unsigned short **outWord, int *outScore, int *outLength) { int **outWord, int *outScore, int *outLength) {
const int score = sw->mScore; const int score = sw->mScore;
unsigned short *word = sw->mWord; int *word = sw->mWord;
const int wordLength = sw->mWordLength; const int wordLength = sw->mWordLength;
if (outScore) { if (outScore) {
*outScore = score; *outScore = score;
@ -217,15 +215,15 @@ class WordsPriorityQueue {
if (outLength) { if (outLength) {
*outLength = wordLength; *outLength = wordLength;
} }
return Correction::RankingAlgorithm::calcNormalizedScore( return Correction::RankingAlgorithm::calcNormalizedScore(before, beforeLength, word,
before, beforeLength, word, wordLength, score); wordLength, score);
} }
typedef std::priority_queue<SuggestedWord *, std::vector<SuggestedWord *>, typedef std::priority_queue<SuggestedWord *, std::vector<SuggestedWord *>,
wordComparator> Suggestions; wordComparator> Suggestions;
Suggestions mSuggestions; Suggestions mSuggestions;
const unsigned int MAX_WORDS; const int MAX_WORDS;
const unsigned int MAX_WORD_LENGTH; const int MAX_WORD_LENGTH;
SuggestedWord *mSuggestedWords; SuggestedWord *mSuggestedWords;
SuggestedWord *mHighestSuggestedWord; SuggestedWord *mHighestSuggestedWord;
}; };