Use 32-bit code points for suggestions output
This is a multi-project commit with Ic43dd666 bug: 6526418 Change-Id: I39c1acb4e91d04cd8a4ec5a943c8cf575da75ebcmain
parent
0ea2c80d8d
commit
1e61493c50
|
@ -51,8 +51,7 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
private long mNativeDict;
|
private long mNativeDict;
|
||||||
private final Locale mLocale;
|
private final Locale mLocale;
|
||||||
private final int[] mInputCodePoints = new int[MAX_WORD_LENGTH];
|
private final int[] mInputCodePoints = new int[MAX_WORD_LENGTH];
|
||||||
// TODO: The below should be int[] mOutputCodePoints
|
private final int[] mOutputCodePoints = new int[MAX_WORD_LENGTH * MAX_RESULTS];
|
||||||
private final char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_RESULTS];
|
|
||||||
private final int[] mSpaceIndices = new int[MAX_SPACES];
|
private final int[] mSpaceIndices = new int[MAX_SPACES];
|
||||||
private final int[] mOutputScores = new int[MAX_RESULTS];
|
private final int[] mOutputScores = new int[MAX_RESULTS];
|
||||||
private final int[] mOutputTypes = new int[MAX_RESULTS];
|
private final int[] mOutputTypes = new int[MAX_RESULTS];
|
||||||
|
@ -88,9 +87,9 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
* @param useFullEditDistance whether to use the full edit distance in suggestions
|
* @param useFullEditDistance whether to use the full edit distance in suggestions
|
||||||
* @param dictType the dictionary type, as a human-readable string
|
* @param dictType the dictionary type, as a human-readable string
|
||||||
*/
|
*/
|
||||||
public BinaryDictionary(final Context context,
|
public BinaryDictionary(final Context context, final String filename, final long offset,
|
||||||
final String filename, final long offset, final long length,
|
final long length, final boolean useFullEditDistance, final Locale locale,
|
||||||
final boolean useFullEditDistance, final Locale locale, final String dictType) {
|
final String dictType) {
|
||||||
super(dictType);
|
super(dictType);
|
||||||
mLocale = locale;
|
mLocale = locale;
|
||||||
mUseFullEditDistance = useFullEditDistance;
|
mUseFullEditDistance = useFullEditDistance;
|
||||||
|
@ -109,10 +108,10 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
private native int getSuggestionsNative(long dict, long proximityInfo, long traverseSession,
|
private native int getSuggestionsNative(long dict, long proximityInfo, long traverseSession,
|
||||||
int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds,
|
int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds,
|
||||||
int[] inputCodePoints, int codesSize, int commitPoint, boolean isGesture,
|
int[] inputCodePoints, int codesSize, int commitPoint, boolean isGesture,
|
||||||
int[] prevWordCodePointArray, boolean useFullEditDistance, char[] outputChars,
|
int[] prevWordCodePointArray, boolean useFullEditDistance, int[] outputCodePoints,
|
||||||
int[] outputScores, int[] outputIndices, int[] outputTypes);
|
int[] outputScores, int[] outputIndices, int[] outputTypes);
|
||||||
private static native float calcNormalizedScoreNative(char[] before, char[] after, int score);
|
private static native float calcNormalizedScoreNative(int[] before, int[] after, int score);
|
||||||
private static native int editDistanceNative(char[] before, char[] after);
|
private static native int editDistanceNative(int[] before, int[] after);
|
||||||
|
|
||||||
// TODO: Move native dict into session
|
// TODO: Move native dict into session
|
||||||
private final void loadDictionary(final String path, final long startOffset,
|
private final void loadDictionary(final String path, final long startOffset,
|
||||||
|
@ -153,7 +152,8 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
proximityInfo.getNativeProximityInfo(), getTraverseSession(sessionId).getSession(),
|
proximityInfo.getNativeProximityInfo(), getTraverseSession(sessionId).getSession(),
|
||||||
ips.getXCoordinates(), ips.getYCoordinates(), ips.getTimes(), ips.getPointerIds(),
|
ips.getXCoordinates(), ips.getYCoordinates(), ips.getTimes(), ips.getPointerIds(),
|
||||||
mInputCodePoints, codesSize, 0 /* commitPoint */, isGesture, prevWordCodePointArray,
|
mInputCodePoints, codesSize, 0 /* commitPoint */, isGesture, prevWordCodePointArray,
|
||||||
mUseFullEditDistance, mOutputChars, mOutputScores, mSpaceIndices, mOutputTypes);
|
mUseFullEditDistance, mOutputCodePoints, mOutputScores, mSpaceIndices,
|
||||||
|
mOutputTypes);
|
||||||
final int count = Math.min(tmpCount, MAX_PREDICTIONS);
|
final int count = Math.min(tmpCount, MAX_PREDICTIONS);
|
||||||
|
|
||||||
final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList();
|
final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList();
|
||||||
|
@ -161,14 +161,14 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
if (composerSize > 0 && mOutputScores[j] < 1) break;
|
if (composerSize > 0 && mOutputScores[j] < 1) break;
|
||||||
final int start = j * MAX_WORD_LENGTH;
|
final int start = j * MAX_WORD_LENGTH;
|
||||||
int len = 0;
|
int len = 0;
|
||||||
while (len < MAX_WORD_LENGTH && mOutputChars[start + len] != 0) {
|
while (len < MAX_WORD_LENGTH && mOutputCodePoints[start + len] != 0) {
|
||||||
++len;
|
++len;
|
||||||
}
|
}
|
||||||
if (len > 0) {
|
if (len > 0) {
|
||||||
final int score = SuggestedWordInfo.KIND_WHITELIST == mOutputTypes[j]
|
final int score = SuggestedWordInfo.KIND_WHITELIST == mOutputTypes[j]
|
||||||
? SuggestedWordInfo.MAX_SCORE : mOutputScores[j];
|
? SuggestedWordInfo.MAX_SCORE : mOutputScores[j];
|
||||||
suggestions.add(new SuggestedWordInfo(
|
suggestions.add(new SuggestedWordInfo(new String(mOutputCodePoints, start, len),
|
||||||
new String(mOutputChars, start, len), score, mOutputTypes[j], mDictType));
|
score, mOutputTypes[j], mDictType));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return suggestions;
|
return suggestions;
|
||||||
|
@ -180,14 +180,16 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
|
|
||||||
public static float calcNormalizedScore(final String before, final String after,
|
public static float calcNormalizedScore(final String before, final String after,
|
||||||
final int score) {
|
final int score) {
|
||||||
return calcNormalizedScoreNative(before.toCharArray(), after.toCharArray(), score);
|
return calcNormalizedScoreNative(StringUtils.toCodePointArray(before),
|
||||||
|
StringUtils.toCodePointArray(after), score);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int editDistance(final String before, final String after) {
|
public static int editDistance(final String before, final String after) {
|
||||||
if (before == null || after == null) {
|
if (before == null || after == null) {
|
||||||
throw new IllegalArgumentException();
|
throw new IllegalArgumentException();
|
||||||
}
|
}
|
||||||
return editDistanceNative(before.toCharArray(), after.toCharArray());
|
return editDistanceNative(StringUtils.toCodePointArray(before),
|
||||||
|
StringUtils.toCodePointArray(after));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -206,9 +208,9 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
// calls when checking for changes in an entire dictionary.
|
// calls when checking for changes in an entire dictionary.
|
||||||
public boolean isValidBigram(final String word1, final String word2) {
|
public boolean isValidBigram(final String word1, final String word2) {
|
||||||
if (TextUtils.isEmpty(word1) || TextUtils.isEmpty(word2)) return false;
|
if (TextUtils.isEmpty(word1) || TextUtils.isEmpty(word2)) return false;
|
||||||
final int[] chars1 = StringUtils.toCodePointArray(word1);
|
final int[] codePoints1 = StringUtils.toCodePointArray(word1);
|
||||||
final int[] chars2 = StringUtils.toCodePointArray(word2);
|
final int[] codePoints2 = StringUtils.toCodePointArray(word2);
|
||||||
return isValidBigramNative(mNativeDict, chars1, chars2);
|
return isValidBigramNative(mNativeDict, codePoints1, codePoints2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -132,7 +132,7 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
|
||||||
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
|
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
|
||||||
jintArray inputCodePointsArray, jint arraySize, jint commitPoint, jboolean isGesture,
|
jintArray inputCodePointsArray, jint arraySize, jint commitPoint, jboolean isGesture,
|
||||||
jintArray prevWordCodePointsForBigrams, jboolean useFullEditDistance,
|
jintArray prevWordCodePointsForBigrams, jboolean useFullEditDistance,
|
||||||
jcharArray outputCharsArray, jintArray scoresArray, jintArray spaceIndicesArray,
|
jintArray outputCodePointsArray, jintArray scoresArray, jintArray spaceIndicesArray,
|
||||||
jintArray outputTypesArray) {
|
jintArray outputTypesArray) {
|
||||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||||
if (!dictionary) return 0;
|
if (!dictionary) return 0;
|
||||||
|
@ -162,16 +162,15 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Output values
|
// Output values
|
||||||
// TODO: Should be "outputCodePointsLength" and "int outputCodePoints[]"
|
const jsize outputCodePointsLength = env->GetArrayLength(outputCodePointsArray);
|
||||||
const jsize outputCharsLength = env->GetArrayLength(outputCharsArray);
|
int outputCodePoints[outputCodePointsLength];
|
||||||
unsigned short outputChars[outputCharsLength];
|
|
||||||
const jsize scoresLength = env->GetArrayLength(scoresArray);
|
const jsize scoresLength = env->GetArrayLength(scoresArray);
|
||||||
int scores[scoresLength];
|
int scores[scoresLength];
|
||||||
const jsize spaceIndicesLength = env->GetArrayLength(spaceIndicesArray);
|
const jsize spaceIndicesLength = env->GetArrayLength(spaceIndicesArray);
|
||||||
int spaceIndices[spaceIndicesLength];
|
int spaceIndices[spaceIndicesLength];
|
||||||
const jsize outputTypesLength = env->GetArrayLength(outputTypesArray);
|
const jsize outputTypesLength = env->GetArrayLength(outputTypesArray);
|
||||||
int outputTypes[outputTypesLength];
|
int outputTypes[outputTypesLength];
|
||||||
memset(outputChars, 0, sizeof(outputChars));
|
memset(outputCodePoints, 0, sizeof(outputCodePoints));
|
||||||
memset(scores, 0, sizeof(scores));
|
memset(scores, 0, sizeof(scores));
|
||||||
memset(spaceIndices, 0, sizeof(spaceIndices));
|
memset(spaceIndices, 0, sizeof(spaceIndices));
|
||||||
memset(outputTypes, 0, sizeof(outputTypes));
|
memset(outputTypes, 0, sizeof(outputTypes));
|
||||||
|
@ -180,16 +179,15 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
|
||||||
if (isGesture || arraySize > 0) {
|
if (isGesture || arraySize > 0) {
|
||||||
count = dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
|
count = dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
|
||||||
times, pointerIds, inputCodePoints, arraySize, prevWordCodePoints,
|
times, pointerIds, inputCodePoints, arraySize, prevWordCodePoints,
|
||||||
prevWordCodePointsLength, commitPoint, isGesture, useFullEditDistance, outputChars,
|
prevWordCodePointsLength, commitPoint, isGesture, useFullEditDistance,
|
||||||
scores, spaceIndices, outputTypes);
|
outputCodePoints, scores, spaceIndices, outputTypes);
|
||||||
} else {
|
} else {
|
||||||
count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength,
|
count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength,
|
||||||
inputCodePoints, arraySize, outputChars, scores, outputTypes);
|
inputCodePoints, arraySize, outputCodePoints, scores, outputTypes);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy back the output values
|
// Copy back the output values
|
||||||
// TODO: Should be SetIntArrayRegion()
|
env->SetIntArrayRegion(outputCodePointsArray, 0, outputCodePointsLength, outputCodePoints);
|
||||||
env->SetCharArrayRegion(outputCharsArray, 0, outputCharsLength, outputChars);
|
|
||||||
env->SetIntArrayRegion(scoresArray, 0, scoresLength, scores);
|
env->SetIntArrayRegion(scoresArray, 0, scoresLength, scores);
|
||||||
env->SetIntArrayRegion(spaceIndicesArray, 0, spaceIndicesLength, spaceIndices);
|
env->SetIntArrayRegion(spaceIndicesArray, 0, spaceIndicesLength, spaceIndices);
|
||||||
env->SetIntArrayRegion(outputTypesArray, 0, outputTypesLength, outputTypes);
|
env->SetIntArrayRegion(outputTypesArray, 0, outputTypesLength, outputTypes);
|
||||||
|
@ -221,29 +219,27 @@ static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jobject obj
|
||||||
}
|
}
|
||||||
|
|
||||||
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object,
|
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object,
|
||||||
jcharArray before, jcharArray after, jint score) {
|
jintArray before, jintArray after, jint score) {
|
||||||
jsize beforeLength = env->GetArrayLength(before);
|
jsize beforeLength = env->GetArrayLength(before);
|
||||||
jsize afterLength = env->GetArrayLength(after);
|
jsize afterLength = env->GetArrayLength(after);
|
||||||
jchar beforeChars[beforeLength];
|
int beforeCodePoints[beforeLength];
|
||||||
jchar afterChars[afterLength];
|
int afterCodePoints[afterLength];
|
||||||
env->GetCharArrayRegion(before, 0, beforeLength, beforeChars);
|
env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
|
||||||
env->GetCharArrayRegion(after, 0, afterLength, afterChars);
|
env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
|
||||||
return Correction::RankingAlgorithm::calcNormalizedScore(
|
return Correction::RankingAlgorithm::calcNormalizedScore(beforeCodePoints, beforeLength,
|
||||||
static_cast<unsigned short *>(beforeChars), beforeLength,
|
afterCodePoints, afterLength, score);
|
||||||
static_cast<unsigned short *>(afterChars), afterLength, score);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jobject object,
|
static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jobject object, jintArray before,
|
||||||
jcharArray before, jcharArray after) {
|
jintArray after) {
|
||||||
jsize beforeLength = env->GetArrayLength(before);
|
jsize beforeLength = env->GetArrayLength(before);
|
||||||
jsize afterLength = env->GetArrayLength(after);
|
jsize afterLength = env->GetArrayLength(after);
|
||||||
jchar beforeChars[beforeLength];
|
int beforeCodePoints[beforeLength];
|
||||||
jchar afterChars[afterLength];
|
int afterCodePoints[afterLength];
|
||||||
env->GetCharArrayRegion(before, 0, beforeLength, beforeChars);
|
env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
|
||||||
env->GetCharArrayRegion(after, 0, afterLength, afterChars);
|
env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
|
||||||
return Correction::RankingAlgorithm::editDistance(
|
return Correction::RankingAlgorithm::editDistance(beforeCodePoints, beforeLength,
|
||||||
static_cast<unsigned short *>(beforeChars), beforeLength,
|
afterCodePoints, afterLength);
|
||||||
static_cast<unsigned short *>(afterChars), afterLength);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jlong dict) {
|
static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jlong dict) {
|
||||||
|
@ -279,15 +275,15 @@ static JNINativeMethod sMethods[] = {
|
||||||
{"openNative", "(Ljava/lang/String;JJIIII)J",
|
{"openNative", "(Ljava/lang/String;JJIIII)J",
|
||||||
reinterpret_cast<void *>(latinime_BinaryDictionary_open)},
|
reinterpret_cast<void *>(latinime_BinaryDictionary_open)},
|
||||||
{"closeNative", "(J)V", reinterpret_cast<void *>(latinime_BinaryDictionary_close)},
|
{"closeNative", "(J)V", reinterpret_cast<void *>(latinime_BinaryDictionary_close)},
|
||||||
{"getSuggestionsNative", "(JJJ[I[I[I[I[IIIZ[IZ[C[I[I[I)I",
|
{"getSuggestionsNative", "(JJJ[I[I[I[I[IIIZ[IZ[I[I[I[I)I",
|
||||||
reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)},
|
reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)},
|
||||||
{"getFrequencyNative", "(J[I)I",
|
{"getFrequencyNative", "(J[I)I",
|
||||||
reinterpret_cast<void *>(latinime_BinaryDictionary_getFrequency)},
|
reinterpret_cast<void *>(latinime_BinaryDictionary_getFrequency)},
|
||||||
{"isValidBigramNative", "(J[I[I)Z",
|
{"isValidBigramNative", "(J[I[I)Z",
|
||||||
reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram)},
|
reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram)},
|
||||||
{"calcNormalizedScoreNative", "([C[CI)F",
|
{"calcNormalizedScoreNative", "([I[II)F",
|
||||||
reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore)},
|
reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore)},
|
||||||
{"editDistanceNative", "([C[C)I",
|
{"editDistanceNative", "([I[I)I",
|
||||||
reinterpret_cast<void *>(latinime_BinaryDictionary_editDistance)}
|
reinterpret_cast<void *>(latinime_BinaryDictionary_editDistance)}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -36,13 +36,13 @@ BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength,
|
||||||
BigramDictionary::~BigramDictionary() {
|
BigramDictionary::~BigramDictionary() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequency,
|
bool BigramDictionary::addWordBigram(int *word, int length, int frequency, int *bigramFreq,
|
||||||
int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const {
|
int *bigramCodePoints, int *outputTypes) const {
|
||||||
word[length] = 0;
|
word[length] = 0;
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
#ifdef FLAG_DBG
|
#ifdef FLAG_DBG
|
||||||
char s[length + 1];
|
char s[length + 1];
|
||||||
for (int i = 0; i <= length; i++) s[i] = word[i];
|
for (int i = 0; i <= length; i++) s[i] = static_cast<char>(word[i]);
|
||||||
AKLOGI("Bigram: Found word = %s, freq = %d :", s, frequency);
|
AKLOGI("Bigram: Found word = %s, freq = %d :", s, frequency);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -51,7 +51,8 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
|
||||||
int insertAt = 0;
|
int insertAt = 0;
|
||||||
while (insertAt < MAX_PREDICTIONS) {
|
while (insertAt < MAX_PREDICTIONS) {
|
||||||
if (frequency > bigramFreq[insertAt] || (bigramFreq[insertAt] == frequency
|
if (frequency > bigramFreq[insertAt] || (bigramFreq[insertAt] == frequency
|
||||||
&& length < Dictionary::wideStrLen(bigramChars + insertAt * MAX_WORD_LENGTH))) {
|
&& length < Dictionary::wideStrLen(
|
||||||
|
bigramCodePoints + insertAt * MAX_WORD_LENGTH))) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
insertAt++;
|
insertAt++;
|
||||||
|
@ -65,10 +66,10 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
|
||||||
(MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramFreq[0]));
|
(MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramFreq[0]));
|
||||||
bigramFreq[insertAt] = frequency;
|
bigramFreq[insertAt] = frequency;
|
||||||
outputTypes[insertAt] = Dictionary::KIND_PREDICTION;
|
outputTypes[insertAt] = Dictionary::KIND_PREDICTION;
|
||||||
memmove(bigramChars + (insertAt + 1) * MAX_WORD_LENGTH,
|
memmove(bigramCodePoints + (insertAt + 1) * MAX_WORD_LENGTH,
|
||||||
bigramChars + insertAt * MAX_WORD_LENGTH,
|
bigramCodePoints + insertAt * MAX_WORD_LENGTH,
|
||||||
(MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramChars[0]) * MAX_WORD_LENGTH);
|
(MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramCodePoints[0]) * MAX_WORD_LENGTH);
|
||||||
unsigned short *dest = bigramChars + insertAt * MAX_WORD_LENGTH;
|
int *dest = bigramCodePoints + insertAt * MAX_WORD_LENGTH;
|
||||||
while (length--) {
|
while (length--) {
|
||||||
*dest++ = *word++;
|
*dest++ = *word++;
|
||||||
}
|
}
|
||||||
|
@ -86,7 +87,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
|
||||||
* prevWordLength: its length.
|
* prevWordLength: its length.
|
||||||
* inputCodes: what user typed, in the same format as for UnigramDictionary::getSuggestions.
|
* inputCodes: what user typed, in the same format as for UnigramDictionary::getSuggestions.
|
||||||
* codesSize: the size of the codes array.
|
* codesSize: the size of the codes array.
|
||||||
* bigramChars: an array for output, at the same format as outwords for getSuggestions.
|
* bigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
|
||||||
* bigramFreq: an array to output frequencies.
|
* bigramFreq: an array to output frequencies.
|
||||||
* outputTypes: an array to output types.
|
* outputTypes: an array to output types.
|
||||||
* This method returns the number of bigrams this word has, for backward compatibility.
|
* This method returns the number of bigrams this word has, for backward compatibility.
|
||||||
|
@ -97,8 +98,8 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
|
||||||
* and the bigrams are used to boost unigram result scores, it makes little sense to
|
* and the bigrams are used to boost unigram result scores, it makes little sense to
|
||||||
* reduce their scope to the ones that match the first letter.
|
* reduce their scope to the ones that match the first letter.
|
||||||
*/
|
*/
|
||||||
int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, int *inputCodes,
|
int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *inputCodes,
|
||||||
int codesSize, unsigned short *bigramChars, int *bigramFreq, int *outputTypes) const {
|
int codesSize, int *bigramCodePoints, int *bigramFreq, int *outputTypes) const {
|
||||||
// TODO: remove unused arguments, and refrain from storing stuff in members of this class
|
// TODO: remove unused arguments, and refrain from storing stuff in members of this class
|
||||||
// TODO: have "in" arguments before "out" ones, and make out args explicit in the name
|
// TODO: have "in" arguments before "out" ones, and make out args explicit in the name
|
||||||
|
|
||||||
|
@ -117,7 +118,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
|
||||||
int bigramCount = 0;
|
int bigramCount = 0;
|
||||||
do {
|
do {
|
||||||
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||||
uint16_t bigramBuffer[MAX_WORD_LENGTH];
|
int bigramBuffer[MAX_WORD_LENGTH];
|
||||||
int unigramFreq = 0;
|
int unigramFreq = 0;
|
||||||
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
|
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
|
||||||
&pos);
|
&pos);
|
||||||
|
@ -134,7 +135,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
|
||||||
// here, but it can't get too bad.
|
// here, but it can't get too bad.
|
||||||
const int frequency =
|
const int frequency =
|
||||||
BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreqTemp);
|
BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreqTemp);
|
||||||
if (addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramChars,
|
if (addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramCodePoints,
|
||||||
outputTypes)) {
|
outputTypes)) {
|
||||||
++bigramCount;
|
++bigramCount;
|
||||||
}
|
}
|
||||||
|
@ -190,12 +191,12 @@ void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *p
|
||||||
} while (0 != (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
|
} while (0 != (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BigramDictionary::checkFirstCharacter(unsigned short *word, int *inputCodes) const {
|
bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodes) const {
|
||||||
// Checks whether this word starts with same character or neighboring characters of
|
// Checks whether this word starts with same character or neighboring characters of
|
||||||
// what user typed.
|
// what user typed.
|
||||||
|
|
||||||
int maxAlt = MAX_ALTERNATIVES;
|
int maxAlt = MAX_ALTERNATIVES;
|
||||||
const unsigned short firstBaseChar = toBaseLowerCase(*word);
|
const int firstBaseChar = toBaseLowerCase(*word);
|
||||||
while (maxAlt > 0) {
|
while (maxAlt > 0) {
|
||||||
if (toBaseLowerCase(*inputCodes) == firstBaseChar) {
|
if (toBaseLowerCase(*inputCodes) == firstBaseChar) {
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -27,23 +27,23 @@ namespace latinime {
|
||||||
class BigramDictionary {
|
class BigramDictionary {
|
||||||
public:
|
public:
|
||||||
BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions);
|
BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions);
|
||||||
int getBigrams(const int32_t *word, int length, int *inputCodes, int codesSize,
|
int getBigrams(const int *word, int length, int *inputCodes, int codesSize, int *outWords,
|
||||||
unsigned short *outWords, int *frequencies, int *outputTypes) const;
|
int *frequencies, int *outputTypes) const;
|
||||||
void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength,
|
void fillBigramAddressToFrequencyMapAndFilter(const int *prevWord, const int prevWordLength,
|
||||||
std::map<int, int> *map, uint8_t *filter) const;
|
std::map<int, int> *map, uint8_t *filter) const;
|
||||||
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const;
|
bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
|
||||||
~BigramDictionary();
|
~BigramDictionary();
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary);
|
||||||
bool addWordBigram(unsigned short *word, int length, int frequency,
|
bool addWordBigram(int *word, int length, int frequency, int *bigramFreq, int *bigramCodePoints,
|
||||||
int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const;
|
int *outputTypes) const;
|
||||||
int getBigramAddress(int *pos, bool advance);
|
int getBigramAddress(int *pos, bool advance);
|
||||||
int getBigramFreq(int *pos);
|
int getBigramFreq(int *pos);
|
||||||
void searchForTerminalNode(int addressLookingFor, int frequency);
|
void searchForTerminalNode(int addressLookingFor, int frequency);
|
||||||
bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; }
|
bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; }
|
||||||
bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; }
|
bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; }
|
||||||
bool checkFirstCharacter(unsigned short *word, int *inputCodes) const;
|
bool checkFirstCharacter(int *word, int *inputCodes) const;
|
||||||
int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength,
|
int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
|
||||||
const bool forceLowerCaseSearch) const;
|
const bool forceLowerCaseSearch) const;
|
||||||
|
|
||||||
const unsigned char *DICT;
|
const unsigned char *DICT;
|
||||||
|
|
|
@ -84,7 +84,7 @@ class BinaryFormat {
|
||||||
static unsigned int getFlags(const uint8_t *const dict);
|
static unsigned int getFlags(const uint8_t *const dict);
|
||||||
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
|
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
|
||||||
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
|
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
|
||||||
static int32_t getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
|
static int getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
|
||||||
static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos);
|
static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos);
|
||||||
static int skipOtherCharacters(const uint8_t *const dict, const int pos);
|
static int skipOtherCharacters(const uint8_t *const dict, const int pos);
|
||||||
static int skipChildrenPosition(const uint8_t flags, const int pos);
|
static int skipChildrenPosition(const uint8_t flags, const int pos);
|
||||||
|
@ -98,10 +98,10 @@ class BinaryFormat {
|
||||||
static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
|
static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
|
||||||
int *pos);
|
int *pos);
|
||||||
static int getAttributeFrequencyFromFlags(const int flags);
|
static int getAttributeFrequencyFromFlags(const int flags);
|
||||||
static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord,
|
static int getTerminalPosition(const uint8_t *const root, const int *const inWord,
|
||||||
const int length, const bool forceLowerCaseSearch);
|
const int length, const bool forceLowerCaseSearch);
|
||||||
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
|
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
|
||||||
uint16_t *outWord, int *outUnigramFrequency);
|
int *outWord, int *outUnigramFrequency);
|
||||||
static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq);
|
static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq);
|
||||||
static int getProbability(const int position, const std::map<int, int> *bigramMap,
|
static int getProbability(const int position, const std::map<int, int> *bigramMap,
|
||||||
const uint8_t *bigramFilter, const int unigramFreq);
|
const uint8_t *bigramFilter, const int unigramFreq);
|
||||||
|
@ -176,17 +176,17 @@ inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict
|
||||||
return dict[(*pos)++];
|
return dict[(*pos)++];
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int32_t BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) {
|
inline int BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) {
|
||||||
const int origin = *pos;
|
const int origin = *pos;
|
||||||
const int32_t codePoint = dict[origin];
|
const int codePoint = dict[origin];
|
||||||
if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
|
if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
|
||||||
if (codePoint == CHARACTER_ARRAY_TERMINATOR) {
|
if (codePoint == CHARACTER_ARRAY_TERMINATOR) {
|
||||||
*pos = origin + 1;
|
*pos = origin + 1;
|
||||||
return NOT_A_CODE_POINT;
|
return NOT_A_CODE_POINT;
|
||||||
} else {
|
} else {
|
||||||
*pos = origin + 3;
|
*pos = origin + 3;
|
||||||
const int32_t char_1 = codePoint << 16;
|
const int char_1 = codePoint << 16;
|
||||||
const int32_t char_2 = char_1 + (dict[origin + 1] << 8);
|
const int char_2 = char_1 + (dict[origin + 1] << 8);
|
||||||
return char_2 + dict[origin + 2];
|
return char_2 + dict[origin + 2];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -202,7 +202,7 @@ inline int BinaryFormat::readFrequencyWithoutMovingPointer(const uint8_t *const
|
||||||
|
|
||||||
inline int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const int pos) {
|
inline int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const int pos) {
|
||||||
int currentPos = pos;
|
int currentPos = pos;
|
||||||
int32_t character = dict[currentPos++];
|
int character = dict[currentPos++];
|
||||||
while (CHARACTER_ARRAY_TERMINATOR != character) {
|
while (CHARACTER_ARRAY_TERMINATOR != character) {
|
||||||
if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
|
if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
|
||||||
currentPos += MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE;
|
currentPos += MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE;
|
||||||
|
@ -352,8 +352,8 @@ inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) {
|
||||||
|
|
||||||
// This function gets the byte position of the last chargroup of the exact matching word in the
|
// This function gets the byte position of the last chargroup of the exact matching word in the
|
||||||
// dictionary. If no match is found, it returns NOT_VALID_WORD.
|
// dictionary. If no match is found, it returns NOT_VALID_WORD.
|
||||||
inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, const int *const inWord,
|
||||||
const int32_t *const inWord, const int length, const bool forceLowerCaseSearch) {
|
const int length, const bool forceLowerCaseSearch) {
|
||||||
int pos = 0;
|
int pos = 0;
|
||||||
int wordPos = 0;
|
int wordPos = 0;
|
||||||
|
|
||||||
|
@ -362,14 +362,14 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
||||||
// there was no match (or we would have found it).
|
// there was no match (or we would have found it).
|
||||||
if (wordPos >= length) return NOT_VALID_WORD;
|
if (wordPos >= length) return NOT_VALID_WORD;
|
||||||
int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
|
int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
|
||||||
const int32_t wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos];
|
const int wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos];
|
||||||
while (true) {
|
while (true) {
|
||||||
// If there are no more character groups in this node, it means we could not
|
// If there are no more character groups in this node, it means we could not
|
||||||
// find a matching character for this depth, therefore there is no match.
|
// find a matching character for this depth, therefore there is no match.
|
||||||
if (0 >= charGroupCount) return NOT_VALID_WORD;
|
if (0 >= charGroupCount) return NOT_VALID_WORD;
|
||||||
const int charGroupPos = pos;
|
const int charGroupPos = pos;
|
||||||
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||||
int32_t character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
int character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||||
if (character == wChar) {
|
if (character == wChar) {
|
||||||
// This is the correct node. Only one character group may start with the same
|
// This is the correct node. Only one character group may start with the same
|
||||||
// char within a node, so either we found our match in this node, or there is
|
// char within a node, so either we found our match in this node, or there is
|
||||||
|
@ -439,7 +439,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
||||||
* Return value : the length of the word, of 0 if the word was not found.
|
* Return value : the length of the word, of 0 if the word was not found.
|
||||||
*/
|
*/
|
||||||
inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address,
|
inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address,
|
||||||
const int maxDepth, uint16_t *outWord, int *outUnigramFrequency) {
|
const int maxDepth, int *outWord, int *outUnigramFrequency) {
|
||||||
int pos = 0;
|
int pos = 0;
|
||||||
int wordPos = 0;
|
int wordPos = 0;
|
||||||
|
|
||||||
|
@ -457,13 +457,13 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
|
||||||
--charGroupCount) {
|
--charGroupCount) {
|
||||||
const int startPos = pos;
|
const int startPos = pos;
|
||||||
const uint8_t flags = getFlagsAndForwardPointer(root, &pos);
|
const uint8_t flags = getFlagsAndForwardPointer(root, &pos);
|
||||||
const int32_t character = getCodePointAndForwardPointer(root, &pos);
|
const int character = getCodePointAndForwardPointer(root, &pos);
|
||||||
if (address == startPos) {
|
if (address == startPos) {
|
||||||
// We found the address. Copy the rest of the word in the buffer and return
|
// We found the address. Copy the rest of the word in the buffer and return
|
||||||
// the length.
|
// the length.
|
||||||
outWord[wordPos] = character;
|
outWord[wordPos] = character;
|
||||||
if (FLAG_HAS_MULTIPLE_CHARS & flags) {
|
if (FLAG_HAS_MULTIPLE_CHARS & flags) {
|
||||||
int32_t nextChar = getCodePointAndForwardPointer(root, &pos);
|
int nextChar = getCodePointAndForwardPointer(root, &pos);
|
||||||
// We count chars in order to avoid infinite loops if the file is broken or
|
// We count chars in order to avoid infinite loops if the file is broken or
|
||||||
// if there is some other bug
|
// if there is some other bug
|
||||||
int charCount = maxDepth;
|
int charCount = maxDepth;
|
||||||
|
@ -522,13 +522,12 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
|
||||||
if (0 != lastCandidateGroupPos) {
|
if (0 != lastCandidateGroupPos) {
|
||||||
const uint8_t lastFlags =
|
const uint8_t lastFlags =
|
||||||
getFlagsAndForwardPointer(root, &lastCandidateGroupPos);
|
getFlagsAndForwardPointer(root, &lastCandidateGroupPos);
|
||||||
const int32_t lastChar =
|
const int lastChar =
|
||||||
getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
|
getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
|
||||||
// We copy all the characters in this group to the buffer
|
// We copy all the characters in this group to the buffer
|
||||||
outWord[wordPos] = lastChar;
|
outWord[wordPos] = lastChar;
|
||||||
if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
|
if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
|
||||||
int32_t nextChar =
|
int nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
|
||||||
getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
|
|
||||||
int charCount = maxDepth;
|
int charCount = maxDepth;
|
||||||
while (-1 != nextChar && --charCount > 0) {
|
while (-1 != nextChar && --charCount > 0) {
|
||||||
outWord[++wordPos] = nextChar;
|
outWord[++wordPos] = nextChar;
|
||||||
|
|
|
@ -18,22 +18,23 @@
|
||||||
#define LATINIME_CHAR_UTILS_H
|
#define LATINIME_CHAR_UTILS_H
|
||||||
|
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
#include <stdint.h>
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
inline static bool isAsciiUpper(unsigned short c) {
|
inline static bool isAsciiUpper(int c) {
|
||||||
// Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
|
// Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
|
||||||
// be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
|
// be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
|
||||||
return (c >= 'A' && c <= 'Z');
|
return (c >= 'A' && c <= 'Z');
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static unsigned short toAsciiLower(unsigned short c) {
|
inline static int toAsciiLower(int c) {
|
||||||
return c - 'A' + 'a';
|
return c - 'A' + 'a';
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static bool isAscii(unsigned short c) {
|
inline static bool isAscii(int c) {
|
||||||
return isascii(static_cast<int>(c)) != 0;
|
return isascii(c) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned short latin_tolower(const unsigned short c);
|
unsigned short latin_tolower(const unsigned short c);
|
||||||
|
@ -44,33 +45,32 @@ unsigned short latin_tolower(const unsigned short c);
|
||||||
* if c is not a combined character, or the base character if it
|
* if c is not a combined character, or the base character if it
|
||||||
* is combined.
|
* is combined.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static const int BASE_CHARS_SIZE = 0x0500;
|
static const int BASE_CHARS_SIZE = 0x0500;
|
||||||
extern const uint16_t BASE_CHARS[BASE_CHARS_SIZE];
|
extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
|
||||||
|
|
||||||
inline static unsigned short toBaseChar(unsigned short c) {
|
inline static int toBaseCodePoint(int c) {
|
||||||
if (c < BASE_CHARS_SIZE) {
|
if (c < BASE_CHARS_SIZE) {
|
||||||
return BASE_CHARS[c];
|
return static_cast<int>(BASE_CHARS[c]);
|
||||||
}
|
}
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static unsigned short toLowerCase(const unsigned short c) {
|
inline static int toLowerCase(const int c) {
|
||||||
if (isAsciiUpper(c)) {
|
if (isAsciiUpper(c)) {
|
||||||
return toAsciiLower(c);
|
return toAsciiLower(c);
|
||||||
} else if (isAscii(c)) {
|
} else if (isAscii(c)) {
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
return latin_tolower(c);
|
return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static unsigned short toBaseLowerCase(const unsigned short c) {
|
inline static int toBaseLowerCase(const int c) {
|
||||||
return toLowerCase(toBaseChar(c));
|
return toLowerCase(toBaseCodePoint(c));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static bool isSkippableChar(const uint16_t character) {
|
inline static bool isSkippableCodePoint(const int codePoint) {
|
||||||
// TODO: Do not hardcode here
|
// TODO: Do not hardcode here
|
||||||
return character == '\'' || character == '-';
|
return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -60,8 +60,8 @@ inline static void dumpEditDistance10ForDebug(int *editDistanceTable,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
|
inline static void calcEditDistanceOneStep(int *editDistanceTable, const int *input,
|
||||||
const int inputSize, const unsigned short *output, const int outputLength) {
|
const int inputSize, const int *output, const int outputLength) {
|
||||||
// TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
|
// TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
|
||||||
// Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j].
|
// Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j].
|
||||||
// Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated,
|
// Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated,
|
||||||
|
@ -71,10 +71,10 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne
|
||||||
const int *const prevprev =
|
const int *const prevprev =
|
||||||
outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0;
|
outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0;
|
||||||
current[0] = outputLength;
|
current[0] = outputLength;
|
||||||
const uint32_t co = toBaseLowerCase(output[outputLength - 1]);
|
const int co = toBaseLowerCase(output[outputLength - 1]);
|
||||||
const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
|
const int prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
|
||||||
for (int i = 1; i <= inputSize; ++i) {
|
for (int i = 1; i <= inputSize; ++i) {
|
||||||
const uint32_t ci = toBaseLowerCase(input[i - 1]);
|
const int ci = toBaseLowerCase(input[i - 1]);
|
||||||
const uint16_t cost = (ci == co) ? 0 : 1;
|
const uint16_t cost = (ci == co) ? 0 : 1;
|
||||||
current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
|
current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
|
||||||
if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
|
if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
|
||||||
|
@ -94,11 +94,9 @@ inline static int getCurrentEditDistance(int *editDistanceTable, const int editD
|
||||||
//////////////////////
|
//////////////////////
|
||||||
// inline functions //
|
// inline functions //
|
||||||
//////////////////////
|
//////////////////////
|
||||||
static const char SINGLE_QUOTE = '\'';
|
inline bool Correction::isSingleQuote(const int c) {
|
||||||
|
const int userTypedChar = mProximityInfoState.getPrimaryCodePointAt(mInputIndex);
|
||||||
inline bool Correction::isSingleQuote(const unsigned short c) {
|
return (c == KEYCODE_SINGLE_QUOTE && userTypedChar != KEYCODE_SINGLE_QUOTE);
|
||||||
const unsigned short userTypedChar = mProximityInfoState.getPrimaryCharAt(mInputIndex);
|
|
||||||
return (c == SINGLE_QUOTE && userTypedChar != SINGLE_QUOTE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////
|
////////////////
|
||||||
|
@ -162,22 +160,22 @@ bool Correction::sameAsTyped() {
|
||||||
}
|
}
|
||||||
|
|
||||||
int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
|
int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
|
||||||
const int wordCount, const bool isSpaceProximity, const unsigned short *word) {
|
const int wordCount, const bool isSpaceProximity, const int *word) {
|
||||||
return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,
|
return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,
|
||||||
wordCount, this, isSpaceProximity, word);
|
wordCount, this, isSpaceProximity, word);
|
||||||
}
|
}
|
||||||
|
|
||||||
int Correction::getFinalProbability(const int probability, unsigned short **word, int *wordLength) {
|
int Correction::getFinalProbability(const int probability, int **word, int *wordLength) {
|
||||||
return getFinalProbabilityInternal(probability, word, wordLength, mInputSize);
|
return getFinalProbabilityInternal(probability, word, wordLength, mInputSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
int Correction::getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
|
int Correction::getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength,
|
||||||
int *wordLength, const int inputSize) {
|
const int inputSize) {
|
||||||
return getFinalProbabilityInternal(probability, word, wordLength, inputSize);
|
return getFinalProbabilityInternal(probability, word, wordLength, inputSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
int Correction::getFinalProbabilityInternal(const int probability, unsigned short **word,
|
int Correction::getFinalProbabilityInternal(const int probability, int **word, int *wordLength,
|
||||||
int *wordLength, const int inputSize) {
|
const int inputSize) {
|
||||||
const int outputIndex = mTerminalOutputIndex;
|
const int outputIndex = mTerminalOutputIndex;
|
||||||
const int inputIndex = mTerminalInputIndex;
|
const int inputIndex = mTerminalInputIndex;
|
||||||
*wordLength = outputIndex + 1;
|
*wordLength = outputIndex + 1;
|
||||||
|
@ -273,15 +271,15 @@ bool Correction::needsToPrune() const {
|
||||||
|| (!mDoAutoCompletion && (mOutputIndex > mInputSize));
|
|| (!mDoAutoCompletion && (mOutputIndex > mInputSize));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Correction::addCharToCurrentWord(const int32_t c) {
|
void Correction::addCharToCurrentWord(const int c) {
|
||||||
mWord[mOutputIndex] = c;
|
mWord[mOutputIndex] = c;
|
||||||
const unsigned short *primaryInputWord = mProximityInfoState.getPrimaryInputWord();
|
const int *primaryInputWord = mProximityInfoState.getPrimaryInputWord();
|
||||||
calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize,
|
calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize, mWord,
|
||||||
mWord, mOutputIndex + 1);
|
mOutputIndex + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
Correction::CorrectionType Correction::processSkipChar(
|
Correction::CorrectionType Correction::processSkipChar(const int c, const bool isTerminal,
|
||||||
const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
|
const bool inputIndexIncremented) {
|
||||||
addCharToCurrentWord(c);
|
addCharToCurrentWord(c);
|
||||||
mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
|
mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
|
||||||
mTerminalOutputIndex = mOutputIndex;
|
mTerminalOutputIndex = mOutputIndex;
|
||||||
|
@ -309,8 +307,7 @@ inline bool isProximityCharOrEquivalentChar(ProximityType type) {
|
||||||
return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR;
|
return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR;
|
||||||
}
|
}
|
||||||
|
|
||||||
Correction::CorrectionType Correction::processCharAndCalcState(
|
Correction::CorrectionType Correction::processCharAndCalcState(const int c, const bool isTerminal) {
|
||||||
const int32_t c, const bool isTerminal) {
|
|
||||||
const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
|
const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
|
||||||
if (correctionCount > mMaxErrors) {
|
if (correctionCount > mMaxErrors) {
|
||||||
return processUnrelatedCorrectionType();
|
return processUnrelatedCorrectionType();
|
||||||
|
@ -628,10 +625,10 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static int getQuoteCount(const unsigned short *word, const int length) {
|
inline static int getQuoteCount(const int *word, const int length) {
|
||||||
int quoteCount = 0;
|
int quoteCount = 0;
|
||||||
for (int i = 0; i < length; ++i) {
|
for (int i = 0; i < length; ++i) {
|
||||||
if (word[i] == SINGLE_QUOTE) {
|
if (word[i] == KEYCODE_SINGLE_QUOTE) {
|
||||||
++quoteCount;
|
++quoteCount;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -639,7 +636,7 @@ inline static int getQuoteCount(const unsigned short *word, const int length) {
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static bool isUpperCase(unsigned short c) {
|
inline static bool isUpperCase(unsigned short c) {
|
||||||
return isAsciiUpper(toBaseChar(c));
|
return isAsciiUpper(toBaseCodePoint(c));
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////
|
//////////////////////
|
||||||
|
@ -672,7 +669,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
|
||||||
// TODO: use mExcessiveCount
|
// TODO: use mExcessiveCount
|
||||||
const int matchCount = inputSize - correction->mProximityCount - excessiveCount;
|
const int matchCount = inputSize - correction->mProximityCount - excessiveCount;
|
||||||
|
|
||||||
const unsigned short *word = correction->mWord;
|
const int *word = correction->mWord;
|
||||||
const bool skipped = skippedCount > 0;
|
const bool skipped = skippedCount > 0;
|
||||||
|
|
||||||
const int quoteDiffCount = max(0, getQuoteCount(word, outputLength)
|
const int quoteDiffCount = max(0, getQuoteCount(word, outputLength)
|
||||||
|
@ -911,7 +908,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
|
||||||
/* static */
|
/* static */
|
||||||
int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
|
int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
|
||||||
const int *freqArray, const int *wordLengthArray, const int wordCount,
|
const int *freqArray, const int *wordLengthArray, const int wordCount,
|
||||||
const Correction *correction, const bool isSpaceProximity, const unsigned short *word) {
|
const Correction *correction, const bool isSpaceProximity, const int *word) {
|
||||||
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
|
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
|
||||||
|
|
||||||
bool firstCapitalizedWordDemotion = false;
|
bool firstCapitalizedWordDemotion = false;
|
||||||
|
@ -1040,9 +1037,8 @@ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Damerau-Levenshtein distance */
|
/* Damerau-Levenshtein distance */
|
||||||
inline static int editDistanceInternal(
|
inline static int editDistanceInternal(int *editDistanceTable, const int *before,
|
||||||
int *editDistanceTable, const unsigned short *before,
|
const int beforeLength, const int *after, const int afterLength) {
|
||||||
const int beforeLength, const unsigned short *after, const int afterLength) {
|
|
||||||
// dp[li][lo] dp[a][b] = dp[ a * lo + b]
|
// dp[li][lo] dp[a][b] = dp[ a * lo + b]
|
||||||
int *dp = editDistanceTable;
|
int *dp = editDistanceTable;
|
||||||
const int li = beforeLength + 1;
|
const int li = beforeLength + 1;
|
||||||
|
@ -1056,9 +1052,9 @@ inline static int editDistanceInternal(
|
||||||
|
|
||||||
for (int i = 0; i < li - 1; ++i) {
|
for (int i = 0; i < li - 1; ++i) {
|
||||||
for (int j = 0; j < lo - 1; ++j) {
|
for (int j = 0; j < lo - 1; ++j) {
|
||||||
const uint32_t ci = toBaseLowerCase(before[i]);
|
const int ci = toBaseLowerCase(before[i]);
|
||||||
const uint32_t co = toBaseLowerCase(after[j]);
|
const int co = toBaseLowerCase(after[j]);
|
||||||
const uint16_t cost = (ci == co) ? 0 : 1;
|
const int cost = (ci == co) ? 0 : 1;
|
||||||
dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
|
dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
|
||||||
min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
|
min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
|
||||||
if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
|
if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
|
||||||
|
@ -1080,8 +1076,8 @@ inline static int editDistanceInternal(
|
||||||
return dp[li * lo - 1];
|
return dp[li * lo - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
int Correction::RankingAlgorithm::editDistance(const unsigned short *before,
|
int Correction::RankingAlgorithm::editDistance(const int *before, const int beforeLength,
|
||||||
const int beforeLength, const unsigned short *after, const int afterLength) {
|
const int *after, const int afterLength) {
|
||||||
int table[(beforeLength + 1) * (afterLength + 1)];
|
int table[(beforeLength + 1) * (afterLength + 1)];
|
||||||
return editDistanceInternal(table, before, beforeLength, after, afterLength);
|
return editDistanceInternal(table, before, beforeLength, after, afterLength);
|
||||||
}
|
}
|
||||||
|
@ -1109,9 +1105,8 @@ int Correction::RankingAlgorithm::editDistance(const unsigned short *before,
|
||||||
// So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2.
|
// So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2.
|
||||||
|
|
||||||
/* static */
|
/* static */
|
||||||
float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *before,
|
float Correction::RankingAlgorithm::calcNormalizedScore(const int *before, const int beforeLength,
|
||||||
const int beforeLength, const unsigned short *after, const int afterLength,
|
const int *after, const int afterLength, const int score) {
|
||||||
const int score) {
|
|
||||||
if (0 == beforeLength || 0 == afterLength) {
|
if (0 == beforeLength || 0 == afterLength) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -78,14 +78,13 @@ class Correction {
|
||||||
return ++mTotalTraverseCount;
|
return ++mTotalTraverseCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
int getFreqForSplitMultipleWords(
|
int getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
|
||||||
const int *freqArray, const int *wordLengthArray, const int wordCount,
|
const int wordCount, const bool isSpaceProximity, const int *word);
|
||||||
const bool isSpaceProximity, const unsigned short *word);
|
int getFinalProbability(const int probability, int **word, int *wordLength);
|
||||||
int getFinalProbability(const int probability, unsigned short **word, int *wordLength);
|
int getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength,
|
||||||
int getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
|
const int inputSize);
|
||||||
int *wordLength, const int inputSize);
|
|
||||||
|
|
||||||
CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
|
CorrectionType processCharAndCalcState(const int c, const bool isTerminal);
|
||||||
|
|
||||||
/////////////////////////
|
/////////////////////////
|
||||||
// Tree helper methods
|
// Tree helper methods
|
||||||
|
@ -110,28 +109,28 @@ class Correction {
|
||||||
const int inputSize);
|
const int inputSize);
|
||||||
static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
|
static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
|
||||||
const int wordCount, const Correction *correction, const bool isSpaceProximity,
|
const int wordCount, const Correction *correction, const bool isSpaceProximity,
|
||||||
const unsigned short *word);
|
const int *word);
|
||||||
static float calcNormalizedScore(const unsigned short *before, const int beforeLength,
|
static float calcNormalizedScore(const int *before, const int beforeLength,
|
||||||
const unsigned short *after, const int afterLength, const int score);
|
const int *after, const int afterLength, const int score);
|
||||||
static int editDistance(const unsigned short *before,
|
static int editDistance(const int *before, const int beforeLength, const int *after,
|
||||||
const int beforeLength, const unsigned short *after, const int afterLength);
|
const int afterLength);
|
||||||
private:
|
private:
|
||||||
static const int MAX_INITIAL_SCORE = 255;
|
static const int MAX_INITIAL_SCORE = 255;
|
||||||
};
|
};
|
||||||
|
|
||||||
// proximity info state
|
// proximity info state
|
||||||
void initInputParams(const ProximityInfo *proximityInfo, const int32_t *inputCodes,
|
void initInputParams(const ProximityInfo *proximityInfo, const int *inputCodes,
|
||||||
const int inputSize, const int *xCoordinates, const int *yCoordinates) {
|
const int inputSize, const int *xCoordinates, const int *yCoordinates) {
|
||||||
mProximityInfoState.initInputParams(0, MAX_POINT_TO_KEY_LENGTH,
|
mProximityInfoState.initInputParams(0, MAX_POINT_TO_KEY_LENGTH,
|
||||||
proximityInfo, inputCodes, inputSize, xCoordinates, yCoordinates, 0, 0, false);
|
proximityInfo, inputCodes, inputSize, xCoordinates, yCoordinates, 0, 0, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
const unsigned short *getPrimaryInputWord() const {
|
const int *getPrimaryInputWord() const {
|
||||||
return mProximityInfoState.getPrimaryInputWord();
|
return mProximityInfoState.getPrimaryInputWord();
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned short getPrimaryCharAt(const int index) const {
|
int getPrimaryCodePointAt(const int index) const {
|
||||||
return mProximityInfoState.getPrimaryCharAt(index);
|
return mProximityInfoState.getPrimaryCodePointAt(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -214,13 +213,13 @@ class Correction {
|
||||||
inline void incrementInputIndex();
|
inline void incrementInputIndex();
|
||||||
inline void incrementOutputIndex();
|
inline void incrementOutputIndex();
|
||||||
inline void startToTraverseAllNodes();
|
inline void startToTraverseAllNodes();
|
||||||
inline bool isSingleQuote(const unsigned short c);
|
inline bool isSingleQuote(const int c);
|
||||||
inline CorrectionType processSkipChar(
|
inline CorrectionType processSkipChar(const int c, const bool isTerminal,
|
||||||
const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
|
const bool inputIndexIncremented);
|
||||||
inline CorrectionType processUnrelatedCorrectionType();
|
inline CorrectionType processUnrelatedCorrectionType();
|
||||||
inline void addCharToCurrentWord(const int32_t c);
|
inline void addCharToCurrentWord(const int c);
|
||||||
inline int getFinalProbabilityInternal(const int probability, unsigned short **word,
|
inline int getFinalProbabilityInternal(const int probability, int **word, int *wordLength,
|
||||||
int *wordLength, const int inputSize);
|
const int inputSize);
|
||||||
|
|
||||||
static const int TYPED_LETTER_MULTIPLIER = 2;
|
static const int TYPED_LETTER_MULTIPLIER = 2;
|
||||||
static const int FULL_WORD_MULTIPLIER = 2;
|
static const int FULL_WORD_MULTIPLIER = 2;
|
||||||
|
@ -240,7 +239,7 @@ class Correction {
|
||||||
uint8_t mTotalTraverseCount;
|
uint8_t mTotalTraverseCount;
|
||||||
|
|
||||||
// The following arrays are state buffer.
|
// The following arrays are state buffer.
|
||||||
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
|
int mWord[MAX_WORD_LENGTH_INTERNAL];
|
||||||
int mDistances[MAX_WORD_LENGTH_INTERNAL];
|
int mDistances[MAX_WORD_LENGTH_INTERNAL];
|
||||||
|
|
||||||
// Edit distance calculation requires a buffer with (N+1)^2 length for the input length N.
|
// Edit distance calculation requires a buffer with (N+1)^2 length for the input length N.
|
||||||
|
|
|
@ -30,17 +30,15 @@
|
||||||
#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) do { \
|
#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) do { \
|
||||||
dumpResult(words, frequencies, maxWordCount, maxWordLength); } while (0)
|
dumpResult(words, frequencies, maxWordCount, maxWordLength); } while (0)
|
||||||
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
|
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
|
||||||
#define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while (0)
|
#define INTS_TO_CHARS(input, length, output) do { \
|
||||||
// TODO: INTS_TO_CHARS
|
intArrayToCharArray(input, length, output); } while (0)
|
||||||
#define SHORTS_TO_CHARS(input, length, output) do { \
|
|
||||||
shortArrayToCharArray(input, length, output); } while (0)
|
|
||||||
|
|
||||||
static inline void dumpWordInfo(const unsigned short *word, const int length,
|
static inline void dumpWordInfo(const int *word, const int length, const int rank,
|
||||||
const int rank, const int frequency) {
|
const int frequency) {
|
||||||
static char charBuf[50];
|
static char charBuf[50];
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for (; i < length; ++i) {
|
for (; i < length; ++i) {
|
||||||
const unsigned short c = word[i];
|
const int c = word[i];
|
||||||
if (c == 0) {
|
if (c == 0) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -53,8 +51,7 @@ static inline void dumpWordInfo(const unsigned short *word, const int length,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void dumpResult(
|
static inline void dumpResult(const int *outWords, const int *frequencies, const int maxWordCounts,
|
||||||
const unsigned short *outWords, const int *frequencies, const int maxWordCounts,
|
|
||||||
const int maxWordLength) {
|
const int maxWordLength) {
|
||||||
AKLOGI("--- DUMP RESULT ---------");
|
AKLOGI("--- DUMP RESULT ---------");
|
||||||
for (int i = 0; i < maxWordCounts; ++i) {
|
for (int i = 0; i < maxWordCounts; ++i) {
|
||||||
|
@ -63,11 +60,11 @@ static inline void dumpResult(
|
||||||
AKLOGI("-------------------------");
|
AKLOGI("-------------------------");
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void dumpWord(const unsigned short *word, const int length) {
|
static inline void dumpWord(const int *word, const int length) {
|
||||||
static char charBuf[50];
|
static char charBuf[50];
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for (; i < length; ++i) {
|
for (; i < length; ++i) {
|
||||||
const unsigned short c = word[i];
|
const int c = word[i];
|
||||||
if (c == 0) {
|
if (c == 0) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -80,22 +77,10 @@ static inline void dumpWord(const unsigned short *word, const int length) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void dumpWordInt(const int *word, const int length) {
|
static inline void intArrayToCharArray(const int *input, const int length, char *output) {
|
||||||
static char charBuf[50];
|
|
||||||
|
|
||||||
for (int i = 0; i < length; ++i) {
|
|
||||||
charBuf[i] = word[i];
|
|
||||||
}
|
|
||||||
charBuf[length] = 0;
|
|
||||||
AKLOGI("i[ %s ]", charBuf);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Change this to intArrayToCharArray
|
|
||||||
static inline void shortArrayToCharArray(
|
|
||||||
const unsigned short *input, const int length, char *output) {
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for (;i < length; ++i) {
|
for (; i < length; ++i) {
|
||||||
const unsigned short c = input[i];
|
const int c = input[i];
|
||||||
if (c == 0) {
|
if (c == 0) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -137,11 +122,9 @@ static inline void showStackTrace() {
|
||||||
#define AKLOGI(fmt, ...)
|
#define AKLOGI(fmt, ...)
|
||||||
#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength)
|
#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength)
|
||||||
#define DUMP_WORD(word, length)
|
#define DUMP_WORD(word, length)
|
||||||
#define DUMP_WORD_INT(word, length)
|
|
||||||
#define ASSERT(success)
|
#define ASSERT(success)
|
||||||
#define SHOW_STACK_TRACE
|
#define SHOW_STACK_TRACE
|
||||||
// TODO: INTS_TO_CHARS
|
#define INTS_TO_CHARS(input, length, output)
|
||||||
#define SHORTS_TO_CHARS(input, length, output)
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef FLAG_DO_PROFILE
|
#ifdef FLAG_DO_PROFILE
|
||||||
|
@ -286,6 +269,8 @@ static inline void prof_out(void) {
|
||||||
#define NOT_A_PROBABILITY (-1)
|
#define NOT_A_PROBABILITY (-1)
|
||||||
|
|
||||||
#define KEYCODE_SPACE ' '
|
#define KEYCODE_SPACE ' '
|
||||||
|
#define KEYCODE_SINGLE_QUOTE '\''
|
||||||
|
#define KEYCODE_HYPHEN_MINUS '-'
|
||||||
|
|
||||||
#define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true
|
#define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true
|
||||||
|
|
||||||
|
|
|
@ -54,11 +54,10 @@ Dictionary::~Dictionary() {
|
||||||
}
|
}
|
||||||
|
|
||||||
int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSession,
|
int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSession,
|
||||||
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds,
|
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *codes,
|
||||||
int *codes, int codesSize, int *prevWordChars,
|
int codesSize, int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture,
|
||||||
int prevWordLength, int commitPoint, bool isGesture,
|
bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices,
|
||||||
bool useFullEditDistance, unsigned short *outWords,
|
int *outputTypes) const {
|
||||||
int *frequencies, int *spaceIndices, int *outputTypes) const {
|
|
||||||
int result = 0;
|
int result = 0;
|
||||||
if (isGesture) {
|
if (isGesture) {
|
||||||
DicTraverseWrapper::initDicTraverseSession(
|
DicTraverseWrapper::initDicTraverseSession(
|
||||||
|
@ -83,7 +82,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSessi
|
||||||
}
|
}
|
||||||
|
|
||||||
int Dictionary::getBigrams(const int32_t *word, int length, int *codes, int codesSize,
|
int Dictionary::getBigrams(const int32_t *word, int length, int *codes, int codesSize,
|
||||||
unsigned short *outWords, int *frequencies, int *outputTypes) const {
|
int *outWords, int *frequencies, int *outputTypes) const {
|
||||||
if (length <= 0) return 0;
|
if (length <= 0) return 0;
|
||||||
return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies,
|
return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies,
|
||||||
outputTypes);
|
outputTypes);
|
||||||
|
|
|
@ -47,11 +47,11 @@ class Dictionary {
|
||||||
int getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, int *xcoordinates,
|
int getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, int *xcoordinates,
|
||||||
int *ycoordinates, int *times, int *pointerIds, int *codes, int codesSize,
|
int *ycoordinates, int *times, int *pointerIds, int *codes, int codesSize,
|
||||||
int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture,
|
int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture,
|
||||||
bool useFullEditDistance, unsigned short *outWords,
|
bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices,
|
||||||
int *frequencies, int *spaceIndices, int *outputTypes) const;
|
int *outputTypes) const;
|
||||||
|
|
||||||
int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
|
int getBigrams(const int32_t *word, int length, int *codes, int codesSize, int *outWords,
|
||||||
unsigned short *outWords, int *frequencies, int *outputTypes) const;
|
int *frequencies, int *outputTypes) const;
|
||||||
|
|
||||||
int getFrequency(const int32_t *word, int length) const;
|
int getFrequency(const int32_t *word, int length) const;
|
||||||
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const;
|
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const;
|
||||||
|
@ -68,7 +68,7 @@ class Dictionary {
|
||||||
|
|
||||||
// public static utility methods
|
// public static utility methods
|
||||||
// static inline methods should be defined in the header file
|
// static inline methods should be defined in the header file
|
||||||
static int wideStrLen(unsigned short *str);
|
static int wideStrLen(int *str);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
|
||||||
|
@ -88,7 +88,7 @@ class Dictionary {
|
||||||
|
|
||||||
// public static utility methods
|
// public static utility methods
|
||||||
// static inline methods should be defined in the header file
|
// static inline methods should be defined in the header file
|
||||||
inline int Dictionary::wideStrLen(unsigned short *str) {
|
inline int Dictionary::wideStrLen(int *str) {
|
||||||
if (!str) return 0;
|
if (!str) return 0;
|
||||||
int length = 0;
|
int length = 0;
|
||||||
while (*str) {
|
while (*str) {
|
||||||
|
|
|
@ -38,15 +38,14 @@ class GestureDecoderWrapper : public IncrementalDecoderInterface {
|
||||||
}
|
}
|
||||||
|
|
||||||
int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
|
int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
|
||||||
int *times, int *pointerIds, int *codes, int inputSize, int commitPoint,
|
int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, int *outWords,
|
||||||
unsigned short *outWords, int *frequencies, int *outputIndices,
|
int *frequencies, int *outputIndices, int *outputTypes) const {
|
||||||
int *outputTypes) const {
|
|
||||||
if (!mIncrementalDecoderInterface) {
|
if (!mIncrementalDecoderInterface) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return mIncrementalDecoderInterface->getSuggestions(
|
return mIncrementalDecoderInterface->getSuggestions(pInfo, traverseSession, inputXs,
|
||||||
pInfo, traverseSession, inputXs, inputYs, times, pointerIds, codes,
|
inputYs, times, pointerIds, codes, inputSize, commitPoint, outWords, frequencies,
|
||||||
inputSize, commitPoint, outWords, frequencies, outputIndices, outputTypes);
|
outputIndices, outputTypes);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void setGestureDecoderFactoryMethod(
|
static void setGestureDecoderFactoryMethod(
|
||||||
|
|
|
@ -28,10 +28,9 @@ class ProximityInfo;
|
||||||
|
|
||||||
class IncrementalDecoderInterface {
|
class IncrementalDecoderInterface {
|
||||||
public:
|
public:
|
||||||
virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession,
|
virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs,
|
||||||
int *inputXs, int *inputYs, int *times, int *pointerIds, int *codes,
|
int *inputYs, int *times, int *pointerIds, int *codes, int inputSize, int commitPoint,
|
||||||
int inputSize, int commitPoint, unsigned short *outWords, int *frequencies,
|
int *outWords, int *frequencies, int *outputIndices, int *outputTypes) const = 0;
|
||||||
int *outputIndices, int *outputTypes) const = 0;
|
|
||||||
IncrementalDecoderInterface() { };
|
IncrementalDecoderInterface() { };
|
||||||
virtual ~IncrementalDecoderInterface() { };
|
virtual ~IncrementalDecoderInterface() { };
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -38,15 +38,14 @@ class IncrementalDecoderWrapper : public IncrementalDecoderInterface {
|
||||||
}
|
}
|
||||||
|
|
||||||
int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
|
int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
|
||||||
int *times, int *pointerIds, int *codes, int inputSize, int commitPoint,
|
int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, int *outWords,
|
||||||
unsigned short *outWords, int *frequencies, int *outputIndices,
|
int *frequencies, int *outputIndices, int *outputTypes) const {
|
||||||
int *outputTypes) const {
|
|
||||||
if (!mIncrementalDecoderInterface) {
|
if (!mIncrementalDecoderInterface) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return mIncrementalDecoderInterface->getSuggestions(
|
return mIncrementalDecoderInterface->getSuggestions(pInfo, traverseSession, inputXs,
|
||||||
pInfo, traverseSession, inputXs, inputYs, times, pointerIds, codes,
|
inputYs, times, pointerIds, codes, inputSize, commitPoint, outWords, frequencies,
|
||||||
inputSize, commitPoint, outWords, frequencies, outputIndices, outputTypes);
|
outputIndices, outputTypes);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void setIncrementalDecoderFactoryMethod(
|
static void setIncrementalDecoderFactoryMethod(
|
||||||
|
|
|
@ -34,7 +34,7 @@ const float ProximityInfoState::NOT_A_DISTANCE_FLOAT = -1.0f;
|
||||||
const int ProximityInfoState::NOT_A_CODE = -1;
|
const int ProximityInfoState::NOT_A_CODE = -1;
|
||||||
|
|
||||||
void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength,
|
void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength,
|
||||||
const ProximityInfo *proximityInfo, const int32_t *const inputCodes, const int inputSize,
|
const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize,
|
||||||
const int *const xCoordinates, const int *const yCoordinates, const int *const times,
|
const int *const xCoordinates, const int *const yCoordinates, const int *const times,
|
||||||
const int *const pointerIds, const bool isGeometric) {
|
const int *const pointerIds, const bool isGeometric) {
|
||||||
|
|
||||||
|
@ -63,7 +63,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
|
||||||
// - mNormalizedSquaredDistances
|
// - mNormalizedSquaredDistances
|
||||||
// TODO: Merge
|
// TODO: Merge
|
||||||
for (int i = 0; i < inputSize; ++i) {
|
for (int i = 0; i < inputSize; ++i) {
|
||||||
const int32_t primaryKey = inputCodes[i];
|
const int primaryKey = inputCodes[i];
|
||||||
const int x = xCoordinates[i];
|
const int x = xCoordinates[i];
|
||||||
const int y = yCoordinates[i];
|
const int y = yCoordinates[i];
|
||||||
int *proximities = &mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL];
|
int *proximities = &mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL];
|
||||||
|
@ -146,7 +146,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
|
||||||
AKLOGI("Init ProximityInfoState: (%d)PID = %d", i, pid);
|
AKLOGI("Init ProximityInfoState: (%d)PID = %d", i, pid);
|
||||||
}
|
}
|
||||||
if (pointerId == pid) {
|
if (pointerId == pid) {
|
||||||
const int c = isGeometric ? NOT_A_COORDINATE : getPrimaryCharAt(i);
|
const int c = isGeometric ? NOT_A_COORDINATE : getPrimaryCodePointAt(i);
|
||||||
const int x = proximityOnly ? NOT_A_COORDINATE : xCoordinates[i];
|
const int x = proximityOnly ? NOT_A_COORDINATE : xCoordinates[i];
|
||||||
const int y = proximityOnly ? NOT_A_COORDINATE : yCoordinates[i];
|
const int y = proximityOnly ? NOT_A_COORDINATE : yCoordinates[i];
|
||||||
const int time = times ? times[i] : -1;
|
const int time = times ? times[i] : -1;
|
||||||
|
@ -306,12 +306,12 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
|
||||||
&& xCoordinates && yCoordinates;
|
&& xCoordinates && yCoordinates;
|
||||||
if (!isGeometric && pointerId == 0) {
|
if (!isGeometric && pointerId == 0) {
|
||||||
for (int i = 0; i < inputSize; ++i) {
|
for (int i = 0; i < inputSize; ++i) {
|
||||||
mPrimaryInputWord[i] = getPrimaryCharAt(i);
|
mPrimaryInputWord[i] = getPrimaryCodePointAt(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < mInputSize && mTouchPositionCorrectionEnabled; ++i) {
|
for (int i = 0; i < mInputSize && mTouchPositionCorrectionEnabled; ++i) {
|
||||||
const int *proximityChars = getProximityCharsAt(i);
|
const int *proximityCodePoints = getProximityCodePointsAt(i);
|
||||||
const int primaryKey = proximityChars[0];
|
const int primaryKey = proximityCodePoints[0];
|
||||||
const int x = xCoordinates[i];
|
const int x = xCoordinates[i];
|
||||||
const int y = yCoordinates[i];
|
const int y = yCoordinates[i];
|
||||||
if (DEBUG_PROXIMITY_CHARS) {
|
if (DEBUG_PROXIMITY_CHARS) {
|
||||||
|
@ -319,11 +319,12 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
|
||||||
a += 0;
|
a += 0;
|
||||||
AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y);
|
AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y);
|
||||||
}
|
}
|
||||||
for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityChars[j] > 0; ++j) {
|
for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityCodePoints[j] > 0;
|
||||||
const int currentChar = proximityChars[j];
|
++j) {
|
||||||
|
const int currentCodePoint = proximityCodePoints[j];
|
||||||
const float squaredDistance =
|
const float squaredDistance =
|
||||||
hasInputCoordinates() ? calculateNormalizedSquaredDistance(
|
hasInputCoordinates() ? calculateNormalizedSquaredDistance(
|
||||||
mProximityInfo->getKeyIndexOf(currentChar), i) :
|
mProximityInfo->getKeyIndexOf(currentCodePoint), i) :
|
||||||
NOT_A_DISTANCE_FLOAT;
|
NOT_A_DISTANCE_FLOAT;
|
||||||
if (squaredDistance >= 0.0f) {
|
if (squaredDistance >= 0.0f) {
|
||||||
mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] =
|
mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] =
|
||||||
|
@ -334,7 +335,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
|
||||||
PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO;
|
PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO;
|
||||||
}
|
}
|
||||||
if (DEBUG_PROXIMITY_CHARS) {
|
if (DEBUG_PROXIMITY_CHARS) {
|
||||||
AKLOGI("--- Proximity (%d) = %c", j, currentChar);
|
AKLOGI("--- Proximity (%d) = %c", j, currentCodePoint);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -449,7 +450,7 @@ float ProximityInfoState::getPointScore(
|
||||||
|
|
||||||
// Sampling touch point and pushing information to vectors.
|
// Sampling touch point and pushing information to vectors.
|
||||||
// Returning if previous point is popped or not.
|
// Returning if previous point is popped or not.
|
||||||
bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y,
|
bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeCodePoint, int x, int y,
|
||||||
const int time, const bool sample, const bool isLastPoint, const float sumAngle,
|
const int time, const bool sample, const bool isLastPoint, const float sumAngle,
|
||||||
NearKeysDistanceMap *const currentNearKeysDistances,
|
NearKeysDistanceMap *const currentNearKeysDistances,
|
||||||
const NearKeysDistanceMap *const prevNearKeysDistances,
|
const NearKeysDistanceMap *const prevNearKeysDistances,
|
||||||
|
@ -458,7 +459,7 @@ bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar
|
||||||
|
|
||||||
size_t size = mInputXs.size();
|
size_t size = mInputXs.size();
|
||||||
bool popped = false;
|
bool popped = false;
|
||||||
if (nodeChar < 0 && sample) {
|
if (nodeCodePoint < 0 && sample) {
|
||||||
const float nearest = updateNearKeysDistances(x, y, currentNearKeysDistances);
|
const float nearest = updateNearKeysDistances(x, y, currentNearKeysDistances);
|
||||||
const float score = getPointScore(x, y, time, isLastPoint, nearest, sumAngle,
|
const float score = getPointScore(x, y, time, isLastPoint, nearest, sumAngle,
|
||||||
currentNearKeysDistances, prevNearKeysDistances, prevPrevNearKeysDistances);
|
currentNearKeysDistances, prevNearKeysDistances, prevPrevNearKeysDistances);
|
||||||
|
@ -487,8 +488,8 @@ bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nodeChar >= 0 && (x < 0 || y < 0)) {
|
if (nodeCodePoint >= 0 && (x < 0 || y < 0)) {
|
||||||
const int keyId = mProximityInfo->getKeyIndexOf(nodeChar);
|
const int keyId = mProximityInfo->getKeyIndexOf(nodeCodePoint);
|
||||||
if (keyId >= 0) {
|
if (keyId >= 0) {
|
||||||
x = mProximityInfo->getKeyCenterXOfKeyIdG(keyId);
|
x = mProximityInfo->getKeyCenterXOfKeyIdG(keyId);
|
||||||
y = mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
|
y = mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
|
||||||
|
@ -543,7 +544,7 @@ float ProximityInfoState::getPointToKeyLength(const int inputIndex, const int co
|
||||||
const int index = inputIndex * mProximityInfo->getKeyCount() + keyId;
|
const int index = inputIndex * mProximityInfo->getKeyCount() + keyId;
|
||||||
return min(mDistanceCache[index], mMaxPointToKeyLength);
|
return min(mDistanceCache[index], mMaxPointToKeyLength);
|
||||||
}
|
}
|
||||||
if (isSkippableChar(codePoint)) {
|
if (isSkippableCodePoint(codePoint)) {
|
||||||
return 0.0f;
|
return 0.0f;
|
||||||
}
|
}
|
||||||
// If the char is not a key on the keyboard then return the max length.
|
// If the char is not a key on the keyboard then return the max length.
|
||||||
|
@ -960,9 +961,9 @@ bool ProximityInfoState::suppressCharProbabilities(const int index0, const int i
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get a word that is detected by tracing highest probability sequence into charBuf and returns
|
// Get a word that is detected by tracing highest probability sequence into codePointBuf and
|
||||||
// probability of generating the word.
|
// returns probability of generating the word.
|
||||||
float ProximityInfoState::getHighestProbabilitySequence(uint16_t *const charBuf) const {
|
float ProximityInfoState::getHighestProbabilitySequence(int *const codePointBuf) const {
|
||||||
static const float DEMOTION_LOG_PROBABILITY = 0.3f;
|
static const float DEMOTION_LOG_PROBABILITY = 0.3f;
|
||||||
int index = 0;
|
int index = 0;
|
||||||
float sumLogProbability = 0.0f;
|
float sumLogProbability = 0.0f;
|
||||||
|
@ -980,12 +981,12 @@ float ProximityInfoState::getHighestProbabilitySequence(uint16_t *const charBuf)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (character != NOT_AN_INDEX) {
|
if (character != NOT_AN_INDEX) {
|
||||||
charBuf[index] = mProximityInfo->getCodePointOf(character);
|
codePointBuf[index] = mProximityInfo->getCodePointOf(character);
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
sumLogProbability += minLogProbability;
|
sumLogProbability += minLogProbability;
|
||||||
}
|
}
|
||||||
charBuf[index] = '\0';
|
codePointBuf[index] = '\0';
|
||||||
return sumLogProbability;
|
return sumLogProbability;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -43,7 +43,7 @@ class ProximityInfoState {
|
||||||
// Defined in proximity_info_state.cpp //
|
// Defined in proximity_info_state.cpp //
|
||||||
/////////////////////////////////////////
|
/////////////////////////////////////////
|
||||||
void initInputParams(const int pointerId, const float maxPointToKeyLength,
|
void initInputParams(const int pointerId, const float maxPointToKeyLength,
|
||||||
const ProximityInfo *proximityInfo, const int32_t *const inputCodes,
|
const ProximityInfo *proximityInfo, const int *const inputCodes,
|
||||||
const int inputSize, const int *xCoordinates, const int *yCoordinates,
|
const int inputSize, const int *xCoordinates, const int *yCoordinates,
|
||||||
const int *const times, const int *const pointerIds, const bool isGeometric);
|
const int *const times, const int *const pointerIds, const bool isGeometric);
|
||||||
|
|
||||||
|
@ -65,15 +65,15 @@ class ProximityInfoState {
|
||||||
|
|
||||||
virtual ~ProximityInfoState() {}
|
virtual ~ProximityInfoState() {}
|
||||||
|
|
||||||
inline unsigned short getPrimaryCharAt(const int index) const {
|
inline int getPrimaryCodePointAt(const int index) const {
|
||||||
return getProximityCharsAt(index)[0];
|
return getProximityCodePointsAt(index)[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool existsCharInProximityAt(const int index, const int c) const {
|
inline bool existsCodePointInProximityAt(const int index, const int c) const {
|
||||||
const int *chars = getProximityCharsAt(index);
|
const int *codePoints = getProximityCodePointsAt(index);
|
||||||
int i = 0;
|
int i = 0;
|
||||||
while (chars[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE_INTERNAL) {
|
while (codePoints[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE_INTERNAL) {
|
||||||
if (chars[i++] == c) {
|
if (codePoints[i++] == c) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -82,13 +82,13 @@ class ProximityInfoState {
|
||||||
|
|
||||||
inline bool existsAdjacentProximityChars(const int index) const {
|
inline bool existsAdjacentProximityChars(const int index) const {
|
||||||
if (index < 0 || index >= mInputSize) return false;
|
if (index < 0 || index >= mInputSize) return false;
|
||||||
const int currentChar = getPrimaryCharAt(index);
|
const int currentCodePoint = getPrimaryCodePointAt(index);
|
||||||
const int leftIndex = index - 1;
|
const int leftIndex = index - 1;
|
||||||
if (leftIndex >= 0 && existsCharInProximityAt(leftIndex, currentChar)) {
|
if (leftIndex >= 0 && existsCodePointInProximityAt(leftIndex, currentCodePoint)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
const int rightIndex = index + 1;
|
const int rightIndex = index + 1;
|
||||||
if (rightIndex < mInputSize && existsCharInProximityAt(rightIndex, currentChar)) {
|
if (rightIndex < mInputSize && existsCodePointInProximityAt(rightIndex, currentCodePoint)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -106,15 +106,15 @@ class ProximityInfoState {
|
||||||
// Notice : accented characters do not have a proximity list, so they are alone
|
// Notice : accented characters do not have a proximity list, so they are alone
|
||||||
// in their list. The non-accented version of the character should be considered
|
// in their list. The non-accented version of the character should be considered
|
||||||
// "close", but not the other keys close to the non-accented version.
|
// "close", but not the other keys close to the non-accented version.
|
||||||
inline ProximityType getMatchedProximityId(const int index,
|
inline ProximityType getMatchedProximityId(const int index, const int c,
|
||||||
const unsigned short c, const bool checkProximityChars, int *proximityIndex = 0) const {
|
const bool checkProximityChars, int *proximityIndex = 0) const {
|
||||||
const int *currentChars = getProximityCharsAt(index);
|
const int *currentCodePoints = getProximityCodePointsAt(index);
|
||||||
const int firstChar = currentChars[0];
|
const int firstCodePoint = currentCodePoints[0];
|
||||||
const unsigned short baseLowerC = toBaseLowerCase(c);
|
const int baseLowerC = toBaseLowerCase(c);
|
||||||
|
|
||||||
// The first char in the array is what user typed. If it matches right away,
|
// The first char in the array is what user typed. If it matches right away,
|
||||||
// that means the user typed that same char for this pos.
|
// that means the user typed that same char for this pos.
|
||||||
if (firstChar == baseLowerC || firstChar == c) {
|
if (firstCodePoint == baseLowerC || firstCodePoint == c) {
|
||||||
return EQUIVALENT_CHAR;
|
return EQUIVALENT_CHAR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,14 +123,14 @@ class ProximityInfoState {
|
||||||
// If the non-accented, lowercased version of that first character matches c,
|
// If the non-accented, lowercased version of that first character matches c,
|
||||||
// then we have a non-accented version of the accented character the user
|
// then we have a non-accented version of the accented character the user
|
||||||
// typed. Treat it as a close char.
|
// typed. Treat it as a close char.
|
||||||
if (toBaseLowerCase(firstChar) == baseLowerC)
|
if (toBaseLowerCase(firstCodePoint) == baseLowerC)
|
||||||
return NEAR_PROXIMITY_CHAR;
|
return NEAR_PROXIMITY_CHAR;
|
||||||
|
|
||||||
// Not an exact nor an accent-alike match: search the list of close keys
|
// Not an exact nor an accent-alike match: search the list of close keys
|
||||||
int j = 1;
|
int j = 1;
|
||||||
while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
|
while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
|
||||||
&& currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
|
&& currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
|
||||||
const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
|
const bool matched = (currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c);
|
||||||
if (matched) {
|
if (matched) {
|
||||||
if (proximityIndex) {
|
if (proximityIndex) {
|
||||||
*proximityIndex = j;
|
*proximityIndex = j;
|
||||||
|
@ -140,11 +140,12 @@ class ProximityInfoState {
|
||||||
++j;
|
++j;
|
||||||
}
|
}
|
||||||
if (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
|
if (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
|
||||||
&& currentChars[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
|
&& currentCodePoints[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
|
||||||
++j;
|
++j;
|
||||||
while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
|
while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
|
||||||
&& currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
|
&& currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
|
||||||
const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
|
const bool matched =
|
||||||
|
(currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c);
|
||||||
if (matched) {
|
if (matched) {
|
||||||
if (proximityIndex) {
|
if (proximityIndex) {
|
||||||
*proximityIndex = j;
|
*proximityIndex = j;
|
||||||
|
@ -165,7 +166,7 @@ class ProximityInfoState {
|
||||||
inputIndex * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + proximityIndex];
|
inputIndex * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + proximityIndex];
|
||||||
}
|
}
|
||||||
|
|
||||||
inline const unsigned short *getPrimaryInputWord() const {
|
inline const int *getPrimaryInputWord() const {
|
||||||
return mPrimaryInputWord;
|
return mPrimaryInputWord;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -173,13 +174,13 @@ class ProximityInfoState {
|
||||||
return mTouchPositionCorrectionEnabled;
|
return mTouchPositionCorrectionEnabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool sameAsTyped(const unsigned short *word, int length) const {
|
inline bool sameAsTyped(const int *word, int length) const {
|
||||||
if (length != mInputSize) {
|
if (length != mInputSize) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int *inputCodes = mInputCodes;
|
const int *inputCodes = mInputCodes;
|
||||||
while (length--) {
|
while (length--) {
|
||||||
if (static_cast<unsigned int>(*inputCodes) != static_cast<unsigned int>(*word)) {
|
if (*inputCodes != *word) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
|
inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
|
||||||
|
@ -236,7 +237,7 @@ class ProximityInfoState {
|
||||||
// Returns angle of three points. x, y, and z are indices.
|
// Returns angle of three points. x, y, and z are indices.
|
||||||
float getPointsAngle(const int index0, const int index1, const int index2) const;
|
float getPointsAngle(const int index0, const int index1, const int index2) const;
|
||||||
|
|
||||||
float getHighestProbabilitySequence(uint16_t *const charBuf) const;
|
float getHighestProbabilitySequence(int *const codePointBuf) const;
|
||||||
|
|
||||||
float getProbability(const int index, const int charCode) const;
|
float getProbability(const int index, const int charCode) const;
|
||||||
|
|
||||||
|
@ -255,7 +256,7 @@ class ProximityInfoState {
|
||||||
float calculateSquaredDistanceFromSweetSpotCenter(
|
float calculateSquaredDistanceFromSweetSpotCenter(
|
||||||
const int keyIndex, const int inputIndex) const;
|
const int keyIndex, const int inputIndex) const;
|
||||||
|
|
||||||
bool pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y, const int time,
|
bool pushTouchPoint(const int inputIndex, const int nodeCodePoint, int x, int y, const int time,
|
||||||
const bool sample, const bool isLastPoint, const float sumAngle,
|
const bool sample, const bool isLastPoint, const float sumAngle,
|
||||||
NearKeysDistanceMap *const currentNearKeysDistances,
|
NearKeysDistanceMap *const currentNearKeysDistances,
|
||||||
const NearKeysDistanceMap *const prevNearKeysDistances,
|
const NearKeysDistanceMap *const prevNearKeysDistances,
|
||||||
|
@ -269,7 +270,7 @@ class ProximityInfoState {
|
||||||
return mInputXs.size() > 0 && mInputYs.size() > 0;
|
return mInputXs.size() > 0 && mInputYs.size() > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline const int *getProximityCharsAt(const int index) const {
|
inline const int *getProximityCodePointsAt(const int index) const {
|
||||||
return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE_INTERNAL);
|
return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE_INTERNAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -322,10 +323,10 @@ class ProximityInfoState {
|
||||||
// inputs including the current input point.
|
// inputs including the current input point.
|
||||||
std::vector<NearKeycodesSet> mSearchKeysVector;
|
std::vector<NearKeycodesSet> mSearchKeysVector;
|
||||||
bool mTouchPositionCorrectionEnabled;
|
bool mTouchPositionCorrectionEnabled;
|
||||||
int32_t mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
|
int mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
|
||||||
int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
|
int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
|
||||||
int mInputSize;
|
int mInputSize;
|
||||||
unsigned short mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL];
|
int mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL];
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_PROXIMITY_INFO_STATE_H
|
#endif // LATINIME_PROXIMITY_INFO_STATE_H
|
||||||
|
|
|
@ -43,18 +43,16 @@ class TerminalAttributes {
|
||||||
return mHasNextShortcutTarget;
|
return mHasNextShortcutTarget;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Gets the shortcut target itself as a uint16_t string. For parameters and return value
|
// Gets the shortcut target itself as an int string. For parameters and return value
|
||||||
// see BinaryFormat::getWordAtAddress.
|
// see BinaryFormat::getWordAtAddress.
|
||||||
// TODO: make the output an uint32_t* to handle the whole unicode range.
|
inline int getNextShortcutTarget(const int maxDepth, int *outWord, int *outFreq) {
|
||||||
inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord, int *outFreq) {
|
|
||||||
const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
|
const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
|
||||||
mHasNextShortcutTarget =
|
mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
|
||||||
0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
|
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) {
|
for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) {
|
||||||
const int codePoint = BinaryFormat::getCodePointAndForwardPointer(mDict, &mPos);
|
const int codePoint = BinaryFormat::getCodePointAndForwardPointer(mDict, &mPos);
|
||||||
if (NOT_A_CODE_POINT == codePoint) break;
|
if (NOT_A_CODE_POINT == codePoint) break;
|
||||||
outWord[i] = (uint16_t)codePoint;
|
outWord[i] = codePoint;
|
||||||
}
|
}
|
||||||
*outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags);
|
*outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags);
|
||||||
return i;
|
return i;
|
||||||
|
|
|
@ -55,13 +55,13 @@ UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, int fullW
|
||||||
UnigramDictionary::~UnigramDictionary() {
|
UnigramDictionary::~UnigramDictionary() {
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize) {
|
static inline int getCodesBufferSize(const int *codes, const int codesSize) {
|
||||||
return static_cast<unsigned int>(sizeof(*codes)) * codesSize;
|
return sizeof(*codes) * codesSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: This needs to take a const unsigned short* and not tinker with its contents
|
// TODO: This needs to take a const int* and not tinker with its contents
|
||||||
static inline void addWord(unsigned short *word, int length, int frequency,
|
static inline void addWord(int *word, int length, int frequency, WordsPriorityQueue *queue,
|
||||||
WordsPriorityQueue *queue, int type) {
|
int type) {
|
||||||
queue->push(frequency, word, length, type);
|
queue->push(frequency, word, length, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -171,9 +171,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
|
||||||
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||||
const int *ycoordinates, const int *codes, const int codesSize,
|
const int *ycoordinates, const int *codes, const int codesSize,
|
||||||
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||||
const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
|
const bool useFullEditDistance, int *outWords, int *frequencies, int *outputTypes) const {
|
||||||
int *outputTypes) const {
|
|
||||||
|
|
||||||
WordsPriorityQueuePool queuePool(MAX_WORDS, SUB_QUEUE_MAX_WORDS, MAX_WORD_LENGTH);
|
WordsPriorityQueuePool queuePool(MAX_WORDS, SUB_QUEUE_MAX_WORDS, MAX_WORD_LENGTH);
|
||||||
queuePool.clearAll();
|
queuePool.clearAll();
|
||||||
Correction masterCorrection;
|
Correction masterCorrection;
|
||||||
|
@ -218,7 +216,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
|
||||||
AKLOGI("Returning %d words", suggestedWordsCount);
|
AKLOGI("Returning %d words", suggestedWordsCount);
|
||||||
/// Print the returned words
|
/// Print the returned words
|
||||||
for (int j = 0; j < suggestedWordsCount; ++j) {
|
for (int j = 0; j < suggestedWordsCount; ++j) {
|
||||||
short unsigned int *w = outWords + j * MAX_WORD_LENGTH;
|
int *w = outWords + j * MAX_WORD_LENGTH;
|
||||||
char s[MAX_WORD_LENGTH];
|
char s[MAX_WORD_LENGTH];
|
||||||
for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
|
for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
|
||||||
(void)s; // To suppress compiler warning
|
(void)s; // To suppress compiler warning
|
||||||
|
@ -230,12 +228,11 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
|
||||||
return suggestedWordsCount;
|
return suggestedWordsCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||||
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
const int *ycoordinates, const int *codes, const int inputSize,
|
||||||
const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||||
const bool useFullEditDistance, Correction *correction,
|
const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool)
|
||||||
WordsPriorityQueuePool *queuePool) const {
|
const {
|
||||||
|
|
||||||
PROF_OPEN;
|
PROF_OPEN;
|
||||||
PROF_START(0);
|
PROF_START(0);
|
||||||
PROF_END(0);
|
PROF_END(0);
|
||||||
|
@ -284,7 +281,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
||||||
if (queue->size() > 0) {
|
if (queue->size() > 0) {
|
||||||
WordsPriorityQueue::SuggestedWord *sw = queue->top();
|
WordsPriorityQueue::SuggestedWord *sw = queue->top();
|
||||||
const int score = sw->mScore;
|
const int score = sw->mScore;
|
||||||
const unsigned short *word = sw->mWord;
|
const int *word = sw->mWord;
|
||||||
const int wordLength = sw->mWordLength;
|
const int wordLength = sw->mWordLength;
|
||||||
float ns = Correction::RankingAlgorithm::calcNormalizedScore(
|
float ns = Correction::RankingAlgorithm::calcNormalizedScore(
|
||||||
correction->getPrimaryInputWord(), i, word, wordLength, score);
|
correction->getPrimaryInputWord(), i, word, wordLength, score);
|
||||||
|
@ -303,7 +300,7 @@ void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int
|
||||||
Correction *correction) const {
|
Correction *correction) const {
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
AKLOGI("initSuggest");
|
AKLOGI("initSuggest");
|
||||||
DUMP_WORD_INT(codes, inputSize);
|
DUMP_WORD(codes, inputSize);
|
||||||
}
|
}
|
||||||
correction->initInputParams(proximityInfo, codes, inputSize, xCoordinates, yCoordinates);
|
correction->initInputParams(proximityInfo, codes, inputSize, xCoordinates, yCoordinates);
|
||||||
const int maxDepth = min(inputSize * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
const int maxDepth = min(inputSize * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
||||||
|
@ -376,7 +373,7 @@ inline void UnigramDictionary::onTerminal(const int probability,
|
||||||
const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
|
const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
|
||||||
|
|
||||||
int wordLength;
|
int wordLength;
|
||||||
unsigned short *wordPointer;
|
int *wordPointer;
|
||||||
|
|
||||||
if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) {
|
if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) {
|
||||||
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
|
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
|
||||||
|
@ -404,7 +401,7 @@ inline void UnigramDictionary::onTerminal(const int probability,
|
||||||
// so that the insert order is protected inside the queue for words
|
// so that the insert order is protected inside the queue for words
|
||||||
// with the same score. For the moment we use -1 to make sure the shortcut will
|
// with the same score. For the moment we use -1 to make sure the shortcut will
|
||||||
// never be in front of the word.
|
// never be in front of the word.
|
||||||
uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
|
int shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
|
||||||
int shortcutFrequency;
|
int shortcutFrequency;
|
||||||
const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
|
const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
|
||||||
MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency);
|
MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency);
|
||||||
|
@ -444,7 +441,7 @@ int UnigramDictionary::getSubStringSuggestion(
|
||||||
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
|
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
|
||||||
const int inputWordStartPos, const int inputWordLength,
|
const int inputWordStartPos, const int inputWordLength,
|
||||||
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
|
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
|
||||||
int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const {
|
int *wordLengthArray, int *outputWord, int *outputWordLength) const {
|
||||||
if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) {
|
if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) {
|
||||||
return FLAG_MULTIPLE_SUGGEST_ABORT;
|
return FLAG_MULTIPLE_SUGGEST_ABORT;
|
||||||
}
|
}
|
||||||
|
@ -487,13 +484,13 @@ int UnigramDictionary::getSubStringSuggestion(
|
||||||
// TODO: Remove the safety net above //
|
// TODO: Remove the safety net above //
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
|
|
||||||
unsigned short *tempOutputWord = 0;
|
int *tempOutputWord = 0;
|
||||||
int nextWordLength = 0;
|
int nextWordLength = 0;
|
||||||
// TODO: Optimize init suggestion
|
// TODO: Optimize init suggestion
|
||||||
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
|
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
|
||||||
inputSize, correction);
|
inputSize, correction);
|
||||||
|
|
||||||
unsigned short word[MAX_WORD_LENGTH_INTERNAL];
|
int word[MAX_WORD_LENGTH_INTERNAL];
|
||||||
int freq = getMostFrequentWordLike(
|
int freq = getMostFrequentWordLike(
|
||||||
inputWordStartPos, inputWordLength, correction, word);
|
inputWordStartPos, inputWordLength, correction, word);
|
||||||
if (freq > 0) {
|
if (freq > 0) {
|
||||||
|
@ -592,7 +589,7 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
|
||||||
const bool useFullEditDistance, const int inputSize, Correction *correction,
|
const bool useFullEditDistance, const int inputSize, Correction *correction,
|
||||||
WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate,
|
WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate,
|
||||||
const int startInputPos, const int startWordIndex, const int outputWordLength,
|
const int startInputPos, const int startWordIndex, const int outputWordLength,
|
||||||
int *freqArray, int *wordLengthArray, unsigned short *outputWord) const {
|
int *freqArray, int *wordLengthArray, int *outputWord) const {
|
||||||
if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
|
if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
|
||||||
// Return if the last word index
|
// Return if the last word index
|
||||||
return;
|
return;
|
||||||
|
@ -678,7 +675,7 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocating fixed length array on stack
|
// Allocating fixed length array on stack
|
||||||
unsigned short outputWord[MAX_WORD_LENGTH];
|
int outputWord[MAX_WORD_LENGTH];
|
||||||
int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
|
int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
|
||||||
int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
|
int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
|
||||||
const int outputWordLength = 0;
|
const int outputWordLength = 0;
|
||||||
|
@ -693,11 +690,11 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit
|
||||||
// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
|
// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
|
||||||
// interface.
|
// interface.
|
||||||
inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
|
inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
|
||||||
const int inputSize, Correction *correction, unsigned short *word) const {
|
const int inputSize, Correction *correction, int *word) const {
|
||||||
uint16_t inWord[inputSize];
|
int inWord[inputSize];
|
||||||
|
|
||||||
for (int i = 0; i < inputSize; ++i) {
|
for (int i = 0; i < inputSize; ++i) {
|
||||||
inWord[i] = (uint16_t)correction->getPrimaryCharAt(startInputIndex + i);
|
inWord[i] = correction->getPrimaryCodePointAt(startInputIndex + i);
|
||||||
}
|
}
|
||||||
return getMostFrequentWordLikeInner(inWord, inputSize, word);
|
return getMostFrequentWordLikeInner(inWord, inputSize, word);
|
||||||
}
|
}
|
||||||
|
@ -715,14 +712,14 @@ inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
|
||||||
// In and out parameters may point to the same location. This function takes care
|
// In and out parameters may point to the same location. This function takes care
|
||||||
// not to use any input parameters after it wrote into its outputs.
|
// not to use any input parameters after it wrote into its outputs.
|
||||||
static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
|
static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
|
||||||
const uint8_t *const root, const int startPos, const uint16_t *const inWord,
|
const uint8_t *const root, const int startPos, const int *const inWord,
|
||||||
const int startInputIndex, const int inputSize, int32_t *outNewWord, int *outInputIndex,
|
const int startInputIndex, const int inputSize, int *outNewWord, int *outInputIndex,
|
||||||
int *outPos) {
|
int *outPos) {
|
||||||
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||||
int pos = startPos;
|
int pos = startPos;
|
||||||
int32_t codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
int codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||||
int32_t baseChar = toBaseLowerCase(codePoint);
|
int baseChar = toBaseLowerCase(codePoint);
|
||||||
const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]);
|
const int wChar = toBaseLowerCase(inWord[startInputIndex]);
|
||||||
|
|
||||||
if (baseChar != wChar) {
|
if (baseChar != wChar) {
|
||||||
*outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos;
|
*outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos;
|
||||||
|
@ -753,8 +750,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
|
||||||
// It will compare the frequency to the max frequency, and if greater, will
|
// It will compare the frequency to the max frequency, and if greater, will
|
||||||
// copy the word into the output buffer. In output value maxFreq, it will
|
// copy the word into the output buffer. In output value maxFreq, it will
|
||||||
// write the new maximum frequency if it changed.
|
// write the new maximum frequency if it changed.
|
||||||
static inline void onTerminalWordLike(const int freq, int32_t *newWord, const int length,
|
static inline void onTerminalWordLike(const int freq, int *newWord, const int length, int *outWord,
|
||||||
short unsigned int *outWord, int *maxFreq) {
|
int *maxFreq) {
|
||||||
if (freq > *maxFreq) {
|
if (freq > *maxFreq) {
|
||||||
for (int q = 0; q < length; ++q) {
|
for (int q = 0; q < length; ++q) {
|
||||||
outWord[q] = newWord[q];
|
outWord[q] = newWord[q];
|
||||||
|
@ -766,9 +763,9 @@ static inline void onTerminalWordLike(const int freq, int32_t *newWord, const in
|
||||||
|
|
||||||
// Will find the highest frequency of the words like the one passed as an argument,
|
// Will find the highest frequency of the words like the one passed as an argument,
|
||||||
// that is, everything that only differs by case/accents.
|
// that is, everything that only differs by case/accents.
|
||||||
int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord,
|
int UnigramDictionary::getMostFrequentWordLikeInner(const int *const inWord, const int inputSize,
|
||||||
const int inputSize, short unsigned int *outWord) const {
|
int *outWord) const {
|
||||||
int32_t newWord[MAX_WORD_LENGTH_INTERNAL];
|
int newWord[MAX_WORD_LENGTH_INTERNAL];
|
||||||
int depth = 0;
|
int depth = 0;
|
||||||
int maxFreq = -1;
|
int maxFreq = -1;
|
||||||
const uint8_t *const root = DICT_ROOT;
|
const uint8_t *const root = DICT_ROOT;
|
||||||
|
@ -828,7 +825,7 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord
|
||||||
return maxFreq;
|
return maxFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
int UnigramDictionary::getFrequency(const int32_t *const inWord, const int length) const {
|
int UnigramDictionary::getFrequency(const int *const inWord, const int length) const {
|
||||||
const uint8_t *const root = DICT_ROOT;
|
const uint8_t *const root = DICT_ROOT;
|
||||||
int pos = BinaryFormat::getTerminalPosition(root, inWord, length,
|
int pos = BinaryFormat::getTerminalPosition(root, inWord, length,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
|
@ -853,8 +850,7 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: remove this function.
|
// TODO: remove this function.
|
||||||
int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offset,
|
int UnigramDictionary::getBigramPosition(int pos, int *word, int offset, int length) const {
|
||||||
int length) const {
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -900,7 +896,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
|
||||||
// else if FLAG_IS_TERMINAL: the frequency
|
// else if FLAG_IS_TERMINAL: the frequency
|
||||||
// else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address
|
// else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address
|
||||||
// Note that you can't have a node that both is not a terminal and has no children.
|
// Note that you can't have a node that both is not a terminal and has no children.
|
||||||
int32_t c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
|
int c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
|
||||||
assert(NOT_A_CODE_POINT != c);
|
assert(NOT_A_CODE_POINT != c);
|
||||||
|
|
||||||
// We are going to loop through each character and make it look like it's a different
|
// We are going to loop through each character and make it look like it's a different
|
||||||
|
@ -914,7 +910,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
|
||||||
// We prefetch the next char. If 'c' is the last char of this node, we will have
|
// We prefetch the next char. If 'c' is the last char of this node, we will have
|
||||||
// NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node
|
// NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node
|
||||||
// should behave as a terminal or not and whether we have children.
|
// should behave as a terminal or not and whether we have children.
|
||||||
const int32_t nextc = hasMultipleChars
|
const int nextc = hasMultipleChars
|
||||||
? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT;
|
? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT;
|
||||||
const bool isLastChar = (NOT_A_CODE_POINT == nextc);
|
const bool isLastChar = (NOT_A_CODE_POINT == nextc);
|
||||||
// If there are more chars in this nodes, then this virtual node is not a terminal.
|
// If there are more chars in this nodes, then this virtual node is not a terminal.
|
||||||
|
|
|
@ -41,12 +41,12 @@ class UnigramDictionary {
|
||||||
static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2;
|
static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2;
|
||||||
UnigramDictionary(const uint8_t *const streamStart, int fullWordMultiplier, int maxWordLength,
|
UnigramDictionary(const uint8_t *const streamStart, int fullWordMultiplier, int maxWordLength,
|
||||||
int maxWords, const unsigned int flags);
|
int maxWords, const unsigned int flags);
|
||||||
int getFrequency(const int32_t *const inWord, const int length) const;
|
int getFrequency(const int *const inWord, const int length) const;
|
||||||
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
|
int getBigramPosition(int pos, int *word, int offset, int length) const;
|
||||||
int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||||
const int *ycoordinates, const int *codes, const int codesSize,
|
const int *ycoordinates, const int *codes, const int codesSize,
|
||||||
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||||
const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
|
const bool useFullEditDistance, int *outWords, int *frequencies,
|
||||||
int *outputTypes) const;
|
int *outputTypes) const;
|
||||||
virtual ~UnigramDictionary();
|
virtual ~UnigramDictionary();
|
||||||
|
|
||||||
|
@ -93,9 +93,9 @@ class UnigramDictionary {
|
||||||
int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
|
int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
|
||||||
const int currentWordIndex) const;
|
const int currentWordIndex) const;
|
||||||
int getMostFrequentWordLike(const int startInputIndex, const int inputSize,
|
int getMostFrequentWordLike(const int startInputIndex, const int inputSize,
|
||||||
Correction *correction, unsigned short *word) const;
|
Correction *correction, int *word) const;
|
||||||
int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int inputSize,
|
int getMostFrequentWordLikeInner(const int *const inWord, const int inputSize,
|
||||||
short unsigned int *outWord) const;
|
int *outWord) const;
|
||||||
int getSubStringSuggestion(
|
int getSubStringSuggestion(
|
||||||
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
|
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
|
||||||
const int *codes, const bool useFullEditDistance, Correction *correction,
|
const int *codes, const bool useFullEditDistance, Correction *correction,
|
||||||
|
@ -103,14 +103,13 @@ class UnigramDictionary {
|
||||||
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
|
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
|
||||||
const int inputWordStartPos, const int inputWordLength,
|
const int inputWordStartPos, const int inputWordLength,
|
||||||
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
|
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
|
||||||
int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const;
|
int *wordLengthArray, int *outputWord, int *outputWordLength) const;
|
||||||
void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
|
void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||||
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
|
||||||
const bool useFullEditDistance, const int inputSize,
|
const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool,
|
||||||
Correction *correction, WordsPriorityQueuePool *queuePool,
|
|
||||||
const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex,
|
const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex,
|
||||||
const int outputWordLength, int *freqArray, int *wordLengthArray,
|
const int outputWordLength, int *freqArray, int *wordLengthArray,
|
||||||
unsigned short *outputWord) const;
|
int *outputWord) const;
|
||||||
|
|
||||||
const uint8_t *const DICT_ROOT;
|
const uint8_t *const DICT_ROOT;
|
||||||
const int MAX_WORD_LENGTH;
|
const int MAX_WORD_LENGTH;
|
||||||
|
|
|
@ -30,15 +30,15 @@ class WordsPriorityQueue {
|
||||||
class SuggestedWord {
|
class SuggestedWord {
|
||||||
public:
|
public:
|
||||||
int mScore;
|
int mScore;
|
||||||
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
|
int mWord[MAX_WORD_LENGTH_INTERNAL];
|
||||||
int mWordLength;
|
int mWordLength;
|
||||||
bool mUsed;
|
bool mUsed;
|
||||||
int mType;
|
int mType;
|
||||||
|
|
||||||
void setParams(int score, unsigned short *word, int wordLength, int type) {
|
void setParams(int score, int *word, int wordLength, int type) {
|
||||||
mScore = score;
|
mScore = score;
|
||||||
mWordLength = wordLength;
|
mWordLength = wordLength;
|
||||||
memcpy(mWord, word, sizeof(unsigned short) * wordLength);
|
memcpy(mWord, word, sizeof(int) * wordLength);
|
||||||
mUsed = true;
|
mUsed = true;
|
||||||
mType = type;
|
mType = type;
|
||||||
}
|
}
|
||||||
|
@ -57,9 +57,9 @@ class WordsPriorityQueue {
|
||||||
delete[] mSuggestedWords;
|
delete[] mSuggestedWords;
|
||||||
}
|
}
|
||||||
|
|
||||||
void push(int score, unsigned short *word, int wordLength, int type) {
|
void push(int score, int *word, int wordLength, int type) {
|
||||||
SuggestedWord *sw = 0;
|
SuggestedWord *sw = 0;
|
||||||
if (mSuggestions.size() >= MAX_WORDS) {
|
if (size() >= MAX_WORDS) {
|
||||||
sw = mSuggestions.top();
|
sw = mSuggestions.top();
|
||||||
const int minScore = sw->mScore;
|
const int minScore = sw->mScore;
|
||||||
if (minScore >= score) {
|
if (minScore >= score) {
|
||||||
|
@ -94,11 +94,10 @@ class WordsPriorityQueue {
|
||||||
return sw;
|
return sw;
|
||||||
}
|
}
|
||||||
|
|
||||||
int outputSuggestions(const unsigned short *before, const int beforeLength,
|
int outputSuggestions(const int *before, const int beforeLength, int *frequencies,
|
||||||
int *frequencies, unsigned short *outputChars, int* outputTypes) {
|
int *outputCodePoints, int* outputTypes) {
|
||||||
mHighestSuggestedWord = 0;
|
mHighestSuggestedWord = 0;
|
||||||
const unsigned int size = min(
|
const int size = min(MAX_WORDS, static_cast<int>(mSuggestions.size()));
|
||||||
MAX_WORDS, static_cast<unsigned int>(mSuggestions.size()));
|
|
||||||
SuggestedWord *swBuffer[size];
|
SuggestedWord *swBuffer[size];
|
||||||
int index = size - 1;
|
int index = size - 1;
|
||||||
while (!mSuggestions.empty() && index >= 0) {
|
while (!mSuggestions.empty() && index >= 0) {
|
||||||
|
@ -113,9 +112,9 @@ class WordsPriorityQueue {
|
||||||
}
|
}
|
||||||
if (size >= 2) {
|
if (size >= 2) {
|
||||||
SuggestedWord *nsMaxSw = 0;
|
SuggestedWord *nsMaxSw = 0;
|
||||||
unsigned int maxIndex = 0;
|
int maxIndex = 0;
|
||||||
float maxNs = 0;
|
float maxNs = 0;
|
||||||
for (unsigned int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
SuggestedWord *tempSw = swBuffer[i];
|
SuggestedWord *tempSw = swBuffer[i];
|
||||||
if (!tempSw) {
|
if (!tempSw) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -132,17 +131,17 @@ class WordsPriorityQueue {
|
||||||
swBuffer[0] = nsMaxSw;
|
swBuffer[0] = nsMaxSw;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (unsigned int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
SuggestedWord *sw = swBuffer[i];
|
SuggestedWord *sw = swBuffer[i];
|
||||||
if (!sw) {
|
if (!sw) {
|
||||||
AKLOGE("SuggestedWord is null %d", i);
|
AKLOGE("SuggestedWord is null %d", i);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const unsigned int wordLength = sw->mWordLength;
|
const int wordLength = sw->mWordLength;
|
||||||
unsigned short *targetAddress = outputChars + i * MAX_WORD_LENGTH;
|
int *targetAddress = outputCodePoints + i * MAX_WORD_LENGTH;
|
||||||
frequencies[i] = sw->mScore;
|
frequencies[i] = sw->mScore;
|
||||||
outputTypes[i] = sw->mType;
|
outputTypes[i] = sw->mType;
|
||||||
memcpy(targetAddress, sw->mWord, wordLength * sizeof(unsigned short));
|
memcpy(targetAddress, sw->mWord, wordLength * sizeof(int));
|
||||||
if (wordLength < MAX_WORD_LENGTH) {
|
if (wordLength < MAX_WORD_LENGTH) {
|
||||||
targetAddress[wordLength] = 0;
|
targetAddress[wordLength] = 0;
|
||||||
}
|
}
|
||||||
|
@ -152,7 +151,7 @@ class WordsPriorityQueue {
|
||||||
}
|
}
|
||||||
|
|
||||||
int size() const {
|
int size() const {
|
||||||
return mSuggestions.size();
|
return static_cast<int>(mSuggestions.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
void clear() {
|
void clear() {
|
||||||
|
@ -175,13 +174,13 @@ class WordsPriorityQueue {
|
||||||
DUMP_WORD(mHighestSuggestedWord->mWord, mHighestSuggestedWord->mWordLength);
|
DUMP_WORD(mHighestSuggestedWord->mWord, mHighestSuggestedWord->mWordLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
float getHighestNormalizedScore(const unsigned short *before, const int beforeLength,
|
float getHighestNormalizedScore(const int *before, const int beforeLength, int **outWord,
|
||||||
unsigned short **outWord, int *outScore, int *outLength) {
|
int *outScore, int *outLength) {
|
||||||
if (!mHighestSuggestedWord) {
|
if (!mHighestSuggestedWord) {
|
||||||
return 0.0;
|
return 0.0;
|
||||||
}
|
}
|
||||||
return getNormalizedScore(
|
return getNormalizedScore(mHighestSuggestedWord, before, beforeLength, outWord, outScore,
|
||||||
mHighestSuggestedWord, before, beforeLength, outWord, outScore, outLength);
|
outLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -192,9 +191,8 @@ class WordsPriorityQueue {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
SuggestedWord *getFreeSuggestedWord(int score, unsigned short *word,
|
SuggestedWord *getFreeSuggestedWord(int score, int *word, int wordLength, int type) {
|
||||||
int wordLength, int type) {
|
for (int i = 0; i < MAX_WORD_LENGTH; ++i) {
|
||||||
for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) {
|
|
||||||
if (!mSuggestedWords[i].mUsed) {
|
if (!mSuggestedWords[i].mUsed) {
|
||||||
mSuggestedWords[i].setParams(score, word, wordLength, type);
|
mSuggestedWords[i].setParams(score, word, wordLength, type);
|
||||||
return &mSuggestedWords[i];
|
return &mSuggestedWords[i];
|
||||||
|
@ -203,10 +201,10 @@ class WordsPriorityQueue {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static float getNormalizedScore(SuggestedWord *sw, const unsigned short *before,
|
static float getNormalizedScore(SuggestedWord *sw, const int *before, const int beforeLength,
|
||||||
const int beforeLength, unsigned short **outWord, int *outScore, int *outLength) {
|
int **outWord, int *outScore, int *outLength) {
|
||||||
const int score = sw->mScore;
|
const int score = sw->mScore;
|
||||||
unsigned short *word = sw->mWord;
|
int *word = sw->mWord;
|
||||||
const int wordLength = sw->mWordLength;
|
const int wordLength = sw->mWordLength;
|
||||||
if (outScore) {
|
if (outScore) {
|
||||||
*outScore = score;
|
*outScore = score;
|
||||||
|
@ -217,15 +215,15 @@ class WordsPriorityQueue {
|
||||||
if (outLength) {
|
if (outLength) {
|
||||||
*outLength = wordLength;
|
*outLength = wordLength;
|
||||||
}
|
}
|
||||||
return Correction::RankingAlgorithm::calcNormalizedScore(
|
return Correction::RankingAlgorithm::calcNormalizedScore(before, beforeLength, word,
|
||||||
before, beforeLength, word, wordLength, score);
|
wordLength, score);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef std::priority_queue<SuggestedWord *, std::vector<SuggestedWord *>,
|
typedef std::priority_queue<SuggestedWord *, std::vector<SuggestedWord *>,
|
||||||
wordComparator> Suggestions;
|
wordComparator> Suggestions;
|
||||||
Suggestions mSuggestions;
|
Suggestions mSuggestions;
|
||||||
const unsigned int MAX_WORDS;
|
const int MAX_WORDS;
|
||||||
const unsigned int MAX_WORD_LENGTH;
|
const int MAX_WORD_LENGTH;
|
||||||
SuggestedWord *mSuggestedWords;
|
SuggestedWord *mSuggestedWords;
|
||||||
SuggestedWord *mHighestSuggestedWord;
|
SuggestedWord *mHighestSuggestedWord;
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue