Merge "Use 32-bit code points for suggestions output"
This commit is contained in:
commit
01f6a61e51
20 changed files with 329 additions and 363 deletions
|
@ -51,8 +51,7 @@ public final class BinaryDictionary extends Dictionary {
|
|||
private long mNativeDict;
|
||||
private final Locale mLocale;
|
||||
private final int[] mInputCodePoints = new int[MAX_WORD_LENGTH];
|
||||
// TODO: The below should be int[] mOutputCodePoints
|
||||
private final char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_RESULTS];
|
||||
private final int[] mOutputCodePoints = new int[MAX_WORD_LENGTH * MAX_RESULTS];
|
||||
private final int[] mSpaceIndices = new int[MAX_SPACES];
|
||||
private final int[] mOutputScores = new int[MAX_RESULTS];
|
||||
private final int[] mOutputTypes = new int[MAX_RESULTS];
|
||||
|
@ -88,9 +87,9 @@ public final class BinaryDictionary extends Dictionary {
|
|||
* @param useFullEditDistance whether to use the full edit distance in suggestions
|
||||
* @param dictType the dictionary type, as a human-readable string
|
||||
*/
|
||||
public BinaryDictionary(final Context context,
|
||||
final String filename, final long offset, final long length,
|
||||
final boolean useFullEditDistance, final Locale locale, final String dictType) {
|
||||
public BinaryDictionary(final Context context, final String filename, final long offset,
|
||||
final long length, final boolean useFullEditDistance, final Locale locale,
|
||||
final String dictType) {
|
||||
super(dictType);
|
||||
mLocale = locale;
|
||||
mUseFullEditDistance = useFullEditDistance;
|
||||
|
@ -109,10 +108,10 @@ public final class BinaryDictionary extends Dictionary {
|
|||
private native int getSuggestionsNative(long dict, long proximityInfo, long traverseSession,
|
||||
int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds,
|
||||
int[] inputCodePoints, int codesSize, int commitPoint, boolean isGesture,
|
||||
int[] prevWordCodePointArray, boolean useFullEditDistance, char[] outputChars,
|
||||
int[] prevWordCodePointArray, boolean useFullEditDistance, int[] outputCodePoints,
|
||||
int[] outputScores, int[] outputIndices, int[] outputTypes);
|
||||
private static native float calcNormalizedScoreNative(char[] before, char[] after, int score);
|
||||
private static native int editDistanceNative(char[] before, char[] after);
|
||||
private static native float calcNormalizedScoreNative(int[] before, int[] after, int score);
|
||||
private static native int editDistanceNative(int[] before, int[] after);
|
||||
|
||||
// TODO: Move native dict into session
|
||||
private final void loadDictionary(final String path, final long startOffset,
|
||||
|
@ -153,7 +152,8 @@ public final class BinaryDictionary extends Dictionary {
|
|||
proximityInfo.getNativeProximityInfo(), getTraverseSession(sessionId).getSession(),
|
||||
ips.getXCoordinates(), ips.getYCoordinates(), ips.getTimes(), ips.getPointerIds(),
|
||||
mInputCodePoints, codesSize, 0 /* commitPoint */, isGesture, prevWordCodePointArray,
|
||||
mUseFullEditDistance, mOutputChars, mOutputScores, mSpaceIndices, mOutputTypes);
|
||||
mUseFullEditDistance, mOutputCodePoints, mOutputScores, mSpaceIndices,
|
||||
mOutputTypes);
|
||||
final int count = Math.min(tmpCount, MAX_PREDICTIONS);
|
||||
|
||||
final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList();
|
||||
|
@ -161,14 +161,14 @@ public final class BinaryDictionary extends Dictionary {
|
|||
if (composerSize > 0 && mOutputScores[j] < 1) break;
|
||||
final int start = j * MAX_WORD_LENGTH;
|
||||
int len = 0;
|
||||
while (len < MAX_WORD_LENGTH && mOutputChars[start + len] != 0) {
|
||||
while (len < MAX_WORD_LENGTH && mOutputCodePoints[start + len] != 0) {
|
||||
++len;
|
||||
}
|
||||
if (len > 0) {
|
||||
final int score = SuggestedWordInfo.KIND_WHITELIST == mOutputTypes[j]
|
||||
? SuggestedWordInfo.MAX_SCORE : mOutputScores[j];
|
||||
suggestions.add(new SuggestedWordInfo(
|
||||
new String(mOutputChars, start, len), score, mOutputTypes[j], mDictType));
|
||||
suggestions.add(new SuggestedWordInfo(new String(mOutputCodePoints, start, len),
|
||||
score, mOutputTypes[j], mDictType));
|
||||
}
|
||||
}
|
||||
return suggestions;
|
||||
|
@ -180,14 +180,16 @@ public final class BinaryDictionary extends Dictionary {
|
|||
|
||||
public static float calcNormalizedScore(final String before, final String after,
|
||||
final int score) {
|
||||
return calcNormalizedScoreNative(before.toCharArray(), after.toCharArray(), score);
|
||||
return calcNormalizedScoreNative(StringUtils.toCodePointArray(before),
|
||||
StringUtils.toCodePointArray(after), score);
|
||||
}
|
||||
|
||||
public static int editDistance(final String before, final String after) {
|
||||
if (before == null || after == null) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
return editDistanceNative(before.toCharArray(), after.toCharArray());
|
||||
return editDistanceNative(StringUtils.toCodePointArray(before),
|
||||
StringUtils.toCodePointArray(after));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -206,9 +208,9 @@ public final class BinaryDictionary extends Dictionary {
|
|||
// calls when checking for changes in an entire dictionary.
|
||||
public boolean isValidBigram(final String word1, final String word2) {
|
||||
if (TextUtils.isEmpty(word1) || TextUtils.isEmpty(word2)) return false;
|
||||
final int[] chars1 = StringUtils.toCodePointArray(word1);
|
||||
final int[] chars2 = StringUtils.toCodePointArray(word2);
|
||||
return isValidBigramNative(mNativeDict, chars1, chars2);
|
||||
final int[] codePoints1 = StringUtils.toCodePointArray(word1);
|
||||
final int[] codePoints2 = StringUtils.toCodePointArray(word2);
|
||||
return isValidBigramNative(mNativeDict, codePoints1, codePoints2);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -132,7 +132,7 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
|
|||
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
|
||||
jintArray inputCodePointsArray, jint arraySize, jint commitPoint, jboolean isGesture,
|
||||
jintArray prevWordCodePointsForBigrams, jboolean useFullEditDistance,
|
||||
jcharArray outputCharsArray, jintArray scoresArray, jintArray spaceIndicesArray,
|
||||
jintArray outputCodePointsArray, jintArray scoresArray, jintArray spaceIndicesArray,
|
||||
jintArray outputTypesArray) {
|
||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||
if (!dictionary) return 0;
|
||||
|
@ -162,16 +162,15 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
|
|||
}
|
||||
|
||||
// Output values
|
||||
// TODO: Should be "outputCodePointsLength" and "int outputCodePoints[]"
|
||||
const jsize outputCharsLength = env->GetArrayLength(outputCharsArray);
|
||||
unsigned short outputChars[outputCharsLength];
|
||||
const jsize outputCodePointsLength = env->GetArrayLength(outputCodePointsArray);
|
||||
int outputCodePoints[outputCodePointsLength];
|
||||
const jsize scoresLength = env->GetArrayLength(scoresArray);
|
||||
int scores[scoresLength];
|
||||
const jsize spaceIndicesLength = env->GetArrayLength(spaceIndicesArray);
|
||||
int spaceIndices[spaceIndicesLength];
|
||||
const jsize outputTypesLength = env->GetArrayLength(outputTypesArray);
|
||||
int outputTypes[outputTypesLength];
|
||||
memset(outputChars, 0, sizeof(outputChars));
|
||||
memset(outputCodePoints, 0, sizeof(outputCodePoints));
|
||||
memset(scores, 0, sizeof(scores));
|
||||
memset(spaceIndices, 0, sizeof(spaceIndices));
|
||||
memset(outputTypes, 0, sizeof(outputTypes));
|
||||
|
@ -180,16 +179,15 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
|
|||
if (isGesture || arraySize > 0) {
|
||||
count = dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
|
||||
times, pointerIds, inputCodePoints, arraySize, prevWordCodePoints,
|
||||
prevWordCodePointsLength, commitPoint, isGesture, useFullEditDistance, outputChars,
|
||||
scores, spaceIndices, outputTypes);
|
||||
prevWordCodePointsLength, commitPoint, isGesture, useFullEditDistance,
|
||||
outputCodePoints, scores, spaceIndices, outputTypes);
|
||||
} else {
|
||||
count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength,
|
||||
inputCodePoints, arraySize, outputChars, scores, outputTypes);
|
||||
inputCodePoints, arraySize, outputCodePoints, scores, outputTypes);
|
||||
}
|
||||
|
||||
// Copy back the output values
|
||||
// TODO: Should be SetIntArrayRegion()
|
||||
env->SetCharArrayRegion(outputCharsArray, 0, outputCharsLength, outputChars);
|
||||
env->SetIntArrayRegion(outputCodePointsArray, 0, outputCodePointsLength, outputCodePoints);
|
||||
env->SetIntArrayRegion(scoresArray, 0, scoresLength, scores);
|
||||
env->SetIntArrayRegion(spaceIndicesArray, 0, spaceIndicesLength, spaceIndices);
|
||||
env->SetIntArrayRegion(outputTypesArray, 0, outputTypesLength, outputTypes);
|
||||
|
@ -221,29 +219,27 @@ static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jobject obj
|
|||
}
|
||||
|
||||
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object,
|
||||
jcharArray before, jcharArray after, jint score) {
|
||||
jintArray before, jintArray after, jint score) {
|
||||
jsize beforeLength = env->GetArrayLength(before);
|
||||
jsize afterLength = env->GetArrayLength(after);
|
||||
jchar beforeChars[beforeLength];
|
||||
jchar afterChars[afterLength];
|
||||
env->GetCharArrayRegion(before, 0, beforeLength, beforeChars);
|
||||
env->GetCharArrayRegion(after, 0, afterLength, afterChars);
|
||||
return Correction::RankingAlgorithm::calcNormalizedScore(
|
||||
static_cast<unsigned short *>(beforeChars), beforeLength,
|
||||
static_cast<unsigned short *>(afterChars), afterLength, score);
|
||||
int beforeCodePoints[beforeLength];
|
||||
int afterCodePoints[afterLength];
|
||||
env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
|
||||
env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
|
||||
return Correction::RankingAlgorithm::calcNormalizedScore(beforeCodePoints, beforeLength,
|
||||
afterCodePoints, afterLength, score);
|
||||
}
|
||||
|
||||
static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jobject object,
|
||||
jcharArray before, jcharArray after) {
|
||||
static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jobject object, jintArray before,
|
||||
jintArray after) {
|
||||
jsize beforeLength = env->GetArrayLength(before);
|
||||
jsize afterLength = env->GetArrayLength(after);
|
||||
jchar beforeChars[beforeLength];
|
||||
jchar afterChars[afterLength];
|
||||
env->GetCharArrayRegion(before, 0, beforeLength, beforeChars);
|
||||
env->GetCharArrayRegion(after, 0, afterLength, afterChars);
|
||||
return Correction::RankingAlgorithm::editDistance(
|
||||
static_cast<unsigned short *>(beforeChars), beforeLength,
|
||||
static_cast<unsigned short *>(afterChars), afterLength);
|
||||
int beforeCodePoints[beforeLength];
|
||||
int afterCodePoints[afterLength];
|
||||
env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
|
||||
env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
|
||||
return Correction::RankingAlgorithm::editDistance(beforeCodePoints, beforeLength,
|
||||
afterCodePoints, afterLength);
|
||||
}
|
||||
|
||||
static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jlong dict) {
|
||||
|
@ -279,15 +275,15 @@ static JNINativeMethod sMethods[] = {
|
|||
{"openNative", "(Ljava/lang/String;JJIIII)J",
|
||||
reinterpret_cast<void *>(latinime_BinaryDictionary_open)},
|
||||
{"closeNative", "(J)V", reinterpret_cast<void *>(latinime_BinaryDictionary_close)},
|
||||
{"getSuggestionsNative", "(JJJ[I[I[I[I[IIIZ[IZ[C[I[I[I)I",
|
||||
{"getSuggestionsNative", "(JJJ[I[I[I[I[IIIZ[IZ[I[I[I[I)I",
|
||||
reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)},
|
||||
{"getFrequencyNative", "(J[I)I",
|
||||
reinterpret_cast<void *>(latinime_BinaryDictionary_getFrequency)},
|
||||
{"isValidBigramNative", "(J[I[I)Z",
|
||||
reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram)},
|
||||
{"calcNormalizedScoreNative", "([C[CI)F",
|
||||
{"calcNormalizedScoreNative", "([I[II)F",
|
||||
reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore)},
|
||||
{"editDistanceNative", "([C[C)I",
|
||||
{"editDistanceNative", "([I[I)I",
|
||||
reinterpret_cast<void *>(latinime_BinaryDictionary_editDistance)}
|
||||
};
|
||||
|
||||
|
|
|
@ -36,13 +36,13 @@ BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength,
|
|||
BigramDictionary::~BigramDictionary() {
|
||||
}
|
||||
|
||||
bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequency,
|
||||
int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const {
|
||||
bool BigramDictionary::addWordBigram(int *word, int length, int frequency, int *bigramFreq,
|
||||
int *bigramCodePoints, int *outputTypes) const {
|
||||
word[length] = 0;
|
||||
if (DEBUG_DICT) {
|
||||
#ifdef FLAG_DBG
|
||||
char s[length + 1];
|
||||
for (int i = 0; i <= length; i++) s[i] = word[i];
|
||||
for (int i = 0; i <= length; i++) s[i] = static_cast<char>(word[i]);
|
||||
AKLOGI("Bigram: Found word = %s, freq = %d :", s, frequency);
|
||||
#endif
|
||||
}
|
||||
|
@ -51,7 +51,8 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
|
|||
int insertAt = 0;
|
||||
while (insertAt < MAX_PREDICTIONS) {
|
||||
if (frequency > bigramFreq[insertAt] || (bigramFreq[insertAt] == frequency
|
||||
&& length < Dictionary::wideStrLen(bigramChars + insertAt * MAX_WORD_LENGTH))) {
|
||||
&& length < Dictionary::wideStrLen(
|
||||
bigramCodePoints + insertAt * MAX_WORD_LENGTH))) {
|
||||
break;
|
||||
}
|
||||
insertAt++;
|
||||
|
@ -65,10 +66,10 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
|
|||
(MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramFreq[0]));
|
||||
bigramFreq[insertAt] = frequency;
|
||||
outputTypes[insertAt] = Dictionary::KIND_PREDICTION;
|
||||
memmove(bigramChars + (insertAt + 1) * MAX_WORD_LENGTH,
|
||||
bigramChars + insertAt * MAX_WORD_LENGTH,
|
||||
(MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramChars[0]) * MAX_WORD_LENGTH);
|
||||
unsigned short *dest = bigramChars + insertAt * MAX_WORD_LENGTH;
|
||||
memmove(bigramCodePoints + (insertAt + 1) * MAX_WORD_LENGTH,
|
||||
bigramCodePoints + insertAt * MAX_WORD_LENGTH,
|
||||
(MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramCodePoints[0]) * MAX_WORD_LENGTH);
|
||||
int *dest = bigramCodePoints + insertAt * MAX_WORD_LENGTH;
|
||||
while (length--) {
|
||||
*dest++ = *word++;
|
||||
}
|
||||
|
@ -86,7 +87,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
|
|||
* prevWordLength: its length.
|
||||
* inputCodes: what user typed, in the same format as for UnigramDictionary::getSuggestions.
|
||||
* codesSize: the size of the codes array.
|
||||
* bigramChars: an array for output, at the same format as outwords for getSuggestions.
|
||||
* bigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
|
||||
* bigramFreq: an array to output frequencies.
|
||||
* outputTypes: an array to output types.
|
||||
* This method returns the number of bigrams this word has, for backward compatibility.
|
||||
|
@ -97,8 +98,8 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
|
|||
* and the bigrams are used to boost unigram result scores, it makes little sense to
|
||||
* reduce their scope to the ones that match the first letter.
|
||||
*/
|
||||
int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, int *inputCodes,
|
||||
int codesSize, unsigned short *bigramChars, int *bigramFreq, int *outputTypes) const {
|
||||
int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *inputCodes,
|
||||
int codesSize, int *bigramCodePoints, int *bigramFreq, int *outputTypes) const {
|
||||
// TODO: remove unused arguments, and refrain from storing stuff in members of this class
|
||||
// TODO: have "in" arguments before "out" ones, and make out args explicit in the name
|
||||
|
||||
|
@ -117,7 +118,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
|
|||
int bigramCount = 0;
|
||||
do {
|
||||
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||
uint16_t bigramBuffer[MAX_WORD_LENGTH];
|
||||
int bigramBuffer[MAX_WORD_LENGTH];
|
||||
int unigramFreq = 0;
|
||||
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
|
||||
&pos);
|
||||
|
@ -134,7 +135,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
|
|||
// here, but it can't get too bad.
|
||||
const int frequency =
|
||||
BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreqTemp);
|
||||
if (addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramChars,
|
||||
if (addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramCodePoints,
|
||||
outputTypes)) {
|
||||
++bigramCount;
|
||||
}
|
||||
|
@ -190,12 +191,12 @@ void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *p
|
|||
} while (0 != (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
|
||||
}
|
||||
|
||||
bool BigramDictionary::checkFirstCharacter(unsigned short *word, int *inputCodes) const {
|
||||
bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodes) const {
|
||||
// Checks whether this word starts with same character or neighboring characters of
|
||||
// what user typed.
|
||||
|
||||
int maxAlt = MAX_ALTERNATIVES;
|
||||
const unsigned short firstBaseChar = toBaseLowerCase(*word);
|
||||
const int firstBaseChar = toBaseLowerCase(*word);
|
||||
while (maxAlt > 0) {
|
||||
if (toBaseLowerCase(*inputCodes) == firstBaseChar) {
|
||||
return true;
|
||||
|
|
|
@ -27,23 +27,23 @@ namespace latinime {
|
|||
class BigramDictionary {
|
||||
public:
|
||||
BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions);
|
||||
int getBigrams(const int32_t *word, int length, int *inputCodes, int codesSize,
|
||||
unsigned short *outWords, int *frequencies, int *outputTypes) const;
|
||||
void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength,
|
||||
int getBigrams(const int *word, int length, int *inputCodes, int codesSize, int *outWords,
|
||||
int *frequencies, int *outputTypes) const;
|
||||
void fillBigramAddressToFrequencyMapAndFilter(const int *prevWord, const int prevWordLength,
|
||||
std::map<int, int> *map, uint8_t *filter) const;
|
||||
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const;
|
||||
bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
|
||||
~BigramDictionary();
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary);
|
||||
bool addWordBigram(unsigned short *word, int length, int frequency,
|
||||
int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const;
|
||||
bool addWordBigram(int *word, int length, int frequency, int *bigramFreq, int *bigramCodePoints,
|
||||
int *outputTypes) const;
|
||||
int getBigramAddress(int *pos, bool advance);
|
||||
int getBigramFreq(int *pos);
|
||||
void searchForTerminalNode(int addressLookingFor, int frequency);
|
||||
bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; }
|
||||
bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; }
|
||||
bool checkFirstCharacter(unsigned short *word, int *inputCodes) const;
|
||||
int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength,
|
||||
bool checkFirstCharacter(int *word, int *inputCodes) const;
|
||||
int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
|
||||
const bool forceLowerCaseSearch) const;
|
||||
|
||||
const unsigned char *DICT;
|
||||
|
|
|
@ -84,7 +84,7 @@ class BinaryFormat {
|
|||
static unsigned int getFlags(const uint8_t *const dict);
|
||||
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
|
||||
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
|
||||
static int32_t getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
|
||||
static int getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
|
||||
static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos);
|
||||
static int skipOtherCharacters(const uint8_t *const dict, const int pos);
|
||||
static int skipChildrenPosition(const uint8_t flags, const int pos);
|
||||
|
@ -98,10 +98,10 @@ class BinaryFormat {
|
|||
static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
|
||||
int *pos);
|
||||
static int getAttributeFrequencyFromFlags(const int flags);
|
||||
static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord,
|
||||
static int getTerminalPosition(const uint8_t *const root, const int *const inWord,
|
||||
const int length, const bool forceLowerCaseSearch);
|
||||
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
|
||||
uint16_t *outWord, int *outUnigramFrequency);
|
||||
int *outWord, int *outUnigramFrequency);
|
||||
static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq);
|
||||
static int getProbability(const int position, const std::map<int, int> *bigramMap,
|
||||
const uint8_t *bigramFilter, const int unigramFreq);
|
||||
|
@ -176,17 +176,17 @@ inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict
|
|||
return dict[(*pos)++];
|
||||
}
|
||||
|
||||
inline int32_t BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) {
|
||||
inline int BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) {
|
||||
const int origin = *pos;
|
||||
const int32_t codePoint = dict[origin];
|
||||
const int codePoint = dict[origin];
|
||||
if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
|
||||
if (codePoint == CHARACTER_ARRAY_TERMINATOR) {
|
||||
*pos = origin + 1;
|
||||
return NOT_A_CODE_POINT;
|
||||
} else {
|
||||
*pos = origin + 3;
|
||||
const int32_t char_1 = codePoint << 16;
|
||||
const int32_t char_2 = char_1 + (dict[origin + 1] << 8);
|
||||
const int char_1 = codePoint << 16;
|
||||
const int char_2 = char_1 + (dict[origin + 1] << 8);
|
||||
return char_2 + dict[origin + 2];
|
||||
}
|
||||
} else {
|
||||
|
@ -202,7 +202,7 @@ inline int BinaryFormat::readFrequencyWithoutMovingPointer(const uint8_t *const
|
|||
|
||||
inline int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const int pos) {
|
||||
int currentPos = pos;
|
||||
int32_t character = dict[currentPos++];
|
||||
int character = dict[currentPos++];
|
||||
while (CHARACTER_ARRAY_TERMINATOR != character) {
|
||||
if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
|
||||
currentPos += MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE;
|
||||
|
@ -352,8 +352,8 @@ inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) {
|
|||
|
||||
// This function gets the byte position of the last chargroup of the exact matching word in the
|
||||
// dictionary. If no match is found, it returns NOT_VALID_WORD.
|
||||
inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
||||
const int32_t *const inWord, const int length, const bool forceLowerCaseSearch) {
|
||||
inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, const int *const inWord,
|
||||
const int length, const bool forceLowerCaseSearch) {
|
||||
int pos = 0;
|
||||
int wordPos = 0;
|
||||
|
||||
|
@ -362,14 +362,14 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
|||
// there was no match (or we would have found it).
|
||||
if (wordPos >= length) return NOT_VALID_WORD;
|
||||
int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
|
||||
const int32_t wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos];
|
||||
const int wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos];
|
||||
while (true) {
|
||||
// If there are no more character groups in this node, it means we could not
|
||||
// find a matching character for this depth, therefore there is no match.
|
||||
if (0 >= charGroupCount) return NOT_VALID_WORD;
|
||||
const int charGroupPos = pos;
|
||||
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||
int32_t character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||
int character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||
if (character == wChar) {
|
||||
// This is the correct node. Only one character group may start with the same
|
||||
// char within a node, so either we found our match in this node, or there is
|
||||
|
@ -439,7 +439,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
|||
* Return value : the length of the word, of 0 if the word was not found.
|
||||
*/
|
||||
inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address,
|
||||
const int maxDepth, uint16_t *outWord, int *outUnigramFrequency) {
|
||||
const int maxDepth, int *outWord, int *outUnigramFrequency) {
|
||||
int pos = 0;
|
||||
int wordPos = 0;
|
||||
|
||||
|
@ -457,13 +457,13 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
|
|||
--charGroupCount) {
|
||||
const int startPos = pos;
|
||||
const uint8_t flags = getFlagsAndForwardPointer(root, &pos);
|
||||
const int32_t character = getCodePointAndForwardPointer(root, &pos);
|
||||
const int character = getCodePointAndForwardPointer(root, &pos);
|
||||
if (address == startPos) {
|
||||
// We found the address. Copy the rest of the word in the buffer and return
|
||||
// the length.
|
||||
outWord[wordPos] = character;
|
||||
if (FLAG_HAS_MULTIPLE_CHARS & flags) {
|
||||
int32_t nextChar = getCodePointAndForwardPointer(root, &pos);
|
||||
int nextChar = getCodePointAndForwardPointer(root, &pos);
|
||||
// We count chars in order to avoid infinite loops if the file is broken or
|
||||
// if there is some other bug
|
||||
int charCount = maxDepth;
|
||||
|
@ -522,13 +522,12 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
|
|||
if (0 != lastCandidateGroupPos) {
|
||||
const uint8_t lastFlags =
|
||||
getFlagsAndForwardPointer(root, &lastCandidateGroupPos);
|
||||
const int32_t lastChar =
|
||||
const int lastChar =
|
||||
getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
|
||||
// We copy all the characters in this group to the buffer
|
||||
outWord[wordPos] = lastChar;
|
||||
if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
|
||||
int32_t nextChar =
|
||||
getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
|
||||
int nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
|
||||
int charCount = maxDepth;
|
||||
while (-1 != nextChar && --charCount > 0) {
|
||||
outWord[++wordPos] = nextChar;
|
||||
|
|
|
@ -18,22 +18,23 @@
|
|||
#define LATINIME_CHAR_UTILS_H
|
||||
|
||||
#include <cctype>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
inline static bool isAsciiUpper(unsigned short c) {
|
||||
inline static bool isAsciiUpper(int c) {
|
||||
// Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
|
||||
// be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
|
||||
return (c >= 'A' && c <= 'Z');
|
||||
}
|
||||
|
||||
inline static unsigned short toAsciiLower(unsigned short c) {
|
||||
inline static int toAsciiLower(int c) {
|
||||
return c - 'A' + 'a';
|
||||
}
|
||||
|
||||
inline static bool isAscii(unsigned short c) {
|
||||
return isascii(static_cast<int>(c)) != 0;
|
||||
inline static bool isAscii(int c) {
|
||||
return isascii(c) != 0;
|
||||
}
|
||||
|
||||
unsigned short latin_tolower(const unsigned short c);
|
||||
|
@ -44,33 +45,32 @@ unsigned short latin_tolower(const unsigned short c);
|
|||
* if c is not a combined character, or the base character if it
|
||||
* is combined.
|
||||
*/
|
||||
|
||||
static const int BASE_CHARS_SIZE = 0x0500;
|
||||
extern const uint16_t BASE_CHARS[BASE_CHARS_SIZE];
|
||||
extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
|
||||
|
||||
inline static unsigned short toBaseChar(unsigned short c) {
|
||||
inline static int toBaseCodePoint(int c) {
|
||||
if (c < BASE_CHARS_SIZE) {
|
||||
return BASE_CHARS[c];
|
||||
return static_cast<int>(BASE_CHARS[c]);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
inline static unsigned short toLowerCase(const unsigned short c) {
|
||||
inline static int toLowerCase(const int c) {
|
||||
if (isAsciiUpper(c)) {
|
||||
return toAsciiLower(c);
|
||||
} else if (isAscii(c)) {
|
||||
return c;
|
||||
}
|
||||
return latin_tolower(c);
|
||||
return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
|
||||
}
|
||||
|
||||
inline static unsigned short toBaseLowerCase(const unsigned short c) {
|
||||
return toLowerCase(toBaseChar(c));
|
||||
inline static int toBaseLowerCase(const int c) {
|
||||
return toLowerCase(toBaseCodePoint(c));
|
||||
}
|
||||
|
||||
inline static bool isSkippableChar(const uint16_t character) {
|
||||
inline static bool isSkippableCodePoint(const int codePoint) {
|
||||
// TODO: Do not hardcode here
|
||||
return character == '\'' || character == '-';
|
||||
return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -60,8 +60,8 @@ inline static void dumpEditDistance10ForDebug(int *editDistanceTable,
|
|||
}
|
||||
}
|
||||
|
||||
inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
|
||||
const int inputSize, const unsigned short *output, const int outputLength) {
|
||||
inline static void calcEditDistanceOneStep(int *editDistanceTable, const int *input,
|
||||
const int inputSize, const int *output, const int outputLength) {
|
||||
// TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
|
||||
// Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j].
|
||||
// Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated,
|
||||
|
@ -71,10 +71,10 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne
|
|||
const int *const prevprev =
|
||||
outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0;
|
||||
current[0] = outputLength;
|
||||
const uint32_t co = toBaseLowerCase(output[outputLength - 1]);
|
||||
const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
|
||||
const int co = toBaseLowerCase(output[outputLength - 1]);
|
||||
const int prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
|
||||
for (int i = 1; i <= inputSize; ++i) {
|
||||
const uint32_t ci = toBaseLowerCase(input[i - 1]);
|
||||
const int ci = toBaseLowerCase(input[i - 1]);
|
||||
const uint16_t cost = (ci == co) ? 0 : 1;
|
||||
current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
|
||||
if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
|
||||
|
@ -94,11 +94,9 @@ inline static int getCurrentEditDistance(int *editDistanceTable, const int editD
|
|||
//////////////////////
|
||||
// inline functions //
|
||||
//////////////////////
|
||||
static const char SINGLE_QUOTE = '\'';
|
||||
|
||||
inline bool Correction::isSingleQuote(const unsigned short c) {
|
||||
const unsigned short userTypedChar = mProximityInfoState.getPrimaryCharAt(mInputIndex);
|
||||
return (c == SINGLE_QUOTE && userTypedChar != SINGLE_QUOTE);
|
||||
inline bool Correction::isSingleQuote(const int c) {
|
||||
const int userTypedChar = mProximityInfoState.getPrimaryCodePointAt(mInputIndex);
|
||||
return (c == KEYCODE_SINGLE_QUOTE && userTypedChar != KEYCODE_SINGLE_QUOTE);
|
||||
}
|
||||
|
||||
////////////////
|
||||
|
@ -162,22 +160,22 @@ bool Correction::sameAsTyped() {
|
|||
}
|
||||
|
||||
int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
|
||||
const int wordCount, const bool isSpaceProximity, const unsigned short *word) {
|
||||
const int wordCount, const bool isSpaceProximity, const int *word) {
|
||||
return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,
|
||||
wordCount, this, isSpaceProximity, word);
|
||||
}
|
||||
|
||||
int Correction::getFinalProbability(const int probability, unsigned short **word, int *wordLength) {
|
||||
int Correction::getFinalProbability(const int probability, int **word, int *wordLength) {
|
||||
return getFinalProbabilityInternal(probability, word, wordLength, mInputSize);
|
||||
}
|
||||
|
||||
int Correction::getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
|
||||
int *wordLength, const int inputSize) {
|
||||
int Correction::getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength,
|
||||
const int inputSize) {
|
||||
return getFinalProbabilityInternal(probability, word, wordLength, inputSize);
|
||||
}
|
||||
|
||||
int Correction::getFinalProbabilityInternal(const int probability, unsigned short **word,
|
||||
int *wordLength, const int inputSize) {
|
||||
int Correction::getFinalProbabilityInternal(const int probability, int **word, int *wordLength,
|
||||
const int inputSize) {
|
||||
const int outputIndex = mTerminalOutputIndex;
|
||||
const int inputIndex = mTerminalInputIndex;
|
||||
*wordLength = outputIndex + 1;
|
||||
|
@ -273,15 +271,15 @@ bool Correction::needsToPrune() const {
|
|||
|| (!mDoAutoCompletion && (mOutputIndex > mInputSize));
|
||||
}
|
||||
|
||||
void Correction::addCharToCurrentWord(const int32_t c) {
|
||||
void Correction::addCharToCurrentWord(const int c) {
|
||||
mWord[mOutputIndex] = c;
|
||||
const unsigned short *primaryInputWord = mProximityInfoState.getPrimaryInputWord();
|
||||
calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize,
|
||||
mWord, mOutputIndex + 1);
|
||||
const int *primaryInputWord = mProximityInfoState.getPrimaryInputWord();
|
||||
calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize, mWord,
|
||||
mOutputIndex + 1);
|
||||
}
|
||||
|
||||
Correction::CorrectionType Correction::processSkipChar(
|
||||
const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
|
||||
Correction::CorrectionType Correction::processSkipChar(const int c, const bool isTerminal,
|
||||
const bool inputIndexIncremented) {
|
||||
addCharToCurrentWord(c);
|
||||
mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
|
||||
mTerminalOutputIndex = mOutputIndex;
|
||||
|
@ -309,8 +307,7 @@ inline bool isProximityCharOrEquivalentChar(ProximityType type) {
|
|||
return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR;
|
||||
}
|
||||
|
||||
Correction::CorrectionType Correction::processCharAndCalcState(
|
||||
const int32_t c, const bool isTerminal) {
|
||||
Correction::CorrectionType Correction::processCharAndCalcState(const int c, const bool isTerminal) {
|
||||
const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
|
||||
if (correctionCount > mMaxErrors) {
|
||||
return processUnrelatedCorrectionType();
|
||||
|
@ -628,10 +625,10 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|
|||
}
|
||||
}
|
||||
|
||||
inline static int getQuoteCount(const unsigned short *word, const int length) {
|
||||
inline static int getQuoteCount(const int *word, const int length) {
|
||||
int quoteCount = 0;
|
||||
for (int i = 0; i < length; ++i) {
|
||||
if (word[i] == SINGLE_QUOTE) {
|
||||
if (word[i] == KEYCODE_SINGLE_QUOTE) {
|
||||
++quoteCount;
|
||||
}
|
||||
}
|
||||
|
@ -639,7 +636,7 @@ inline static int getQuoteCount(const unsigned short *word, const int length) {
|
|||
}
|
||||
|
||||
inline static bool isUpperCase(unsigned short c) {
|
||||
return isAsciiUpper(toBaseChar(c));
|
||||
return isAsciiUpper(toBaseCodePoint(c));
|
||||
}
|
||||
|
||||
//////////////////////
|
||||
|
@ -672,7 +669,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
|
|||
// TODO: use mExcessiveCount
|
||||
const int matchCount = inputSize - correction->mProximityCount - excessiveCount;
|
||||
|
||||
const unsigned short *word = correction->mWord;
|
||||
const int *word = correction->mWord;
|
||||
const bool skipped = skippedCount > 0;
|
||||
|
||||
const int quoteDiffCount = max(0, getQuoteCount(word, outputLength)
|
||||
|
@ -911,7 +908,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
|
|||
/* static */
|
||||
int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
|
||||
const int *freqArray, const int *wordLengthArray, const int wordCount,
|
||||
const Correction *correction, const bool isSpaceProximity, const unsigned short *word) {
|
||||
const Correction *correction, const bool isSpaceProximity, const int *word) {
|
||||
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
|
||||
|
||||
bool firstCapitalizedWordDemotion = false;
|
||||
|
@ -1040,9 +1037,8 @@ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
|
|||
}
|
||||
|
||||
/* Damerau-Levenshtein distance */
|
||||
inline static int editDistanceInternal(
|
||||
int *editDistanceTable, const unsigned short *before,
|
||||
const int beforeLength, const unsigned short *after, const int afterLength) {
|
||||
inline static int editDistanceInternal(int *editDistanceTable, const int *before,
|
||||
const int beforeLength, const int *after, const int afterLength) {
|
||||
// dp[li][lo] dp[a][b] = dp[ a * lo + b]
|
||||
int *dp = editDistanceTable;
|
||||
const int li = beforeLength + 1;
|
||||
|
@ -1056,9 +1052,9 @@ inline static int editDistanceInternal(
|
|||
|
||||
for (int i = 0; i < li - 1; ++i) {
|
||||
for (int j = 0; j < lo - 1; ++j) {
|
||||
const uint32_t ci = toBaseLowerCase(before[i]);
|
||||
const uint32_t co = toBaseLowerCase(after[j]);
|
||||
const uint16_t cost = (ci == co) ? 0 : 1;
|
||||
const int ci = toBaseLowerCase(before[i]);
|
||||
const int co = toBaseLowerCase(after[j]);
|
||||
const int cost = (ci == co) ? 0 : 1;
|
||||
dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
|
||||
min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
|
||||
if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
|
||||
|
@ -1080,8 +1076,8 @@ inline static int editDistanceInternal(
|
|||
return dp[li * lo - 1];
|
||||
}
|
||||
|
||||
int Correction::RankingAlgorithm::editDistance(const unsigned short *before,
|
||||
const int beforeLength, const unsigned short *after, const int afterLength) {
|
||||
int Correction::RankingAlgorithm::editDistance(const int *before, const int beforeLength,
|
||||
const int *after, const int afterLength) {
|
||||
int table[(beforeLength + 1) * (afterLength + 1)];
|
||||
return editDistanceInternal(table, before, beforeLength, after, afterLength);
|
||||
}
|
||||
|
@ -1109,9 +1105,8 @@ int Correction::RankingAlgorithm::editDistance(const unsigned short *before,
|
|||
// So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2.
|
||||
|
||||
/* static */
|
||||
float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *before,
|
||||
const int beforeLength, const unsigned short *after, const int afterLength,
|
||||
const int score) {
|
||||
float Correction::RankingAlgorithm::calcNormalizedScore(const int *before, const int beforeLength,
|
||||
const int *after, const int afterLength, const int score) {
|
||||
if (0 == beforeLength || 0 == afterLength) {
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -78,14 +78,13 @@ class Correction {
|
|||
return ++mTotalTraverseCount;
|
||||
}
|
||||
|
||||
int getFreqForSplitMultipleWords(
|
||||
const int *freqArray, const int *wordLengthArray, const int wordCount,
|
||||
const bool isSpaceProximity, const unsigned short *word);
|
||||
int getFinalProbability(const int probability, unsigned short **word, int *wordLength);
|
||||
int getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
|
||||
int *wordLength, const int inputSize);
|
||||
int getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
|
||||
const int wordCount, const bool isSpaceProximity, const int *word);
|
||||
int getFinalProbability(const int probability, int **word, int *wordLength);
|
||||
int getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength,
|
||||
const int inputSize);
|
||||
|
||||
CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
|
||||
CorrectionType processCharAndCalcState(const int c, const bool isTerminal);
|
||||
|
||||
/////////////////////////
|
||||
// Tree helper methods
|
||||
|
@ -110,28 +109,28 @@ class Correction {
|
|||
const int inputSize);
|
||||
static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
|
||||
const int wordCount, const Correction *correction, const bool isSpaceProximity,
|
||||
const unsigned short *word);
|
||||
static float calcNormalizedScore(const unsigned short *before, const int beforeLength,
|
||||
const unsigned short *after, const int afterLength, const int score);
|
||||
static int editDistance(const unsigned short *before,
|
||||
const int beforeLength, const unsigned short *after, const int afterLength);
|
||||
const int *word);
|
||||
static float calcNormalizedScore(const int *before, const int beforeLength,
|
||||
const int *after, const int afterLength, const int score);
|
||||
static int editDistance(const int *before, const int beforeLength, const int *after,
|
||||
const int afterLength);
|
||||
private:
|
||||
static const int MAX_INITIAL_SCORE = 255;
|
||||
};
|
||||
|
||||
// proximity info state
|
||||
void initInputParams(const ProximityInfo *proximityInfo, const int32_t *inputCodes,
|
||||
void initInputParams(const ProximityInfo *proximityInfo, const int *inputCodes,
|
||||
const int inputSize, const int *xCoordinates, const int *yCoordinates) {
|
||||
mProximityInfoState.initInputParams(0, MAX_POINT_TO_KEY_LENGTH,
|
||||
proximityInfo, inputCodes, inputSize, xCoordinates, yCoordinates, 0, 0, false);
|
||||
}
|
||||
|
||||
const unsigned short *getPrimaryInputWord() const {
|
||||
const int *getPrimaryInputWord() const {
|
||||
return mProximityInfoState.getPrimaryInputWord();
|
||||
}
|
||||
|
||||
unsigned short getPrimaryCharAt(const int index) const {
|
||||
return mProximityInfoState.getPrimaryCharAt(index);
|
||||
int getPrimaryCodePointAt(const int index) const {
|
||||
return mProximityInfoState.getPrimaryCodePointAt(index);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -214,13 +213,13 @@ class Correction {
|
|||
inline void incrementInputIndex();
|
||||
inline void incrementOutputIndex();
|
||||
inline void startToTraverseAllNodes();
|
||||
inline bool isSingleQuote(const unsigned short c);
|
||||
inline CorrectionType processSkipChar(
|
||||
const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
|
||||
inline bool isSingleQuote(const int c);
|
||||
inline CorrectionType processSkipChar(const int c, const bool isTerminal,
|
||||
const bool inputIndexIncremented);
|
||||
inline CorrectionType processUnrelatedCorrectionType();
|
||||
inline void addCharToCurrentWord(const int32_t c);
|
||||
inline int getFinalProbabilityInternal(const int probability, unsigned short **word,
|
||||
int *wordLength, const int inputSize);
|
||||
inline void addCharToCurrentWord(const int c);
|
||||
inline int getFinalProbabilityInternal(const int probability, int **word, int *wordLength,
|
||||
const int inputSize);
|
||||
|
||||
static const int TYPED_LETTER_MULTIPLIER = 2;
|
||||
static const int FULL_WORD_MULTIPLIER = 2;
|
||||
|
@ -240,7 +239,7 @@ class Correction {
|
|||
uint8_t mTotalTraverseCount;
|
||||
|
||||
// The following arrays are state buffer.
|
||||
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
|
||||
int mWord[MAX_WORD_LENGTH_INTERNAL];
|
||||
int mDistances[MAX_WORD_LENGTH_INTERNAL];
|
||||
|
||||
// Edit distance calculation requires a buffer with (N+1)^2 length for the input length N.
|
||||
|
|
|
@ -30,17 +30,15 @@
|
|||
#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) do { \
|
||||
dumpResult(words, frequencies, maxWordCount, maxWordLength); } while (0)
|
||||
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
|
||||
#define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while (0)
|
||||
// TODO: INTS_TO_CHARS
|
||||
#define SHORTS_TO_CHARS(input, length, output) do { \
|
||||
shortArrayToCharArray(input, length, output); } while (0)
|
||||
#define INTS_TO_CHARS(input, length, output) do { \
|
||||
intArrayToCharArray(input, length, output); } while (0)
|
||||
|
||||
static inline void dumpWordInfo(const unsigned short *word, const int length,
|
||||
const int rank, const int frequency) {
|
||||
static inline void dumpWordInfo(const int *word, const int length, const int rank,
|
||||
const int frequency) {
|
||||
static char charBuf[50];
|
||||
int i = 0;
|
||||
for (; i < length; ++i) {
|
||||
const unsigned short c = word[i];
|
||||
const int c = word[i];
|
||||
if (c == 0) {
|
||||
break;
|
||||
}
|
||||
|
@ -53,8 +51,7 @@ static inline void dumpWordInfo(const unsigned short *word, const int length,
|
|||
}
|
||||
}
|
||||
|
||||
static inline void dumpResult(
|
||||
const unsigned short *outWords, const int *frequencies, const int maxWordCounts,
|
||||
static inline void dumpResult(const int *outWords, const int *frequencies, const int maxWordCounts,
|
||||
const int maxWordLength) {
|
||||
AKLOGI("--- DUMP RESULT ---------");
|
||||
for (int i = 0; i < maxWordCounts; ++i) {
|
||||
|
@ -63,11 +60,11 @@ static inline void dumpResult(
|
|||
AKLOGI("-------------------------");
|
||||
}
|
||||
|
||||
static inline void dumpWord(const unsigned short *word, const int length) {
|
||||
static inline void dumpWord(const int *word, const int length) {
|
||||
static char charBuf[50];
|
||||
int i = 0;
|
||||
for (; i < length; ++i) {
|
||||
const unsigned short c = word[i];
|
||||
const int c = word[i];
|
||||
if (c == 0) {
|
||||
break;
|
||||
}
|
||||
|
@ -80,22 +77,10 @@ static inline void dumpWord(const unsigned short *word, const int length) {
|
|||
}
|
||||
}
|
||||
|
||||
static inline void dumpWordInt(const int *word, const int length) {
|
||||
static char charBuf[50];
|
||||
|
||||
for (int i = 0; i < length; ++i) {
|
||||
charBuf[i] = word[i];
|
||||
}
|
||||
charBuf[length] = 0;
|
||||
AKLOGI("i[ %s ]", charBuf);
|
||||
}
|
||||
|
||||
// TODO: Change this to intArrayToCharArray
|
||||
static inline void shortArrayToCharArray(
|
||||
const unsigned short *input, const int length, char *output) {
|
||||
static inline void intArrayToCharArray(const int *input, const int length, char *output) {
|
||||
int i = 0;
|
||||
for (;i < length; ++i) {
|
||||
const unsigned short c = input[i];
|
||||
for (; i < length; ++i) {
|
||||
const int c = input[i];
|
||||
if (c == 0) {
|
||||
break;
|
||||
}
|
||||
|
@ -137,11 +122,9 @@ static inline void showStackTrace() {
|
|||
#define AKLOGI(fmt, ...)
|
||||
#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength)
|
||||
#define DUMP_WORD(word, length)
|
||||
#define DUMP_WORD_INT(word, length)
|
||||
#define ASSERT(success)
|
||||
#define SHOW_STACK_TRACE
|
||||
// TODO: INTS_TO_CHARS
|
||||
#define SHORTS_TO_CHARS(input, length, output)
|
||||
#define INTS_TO_CHARS(input, length, output)
|
||||
#endif
|
||||
|
||||
#ifdef FLAG_DO_PROFILE
|
||||
|
@ -286,6 +269,8 @@ static inline void prof_out(void) {
|
|||
#define NOT_A_PROBABILITY (-1)
|
||||
|
||||
#define KEYCODE_SPACE ' '
|
||||
#define KEYCODE_SINGLE_QUOTE '\''
|
||||
#define KEYCODE_HYPHEN_MINUS '-'
|
||||
|
||||
#define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true
|
||||
|
||||
|
|
|
@ -54,11 +54,10 @@ Dictionary::~Dictionary() {
|
|||
}
|
||||
|
||||
int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSession,
|
||||
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds,
|
||||
int *codes, int codesSize, int *prevWordChars,
|
||||
int prevWordLength, int commitPoint, bool isGesture,
|
||||
bool useFullEditDistance, unsigned short *outWords,
|
||||
int *frequencies, int *spaceIndices, int *outputTypes) const {
|
||||
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *codes,
|
||||
int codesSize, int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture,
|
||||
bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices,
|
||||
int *outputTypes) const {
|
||||
int result = 0;
|
||||
if (isGesture) {
|
||||
DicTraverseWrapper::initDicTraverseSession(
|
||||
|
@ -83,7 +82,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSessi
|
|||
}
|
||||
|
||||
int Dictionary::getBigrams(const int32_t *word, int length, int *codes, int codesSize,
|
||||
unsigned short *outWords, int *frequencies, int *outputTypes) const {
|
||||
int *outWords, int *frequencies, int *outputTypes) const {
|
||||
if (length <= 0) return 0;
|
||||
return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies,
|
||||
outputTypes);
|
||||
|
|
|
@ -47,11 +47,11 @@ class Dictionary {
|
|||
int getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, int *xcoordinates,
|
||||
int *ycoordinates, int *times, int *pointerIds, int *codes, int codesSize,
|
||||
int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture,
|
||||
bool useFullEditDistance, unsigned short *outWords,
|
||||
int *frequencies, int *spaceIndices, int *outputTypes) const;
|
||||
bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices,
|
||||
int *outputTypes) const;
|
||||
|
||||
int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
|
||||
unsigned short *outWords, int *frequencies, int *outputTypes) const;
|
||||
int getBigrams(const int32_t *word, int length, int *codes, int codesSize, int *outWords,
|
||||
int *frequencies, int *outputTypes) const;
|
||||
|
||||
int getFrequency(const int32_t *word, int length) const;
|
||||
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const;
|
||||
|
@ -68,7 +68,7 @@ class Dictionary {
|
|||
|
||||
// public static utility methods
|
||||
// static inline methods should be defined in the header file
|
||||
static int wideStrLen(unsigned short *str);
|
||||
static int wideStrLen(int *str);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
|
||||
|
@ -88,7 +88,7 @@ class Dictionary {
|
|||
|
||||
// public static utility methods
|
||||
// static inline methods should be defined in the header file
|
||||
inline int Dictionary::wideStrLen(unsigned short *str) {
|
||||
inline int Dictionary::wideStrLen(int *str) {
|
||||
if (!str) return 0;
|
||||
int length = 0;
|
||||
while (*str) {
|
||||
|
|
|
@ -38,15 +38,14 @@ class GestureDecoderWrapper : public IncrementalDecoderInterface {
|
|||
}
|
||||
|
||||
int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
|
||||
int *times, int *pointerIds, int *codes, int inputSize, int commitPoint,
|
||||
unsigned short *outWords, int *frequencies, int *outputIndices,
|
||||
int *outputTypes) const {
|
||||
int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, int *outWords,
|
||||
int *frequencies, int *outputIndices, int *outputTypes) const {
|
||||
if (!mIncrementalDecoderInterface) {
|
||||
return 0;
|
||||
}
|
||||
return mIncrementalDecoderInterface->getSuggestions(
|
||||
pInfo, traverseSession, inputXs, inputYs, times, pointerIds, codes,
|
||||
inputSize, commitPoint, outWords, frequencies, outputIndices, outputTypes);
|
||||
return mIncrementalDecoderInterface->getSuggestions(pInfo, traverseSession, inputXs,
|
||||
inputYs, times, pointerIds, codes, inputSize, commitPoint, outWords, frequencies,
|
||||
outputIndices, outputTypes);
|
||||
}
|
||||
|
||||
static void setGestureDecoderFactoryMethod(
|
||||
|
|
|
@ -28,10 +28,9 @@ class ProximityInfo;
|
|||
|
||||
class IncrementalDecoderInterface {
|
||||
public:
|
||||
virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession,
|
||||
int *inputXs, int *inputYs, int *times, int *pointerIds, int *codes,
|
||||
int inputSize, int commitPoint, unsigned short *outWords, int *frequencies,
|
||||
int *outputIndices, int *outputTypes) const = 0;
|
||||
virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs,
|
||||
int *inputYs, int *times, int *pointerIds, int *codes, int inputSize, int commitPoint,
|
||||
int *outWords, int *frequencies, int *outputIndices, int *outputTypes) const = 0;
|
||||
IncrementalDecoderInterface() { };
|
||||
virtual ~IncrementalDecoderInterface() { };
|
||||
private:
|
||||
|
|
|
@ -38,15 +38,14 @@ class IncrementalDecoderWrapper : public IncrementalDecoderInterface {
|
|||
}
|
||||
|
||||
int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
|
||||
int *times, int *pointerIds, int *codes, int inputSize, int commitPoint,
|
||||
unsigned short *outWords, int *frequencies, int *outputIndices,
|
||||
int *outputTypes) const {
|
||||
int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, int *outWords,
|
||||
int *frequencies, int *outputIndices, int *outputTypes) const {
|
||||
if (!mIncrementalDecoderInterface) {
|
||||
return 0;
|
||||
}
|
||||
return mIncrementalDecoderInterface->getSuggestions(
|
||||
pInfo, traverseSession, inputXs, inputYs, times, pointerIds, codes,
|
||||
inputSize, commitPoint, outWords, frequencies, outputIndices, outputTypes);
|
||||
return mIncrementalDecoderInterface->getSuggestions(pInfo, traverseSession, inputXs,
|
||||
inputYs, times, pointerIds, codes, inputSize, commitPoint, outWords, frequencies,
|
||||
outputIndices, outputTypes);
|
||||
}
|
||||
|
||||
static void setIncrementalDecoderFactoryMethod(
|
||||
|
|
|
@ -34,7 +34,7 @@ const float ProximityInfoState::NOT_A_DISTANCE_FLOAT = -1.0f;
|
|||
const int ProximityInfoState::NOT_A_CODE = -1;
|
||||
|
||||
void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength,
|
||||
const ProximityInfo *proximityInfo, const int32_t *const inputCodes, const int inputSize,
|
||||
const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize,
|
||||
const int *const xCoordinates, const int *const yCoordinates, const int *const times,
|
||||
const int *const pointerIds, const bool isGeometric) {
|
||||
|
||||
|
@ -63,7 +63,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
|
|||
// - mNormalizedSquaredDistances
|
||||
// TODO: Merge
|
||||
for (int i = 0; i < inputSize; ++i) {
|
||||
const int32_t primaryKey = inputCodes[i];
|
||||
const int primaryKey = inputCodes[i];
|
||||
const int x = xCoordinates[i];
|
||||
const int y = yCoordinates[i];
|
||||
int *proximities = &mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL];
|
||||
|
@ -146,7 +146,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
|
|||
AKLOGI("Init ProximityInfoState: (%d)PID = %d", i, pid);
|
||||
}
|
||||
if (pointerId == pid) {
|
||||
const int c = isGeometric ? NOT_A_COORDINATE : getPrimaryCharAt(i);
|
||||
const int c = isGeometric ? NOT_A_COORDINATE : getPrimaryCodePointAt(i);
|
||||
const int x = proximityOnly ? NOT_A_COORDINATE : xCoordinates[i];
|
||||
const int y = proximityOnly ? NOT_A_COORDINATE : yCoordinates[i];
|
||||
const int time = times ? times[i] : -1;
|
||||
|
@ -306,12 +306,12 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
|
|||
&& xCoordinates && yCoordinates;
|
||||
if (!isGeometric && pointerId == 0) {
|
||||
for (int i = 0; i < inputSize; ++i) {
|
||||
mPrimaryInputWord[i] = getPrimaryCharAt(i);
|
||||
mPrimaryInputWord[i] = getPrimaryCodePointAt(i);
|
||||
}
|
||||
|
||||
for (int i = 0; i < mInputSize && mTouchPositionCorrectionEnabled; ++i) {
|
||||
const int *proximityChars = getProximityCharsAt(i);
|
||||
const int primaryKey = proximityChars[0];
|
||||
const int *proximityCodePoints = getProximityCodePointsAt(i);
|
||||
const int primaryKey = proximityCodePoints[0];
|
||||
const int x = xCoordinates[i];
|
||||
const int y = yCoordinates[i];
|
||||
if (DEBUG_PROXIMITY_CHARS) {
|
||||
|
@ -319,11 +319,12 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
|
|||
a += 0;
|
||||
AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y);
|
||||
}
|
||||
for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityChars[j] > 0; ++j) {
|
||||
const int currentChar = proximityChars[j];
|
||||
for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityCodePoints[j] > 0;
|
||||
++j) {
|
||||
const int currentCodePoint = proximityCodePoints[j];
|
||||
const float squaredDistance =
|
||||
hasInputCoordinates() ? calculateNormalizedSquaredDistance(
|
||||
mProximityInfo->getKeyIndexOf(currentChar), i) :
|
||||
mProximityInfo->getKeyIndexOf(currentCodePoint), i) :
|
||||
NOT_A_DISTANCE_FLOAT;
|
||||
if (squaredDistance >= 0.0f) {
|
||||
mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] =
|
||||
|
@ -334,7 +335,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
|
|||
PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO;
|
||||
}
|
||||
if (DEBUG_PROXIMITY_CHARS) {
|
||||
AKLOGI("--- Proximity (%d) = %c", j, currentChar);
|
||||
AKLOGI("--- Proximity (%d) = %c", j, currentCodePoint);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -449,7 +450,7 @@ float ProximityInfoState::getPointScore(
|
|||
|
||||
// Sampling touch point and pushing information to vectors.
|
||||
// Returning if previous point is popped or not.
|
||||
bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y,
|
||||
bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeCodePoint, int x, int y,
|
||||
const int time, const bool sample, const bool isLastPoint, const float sumAngle,
|
||||
NearKeysDistanceMap *const currentNearKeysDistances,
|
||||
const NearKeysDistanceMap *const prevNearKeysDistances,
|
||||
|
@ -458,7 +459,7 @@ bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar
|
|||
|
||||
size_t size = mInputXs.size();
|
||||
bool popped = false;
|
||||
if (nodeChar < 0 && sample) {
|
||||
if (nodeCodePoint < 0 && sample) {
|
||||
const float nearest = updateNearKeysDistances(x, y, currentNearKeysDistances);
|
||||
const float score = getPointScore(x, y, time, isLastPoint, nearest, sumAngle,
|
||||
currentNearKeysDistances, prevNearKeysDistances, prevPrevNearKeysDistances);
|
||||
|
@ -487,8 +488,8 @@ bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar
|
|||
}
|
||||
}
|
||||
|
||||
if (nodeChar >= 0 && (x < 0 || y < 0)) {
|
||||
const int keyId = mProximityInfo->getKeyIndexOf(nodeChar);
|
||||
if (nodeCodePoint >= 0 && (x < 0 || y < 0)) {
|
||||
const int keyId = mProximityInfo->getKeyIndexOf(nodeCodePoint);
|
||||
if (keyId >= 0) {
|
||||
x = mProximityInfo->getKeyCenterXOfKeyIdG(keyId);
|
||||
y = mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
|
||||
|
@ -543,7 +544,7 @@ float ProximityInfoState::getPointToKeyLength(const int inputIndex, const int co
|
|||
const int index = inputIndex * mProximityInfo->getKeyCount() + keyId;
|
||||
return min(mDistanceCache[index], mMaxPointToKeyLength);
|
||||
}
|
||||
if (isSkippableChar(codePoint)) {
|
||||
if (isSkippableCodePoint(codePoint)) {
|
||||
return 0.0f;
|
||||
}
|
||||
// If the char is not a key on the keyboard then return the max length.
|
||||
|
@ -960,9 +961,9 @@ bool ProximityInfoState::suppressCharProbabilities(const int index0, const int i
|
|||
return true;
|
||||
}
|
||||
|
||||
// Get a word that is detected by tracing highest probability sequence into charBuf and returns
|
||||
// probability of generating the word.
|
||||
float ProximityInfoState::getHighestProbabilitySequence(uint16_t *const charBuf) const {
|
||||
// Get a word that is detected by tracing highest probability sequence into codePointBuf and
|
||||
// returns probability of generating the word.
|
||||
float ProximityInfoState::getHighestProbabilitySequence(int *const codePointBuf) const {
|
||||
static const float DEMOTION_LOG_PROBABILITY = 0.3f;
|
||||
int index = 0;
|
||||
float sumLogProbability = 0.0f;
|
||||
|
@ -980,12 +981,12 @@ float ProximityInfoState::getHighestProbabilitySequence(uint16_t *const charBuf)
|
|||
}
|
||||
}
|
||||
if (character != NOT_AN_INDEX) {
|
||||
charBuf[index] = mProximityInfo->getCodePointOf(character);
|
||||
codePointBuf[index] = mProximityInfo->getCodePointOf(character);
|
||||
index++;
|
||||
}
|
||||
sumLogProbability += minLogProbability;
|
||||
}
|
||||
charBuf[index] = '\0';
|
||||
codePointBuf[index] = '\0';
|
||||
return sumLogProbability;
|
||||
}
|
||||
|
||||
|
|
|
@ -43,7 +43,7 @@ class ProximityInfoState {
|
|||
// Defined in proximity_info_state.cpp //
|
||||
/////////////////////////////////////////
|
||||
void initInputParams(const int pointerId, const float maxPointToKeyLength,
|
||||
const ProximityInfo *proximityInfo, const int32_t *const inputCodes,
|
||||
const ProximityInfo *proximityInfo, const int *const inputCodes,
|
||||
const int inputSize, const int *xCoordinates, const int *yCoordinates,
|
||||
const int *const times, const int *const pointerIds, const bool isGeometric);
|
||||
|
||||
|
@ -65,15 +65,15 @@ class ProximityInfoState {
|
|||
|
||||
virtual ~ProximityInfoState() {}
|
||||
|
||||
inline unsigned short getPrimaryCharAt(const int index) const {
|
||||
return getProximityCharsAt(index)[0];
|
||||
inline int getPrimaryCodePointAt(const int index) const {
|
||||
return getProximityCodePointsAt(index)[0];
|
||||
}
|
||||
|
||||
inline bool existsCharInProximityAt(const int index, const int c) const {
|
||||
const int *chars = getProximityCharsAt(index);
|
||||
inline bool existsCodePointInProximityAt(const int index, const int c) const {
|
||||
const int *codePoints = getProximityCodePointsAt(index);
|
||||
int i = 0;
|
||||
while (chars[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE_INTERNAL) {
|
||||
if (chars[i++] == c) {
|
||||
while (codePoints[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE_INTERNAL) {
|
||||
if (codePoints[i++] == c) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -82,13 +82,13 @@ class ProximityInfoState {
|
|||
|
||||
inline bool existsAdjacentProximityChars(const int index) const {
|
||||
if (index < 0 || index >= mInputSize) return false;
|
||||
const int currentChar = getPrimaryCharAt(index);
|
||||
const int currentCodePoint = getPrimaryCodePointAt(index);
|
||||
const int leftIndex = index - 1;
|
||||
if (leftIndex >= 0 && existsCharInProximityAt(leftIndex, currentChar)) {
|
||||
if (leftIndex >= 0 && existsCodePointInProximityAt(leftIndex, currentCodePoint)) {
|
||||
return true;
|
||||
}
|
||||
const int rightIndex = index + 1;
|
||||
if (rightIndex < mInputSize && existsCharInProximityAt(rightIndex, currentChar)) {
|
||||
if (rightIndex < mInputSize && existsCodePointInProximityAt(rightIndex, currentCodePoint)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -106,15 +106,15 @@ class ProximityInfoState {
|
|||
// Notice : accented characters do not have a proximity list, so they are alone
|
||||
// in their list. The non-accented version of the character should be considered
|
||||
// "close", but not the other keys close to the non-accented version.
|
||||
inline ProximityType getMatchedProximityId(const int index,
|
||||
const unsigned short c, const bool checkProximityChars, int *proximityIndex = 0) const {
|
||||
const int *currentChars = getProximityCharsAt(index);
|
||||
const int firstChar = currentChars[0];
|
||||
const unsigned short baseLowerC = toBaseLowerCase(c);
|
||||
inline ProximityType getMatchedProximityId(const int index, const int c,
|
||||
const bool checkProximityChars, int *proximityIndex = 0) const {
|
||||
const int *currentCodePoints = getProximityCodePointsAt(index);
|
||||
const int firstCodePoint = currentCodePoints[0];
|
||||
const int baseLowerC = toBaseLowerCase(c);
|
||||
|
||||
// The first char in the array is what user typed. If it matches right away,
|
||||
// that means the user typed that same char for this pos.
|
||||
if (firstChar == baseLowerC || firstChar == c) {
|
||||
if (firstCodePoint == baseLowerC || firstCodePoint == c) {
|
||||
return EQUIVALENT_CHAR;
|
||||
}
|
||||
|
||||
|
@ -123,14 +123,14 @@ class ProximityInfoState {
|
|||
// If the non-accented, lowercased version of that first character matches c,
|
||||
// then we have a non-accented version of the accented character the user
|
||||
// typed. Treat it as a close char.
|
||||
if (toBaseLowerCase(firstChar) == baseLowerC)
|
||||
if (toBaseLowerCase(firstCodePoint) == baseLowerC)
|
||||
return NEAR_PROXIMITY_CHAR;
|
||||
|
||||
// Not an exact nor an accent-alike match: search the list of close keys
|
||||
int j = 1;
|
||||
while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
|
||||
&& currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
|
||||
const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
|
||||
&& currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
|
||||
const bool matched = (currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c);
|
||||
if (matched) {
|
||||
if (proximityIndex) {
|
||||
*proximityIndex = j;
|
||||
|
@ -140,11 +140,12 @@ class ProximityInfoState {
|
|||
++j;
|
||||
}
|
||||
if (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
|
||||
&& currentChars[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
|
||||
&& currentCodePoints[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
|
||||
++j;
|
||||
while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
|
||||
&& currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
|
||||
const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
|
||||
&& currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
|
||||
const bool matched =
|
||||
(currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c);
|
||||
if (matched) {
|
||||
if (proximityIndex) {
|
||||
*proximityIndex = j;
|
||||
|
@ -165,7 +166,7 @@ class ProximityInfoState {
|
|||
inputIndex * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + proximityIndex];
|
||||
}
|
||||
|
||||
inline const unsigned short *getPrimaryInputWord() const {
|
||||
inline const int *getPrimaryInputWord() const {
|
||||
return mPrimaryInputWord;
|
||||
}
|
||||
|
||||
|
@ -173,13 +174,13 @@ class ProximityInfoState {
|
|||
return mTouchPositionCorrectionEnabled;
|
||||
}
|
||||
|
||||
inline bool sameAsTyped(const unsigned short *word, int length) const {
|
||||
inline bool sameAsTyped(const int *word, int length) const {
|
||||
if (length != mInputSize) {
|
||||
return false;
|
||||
}
|
||||
const int *inputCodes = mInputCodes;
|
||||
while (length--) {
|
||||
if (static_cast<unsigned int>(*inputCodes) != static_cast<unsigned int>(*word)) {
|
||||
if (*inputCodes != *word) {
|
||||
return false;
|
||||
}
|
||||
inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
|
||||
|
@ -236,7 +237,7 @@ class ProximityInfoState {
|
|||
// Returns angle of three points. x, y, and z are indices.
|
||||
float getPointsAngle(const int index0, const int index1, const int index2) const;
|
||||
|
||||
float getHighestProbabilitySequence(uint16_t *const charBuf) const;
|
||||
float getHighestProbabilitySequence(int *const codePointBuf) const;
|
||||
|
||||
float getProbability(const int index, const int charCode) const;
|
||||
|
||||
|
@ -255,7 +256,7 @@ class ProximityInfoState {
|
|||
float calculateSquaredDistanceFromSweetSpotCenter(
|
||||
const int keyIndex, const int inputIndex) const;
|
||||
|
||||
bool pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y, const int time,
|
||||
bool pushTouchPoint(const int inputIndex, const int nodeCodePoint, int x, int y, const int time,
|
||||
const bool sample, const bool isLastPoint, const float sumAngle,
|
||||
NearKeysDistanceMap *const currentNearKeysDistances,
|
||||
const NearKeysDistanceMap *const prevNearKeysDistances,
|
||||
|
@ -269,7 +270,7 @@ class ProximityInfoState {
|
|||
return mInputXs.size() > 0 && mInputYs.size() > 0;
|
||||
}
|
||||
|
||||
inline const int *getProximityCharsAt(const int index) const {
|
||||
inline const int *getProximityCodePointsAt(const int index) const {
|
||||
return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE_INTERNAL);
|
||||
}
|
||||
|
||||
|
@ -322,10 +323,10 @@ class ProximityInfoState {
|
|||
// inputs including the current input point.
|
||||
std::vector<NearKeycodesSet> mSearchKeysVector;
|
||||
bool mTouchPositionCorrectionEnabled;
|
||||
int32_t mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
|
||||
int mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
|
||||
int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
|
||||
int mInputSize;
|
||||
unsigned short mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL];
|
||||
int mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL];
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_PROXIMITY_INFO_STATE_H
|
||||
|
|
|
@ -43,18 +43,16 @@ class TerminalAttributes {
|
|||
return mHasNextShortcutTarget;
|
||||
}
|
||||
|
||||
// Gets the shortcut target itself as a uint16_t string. For parameters and return value
|
||||
// Gets the shortcut target itself as an int string. For parameters and return value
|
||||
// see BinaryFormat::getWordAtAddress.
|
||||
// TODO: make the output an uint32_t* to handle the whole unicode range.
|
||||
inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord, int *outFreq) {
|
||||
inline int getNextShortcutTarget(const int maxDepth, int *outWord, int *outFreq) {
|
||||
const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
|
||||
mHasNextShortcutTarget =
|
||||
0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
|
||||
mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
|
||||
unsigned int i;
|
||||
for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) {
|
||||
const int codePoint = BinaryFormat::getCodePointAndForwardPointer(mDict, &mPos);
|
||||
if (NOT_A_CODE_POINT == codePoint) break;
|
||||
outWord[i] = (uint16_t)codePoint;
|
||||
outWord[i] = codePoint;
|
||||
}
|
||||
*outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags);
|
||||
return i;
|
||||
|
|
|
@ -55,13 +55,13 @@ UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, int fullW
|
|||
UnigramDictionary::~UnigramDictionary() {
|
||||
}
|
||||
|
||||
static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize) {
|
||||
return static_cast<unsigned int>(sizeof(*codes)) * codesSize;
|
||||
static inline int getCodesBufferSize(const int *codes, const int codesSize) {
|
||||
return sizeof(*codes) * codesSize;
|
||||
}
|
||||
|
||||
// TODO: This needs to take a const unsigned short* and not tinker with its contents
|
||||
static inline void addWord(unsigned short *word, int length, int frequency,
|
||||
WordsPriorityQueue *queue, int type) {
|
||||
// TODO: This needs to take a const int* and not tinker with its contents
|
||||
static inline void addWord(int *word, int length, int frequency, WordsPriorityQueue *queue,
|
||||
int type) {
|
||||
queue->push(frequency, word, length, type);
|
||||
}
|
||||
|
||||
|
@ -171,9 +171,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
|
|||
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int codesSize,
|
||||
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||
const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
|
||||
int *outputTypes) const {
|
||||
|
||||
const bool useFullEditDistance, int *outWords, int *frequencies, int *outputTypes) const {
|
||||
WordsPriorityQueuePool queuePool(MAX_WORDS, SUB_QUEUE_MAX_WORDS, MAX_WORD_LENGTH);
|
||||
queuePool.clearAll();
|
||||
Correction masterCorrection;
|
||||
|
@ -218,7 +216,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
|
|||
AKLOGI("Returning %d words", suggestedWordsCount);
|
||||
/// Print the returned words
|
||||
for (int j = 0; j < suggestedWordsCount; ++j) {
|
||||
short unsigned int *w = outWords + j * MAX_WORD_LENGTH;
|
||||
int *w = outWords + j * MAX_WORD_LENGTH;
|
||||
char s[MAX_WORD_LENGTH];
|
||||
for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
|
||||
(void)s; // To suppress compiler warning
|
||||
|
@ -230,12 +228,11 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
|
|||
return suggestedWordsCount;
|
||||
}
|
||||
|
||||
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
||||
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
||||
const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||
const bool useFullEditDistance, Correction *correction,
|
||||
WordsPriorityQueuePool *queuePool) const {
|
||||
|
||||
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int inputSize,
|
||||
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||
const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool)
|
||||
const {
|
||||
PROF_OPEN;
|
||||
PROF_START(0);
|
||||
PROF_END(0);
|
||||
|
@ -284,7 +281,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
|||
if (queue->size() > 0) {
|
||||
WordsPriorityQueue::SuggestedWord *sw = queue->top();
|
||||
const int score = sw->mScore;
|
||||
const unsigned short *word = sw->mWord;
|
||||
const int *word = sw->mWord;
|
||||
const int wordLength = sw->mWordLength;
|
||||
float ns = Correction::RankingAlgorithm::calcNormalizedScore(
|
||||
correction->getPrimaryInputWord(), i, word, wordLength, score);
|
||||
|
@ -303,7 +300,7 @@ void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int
|
|||
Correction *correction) const {
|
||||
if (DEBUG_DICT) {
|
||||
AKLOGI("initSuggest");
|
||||
DUMP_WORD_INT(codes, inputSize);
|
||||
DUMP_WORD(codes, inputSize);
|
||||
}
|
||||
correction->initInputParams(proximityInfo, codes, inputSize, xCoordinates, yCoordinates);
|
||||
const int maxDepth = min(inputSize * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
||||
|
@ -376,7 +373,7 @@ inline void UnigramDictionary::onTerminal(const int probability,
|
|||
const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
|
||||
|
||||
int wordLength;
|
||||
unsigned short *wordPointer;
|
||||
int *wordPointer;
|
||||
|
||||
if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) {
|
||||
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
|
||||
|
@ -404,7 +401,7 @@ inline void UnigramDictionary::onTerminal(const int probability,
|
|||
// so that the insert order is protected inside the queue for words
|
||||
// with the same score. For the moment we use -1 to make sure the shortcut will
|
||||
// never be in front of the word.
|
||||
uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
|
||||
int shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
|
||||
int shortcutFrequency;
|
||||
const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
|
||||
MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency);
|
||||
|
@ -444,7 +441,7 @@ int UnigramDictionary::getSubStringSuggestion(
|
|||
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
|
||||
const int inputWordStartPos, const int inputWordLength,
|
||||
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
|
||||
int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const {
|
||||
int *wordLengthArray, int *outputWord, int *outputWordLength) const {
|
||||
if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) {
|
||||
return FLAG_MULTIPLE_SUGGEST_ABORT;
|
||||
}
|
||||
|
@ -487,13 +484,13 @@ int UnigramDictionary::getSubStringSuggestion(
|
|||
// TODO: Remove the safety net above //
|
||||
//////////////////////////////////////////////
|
||||
|
||||
unsigned short *tempOutputWord = 0;
|
||||
int *tempOutputWord = 0;
|
||||
int nextWordLength = 0;
|
||||
// TODO: Optimize init suggestion
|
||||
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
|
||||
inputSize, correction);
|
||||
|
||||
unsigned short word[MAX_WORD_LENGTH_INTERNAL];
|
||||
int word[MAX_WORD_LENGTH_INTERNAL];
|
||||
int freq = getMostFrequentWordLike(
|
||||
inputWordStartPos, inputWordLength, correction, word);
|
||||
if (freq > 0) {
|
||||
|
@ -592,7 +589,7 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
|
|||
const bool useFullEditDistance, const int inputSize, Correction *correction,
|
||||
WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate,
|
||||
const int startInputPos, const int startWordIndex, const int outputWordLength,
|
||||
int *freqArray, int *wordLengthArray, unsigned short *outputWord) const {
|
||||
int *freqArray, int *wordLengthArray, int *outputWord) const {
|
||||
if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
|
||||
// Return if the last word index
|
||||
return;
|
||||
|
@ -678,7 +675,7 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit
|
|||
}
|
||||
|
||||
// Allocating fixed length array on stack
|
||||
unsigned short outputWord[MAX_WORD_LENGTH];
|
||||
int outputWord[MAX_WORD_LENGTH];
|
||||
int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
|
||||
int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
|
||||
const int outputWordLength = 0;
|
||||
|
@ -693,11 +690,11 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit
|
|||
// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
|
||||
// interface.
|
||||
inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
|
||||
const int inputSize, Correction *correction, unsigned short *word) const {
|
||||
uint16_t inWord[inputSize];
|
||||
const int inputSize, Correction *correction, int *word) const {
|
||||
int inWord[inputSize];
|
||||
|
||||
for (int i = 0; i < inputSize; ++i) {
|
||||
inWord[i] = (uint16_t)correction->getPrimaryCharAt(startInputIndex + i);
|
||||
inWord[i] = correction->getPrimaryCodePointAt(startInputIndex + i);
|
||||
}
|
||||
return getMostFrequentWordLikeInner(inWord, inputSize, word);
|
||||
}
|
||||
|
@ -715,14 +712,14 @@ inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
|
|||
// In and out parameters may point to the same location. This function takes care
|
||||
// not to use any input parameters after it wrote into its outputs.
|
||||
static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
|
||||
const uint8_t *const root, const int startPos, const uint16_t *const inWord,
|
||||
const int startInputIndex, const int inputSize, int32_t *outNewWord, int *outInputIndex,
|
||||
const uint8_t *const root, const int startPos, const int *const inWord,
|
||||
const int startInputIndex, const int inputSize, int *outNewWord, int *outInputIndex,
|
||||
int *outPos) {
|
||||
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||
int pos = startPos;
|
||||
int32_t codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||
int32_t baseChar = toBaseLowerCase(codePoint);
|
||||
const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]);
|
||||
int codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||
int baseChar = toBaseLowerCase(codePoint);
|
||||
const int wChar = toBaseLowerCase(inWord[startInputIndex]);
|
||||
|
||||
if (baseChar != wChar) {
|
||||
*outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos;
|
||||
|
@ -753,8 +750,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
|
|||
// It will compare the frequency to the max frequency, and if greater, will
|
||||
// copy the word into the output buffer. In output value maxFreq, it will
|
||||
// write the new maximum frequency if it changed.
|
||||
static inline void onTerminalWordLike(const int freq, int32_t *newWord, const int length,
|
||||
short unsigned int *outWord, int *maxFreq) {
|
||||
static inline void onTerminalWordLike(const int freq, int *newWord, const int length, int *outWord,
|
||||
int *maxFreq) {
|
||||
if (freq > *maxFreq) {
|
||||
for (int q = 0; q < length; ++q) {
|
||||
outWord[q] = newWord[q];
|
||||
|
@ -766,9 +763,9 @@ static inline void onTerminalWordLike(const int freq, int32_t *newWord, const in
|
|||
|
||||
// Will find the highest frequency of the words like the one passed as an argument,
|
||||
// that is, everything that only differs by case/accents.
|
||||
int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord,
|
||||
const int inputSize, short unsigned int *outWord) const {
|
||||
int32_t newWord[MAX_WORD_LENGTH_INTERNAL];
|
||||
int UnigramDictionary::getMostFrequentWordLikeInner(const int *const inWord, const int inputSize,
|
||||
int *outWord) const {
|
||||
int newWord[MAX_WORD_LENGTH_INTERNAL];
|
||||
int depth = 0;
|
||||
int maxFreq = -1;
|
||||
const uint8_t *const root = DICT_ROOT;
|
||||
|
@ -828,7 +825,7 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord
|
|||
return maxFreq;
|
||||
}
|
||||
|
||||
int UnigramDictionary::getFrequency(const int32_t *const inWord, const int length) const {
|
||||
int UnigramDictionary::getFrequency(const int *const inWord, const int length) const {
|
||||
const uint8_t *const root = DICT_ROOT;
|
||||
int pos = BinaryFormat::getTerminalPosition(root, inWord, length,
|
||||
false /* forceLowerCaseSearch */);
|
||||
|
@ -853,8 +850,7 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt
|
|||
}
|
||||
|
||||
// TODO: remove this function.
|
||||
int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offset,
|
||||
int length) const {
|
||||
int UnigramDictionary::getBigramPosition(int pos, int *word, int offset, int length) const {
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -900,7 +896,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
|
|||
// else if FLAG_IS_TERMINAL: the frequency
|
||||
// else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address
|
||||
// Note that you can't have a node that both is not a terminal and has no children.
|
||||
int32_t c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
|
||||
int c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
|
||||
assert(NOT_A_CODE_POINT != c);
|
||||
|
||||
// We are going to loop through each character and make it look like it's a different
|
||||
|
@ -914,7 +910,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
|
|||
// We prefetch the next char. If 'c' is the last char of this node, we will have
|
||||
// NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node
|
||||
// should behave as a terminal or not and whether we have children.
|
||||
const int32_t nextc = hasMultipleChars
|
||||
const int nextc = hasMultipleChars
|
||||
? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT;
|
||||
const bool isLastChar = (NOT_A_CODE_POINT == nextc);
|
||||
// If there are more chars in this nodes, then this virtual node is not a terminal.
|
||||
|
|
|
@ -41,12 +41,12 @@ class UnigramDictionary {
|
|||
static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2;
|
||||
UnigramDictionary(const uint8_t *const streamStart, int fullWordMultiplier, int maxWordLength,
|
||||
int maxWords, const unsigned int flags);
|
||||
int getFrequency(const int32_t *const inWord, const int length) const;
|
||||
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
|
||||
int getFrequency(const int *const inWord, const int length) const;
|
||||
int getBigramPosition(int pos, int *word, int offset, int length) const;
|
||||
int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int codesSize,
|
||||
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||
const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
|
||||
const bool useFullEditDistance, int *outWords, int *frequencies,
|
||||
int *outputTypes) const;
|
||||
virtual ~UnigramDictionary();
|
||||
|
||||
|
@ -93,9 +93,9 @@ class UnigramDictionary {
|
|||
int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
|
||||
const int currentWordIndex) const;
|
||||
int getMostFrequentWordLike(const int startInputIndex, const int inputSize,
|
||||
Correction *correction, unsigned short *word) const;
|
||||
int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int inputSize,
|
||||
short unsigned int *outWord) const;
|
||||
Correction *correction, int *word) const;
|
||||
int getMostFrequentWordLikeInner(const int *const inWord, const int inputSize,
|
||||
int *outWord) const;
|
||||
int getSubStringSuggestion(
|
||||
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
|
||||
const int *codes, const bool useFullEditDistance, Correction *correction,
|
||||
|
@ -103,14 +103,13 @@ class UnigramDictionary {
|
|||
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
|
||||
const int inputWordStartPos, const int inputWordLength,
|
||||
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
|
||||
int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const;
|
||||
void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
|
||||
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
||||
const bool useFullEditDistance, const int inputSize,
|
||||
Correction *correction, WordsPriorityQueuePool *queuePool,
|
||||
int *wordLengthArray, int *outputWord, int *outputWordLength) const;
|
||||
void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
|
||||
const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool,
|
||||
const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex,
|
||||
const int outputWordLength, int *freqArray, int *wordLengthArray,
|
||||
unsigned short *outputWord) const;
|
||||
int *outputWord) const;
|
||||
|
||||
const uint8_t *const DICT_ROOT;
|
||||
const int MAX_WORD_LENGTH;
|
||||
|
|
|
@ -30,15 +30,15 @@ class WordsPriorityQueue {
|
|||
class SuggestedWord {
|
||||
public:
|
||||
int mScore;
|
||||
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
|
||||
int mWord[MAX_WORD_LENGTH_INTERNAL];
|
||||
int mWordLength;
|
||||
bool mUsed;
|
||||
int mType;
|
||||
|
||||
void setParams(int score, unsigned short *word, int wordLength, int type) {
|
||||
void setParams(int score, int *word, int wordLength, int type) {
|
||||
mScore = score;
|
||||
mWordLength = wordLength;
|
||||
memcpy(mWord, word, sizeof(unsigned short) * wordLength);
|
||||
memcpy(mWord, word, sizeof(int) * wordLength);
|
||||
mUsed = true;
|
||||
mType = type;
|
||||
}
|
||||
|
@ -57,9 +57,9 @@ class WordsPriorityQueue {
|
|||
delete[] mSuggestedWords;
|
||||
}
|
||||
|
||||
void push(int score, unsigned short *word, int wordLength, int type) {
|
||||
void push(int score, int *word, int wordLength, int type) {
|
||||
SuggestedWord *sw = 0;
|
||||
if (mSuggestions.size() >= MAX_WORDS) {
|
||||
if (size() >= MAX_WORDS) {
|
||||
sw = mSuggestions.top();
|
||||
const int minScore = sw->mScore;
|
||||
if (minScore >= score) {
|
||||
|
@ -94,11 +94,10 @@ class WordsPriorityQueue {
|
|||
return sw;
|
||||
}
|
||||
|
||||
int outputSuggestions(const unsigned short *before, const int beforeLength,
|
||||
int *frequencies, unsigned short *outputChars, int* outputTypes) {
|
||||
int outputSuggestions(const int *before, const int beforeLength, int *frequencies,
|
||||
int *outputCodePoints, int* outputTypes) {
|
||||
mHighestSuggestedWord = 0;
|
||||
const unsigned int size = min(
|
||||
MAX_WORDS, static_cast<unsigned int>(mSuggestions.size()));
|
||||
const int size = min(MAX_WORDS, static_cast<int>(mSuggestions.size()));
|
||||
SuggestedWord *swBuffer[size];
|
||||
int index = size - 1;
|
||||
while (!mSuggestions.empty() && index >= 0) {
|
||||
|
@ -113,9 +112,9 @@ class WordsPriorityQueue {
|
|||
}
|
||||
if (size >= 2) {
|
||||
SuggestedWord *nsMaxSw = 0;
|
||||
unsigned int maxIndex = 0;
|
||||
int maxIndex = 0;
|
||||
float maxNs = 0;
|
||||
for (unsigned int i = 0; i < size; ++i) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
SuggestedWord *tempSw = swBuffer[i];
|
||||
if (!tempSw) {
|
||||
continue;
|
||||
|
@ -132,17 +131,17 @@ class WordsPriorityQueue {
|
|||
swBuffer[0] = nsMaxSw;
|
||||
}
|
||||
}
|
||||
for (unsigned int i = 0; i < size; ++i) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
SuggestedWord *sw = swBuffer[i];
|
||||
if (!sw) {
|
||||
AKLOGE("SuggestedWord is null %d", i);
|
||||
continue;
|
||||
}
|
||||
const unsigned int wordLength = sw->mWordLength;
|
||||
unsigned short *targetAddress = outputChars + i * MAX_WORD_LENGTH;
|
||||
const int wordLength = sw->mWordLength;
|
||||
int *targetAddress = outputCodePoints + i * MAX_WORD_LENGTH;
|
||||
frequencies[i] = sw->mScore;
|
||||
outputTypes[i] = sw->mType;
|
||||
memcpy(targetAddress, sw->mWord, wordLength * sizeof(unsigned short));
|
||||
memcpy(targetAddress, sw->mWord, wordLength * sizeof(int));
|
||||
if (wordLength < MAX_WORD_LENGTH) {
|
||||
targetAddress[wordLength] = 0;
|
||||
}
|
||||
|
@ -152,7 +151,7 @@ class WordsPriorityQueue {
|
|||
}
|
||||
|
||||
int size() const {
|
||||
return mSuggestions.size();
|
||||
return static_cast<int>(mSuggestions.size());
|
||||
}
|
||||
|
||||
void clear() {
|
||||
|
@ -175,13 +174,13 @@ class WordsPriorityQueue {
|
|||
DUMP_WORD(mHighestSuggestedWord->mWord, mHighestSuggestedWord->mWordLength);
|
||||
}
|
||||
|
||||
float getHighestNormalizedScore(const unsigned short *before, const int beforeLength,
|
||||
unsigned short **outWord, int *outScore, int *outLength) {
|
||||
float getHighestNormalizedScore(const int *before, const int beforeLength, int **outWord,
|
||||
int *outScore, int *outLength) {
|
||||
if (!mHighestSuggestedWord) {
|
||||
return 0.0;
|
||||
}
|
||||
return getNormalizedScore(
|
||||
mHighestSuggestedWord, before, beforeLength, outWord, outScore, outLength);
|
||||
return getNormalizedScore(mHighestSuggestedWord, before, beforeLength, outWord, outScore,
|
||||
outLength);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -192,9 +191,8 @@ class WordsPriorityQueue {
|
|||
}
|
||||
};
|
||||
|
||||
SuggestedWord *getFreeSuggestedWord(int score, unsigned short *word,
|
||||
int wordLength, int type) {
|
||||
for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) {
|
||||
SuggestedWord *getFreeSuggestedWord(int score, int *word, int wordLength, int type) {
|
||||
for (int i = 0; i < MAX_WORD_LENGTH; ++i) {
|
||||
if (!mSuggestedWords[i].mUsed) {
|
||||
mSuggestedWords[i].setParams(score, word, wordLength, type);
|
||||
return &mSuggestedWords[i];
|
||||
|
@ -203,10 +201,10 @@ class WordsPriorityQueue {
|
|||
return 0;
|
||||
}
|
||||
|
||||
static float getNormalizedScore(SuggestedWord *sw, const unsigned short *before,
|
||||
const int beforeLength, unsigned short **outWord, int *outScore, int *outLength) {
|
||||
static float getNormalizedScore(SuggestedWord *sw, const int *before, const int beforeLength,
|
||||
int **outWord, int *outScore, int *outLength) {
|
||||
const int score = sw->mScore;
|
||||
unsigned short *word = sw->mWord;
|
||||
int *word = sw->mWord;
|
||||
const int wordLength = sw->mWordLength;
|
||||
if (outScore) {
|
||||
*outScore = score;
|
||||
|
@ -217,15 +215,15 @@ class WordsPriorityQueue {
|
|||
if (outLength) {
|
||||
*outLength = wordLength;
|
||||
}
|
||||
return Correction::RankingAlgorithm::calcNormalizedScore(
|
||||
before, beforeLength, word, wordLength, score);
|
||||
return Correction::RankingAlgorithm::calcNormalizedScore(before, beforeLength, word,
|
||||
wordLength, score);
|
||||
}
|
||||
|
||||
typedef std::priority_queue<SuggestedWord *, std::vector<SuggestedWord *>,
|
||||
wordComparator> Suggestions;
|
||||
Suggestions mSuggestions;
|
||||
const unsigned int MAX_WORDS;
|
||||
const unsigned int MAX_WORD_LENGTH;
|
||||
const int MAX_WORDS;
|
||||
const int MAX_WORD_LENGTH;
|
||||
SuggestedWord *mSuggestedWords;
|
||||
SuggestedWord *mHighestSuggestedWord;
|
||||
};
|
||||
|
|
Loading…
Reference in a new issue