Merge "Use 32-bit code points for suggestions output"

This commit is contained in:
Ken Wakasa 2012-10-31 09:36:00 -07:00 committed by Android (Google) Code Review
commit 01f6a61e51
20 changed files with 329 additions and 363 deletions

View file

@ -51,8 +51,7 @@ public final class BinaryDictionary extends Dictionary {
private long mNativeDict;
private final Locale mLocale;
private final int[] mInputCodePoints = new int[MAX_WORD_LENGTH];
// TODO: The below should be int[] mOutputCodePoints
private final char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_RESULTS];
private final int[] mOutputCodePoints = new int[MAX_WORD_LENGTH * MAX_RESULTS];
private final int[] mSpaceIndices = new int[MAX_SPACES];
private final int[] mOutputScores = new int[MAX_RESULTS];
private final int[] mOutputTypes = new int[MAX_RESULTS];
@ -88,9 +87,9 @@ public final class BinaryDictionary extends Dictionary {
* @param useFullEditDistance whether to use the full edit distance in suggestions
* @param dictType the dictionary type, as a human-readable string
*/
public BinaryDictionary(final Context context,
final String filename, final long offset, final long length,
final boolean useFullEditDistance, final Locale locale, final String dictType) {
public BinaryDictionary(final Context context, final String filename, final long offset,
final long length, final boolean useFullEditDistance, final Locale locale,
final String dictType) {
super(dictType);
mLocale = locale;
mUseFullEditDistance = useFullEditDistance;
@ -109,10 +108,10 @@ public final class BinaryDictionary extends Dictionary {
private native int getSuggestionsNative(long dict, long proximityInfo, long traverseSession,
int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds,
int[] inputCodePoints, int codesSize, int commitPoint, boolean isGesture,
int[] prevWordCodePointArray, boolean useFullEditDistance, char[] outputChars,
int[] prevWordCodePointArray, boolean useFullEditDistance, int[] outputCodePoints,
int[] outputScores, int[] outputIndices, int[] outputTypes);
private static native float calcNormalizedScoreNative(char[] before, char[] after, int score);
private static native int editDistanceNative(char[] before, char[] after);
private static native float calcNormalizedScoreNative(int[] before, int[] after, int score);
private static native int editDistanceNative(int[] before, int[] after);
// TODO: Move native dict into session
private final void loadDictionary(final String path, final long startOffset,
@ -153,7 +152,8 @@ public final class BinaryDictionary extends Dictionary {
proximityInfo.getNativeProximityInfo(), getTraverseSession(sessionId).getSession(),
ips.getXCoordinates(), ips.getYCoordinates(), ips.getTimes(), ips.getPointerIds(),
mInputCodePoints, codesSize, 0 /* commitPoint */, isGesture, prevWordCodePointArray,
mUseFullEditDistance, mOutputChars, mOutputScores, mSpaceIndices, mOutputTypes);
mUseFullEditDistance, mOutputCodePoints, mOutputScores, mSpaceIndices,
mOutputTypes);
final int count = Math.min(tmpCount, MAX_PREDICTIONS);
final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList();
@ -161,14 +161,14 @@ public final class BinaryDictionary extends Dictionary {
if (composerSize > 0 && mOutputScores[j] < 1) break;
final int start = j * MAX_WORD_LENGTH;
int len = 0;
while (len < MAX_WORD_LENGTH && mOutputChars[start + len] != 0) {
while (len < MAX_WORD_LENGTH && mOutputCodePoints[start + len] != 0) {
++len;
}
if (len > 0) {
final int score = SuggestedWordInfo.KIND_WHITELIST == mOutputTypes[j]
? SuggestedWordInfo.MAX_SCORE : mOutputScores[j];
suggestions.add(new SuggestedWordInfo(
new String(mOutputChars, start, len), score, mOutputTypes[j], mDictType));
suggestions.add(new SuggestedWordInfo(new String(mOutputCodePoints, start, len),
score, mOutputTypes[j], mDictType));
}
}
return suggestions;
@ -180,14 +180,16 @@ public final class BinaryDictionary extends Dictionary {
public static float calcNormalizedScore(final String before, final String after,
final int score) {
return calcNormalizedScoreNative(before.toCharArray(), after.toCharArray(), score);
return calcNormalizedScoreNative(StringUtils.toCodePointArray(before),
StringUtils.toCodePointArray(after), score);
}
public static int editDistance(final String before, final String after) {
if (before == null || after == null) {
throw new IllegalArgumentException();
}
return editDistanceNative(before.toCharArray(), after.toCharArray());
return editDistanceNative(StringUtils.toCodePointArray(before),
StringUtils.toCodePointArray(after));
}
@Override
@ -206,9 +208,9 @@ public final class BinaryDictionary extends Dictionary {
// calls when checking for changes in an entire dictionary.
public boolean isValidBigram(final String word1, final String word2) {
if (TextUtils.isEmpty(word1) || TextUtils.isEmpty(word2)) return false;
final int[] chars1 = StringUtils.toCodePointArray(word1);
final int[] chars2 = StringUtils.toCodePointArray(word2);
return isValidBigramNative(mNativeDict, chars1, chars2);
final int[] codePoints1 = StringUtils.toCodePointArray(word1);
final int[] codePoints2 = StringUtils.toCodePointArray(word2);
return isValidBigramNative(mNativeDict, codePoints1, codePoints2);
}
@Override

View file

@ -132,7 +132,7 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
jintArray inputCodePointsArray, jint arraySize, jint commitPoint, jboolean isGesture,
jintArray prevWordCodePointsForBigrams, jboolean useFullEditDistance,
jcharArray outputCharsArray, jintArray scoresArray, jintArray spaceIndicesArray,
jintArray outputCodePointsArray, jintArray scoresArray, jintArray spaceIndicesArray,
jintArray outputTypesArray) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return 0;
@ -162,16 +162,15 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
}
// Output values
// TODO: Should be "outputCodePointsLength" and "int outputCodePoints[]"
const jsize outputCharsLength = env->GetArrayLength(outputCharsArray);
unsigned short outputChars[outputCharsLength];
const jsize outputCodePointsLength = env->GetArrayLength(outputCodePointsArray);
int outputCodePoints[outputCodePointsLength];
const jsize scoresLength = env->GetArrayLength(scoresArray);
int scores[scoresLength];
const jsize spaceIndicesLength = env->GetArrayLength(spaceIndicesArray);
int spaceIndices[spaceIndicesLength];
const jsize outputTypesLength = env->GetArrayLength(outputTypesArray);
int outputTypes[outputTypesLength];
memset(outputChars, 0, sizeof(outputChars));
memset(outputCodePoints, 0, sizeof(outputCodePoints));
memset(scores, 0, sizeof(scores));
memset(spaceIndices, 0, sizeof(spaceIndices));
memset(outputTypes, 0, sizeof(outputTypes));
@ -180,16 +179,15 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
if (isGesture || arraySize > 0) {
count = dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
times, pointerIds, inputCodePoints, arraySize, prevWordCodePoints,
prevWordCodePointsLength, commitPoint, isGesture, useFullEditDistance, outputChars,
scores, spaceIndices, outputTypes);
prevWordCodePointsLength, commitPoint, isGesture, useFullEditDistance,
outputCodePoints, scores, spaceIndices, outputTypes);
} else {
count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength,
inputCodePoints, arraySize, outputChars, scores, outputTypes);
inputCodePoints, arraySize, outputCodePoints, scores, outputTypes);
}
// Copy back the output values
// TODO: Should be SetIntArrayRegion()
env->SetCharArrayRegion(outputCharsArray, 0, outputCharsLength, outputChars);
env->SetIntArrayRegion(outputCodePointsArray, 0, outputCodePointsLength, outputCodePoints);
env->SetIntArrayRegion(scoresArray, 0, scoresLength, scores);
env->SetIntArrayRegion(spaceIndicesArray, 0, spaceIndicesLength, spaceIndices);
env->SetIntArrayRegion(outputTypesArray, 0, outputTypesLength, outputTypes);
@ -221,29 +219,27 @@ static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jobject obj
}
static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object,
jcharArray before, jcharArray after, jint score) {
jintArray before, jintArray after, jint score) {
jsize beforeLength = env->GetArrayLength(before);
jsize afterLength = env->GetArrayLength(after);
jchar beforeChars[beforeLength];
jchar afterChars[afterLength];
env->GetCharArrayRegion(before, 0, beforeLength, beforeChars);
env->GetCharArrayRegion(after, 0, afterLength, afterChars);
return Correction::RankingAlgorithm::calcNormalizedScore(
static_cast<unsigned short *>(beforeChars), beforeLength,
static_cast<unsigned short *>(afterChars), afterLength, score);
int beforeCodePoints[beforeLength];
int afterCodePoints[afterLength];
env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
return Correction::RankingAlgorithm::calcNormalizedScore(beforeCodePoints, beforeLength,
afterCodePoints, afterLength, score);
}
static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jobject object,
jcharArray before, jcharArray after) {
static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jobject object, jintArray before,
jintArray after) {
jsize beforeLength = env->GetArrayLength(before);
jsize afterLength = env->GetArrayLength(after);
jchar beforeChars[beforeLength];
jchar afterChars[afterLength];
env->GetCharArrayRegion(before, 0, beforeLength, beforeChars);
env->GetCharArrayRegion(after, 0, afterLength, afterChars);
return Correction::RankingAlgorithm::editDistance(
static_cast<unsigned short *>(beforeChars), beforeLength,
static_cast<unsigned short *>(afterChars), afterLength);
int beforeCodePoints[beforeLength];
int afterCodePoints[afterLength];
env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints);
env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints);
return Correction::RankingAlgorithm::editDistance(beforeCodePoints, beforeLength,
afterCodePoints, afterLength);
}
static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jlong dict) {
@ -279,15 +275,15 @@ static JNINativeMethod sMethods[] = {
{"openNative", "(Ljava/lang/String;JJIIII)J",
reinterpret_cast<void *>(latinime_BinaryDictionary_open)},
{"closeNative", "(J)V", reinterpret_cast<void *>(latinime_BinaryDictionary_close)},
{"getSuggestionsNative", "(JJJ[I[I[I[I[IIIZ[IZ[C[I[I[I)I",
{"getSuggestionsNative", "(JJJ[I[I[I[I[IIIZ[IZ[I[I[I[I)I",
reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)},
{"getFrequencyNative", "(J[I)I",
reinterpret_cast<void *>(latinime_BinaryDictionary_getFrequency)},
{"isValidBigramNative", "(J[I[I)Z",
reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram)},
{"calcNormalizedScoreNative", "([C[CI)F",
{"calcNormalizedScoreNative", "([I[II)F",
reinterpret_cast<void *>(latinime_BinaryDictionary_calcNormalizedScore)},
{"editDistanceNative", "([C[C)I",
{"editDistanceNative", "([I[I)I",
reinterpret_cast<void *>(latinime_BinaryDictionary_editDistance)}
};

View file

@ -36,13 +36,13 @@ BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength,
BigramDictionary::~BigramDictionary() {
}
bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequency,
int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const {
bool BigramDictionary::addWordBigram(int *word, int length, int frequency, int *bigramFreq,
int *bigramCodePoints, int *outputTypes) const {
word[length] = 0;
if (DEBUG_DICT) {
#ifdef FLAG_DBG
char s[length + 1];
for (int i = 0; i <= length; i++) s[i] = word[i];
for (int i = 0; i <= length; i++) s[i] = static_cast<char>(word[i]);
AKLOGI("Bigram: Found word = %s, freq = %d :", s, frequency);
#endif
}
@ -51,7 +51,8 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
int insertAt = 0;
while (insertAt < MAX_PREDICTIONS) {
if (frequency > bigramFreq[insertAt] || (bigramFreq[insertAt] == frequency
&& length < Dictionary::wideStrLen(bigramChars + insertAt * MAX_WORD_LENGTH))) {
&& length < Dictionary::wideStrLen(
bigramCodePoints + insertAt * MAX_WORD_LENGTH))) {
break;
}
insertAt++;
@ -65,10 +66,10 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
(MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramFreq[0]));
bigramFreq[insertAt] = frequency;
outputTypes[insertAt] = Dictionary::KIND_PREDICTION;
memmove(bigramChars + (insertAt + 1) * MAX_WORD_LENGTH,
bigramChars + insertAt * MAX_WORD_LENGTH,
(MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramChars[0]) * MAX_WORD_LENGTH);
unsigned short *dest = bigramChars + insertAt * MAX_WORD_LENGTH;
memmove(bigramCodePoints + (insertAt + 1) * MAX_WORD_LENGTH,
bigramCodePoints + insertAt * MAX_WORD_LENGTH,
(MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramCodePoints[0]) * MAX_WORD_LENGTH);
int *dest = bigramCodePoints + insertAt * MAX_WORD_LENGTH;
while (length--) {
*dest++ = *word++;
}
@ -86,7 +87,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
* prevWordLength: its length.
* inputCodes: what user typed, in the same format as for UnigramDictionary::getSuggestions.
* codesSize: the size of the codes array.
* bigramChars: an array for output, at the same format as outwords for getSuggestions.
* bigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
* bigramFreq: an array to output frequencies.
* outputTypes: an array to output types.
* This method returns the number of bigrams this word has, for backward compatibility.
@ -97,8 +98,8 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
* and the bigrams are used to boost unigram result scores, it makes little sense to
* reduce their scope to the ones that match the first letter.
*/
int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, int *inputCodes,
int codesSize, unsigned short *bigramChars, int *bigramFreq, int *outputTypes) const {
int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *inputCodes,
int codesSize, int *bigramCodePoints, int *bigramFreq, int *outputTypes) const {
// TODO: remove unused arguments, and refrain from storing stuff in members of this class
// TODO: have "in" arguments before "out" ones, and make out args explicit in the name
@ -117,7 +118,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
int bigramCount = 0;
do {
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
uint16_t bigramBuffer[MAX_WORD_LENGTH];
int bigramBuffer[MAX_WORD_LENGTH];
int unigramFreq = 0;
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
&pos);
@ -134,7 +135,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
// here, but it can't get too bad.
const int frequency =
BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreqTemp);
if (addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramChars,
if (addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramCodePoints,
outputTypes)) {
++bigramCount;
}
@ -190,12 +191,12 @@ void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *p
} while (0 != (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
}
bool BigramDictionary::checkFirstCharacter(unsigned short *word, int *inputCodes) const {
bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodes) const {
// Checks whether this word starts with same character or neighboring characters of
// what user typed.
int maxAlt = MAX_ALTERNATIVES;
const unsigned short firstBaseChar = toBaseLowerCase(*word);
const int firstBaseChar = toBaseLowerCase(*word);
while (maxAlt > 0) {
if (toBaseLowerCase(*inputCodes) == firstBaseChar) {
return true;

View file

@ -27,23 +27,23 @@ namespace latinime {
class BigramDictionary {
public:
BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions);
int getBigrams(const int32_t *word, int length, int *inputCodes, int codesSize,
unsigned short *outWords, int *frequencies, int *outputTypes) const;
void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength,
int getBigrams(const int *word, int length, int *inputCodes, int codesSize, int *outWords,
int *frequencies, int *outputTypes) const;
void fillBigramAddressToFrequencyMapAndFilter(const int *prevWord, const int prevWordLength,
std::map<int, int> *map, uint8_t *filter) const;
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const;
bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
~BigramDictionary();
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary);
bool addWordBigram(unsigned short *word, int length, int frequency,
int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const;
bool addWordBigram(int *word, int length, int frequency, int *bigramFreq, int *bigramCodePoints,
int *outputTypes) const;
int getBigramAddress(int *pos, bool advance);
int getBigramFreq(int *pos);
void searchForTerminalNode(int addressLookingFor, int frequency);
bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; }
bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; }
bool checkFirstCharacter(unsigned short *word, int *inputCodes) const;
int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength,
bool checkFirstCharacter(int *word, int *inputCodes) const;
int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const;
const unsigned char *DICT;

View file

@ -84,7 +84,7 @@ class BinaryFormat {
static unsigned int getFlags(const uint8_t *const dict);
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
static int32_t getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
static int getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos);
static int skipOtherCharacters(const uint8_t *const dict, const int pos);
static int skipChildrenPosition(const uint8_t flags, const int pos);
@ -98,10 +98,10 @@ class BinaryFormat {
static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
int *pos);
static int getAttributeFrequencyFromFlags(const int flags);
static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord,
static int getTerminalPosition(const uint8_t *const root, const int *const inWord,
const int length, const bool forceLowerCaseSearch);
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
uint16_t *outWord, int *outUnigramFrequency);
int *outWord, int *outUnigramFrequency);
static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq);
static int getProbability(const int position, const std::map<int, int> *bigramMap,
const uint8_t *bigramFilter, const int unigramFreq);
@ -176,17 +176,17 @@ inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict
return dict[(*pos)++];
}
inline int32_t BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) {
inline int BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) {
const int origin = *pos;
const int32_t codePoint = dict[origin];
const int codePoint = dict[origin];
if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
if (codePoint == CHARACTER_ARRAY_TERMINATOR) {
*pos = origin + 1;
return NOT_A_CODE_POINT;
} else {
*pos = origin + 3;
const int32_t char_1 = codePoint << 16;
const int32_t char_2 = char_1 + (dict[origin + 1] << 8);
const int char_1 = codePoint << 16;
const int char_2 = char_1 + (dict[origin + 1] << 8);
return char_2 + dict[origin + 2];
}
} else {
@ -202,7 +202,7 @@ inline int BinaryFormat::readFrequencyWithoutMovingPointer(const uint8_t *const
inline int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const int pos) {
int currentPos = pos;
int32_t character = dict[currentPos++];
int character = dict[currentPos++];
while (CHARACTER_ARRAY_TERMINATOR != character) {
if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
currentPos += MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE;
@ -352,8 +352,8 @@ inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) {
// This function gets the byte position of the last chargroup of the exact matching word in the
// dictionary. If no match is found, it returns NOT_VALID_WORD.
inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
const int32_t *const inWord, const int length, const bool forceLowerCaseSearch) {
inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, const int *const inWord,
const int length, const bool forceLowerCaseSearch) {
int pos = 0;
int wordPos = 0;
@ -362,14 +362,14 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
// there was no match (or we would have found it).
if (wordPos >= length) return NOT_VALID_WORD;
int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
const int32_t wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos];
const int wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos];
while (true) {
// If there are no more character groups in this node, it means we could not
// find a matching character for this depth, therefore there is no match.
if (0 >= charGroupCount) return NOT_VALID_WORD;
const int charGroupPos = pos;
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
int32_t character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
int character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
if (character == wChar) {
// This is the correct node. Only one character group may start with the same
// char within a node, so either we found our match in this node, or there is
@ -439,7 +439,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
* Return value : the length of the word, of 0 if the word was not found.
*/
inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address,
const int maxDepth, uint16_t *outWord, int *outUnigramFrequency) {
const int maxDepth, int *outWord, int *outUnigramFrequency) {
int pos = 0;
int wordPos = 0;
@ -457,13 +457,13 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
--charGroupCount) {
const int startPos = pos;
const uint8_t flags = getFlagsAndForwardPointer(root, &pos);
const int32_t character = getCodePointAndForwardPointer(root, &pos);
const int character = getCodePointAndForwardPointer(root, &pos);
if (address == startPos) {
// We found the address. Copy the rest of the word in the buffer and return
// the length.
outWord[wordPos] = character;
if (FLAG_HAS_MULTIPLE_CHARS & flags) {
int32_t nextChar = getCodePointAndForwardPointer(root, &pos);
int nextChar = getCodePointAndForwardPointer(root, &pos);
// We count chars in order to avoid infinite loops if the file is broken or
// if there is some other bug
int charCount = maxDepth;
@ -522,13 +522,12 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
if (0 != lastCandidateGroupPos) {
const uint8_t lastFlags =
getFlagsAndForwardPointer(root, &lastCandidateGroupPos);
const int32_t lastChar =
const int lastChar =
getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
// We copy all the characters in this group to the buffer
outWord[wordPos] = lastChar;
if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
int32_t nextChar =
getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
int nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
int charCount = maxDepth;
while (-1 != nextChar && --charCount > 0) {
outWord[++wordPos] = nextChar;

View file

@ -18,22 +18,23 @@
#define LATINIME_CHAR_UTILS_H
#include <cctype>
#include <stdint.h>
#include "defines.h"
namespace latinime {
inline static bool isAsciiUpper(unsigned short c) {
inline static bool isAsciiUpper(int c) {
// Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
// be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
return (c >= 'A' && c <= 'Z');
}
inline static unsigned short toAsciiLower(unsigned short c) {
inline static int toAsciiLower(int c) {
return c - 'A' + 'a';
}
inline static bool isAscii(unsigned short c) {
return isascii(static_cast<int>(c)) != 0;
inline static bool isAscii(int c) {
return isascii(c) != 0;
}
unsigned short latin_tolower(const unsigned short c);
@ -44,33 +45,32 @@ unsigned short latin_tolower(const unsigned short c);
* if c is not a combined character, or the base character if it
* is combined.
*/
static const int BASE_CHARS_SIZE = 0x0500;
extern const uint16_t BASE_CHARS[BASE_CHARS_SIZE];
extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
inline static unsigned short toBaseChar(unsigned short c) {
inline static int toBaseCodePoint(int c) {
if (c < BASE_CHARS_SIZE) {
return BASE_CHARS[c];
return static_cast<int>(BASE_CHARS[c]);
}
return c;
}
inline static unsigned short toLowerCase(const unsigned short c) {
inline static int toLowerCase(const int c) {
if (isAsciiUpper(c)) {
return toAsciiLower(c);
} else if (isAscii(c)) {
return c;
}
return latin_tolower(c);
return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
}
inline static unsigned short toBaseLowerCase(const unsigned short c) {
return toLowerCase(toBaseChar(c));
inline static int toBaseLowerCase(const int c) {
return toLowerCase(toBaseCodePoint(c));
}
inline static bool isSkippableChar(const uint16_t character) {
inline static bool isSkippableCodePoint(const int codePoint) {
// TODO: Do not hardcode here
return character == '\'' || character == '-';
return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS;
}
} // namespace latinime

View file

@ -60,8 +60,8 @@ inline static void dumpEditDistance10ForDebug(int *editDistanceTable,
}
}
inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
const int inputSize, const unsigned short *output, const int outputLength) {
inline static void calcEditDistanceOneStep(int *editDistanceTable, const int *input,
const int inputSize, const int *output, const int outputLength) {
// TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
// Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j].
// Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated,
@ -71,10 +71,10 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne
const int *const prevprev =
outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0;
current[0] = outputLength;
const uint32_t co = toBaseLowerCase(output[outputLength - 1]);
const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
const int co = toBaseLowerCase(output[outputLength - 1]);
const int prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
for (int i = 1; i <= inputSize; ++i) {
const uint32_t ci = toBaseLowerCase(input[i - 1]);
const int ci = toBaseLowerCase(input[i - 1]);
const uint16_t cost = (ci == co) ? 0 : 1;
current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
@ -94,11 +94,9 @@ inline static int getCurrentEditDistance(int *editDistanceTable, const int editD
//////////////////////
// inline functions //
//////////////////////
static const char SINGLE_QUOTE = '\'';
inline bool Correction::isSingleQuote(const unsigned short c) {
const unsigned short userTypedChar = mProximityInfoState.getPrimaryCharAt(mInputIndex);
return (c == SINGLE_QUOTE && userTypedChar != SINGLE_QUOTE);
inline bool Correction::isSingleQuote(const int c) {
const int userTypedChar = mProximityInfoState.getPrimaryCodePointAt(mInputIndex);
return (c == KEYCODE_SINGLE_QUOTE && userTypedChar != KEYCODE_SINGLE_QUOTE);
}
////////////////
@ -162,22 +160,22 @@ bool Correction::sameAsTyped() {
}
int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
const int wordCount, const bool isSpaceProximity, const unsigned short *word) {
const int wordCount, const bool isSpaceProximity, const int *word) {
return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,
wordCount, this, isSpaceProximity, word);
}
int Correction::getFinalProbability(const int probability, unsigned short **word, int *wordLength) {
int Correction::getFinalProbability(const int probability, int **word, int *wordLength) {
return getFinalProbabilityInternal(probability, word, wordLength, mInputSize);
}
int Correction::getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
int *wordLength, const int inputSize) {
int Correction::getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength,
const int inputSize) {
return getFinalProbabilityInternal(probability, word, wordLength, inputSize);
}
int Correction::getFinalProbabilityInternal(const int probability, unsigned short **word,
int *wordLength, const int inputSize) {
int Correction::getFinalProbabilityInternal(const int probability, int **word, int *wordLength,
const int inputSize) {
const int outputIndex = mTerminalOutputIndex;
const int inputIndex = mTerminalInputIndex;
*wordLength = outputIndex + 1;
@ -273,15 +271,15 @@ bool Correction::needsToPrune() const {
|| (!mDoAutoCompletion && (mOutputIndex > mInputSize));
}
void Correction::addCharToCurrentWord(const int32_t c) {
void Correction::addCharToCurrentWord(const int c) {
mWord[mOutputIndex] = c;
const unsigned short *primaryInputWord = mProximityInfoState.getPrimaryInputWord();
calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize,
mWord, mOutputIndex + 1);
const int *primaryInputWord = mProximityInfoState.getPrimaryInputWord();
calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize, mWord,
mOutputIndex + 1);
}
Correction::CorrectionType Correction::processSkipChar(
const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
Correction::CorrectionType Correction::processSkipChar(const int c, const bool isTerminal,
const bool inputIndexIncremented) {
addCharToCurrentWord(c);
mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
mTerminalOutputIndex = mOutputIndex;
@ -309,8 +307,7 @@ inline bool isProximityCharOrEquivalentChar(ProximityType type) {
return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR;
}
Correction::CorrectionType Correction::processCharAndCalcState(
const int32_t c, const bool isTerminal) {
Correction::CorrectionType Correction::processCharAndCalcState(const int c, const bool isTerminal) {
const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
if (correctionCount > mMaxErrors) {
return processUnrelatedCorrectionType();
@ -628,10 +625,10 @@ Correction::CorrectionType Correction::processCharAndCalcState(
}
}
inline static int getQuoteCount(const unsigned short *word, const int length) {
inline static int getQuoteCount(const int *word, const int length) {
int quoteCount = 0;
for (int i = 0; i < length; ++i) {
if (word[i] == SINGLE_QUOTE) {
if (word[i] == KEYCODE_SINGLE_QUOTE) {
++quoteCount;
}
}
@ -639,7 +636,7 @@ inline static int getQuoteCount(const unsigned short *word, const int length) {
}
inline static bool isUpperCase(unsigned short c) {
return isAsciiUpper(toBaseChar(c));
return isAsciiUpper(toBaseCodePoint(c));
}
//////////////////////
@ -672,7 +669,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
// TODO: use mExcessiveCount
const int matchCount = inputSize - correction->mProximityCount - excessiveCount;
const unsigned short *word = correction->mWord;
const int *word = correction->mWord;
const bool skipped = skippedCount > 0;
const int quoteDiffCount = max(0, getQuoteCount(word, outputLength)
@ -911,7 +908,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
/* static */
int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
const int *freqArray, const int *wordLengthArray, const int wordCount,
const Correction *correction, const bool isSpaceProximity, const unsigned short *word) {
const Correction *correction, const bool isSpaceProximity, const int *word) {
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
bool firstCapitalizedWordDemotion = false;
@ -1040,9 +1037,8 @@ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
}
/* Damerau-Levenshtein distance */
inline static int editDistanceInternal(
int *editDistanceTable, const unsigned short *before,
const int beforeLength, const unsigned short *after, const int afterLength) {
inline static int editDistanceInternal(int *editDistanceTable, const int *before,
const int beforeLength, const int *after, const int afterLength) {
// dp[li][lo] dp[a][b] = dp[ a * lo + b]
int *dp = editDistanceTable;
const int li = beforeLength + 1;
@ -1056,9 +1052,9 @@ inline static int editDistanceInternal(
for (int i = 0; i < li - 1; ++i) {
for (int j = 0; j < lo - 1; ++j) {
const uint32_t ci = toBaseLowerCase(before[i]);
const uint32_t co = toBaseLowerCase(after[j]);
const uint16_t cost = (ci == co) ? 0 : 1;
const int ci = toBaseLowerCase(before[i]);
const int co = toBaseLowerCase(after[j]);
const int cost = (ci == co) ? 0 : 1;
dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
@ -1080,8 +1076,8 @@ inline static int editDistanceInternal(
return dp[li * lo - 1];
}
int Correction::RankingAlgorithm::editDistance(const unsigned short *before,
const int beforeLength, const unsigned short *after, const int afterLength) {
int Correction::RankingAlgorithm::editDistance(const int *before, const int beforeLength,
const int *after, const int afterLength) {
int table[(beforeLength + 1) * (afterLength + 1)];
return editDistanceInternal(table, before, beforeLength, after, afterLength);
}
@ -1109,9 +1105,8 @@ int Correction::RankingAlgorithm::editDistance(const unsigned short *before,
// So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2.
/* static */
float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *before,
const int beforeLength, const unsigned short *after, const int afterLength,
const int score) {
float Correction::RankingAlgorithm::calcNormalizedScore(const int *before, const int beforeLength,
const int *after, const int afterLength, const int score) {
if (0 == beforeLength || 0 == afterLength) {
return 0;
}

View file

@ -78,14 +78,13 @@ class Correction {
return ++mTotalTraverseCount;
}
int getFreqForSplitMultipleWords(
const int *freqArray, const int *wordLengthArray, const int wordCount,
const bool isSpaceProximity, const unsigned short *word);
int getFinalProbability(const int probability, unsigned short **word, int *wordLength);
int getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
int *wordLength, const int inputSize);
int getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
const int wordCount, const bool isSpaceProximity, const int *word);
int getFinalProbability(const int probability, int **word, int *wordLength);
int getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength,
const int inputSize);
CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
CorrectionType processCharAndCalcState(const int c, const bool isTerminal);
/////////////////////////
// Tree helper methods
@ -110,28 +109,28 @@ class Correction {
const int inputSize);
static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
const int wordCount, const Correction *correction, const bool isSpaceProximity,
const unsigned short *word);
static float calcNormalizedScore(const unsigned short *before, const int beforeLength,
const unsigned short *after, const int afterLength, const int score);
static int editDistance(const unsigned short *before,
const int beforeLength, const unsigned short *after, const int afterLength);
const int *word);
static float calcNormalizedScore(const int *before, const int beforeLength,
const int *after, const int afterLength, const int score);
static int editDistance(const int *before, const int beforeLength, const int *after,
const int afterLength);
private:
static const int MAX_INITIAL_SCORE = 255;
};
// proximity info state
void initInputParams(const ProximityInfo *proximityInfo, const int32_t *inputCodes,
void initInputParams(const ProximityInfo *proximityInfo, const int *inputCodes,
const int inputSize, const int *xCoordinates, const int *yCoordinates) {
mProximityInfoState.initInputParams(0, MAX_POINT_TO_KEY_LENGTH,
proximityInfo, inputCodes, inputSize, xCoordinates, yCoordinates, 0, 0, false);
}
const unsigned short *getPrimaryInputWord() const {
const int *getPrimaryInputWord() const {
return mProximityInfoState.getPrimaryInputWord();
}
unsigned short getPrimaryCharAt(const int index) const {
return mProximityInfoState.getPrimaryCharAt(index);
int getPrimaryCodePointAt(const int index) const {
return mProximityInfoState.getPrimaryCodePointAt(index);
}
private:
@ -214,13 +213,13 @@ class Correction {
inline void incrementInputIndex();
inline void incrementOutputIndex();
inline void startToTraverseAllNodes();
inline bool isSingleQuote(const unsigned short c);
inline CorrectionType processSkipChar(
const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
inline bool isSingleQuote(const int c);
inline CorrectionType processSkipChar(const int c, const bool isTerminal,
const bool inputIndexIncremented);
inline CorrectionType processUnrelatedCorrectionType();
inline void addCharToCurrentWord(const int32_t c);
inline int getFinalProbabilityInternal(const int probability, unsigned short **word,
int *wordLength, const int inputSize);
inline void addCharToCurrentWord(const int c);
inline int getFinalProbabilityInternal(const int probability, int **word, int *wordLength,
const int inputSize);
static const int TYPED_LETTER_MULTIPLIER = 2;
static const int FULL_WORD_MULTIPLIER = 2;
@ -240,7 +239,7 @@ class Correction {
uint8_t mTotalTraverseCount;
// The following arrays are state buffer.
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
int mWord[MAX_WORD_LENGTH_INTERNAL];
int mDistances[MAX_WORD_LENGTH_INTERNAL];
// Edit distance calculation requires a buffer with (N+1)^2 length for the input length N.

View file

@ -30,17 +30,15 @@
#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) do { \
dumpResult(words, frequencies, maxWordCount, maxWordLength); } while (0)
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
#define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while (0)
// TODO: INTS_TO_CHARS
#define SHORTS_TO_CHARS(input, length, output) do { \
shortArrayToCharArray(input, length, output); } while (0)
#define INTS_TO_CHARS(input, length, output) do { \
intArrayToCharArray(input, length, output); } while (0)
static inline void dumpWordInfo(const unsigned short *word, const int length,
const int rank, const int frequency) {
static inline void dumpWordInfo(const int *word, const int length, const int rank,
const int frequency) {
static char charBuf[50];
int i = 0;
for (; i < length; ++i) {
const unsigned short c = word[i];
const int c = word[i];
if (c == 0) {
break;
}
@ -53,8 +51,7 @@ static inline void dumpWordInfo(const unsigned short *word, const int length,
}
}
static inline void dumpResult(
const unsigned short *outWords, const int *frequencies, const int maxWordCounts,
static inline void dumpResult(const int *outWords, const int *frequencies, const int maxWordCounts,
const int maxWordLength) {
AKLOGI("--- DUMP RESULT ---------");
for (int i = 0; i < maxWordCounts; ++i) {
@ -63,11 +60,11 @@ static inline void dumpResult(
AKLOGI("-------------------------");
}
static inline void dumpWord(const unsigned short *word, const int length) {
static inline void dumpWord(const int *word, const int length) {
static char charBuf[50];
int i = 0;
for (; i < length; ++i) {
const unsigned short c = word[i];
const int c = word[i];
if (c == 0) {
break;
}
@ -80,22 +77,10 @@ static inline void dumpWord(const unsigned short *word, const int length) {
}
}
static inline void dumpWordInt(const int *word, const int length) {
static char charBuf[50];
for (int i = 0; i < length; ++i) {
charBuf[i] = word[i];
}
charBuf[length] = 0;
AKLOGI("i[ %s ]", charBuf);
}
// TODO: Change this to intArrayToCharArray
static inline void shortArrayToCharArray(
const unsigned short *input, const int length, char *output) {
static inline void intArrayToCharArray(const int *input, const int length, char *output) {
int i = 0;
for (;i < length; ++i) {
const unsigned short c = input[i];
for (; i < length; ++i) {
const int c = input[i];
if (c == 0) {
break;
}
@ -137,11 +122,9 @@ static inline void showStackTrace() {
#define AKLOGI(fmt, ...)
#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength)
#define DUMP_WORD(word, length)
#define DUMP_WORD_INT(word, length)
#define ASSERT(success)
#define SHOW_STACK_TRACE
// TODO: INTS_TO_CHARS
#define SHORTS_TO_CHARS(input, length, output)
#define INTS_TO_CHARS(input, length, output)
#endif
#ifdef FLAG_DO_PROFILE
@ -286,6 +269,8 @@ static inline void prof_out(void) {
#define NOT_A_PROBABILITY (-1)
#define KEYCODE_SPACE ' '
#define KEYCODE_SINGLE_QUOTE '\''
#define KEYCODE_HYPHEN_MINUS '-'
#define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true

View file

@ -54,11 +54,10 @@ Dictionary::~Dictionary() {
}
int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds,
int *codes, int codesSize, int *prevWordChars,
int prevWordLength, int commitPoint, bool isGesture,
bool useFullEditDistance, unsigned short *outWords,
int *frequencies, int *spaceIndices, int *outputTypes) const {
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *codes,
int codesSize, int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture,
bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices,
int *outputTypes) const {
int result = 0;
if (isGesture) {
DicTraverseWrapper::initDicTraverseSession(
@ -83,7 +82,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSessi
}
int Dictionary::getBigrams(const int32_t *word, int length, int *codes, int codesSize,
unsigned short *outWords, int *frequencies, int *outputTypes) const {
int *outWords, int *frequencies, int *outputTypes) const {
if (length <= 0) return 0;
return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies,
outputTypes);

View file

@ -47,11 +47,11 @@ class Dictionary {
int getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, int *xcoordinates,
int *ycoordinates, int *times, int *pointerIds, int *codes, int codesSize,
int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture,
bool useFullEditDistance, unsigned short *outWords,
int *frequencies, int *spaceIndices, int *outputTypes) const;
bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices,
int *outputTypes) const;
int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
unsigned short *outWords, int *frequencies, int *outputTypes) const;
int getBigrams(const int32_t *word, int length, int *codes, int codesSize, int *outWords,
int *frequencies, int *outputTypes) const;
int getFrequency(const int32_t *word, int length) const;
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const;
@ -68,7 +68,7 @@ class Dictionary {
// public static utility methods
// static inline methods should be defined in the header file
static int wideStrLen(unsigned short *str);
static int wideStrLen(int *str);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
@ -88,7 +88,7 @@ class Dictionary {
// public static utility methods
// static inline methods should be defined in the header file
inline int Dictionary::wideStrLen(unsigned short *str) {
inline int Dictionary::wideStrLen(int *str) {
if (!str) return 0;
int length = 0;
while (*str) {

View file

@ -38,15 +38,14 @@ class GestureDecoderWrapper : public IncrementalDecoderInterface {
}
int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
int *times, int *pointerIds, int *codes, int inputSize, int commitPoint,
unsigned short *outWords, int *frequencies, int *outputIndices,
int *outputTypes) const {
int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, int *outWords,
int *frequencies, int *outputIndices, int *outputTypes) const {
if (!mIncrementalDecoderInterface) {
return 0;
}
return mIncrementalDecoderInterface->getSuggestions(
pInfo, traverseSession, inputXs, inputYs, times, pointerIds, codes,
inputSize, commitPoint, outWords, frequencies, outputIndices, outputTypes);
return mIncrementalDecoderInterface->getSuggestions(pInfo, traverseSession, inputXs,
inputYs, times, pointerIds, codes, inputSize, commitPoint, outWords, frequencies,
outputIndices, outputTypes);
}
static void setGestureDecoderFactoryMethod(

View file

@ -28,10 +28,9 @@ class ProximityInfo;
class IncrementalDecoderInterface {
public:
virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession,
int *inputXs, int *inputYs, int *times, int *pointerIds, int *codes,
int inputSize, int commitPoint, unsigned short *outWords, int *frequencies,
int *outputIndices, int *outputTypes) const = 0;
virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs,
int *inputYs, int *times, int *pointerIds, int *codes, int inputSize, int commitPoint,
int *outWords, int *frequencies, int *outputIndices, int *outputTypes) const = 0;
IncrementalDecoderInterface() { };
virtual ~IncrementalDecoderInterface() { };
private:

View file

@ -38,15 +38,14 @@ class IncrementalDecoderWrapper : public IncrementalDecoderInterface {
}
int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
int *times, int *pointerIds, int *codes, int inputSize, int commitPoint,
unsigned short *outWords, int *frequencies, int *outputIndices,
int *outputTypes) const {
int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, int *outWords,
int *frequencies, int *outputIndices, int *outputTypes) const {
if (!mIncrementalDecoderInterface) {
return 0;
}
return mIncrementalDecoderInterface->getSuggestions(
pInfo, traverseSession, inputXs, inputYs, times, pointerIds, codes,
inputSize, commitPoint, outWords, frequencies, outputIndices, outputTypes);
return mIncrementalDecoderInterface->getSuggestions(pInfo, traverseSession, inputXs,
inputYs, times, pointerIds, codes, inputSize, commitPoint, outWords, frequencies,
outputIndices, outputTypes);
}
static void setIncrementalDecoderFactoryMethod(

View file

@ -34,7 +34,7 @@ const float ProximityInfoState::NOT_A_DISTANCE_FLOAT = -1.0f;
const int ProximityInfoState::NOT_A_CODE = -1;
void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength,
const ProximityInfo *proximityInfo, const int32_t *const inputCodes, const int inputSize,
const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize,
const int *const xCoordinates, const int *const yCoordinates, const int *const times,
const int *const pointerIds, const bool isGeometric) {
@ -63,7 +63,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
// - mNormalizedSquaredDistances
// TODO: Merge
for (int i = 0; i < inputSize; ++i) {
const int32_t primaryKey = inputCodes[i];
const int primaryKey = inputCodes[i];
const int x = xCoordinates[i];
const int y = yCoordinates[i];
int *proximities = &mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL];
@ -146,7 +146,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
AKLOGI("Init ProximityInfoState: (%d)PID = %d", i, pid);
}
if (pointerId == pid) {
const int c = isGeometric ? NOT_A_COORDINATE : getPrimaryCharAt(i);
const int c = isGeometric ? NOT_A_COORDINATE : getPrimaryCodePointAt(i);
const int x = proximityOnly ? NOT_A_COORDINATE : xCoordinates[i];
const int y = proximityOnly ? NOT_A_COORDINATE : yCoordinates[i];
const int time = times ? times[i] : -1;
@ -306,12 +306,12 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
&& xCoordinates && yCoordinates;
if (!isGeometric && pointerId == 0) {
for (int i = 0; i < inputSize; ++i) {
mPrimaryInputWord[i] = getPrimaryCharAt(i);
mPrimaryInputWord[i] = getPrimaryCodePointAt(i);
}
for (int i = 0; i < mInputSize && mTouchPositionCorrectionEnabled; ++i) {
const int *proximityChars = getProximityCharsAt(i);
const int primaryKey = proximityChars[0];
const int *proximityCodePoints = getProximityCodePointsAt(i);
const int primaryKey = proximityCodePoints[0];
const int x = xCoordinates[i];
const int y = yCoordinates[i];
if (DEBUG_PROXIMITY_CHARS) {
@ -319,11 +319,12 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
a += 0;
AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y);
}
for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityChars[j] > 0; ++j) {
const int currentChar = proximityChars[j];
for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityCodePoints[j] > 0;
++j) {
const int currentCodePoint = proximityCodePoints[j];
const float squaredDistance =
hasInputCoordinates() ? calculateNormalizedSquaredDistance(
mProximityInfo->getKeyIndexOf(currentChar), i) :
mProximityInfo->getKeyIndexOf(currentCodePoint), i) :
NOT_A_DISTANCE_FLOAT;
if (squaredDistance >= 0.0f) {
mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] =
@ -334,7 +335,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO;
}
if (DEBUG_PROXIMITY_CHARS) {
AKLOGI("--- Proximity (%d) = %c", j, currentChar);
AKLOGI("--- Proximity (%d) = %c", j, currentCodePoint);
}
}
}
@ -449,7 +450,7 @@ float ProximityInfoState::getPointScore(
// Sampling touch point and pushing information to vectors.
// Returning if previous point is popped or not.
bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y,
bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeCodePoint, int x, int y,
const int time, const bool sample, const bool isLastPoint, const float sumAngle,
NearKeysDistanceMap *const currentNearKeysDistances,
const NearKeysDistanceMap *const prevNearKeysDistances,
@ -458,7 +459,7 @@ bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar
size_t size = mInputXs.size();
bool popped = false;
if (nodeChar < 0 && sample) {
if (nodeCodePoint < 0 && sample) {
const float nearest = updateNearKeysDistances(x, y, currentNearKeysDistances);
const float score = getPointScore(x, y, time, isLastPoint, nearest, sumAngle,
currentNearKeysDistances, prevNearKeysDistances, prevPrevNearKeysDistances);
@ -487,8 +488,8 @@ bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar
}
}
if (nodeChar >= 0 && (x < 0 || y < 0)) {
const int keyId = mProximityInfo->getKeyIndexOf(nodeChar);
if (nodeCodePoint >= 0 && (x < 0 || y < 0)) {
const int keyId = mProximityInfo->getKeyIndexOf(nodeCodePoint);
if (keyId >= 0) {
x = mProximityInfo->getKeyCenterXOfKeyIdG(keyId);
y = mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
@ -543,7 +544,7 @@ float ProximityInfoState::getPointToKeyLength(const int inputIndex, const int co
const int index = inputIndex * mProximityInfo->getKeyCount() + keyId;
return min(mDistanceCache[index], mMaxPointToKeyLength);
}
if (isSkippableChar(codePoint)) {
if (isSkippableCodePoint(codePoint)) {
return 0.0f;
}
// If the char is not a key on the keyboard then return the max length.
@ -960,9 +961,9 @@ bool ProximityInfoState::suppressCharProbabilities(const int index0, const int i
return true;
}
// Get a word that is detected by tracing highest probability sequence into charBuf and returns
// probability of generating the word.
float ProximityInfoState::getHighestProbabilitySequence(uint16_t *const charBuf) const {
// Get a word that is detected by tracing highest probability sequence into codePointBuf and
// returns probability of generating the word.
float ProximityInfoState::getHighestProbabilitySequence(int *const codePointBuf) const {
static const float DEMOTION_LOG_PROBABILITY = 0.3f;
int index = 0;
float sumLogProbability = 0.0f;
@ -980,12 +981,12 @@ float ProximityInfoState::getHighestProbabilitySequence(uint16_t *const charBuf)
}
}
if (character != NOT_AN_INDEX) {
charBuf[index] = mProximityInfo->getCodePointOf(character);
codePointBuf[index] = mProximityInfo->getCodePointOf(character);
index++;
}
sumLogProbability += minLogProbability;
}
charBuf[index] = '\0';
codePointBuf[index] = '\0';
return sumLogProbability;
}

View file

@ -43,7 +43,7 @@ class ProximityInfoState {
// Defined in proximity_info_state.cpp //
/////////////////////////////////////////
void initInputParams(const int pointerId, const float maxPointToKeyLength,
const ProximityInfo *proximityInfo, const int32_t *const inputCodes,
const ProximityInfo *proximityInfo, const int *const inputCodes,
const int inputSize, const int *xCoordinates, const int *yCoordinates,
const int *const times, const int *const pointerIds, const bool isGeometric);
@ -65,15 +65,15 @@ class ProximityInfoState {
virtual ~ProximityInfoState() {}
inline unsigned short getPrimaryCharAt(const int index) const {
return getProximityCharsAt(index)[0];
inline int getPrimaryCodePointAt(const int index) const {
return getProximityCodePointsAt(index)[0];
}
inline bool existsCharInProximityAt(const int index, const int c) const {
const int *chars = getProximityCharsAt(index);
inline bool existsCodePointInProximityAt(const int index, const int c) const {
const int *codePoints = getProximityCodePointsAt(index);
int i = 0;
while (chars[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE_INTERNAL) {
if (chars[i++] == c) {
while (codePoints[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE_INTERNAL) {
if (codePoints[i++] == c) {
return true;
}
}
@ -82,13 +82,13 @@ class ProximityInfoState {
inline bool existsAdjacentProximityChars(const int index) const {
if (index < 0 || index >= mInputSize) return false;
const int currentChar = getPrimaryCharAt(index);
const int currentCodePoint = getPrimaryCodePointAt(index);
const int leftIndex = index - 1;
if (leftIndex >= 0 && existsCharInProximityAt(leftIndex, currentChar)) {
if (leftIndex >= 0 && existsCodePointInProximityAt(leftIndex, currentCodePoint)) {
return true;
}
const int rightIndex = index + 1;
if (rightIndex < mInputSize && existsCharInProximityAt(rightIndex, currentChar)) {
if (rightIndex < mInputSize && existsCodePointInProximityAt(rightIndex, currentCodePoint)) {
return true;
}
return false;
@ -106,15 +106,15 @@ class ProximityInfoState {
// Notice : accented characters do not have a proximity list, so they are alone
// in their list. The non-accented version of the character should be considered
// "close", but not the other keys close to the non-accented version.
inline ProximityType getMatchedProximityId(const int index,
const unsigned short c, const bool checkProximityChars, int *proximityIndex = 0) const {
const int *currentChars = getProximityCharsAt(index);
const int firstChar = currentChars[0];
const unsigned short baseLowerC = toBaseLowerCase(c);
inline ProximityType getMatchedProximityId(const int index, const int c,
const bool checkProximityChars, int *proximityIndex = 0) const {
const int *currentCodePoints = getProximityCodePointsAt(index);
const int firstCodePoint = currentCodePoints[0];
const int baseLowerC = toBaseLowerCase(c);
// The first char in the array is what user typed. If it matches right away,
// that means the user typed that same char for this pos.
if (firstChar == baseLowerC || firstChar == c) {
if (firstCodePoint == baseLowerC || firstCodePoint == c) {
return EQUIVALENT_CHAR;
}
@ -123,14 +123,14 @@ class ProximityInfoState {
// If the non-accented, lowercased version of that first character matches c,
// then we have a non-accented version of the accented character the user
// typed. Treat it as a close char.
if (toBaseLowerCase(firstChar) == baseLowerC)
if (toBaseLowerCase(firstCodePoint) == baseLowerC)
return NEAR_PROXIMITY_CHAR;
// Not an exact nor an accent-alike match: search the list of close keys
int j = 1;
while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
&& currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
&& currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
const bool matched = (currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c);
if (matched) {
if (proximityIndex) {
*proximityIndex = j;
@ -140,11 +140,12 @@ class ProximityInfoState {
++j;
}
if (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
&& currentChars[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
&& currentCodePoints[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
++j;
while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
&& currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
&& currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
const bool matched =
(currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c);
if (matched) {
if (proximityIndex) {
*proximityIndex = j;
@ -165,7 +166,7 @@ class ProximityInfoState {
inputIndex * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + proximityIndex];
}
inline const unsigned short *getPrimaryInputWord() const {
inline const int *getPrimaryInputWord() const {
return mPrimaryInputWord;
}
@ -173,13 +174,13 @@ class ProximityInfoState {
return mTouchPositionCorrectionEnabled;
}
inline bool sameAsTyped(const unsigned short *word, int length) const {
inline bool sameAsTyped(const int *word, int length) const {
if (length != mInputSize) {
return false;
}
const int *inputCodes = mInputCodes;
while (length--) {
if (static_cast<unsigned int>(*inputCodes) != static_cast<unsigned int>(*word)) {
if (*inputCodes != *word) {
return false;
}
inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
@ -236,7 +237,7 @@ class ProximityInfoState {
// Returns angle of three points. x, y, and z are indices.
float getPointsAngle(const int index0, const int index1, const int index2) const;
float getHighestProbabilitySequence(uint16_t *const charBuf) const;
float getHighestProbabilitySequence(int *const codePointBuf) const;
float getProbability(const int index, const int charCode) const;
@ -255,7 +256,7 @@ class ProximityInfoState {
float calculateSquaredDistanceFromSweetSpotCenter(
const int keyIndex, const int inputIndex) const;
bool pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y, const int time,
bool pushTouchPoint(const int inputIndex, const int nodeCodePoint, int x, int y, const int time,
const bool sample, const bool isLastPoint, const float sumAngle,
NearKeysDistanceMap *const currentNearKeysDistances,
const NearKeysDistanceMap *const prevNearKeysDistances,
@ -269,7 +270,7 @@ class ProximityInfoState {
return mInputXs.size() > 0 && mInputYs.size() > 0;
}
inline const int *getProximityCharsAt(const int index) const {
inline const int *getProximityCodePointsAt(const int index) const {
return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE_INTERNAL);
}
@ -322,10 +323,10 @@ class ProximityInfoState {
// inputs including the current input point.
std::vector<NearKeycodesSet> mSearchKeysVector;
bool mTouchPositionCorrectionEnabled;
int32_t mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
int mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
int mInputSize;
unsigned short mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL];
int mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL];
};
} // namespace latinime
#endif // LATINIME_PROXIMITY_INFO_STATE_H

View file

@ -43,18 +43,16 @@ class TerminalAttributes {
return mHasNextShortcutTarget;
}
// Gets the shortcut target itself as a uint16_t string. For parameters and return value
// Gets the shortcut target itself as an int string. For parameters and return value
// see BinaryFormat::getWordAtAddress.
// TODO: make the output an uint32_t* to handle the whole unicode range.
inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord, int *outFreq) {
inline int getNextShortcutTarget(const int maxDepth, int *outWord, int *outFreq) {
const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
mHasNextShortcutTarget =
0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
unsigned int i;
for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) {
const int codePoint = BinaryFormat::getCodePointAndForwardPointer(mDict, &mPos);
if (NOT_A_CODE_POINT == codePoint) break;
outWord[i] = (uint16_t)codePoint;
outWord[i] = codePoint;
}
*outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags);
return i;

View file

@ -55,13 +55,13 @@ UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, int fullW
UnigramDictionary::~UnigramDictionary() {
}
static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize) {
return static_cast<unsigned int>(sizeof(*codes)) * codesSize;
static inline int getCodesBufferSize(const int *codes, const int codesSize) {
return sizeof(*codes) * codesSize;
}
// TODO: This needs to take a const unsigned short* and not tinker with its contents
static inline void addWord(unsigned short *word, int length, int frequency,
WordsPriorityQueue *queue, int type) {
// TODO: This needs to take a const int* and not tinker with its contents
static inline void addWord(int *word, int length, int frequency, WordsPriorityQueue *queue,
int type) {
queue->push(frequency, word, length, type);
}
@ -171,9 +171,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
int *outputTypes) const {
const bool useFullEditDistance, int *outWords, int *frequencies, int *outputTypes) const {
WordsPriorityQueuePool queuePool(MAX_WORDS, SUB_QUEUE_MAX_WORDS, MAX_WORD_LENGTH);
queuePool.clearAll();
Correction masterCorrection;
@ -218,7 +216,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
AKLOGI("Returning %d words", suggestedWordsCount);
/// Print the returned words
for (int j = 0; j < suggestedWordsCount; ++j) {
short unsigned int *w = outWords + j * MAX_WORD_LENGTH;
int *w = outWords + j * MAX_WORD_LENGTH;
char s[MAX_WORD_LENGTH];
for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
(void)s; // To suppress compiler warning
@ -230,12 +228,11 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
return suggestedWordsCount;
}
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, Correction *correction,
WordsPriorityQueuePool *queuePool) const {
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int inputSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool)
const {
PROF_OPEN;
PROF_START(0);
PROF_END(0);
@ -284,7 +281,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
if (queue->size() > 0) {
WordsPriorityQueue::SuggestedWord *sw = queue->top();
const int score = sw->mScore;
const unsigned short *word = sw->mWord;
const int *word = sw->mWord;
const int wordLength = sw->mWordLength;
float ns = Correction::RankingAlgorithm::calcNormalizedScore(
correction->getPrimaryInputWord(), i, word, wordLength, score);
@ -303,7 +300,7 @@ void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int
Correction *correction) const {
if (DEBUG_DICT) {
AKLOGI("initSuggest");
DUMP_WORD_INT(codes, inputSize);
DUMP_WORD(codes, inputSize);
}
correction->initInputParams(proximityInfo, codes, inputSize, xCoordinates, yCoordinates);
const int maxDepth = min(inputSize * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
@ -376,7 +373,7 @@ inline void UnigramDictionary::onTerminal(const int probability,
const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
int wordLength;
unsigned short *wordPointer;
int *wordPointer;
if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) {
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
@ -404,7 +401,7 @@ inline void UnigramDictionary::onTerminal(const int probability,
// so that the insert order is protected inside the queue for words
// with the same score. For the moment we use -1 to make sure the shortcut will
// never be in front of the word.
uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
int shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
int shortcutFrequency;
const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency);
@ -444,7 +441,7 @@ int UnigramDictionary::getSubStringSuggestion(
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
const int inputWordStartPos, const int inputWordLength,
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const {
int *wordLengthArray, int *outputWord, int *outputWordLength) const {
if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) {
return FLAG_MULTIPLE_SUGGEST_ABORT;
}
@ -487,13 +484,13 @@ int UnigramDictionary::getSubStringSuggestion(
// TODO: Remove the safety net above //
//////////////////////////////////////////////
unsigned short *tempOutputWord = 0;
int *tempOutputWord = 0;
int nextWordLength = 0;
// TODO: Optimize init suggestion
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
inputSize, correction);
unsigned short word[MAX_WORD_LENGTH_INTERNAL];
int word[MAX_WORD_LENGTH_INTERNAL];
int freq = getMostFrequentWordLike(
inputWordStartPos, inputWordLength, correction, word);
if (freq > 0) {
@ -592,7 +589,7 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
const bool useFullEditDistance, const int inputSize, Correction *correction,
WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate,
const int startInputPos, const int startWordIndex, const int outputWordLength,
int *freqArray, int *wordLengthArray, unsigned short *outputWord) const {
int *freqArray, int *wordLengthArray, int *outputWord) const {
if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
// Return if the last word index
return;
@ -678,7 +675,7 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit
}
// Allocating fixed length array on stack
unsigned short outputWord[MAX_WORD_LENGTH];
int outputWord[MAX_WORD_LENGTH];
int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
const int outputWordLength = 0;
@ -693,11 +690,11 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit
// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
// interface.
inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
const int inputSize, Correction *correction, unsigned short *word) const {
uint16_t inWord[inputSize];
const int inputSize, Correction *correction, int *word) const {
int inWord[inputSize];
for (int i = 0; i < inputSize; ++i) {
inWord[i] = (uint16_t)correction->getPrimaryCharAt(startInputIndex + i);
inWord[i] = correction->getPrimaryCodePointAt(startInputIndex + i);
}
return getMostFrequentWordLikeInner(inWord, inputSize, word);
}
@ -715,14 +712,14 @@ inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
// In and out parameters may point to the same location. This function takes care
// not to use any input parameters after it wrote into its outputs.
static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
const uint8_t *const root, const int startPos, const uint16_t *const inWord,
const int startInputIndex, const int inputSize, int32_t *outNewWord, int *outInputIndex,
const uint8_t *const root, const int startPos, const int *const inWord,
const int startInputIndex, const int inputSize, int *outNewWord, int *outInputIndex,
int *outPos) {
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
int pos = startPos;
int32_t codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
int32_t baseChar = toBaseLowerCase(codePoint);
const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]);
int codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
int baseChar = toBaseLowerCase(codePoint);
const int wChar = toBaseLowerCase(inWord[startInputIndex]);
if (baseChar != wChar) {
*outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos;
@ -753,8 +750,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
// It will compare the frequency to the max frequency, and if greater, will
// copy the word into the output buffer. In output value maxFreq, it will
// write the new maximum frequency if it changed.
static inline void onTerminalWordLike(const int freq, int32_t *newWord, const int length,
short unsigned int *outWord, int *maxFreq) {
static inline void onTerminalWordLike(const int freq, int *newWord, const int length, int *outWord,
int *maxFreq) {
if (freq > *maxFreq) {
for (int q = 0; q < length; ++q) {
outWord[q] = newWord[q];
@ -766,9 +763,9 @@ static inline void onTerminalWordLike(const int freq, int32_t *newWord, const in
// Will find the highest frequency of the words like the one passed as an argument,
// that is, everything that only differs by case/accents.
int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord,
const int inputSize, short unsigned int *outWord) const {
int32_t newWord[MAX_WORD_LENGTH_INTERNAL];
int UnigramDictionary::getMostFrequentWordLikeInner(const int *const inWord, const int inputSize,
int *outWord) const {
int newWord[MAX_WORD_LENGTH_INTERNAL];
int depth = 0;
int maxFreq = -1;
const uint8_t *const root = DICT_ROOT;
@ -828,7 +825,7 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord
return maxFreq;
}
int UnigramDictionary::getFrequency(const int32_t *const inWord, const int length) const {
int UnigramDictionary::getFrequency(const int *const inWord, const int length) const {
const uint8_t *const root = DICT_ROOT;
int pos = BinaryFormat::getTerminalPosition(root, inWord, length,
false /* forceLowerCaseSearch */);
@ -853,8 +850,7 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt
}
// TODO: remove this function.
int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offset,
int length) const {
int UnigramDictionary::getBigramPosition(int pos, int *word, int offset, int length) const {
return -1;
}
@ -900,7 +896,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// else if FLAG_IS_TERMINAL: the frequency
// else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address
// Note that you can't have a node that both is not a terminal and has no children.
int32_t c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
int c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
assert(NOT_A_CODE_POINT != c);
// We are going to loop through each character and make it look like it's a different
@ -914,7 +910,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// We prefetch the next char. If 'c' is the last char of this node, we will have
// NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node
// should behave as a terminal or not and whether we have children.
const int32_t nextc = hasMultipleChars
const int nextc = hasMultipleChars
? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT;
const bool isLastChar = (NOT_A_CODE_POINT == nextc);
// If there are more chars in this nodes, then this virtual node is not a terminal.

View file

@ -41,12 +41,12 @@ class UnigramDictionary {
static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2;
UnigramDictionary(const uint8_t *const streamStart, int fullWordMultiplier, int maxWordLength,
int maxWords, const unsigned int flags);
int getFrequency(const int32_t *const inWord, const int length) const;
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
int getFrequency(const int *const inWord, const int length) const;
int getBigramPosition(int pos, int *word, int offset, int length) const;
int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
const bool useFullEditDistance, int *outWords, int *frequencies,
int *outputTypes) const;
virtual ~UnigramDictionary();
@ -93,9 +93,9 @@ class UnigramDictionary {
int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
const int currentWordIndex) const;
int getMostFrequentWordLike(const int startInputIndex, const int inputSize,
Correction *correction, unsigned short *word) const;
int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int inputSize,
short unsigned int *outWord) const;
Correction *correction, int *word) const;
int getMostFrequentWordLikeInner(const int *const inWord, const int inputSize,
int *outWord) const;
int getSubStringSuggestion(
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
const int *codes, const bool useFullEditDistance, Correction *correction,
@ -103,14 +103,13 @@ class UnigramDictionary {
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
const int inputWordStartPos, const int inputWordLength,
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const;
void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputSize,
Correction *correction, WordsPriorityQueuePool *queuePool,
int *wordLengthArray, int *outputWord, int *outputWordLength) const;
void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool,
const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex,
const int outputWordLength, int *freqArray, int *wordLengthArray,
unsigned short *outputWord) const;
int *outputWord) const;
const uint8_t *const DICT_ROOT;
const int MAX_WORD_LENGTH;

View file

@ -30,15 +30,15 @@ class WordsPriorityQueue {
class SuggestedWord {
public:
int mScore;
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
int mWord[MAX_WORD_LENGTH_INTERNAL];
int mWordLength;
bool mUsed;
int mType;
void setParams(int score, unsigned short *word, int wordLength, int type) {
void setParams(int score, int *word, int wordLength, int type) {
mScore = score;
mWordLength = wordLength;
memcpy(mWord, word, sizeof(unsigned short) * wordLength);
memcpy(mWord, word, sizeof(int) * wordLength);
mUsed = true;
mType = type;
}
@ -57,9 +57,9 @@ class WordsPriorityQueue {
delete[] mSuggestedWords;
}
void push(int score, unsigned short *word, int wordLength, int type) {
void push(int score, int *word, int wordLength, int type) {
SuggestedWord *sw = 0;
if (mSuggestions.size() >= MAX_WORDS) {
if (size() >= MAX_WORDS) {
sw = mSuggestions.top();
const int minScore = sw->mScore;
if (minScore >= score) {
@ -94,11 +94,10 @@ class WordsPriorityQueue {
return sw;
}
int outputSuggestions(const unsigned short *before, const int beforeLength,
int *frequencies, unsigned short *outputChars, int* outputTypes) {
int outputSuggestions(const int *before, const int beforeLength, int *frequencies,
int *outputCodePoints, int* outputTypes) {
mHighestSuggestedWord = 0;
const unsigned int size = min(
MAX_WORDS, static_cast<unsigned int>(mSuggestions.size()));
const int size = min(MAX_WORDS, static_cast<int>(mSuggestions.size()));
SuggestedWord *swBuffer[size];
int index = size - 1;
while (!mSuggestions.empty() && index >= 0) {
@ -113,9 +112,9 @@ class WordsPriorityQueue {
}
if (size >= 2) {
SuggestedWord *nsMaxSw = 0;
unsigned int maxIndex = 0;
int maxIndex = 0;
float maxNs = 0;
for (unsigned int i = 0; i < size; ++i) {
for (int i = 0; i < size; ++i) {
SuggestedWord *tempSw = swBuffer[i];
if (!tempSw) {
continue;
@ -132,17 +131,17 @@ class WordsPriorityQueue {
swBuffer[0] = nsMaxSw;
}
}
for (unsigned int i = 0; i < size; ++i) {
for (int i = 0; i < size; ++i) {
SuggestedWord *sw = swBuffer[i];
if (!sw) {
AKLOGE("SuggestedWord is null %d", i);
continue;
}
const unsigned int wordLength = sw->mWordLength;
unsigned short *targetAddress = outputChars + i * MAX_WORD_LENGTH;
const int wordLength = sw->mWordLength;
int *targetAddress = outputCodePoints + i * MAX_WORD_LENGTH;
frequencies[i] = sw->mScore;
outputTypes[i] = sw->mType;
memcpy(targetAddress, sw->mWord, wordLength * sizeof(unsigned short));
memcpy(targetAddress, sw->mWord, wordLength * sizeof(int));
if (wordLength < MAX_WORD_LENGTH) {
targetAddress[wordLength] = 0;
}
@ -152,7 +151,7 @@ class WordsPriorityQueue {
}
int size() const {
return mSuggestions.size();
return static_cast<int>(mSuggestions.size());
}
void clear() {
@ -175,13 +174,13 @@ class WordsPriorityQueue {
DUMP_WORD(mHighestSuggestedWord->mWord, mHighestSuggestedWord->mWordLength);
}
float getHighestNormalizedScore(const unsigned short *before, const int beforeLength,
unsigned short **outWord, int *outScore, int *outLength) {
float getHighestNormalizedScore(const int *before, const int beforeLength, int **outWord,
int *outScore, int *outLength) {
if (!mHighestSuggestedWord) {
return 0.0;
}
return getNormalizedScore(
mHighestSuggestedWord, before, beforeLength, outWord, outScore, outLength);
return getNormalizedScore(mHighestSuggestedWord, before, beforeLength, outWord, outScore,
outLength);
}
private:
@ -192,9 +191,8 @@ class WordsPriorityQueue {
}
};
SuggestedWord *getFreeSuggestedWord(int score, unsigned short *word,
int wordLength, int type) {
for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) {
SuggestedWord *getFreeSuggestedWord(int score, int *word, int wordLength, int type) {
for (int i = 0; i < MAX_WORD_LENGTH; ++i) {
if (!mSuggestedWords[i].mUsed) {
mSuggestedWords[i].setParams(score, word, wordLength, type);
return &mSuggestedWords[i];
@ -203,10 +201,10 @@ class WordsPriorityQueue {
return 0;
}
static float getNormalizedScore(SuggestedWord *sw, const unsigned short *before,
const int beforeLength, unsigned short **outWord, int *outScore, int *outLength) {
static float getNormalizedScore(SuggestedWord *sw, const int *before, const int beforeLength,
int **outWord, int *outScore, int *outLength) {
const int score = sw->mScore;
unsigned short *word = sw->mWord;
int *word = sw->mWord;
const int wordLength = sw->mWordLength;
if (outScore) {
*outScore = score;
@ -217,15 +215,15 @@ class WordsPriorityQueue {
if (outLength) {
*outLength = wordLength;
}
return Correction::RankingAlgorithm::calcNormalizedScore(
before, beforeLength, word, wordLength, score);
return Correction::RankingAlgorithm::calcNormalizedScore(before, beforeLength, word,
wordLength, score);
}
typedef std::priority_queue<SuggestedWord *, std::vector<SuggestedWord *>,
wordComparator> Suggestions;
Suggestions mSuggestions;
const unsigned int MAX_WORDS;
const unsigned int MAX_WORD_LENGTH;
const int MAX_WORDS;
const int MAX_WORD_LENGTH;
SuggestedWord *mSuggestedWords;
SuggestedWord *mHighestSuggestedWord;
};