Merge "Cosmetic fixes and a bug fix in UnigramDictionary::testCharGroupForContinuedLikeness()." into jb-mr1-dev

main
Ken Wakasa 2012-09-03 22:26:42 -07:00 committed by Android (Google) Code Review
commit d34dd5bb6b
19 changed files with 190 additions and 179 deletions

View File

@ -68,9 +68,9 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
return 0; return 0;
} }
int pagesize = getpagesize(); int pagesize = getpagesize();
adjust = dictOffset % pagesize; adjust = static_cast<int>(dictOffset) % pagesize;
int adjDictOffset = dictOffset - adjust; int adjDictOffset = static_cast<int>(dictOffset) - adjust;
int adjDictSize = dictSize + adjust; int adjDictSize = static_cast<int>(dictSize) + adjust;
dictBuf = mmap(0, sizeof(char) * adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset); dictBuf = mmap(0, sizeof(char) * adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset);
if (dictBuf == MAP_FAILED) { if (dictBuf == MAP_FAILED) {
AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno); AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
@ -120,8 +120,8 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
releaseDictBuf(dictBuf, 0, 0); releaseDictBuf(dictBuf, 0, 0);
#endif // USE_MMAP_FOR_DICTIONARY #endif // USE_MMAP_FOR_DICTIONARY
} else { } else {
dictionary = new Dictionary(dictBuf, dictSize, fd, adjust, typedLetterMultiplier, dictionary = new Dictionary(dictBuf, static_cast<int>(dictSize), fd, adjust,
fullWordMultiplier, maxWordLength, maxWords, maxPredictions); typedLetterMultiplier, fullWordMultiplier, maxWordLength, maxWords, maxPredictions);
} }
PROF_END(66); PROF_END(66);
PROF_CLOSE; PROF_CLOSE;

View File

@ -14,6 +14,8 @@
* limitations under the License. * limitations under the License.
*/ */
#include <stdint.h>
#include "char_utils.h" #include "char_utils.h"
namespace latinime { namespace latinime {
@ -24,7 +26,7 @@ namespace latinime {
* if c is not a combined character, or the base character if it * if c is not a combined character, or the base character if it
* is combined. * is combined.
*/ */
const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = { const uint16_t BASE_CHARS[BASE_CHARS_SIZE] = {
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,

View File

@ -156,7 +156,7 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
const int flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); const int flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) return 0; if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) return 0;
if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) { if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) {
BinaryFormat::getCharCodeAndForwardPointer(root, &pos); BinaryFormat::getCodePointAndForwardPointer(root, &pos);
} else { } else {
pos = BinaryFormat::skipOtherCharacters(root, pos); pos = BinaryFormat::skipOtherCharacters(root, pos);
} }

View File

@ -84,7 +84,7 @@ class BinaryFormat {
static unsigned int getFlags(const uint8_t *const dict); static unsigned int getFlags(const uint8_t *const dict);
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos); static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos); static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
static int32_t getCharCodeAndForwardPointer(const uint8_t *const dict, int *pos); static int32_t getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos); static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos);
static int skipOtherCharacters(const uint8_t *const dict, const int pos); static int skipOtherCharacters(const uint8_t *const dict, const int pos);
static int skipChildrenPosition(const uint8_t flags, const int pos); static int skipChildrenPosition(const uint8_t flags, const int pos);
@ -176,22 +176,22 @@ inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict
return dict[(*pos)++]; return dict[(*pos)++];
} }
inline int32_t BinaryFormat::getCharCodeAndForwardPointer(const uint8_t *const dict, int *pos) { inline int32_t BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) {
const int origin = *pos; const int origin = *pos;
const int32_t character = dict[origin]; const int32_t codePoint = dict[origin];
if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
if (character == CHARACTER_ARRAY_TERMINATOR) { if (codePoint == CHARACTER_ARRAY_TERMINATOR) {
*pos = origin + 1; *pos = origin + 1;
return NOT_A_CHARACTER; return NOT_A_CODE_POINT;
} else { } else {
*pos = origin + 3; *pos = origin + 3;
const int32_t char_1 = character << 16; const int32_t char_1 = codePoint << 16;
const int32_t char_2 = char_1 + (dict[origin + 1] << 8); const int32_t char_2 = char_1 + (dict[origin + 1] << 8);
return char_2 + dict[origin + 2]; return char_2 + dict[origin + 2];
} }
} else { } else {
*pos = origin + 1; *pos = origin + 1;
return character; return codePoint;
} }
} }
@ -369,15 +369,15 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
if (0 >= charGroupCount) return NOT_VALID_WORD; if (0 >= charGroupCount) return NOT_VALID_WORD;
const int charGroupPos = pos; const int charGroupPos = pos;
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); int32_t character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
if (character == wChar) { if (character == wChar) {
// This is the correct node. Only one character group may start with the same // This is the correct node. Only one character group may start with the same
// char within a node, so either we found our match in this node, or there is // char within a node, so either we found our match in this node, or there is
// no match and we can return NOT_VALID_WORD. So we will check all the characters // no match and we can return NOT_VALID_WORD. So we will check all the characters
// in this character group indeed does match. // in this character group indeed does match.
if (FLAG_HAS_MULTIPLE_CHARS & flags) { if (FLAG_HAS_MULTIPLE_CHARS & flags) {
character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
while (NOT_A_CHARACTER != character) { while (NOT_A_CODE_POINT != character) {
++wordPos; ++wordPos;
// If we shoot the length of the word we search for, or if we find a single // If we shoot the length of the word we search for, or if we find a single
// character that does not match, as explained above, it means the word is // character that does not match, as explained above, it means the word is
@ -385,7 +385,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
// match the word on the first character, but not matching the whole word). // match the word on the first character, but not matching the whole word).
if (wordPos > length) return NOT_VALID_WORD; if (wordPos > length) return NOT_VALID_WORD;
if (inWord[wordPos] != character) return NOT_VALID_WORD; if (inWord[wordPos] != character) return NOT_VALID_WORD;
character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
} }
} }
// If we come here we know that so far, we do match. Either we are on a terminal // If we come here we know that so far, we do match. Either we are on a terminal
@ -457,19 +457,19 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
--charGroupCount) { --charGroupCount) {
const int startPos = pos; const int startPos = pos;
const uint8_t flags = getFlagsAndForwardPointer(root, &pos); const uint8_t flags = getFlagsAndForwardPointer(root, &pos);
const int32_t character = getCharCodeAndForwardPointer(root, &pos); const int32_t character = getCodePointAndForwardPointer(root, &pos);
if (address == startPos) { if (address == startPos) {
// We found the address. Copy the rest of the word in the buffer and return // We found the address. Copy the rest of the word in the buffer and return
// the length. // the length.
outWord[wordPos] = character; outWord[wordPos] = character;
if (FLAG_HAS_MULTIPLE_CHARS & flags) { if (FLAG_HAS_MULTIPLE_CHARS & flags) {
int32_t nextChar = getCharCodeAndForwardPointer(root, &pos); int32_t nextChar = getCodePointAndForwardPointer(root, &pos);
// We count chars in order to avoid infinite loops if the file is broken or // We count chars in order to avoid infinite loops if the file is broken or
// if there is some other bug // if there is some other bug
int charCount = maxDepth; int charCount = maxDepth;
while (NOT_A_CHARACTER != nextChar && --charCount > 0) { while (NOT_A_CODE_POINT != nextChar && --charCount > 0) {
outWord[++wordPos] = nextChar; outWord[++wordPos] = nextChar;
nextChar = getCharCodeAndForwardPointer(root, &pos); nextChar = getCodePointAndForwardPointer(root, &pos);
} }
} }
*outUnigramFrequency = readFrequencyWithoutMovingPointer(root, pos); *outUnigramFrequency = readFrequencyWithoutMovingPointer(root, pos);
@ -523,16 +523,16 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
const uint8_t lastFlags = const uint8_t lastFlags =
getFlagsAndForwardPointer(root, &lastCandidateGroupPos); getFlagsAndForwardPointer(root, &lastCandidateGroupPos);
const int32_t lastChar = const int32_t lastChar =
getCharCodeAndForwardPointer(root, &lastCandidateGroupPos); getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
// We copy all the characters in this group to the buffer // We copy all the characters in this group to the buffer
outWord[wordPos] = lastChar; outWord[wordPos] = lastChar;
if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) { if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
int32_t nextChar = int32_t nextChar =
getCharCodeAndForwardPointer(root, &lastCandidateGroupPos); getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
int charCount = maxDepth; int charCount = maxDepth;
while (-1 != nextChar && --charCount > 0) { while (-1 != nextChar && --charCount > 0) {
outWord[++wordPos] = nextChar; outWord[++wordPos] = nextChar;
nextChar = getCharCodeAndForwardPointer(root, &lastCandidateGroupPos); nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
} }
} }
++wordPos; ++wordPos;
@ -582,8 +582,8 @@ inline int BinaryFormat::computeFrequencyForBigram(const int unigramFreq, const
// 0 for the bigram frequency represents the middle of the 16th step from the top, // 0 for the bigram frequency represents the middle of the 16th step from the top,
// while a value of 15 represents the middle of the top step. // while a value of 15 represents the middle of the top step.
// See makedict.BinaryDictInputOutput for details. // See makedict.BinaryDictInputOutput for details.
const float stepSize = (static_cast<float>(MAX_FREQ) - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ); const float stepSize = static_cast<float>(MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
return static_cast<int>(unigramFreq + (bigramFreq + 1) * stepSize); return unigramFreq + static_cast<int>(static_cast<float>(bigramFreq + 1) * stepSize);
} }
// This returns a probability in log space. // This returns a probability in log space.

View File

@ -23,14 +23,16 @@
namespace latinime { namespace latinime {
static inline void setInFilter(uint8_t *filter, const int position) { // TODO: uint32_t position
const unsigned int bucket = position % BIGRAM_FILTER_MODULO; static inline void setInFilter(uint8_t *filter, const int32_t position) {
filter[bucket >> 3] |= (1 << (bucket & 0x7)); const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO);
filter[bucket >> 3] |= static_cast<uint8_t>(1 << (bucket & 0x7));
} }
static inline bool isInFilter(const uint8_t *filter, const int position) { // TODO: uint32_t position
const unsigned int bucket = position % BIGRAM_FILTER_MODULO; static inline bool isInFilter(const uint8_t *filter, const int32_t position) {
return filter[bucket >> 3] & (1 << (bucket & 0x7)); const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO);
return filter[bucket >> 3] & static_cast<uint8_t>(1 << (bucket & 0x7));
} }
} // namespace latinime } // namespace latinime
#endif // LATINIME_BLOOM_FILTER_H #endif // LATINIME_BLOOM_FILTER_H

View File

@ -18,6 +18,7 @@
#define LATINIME_CHAR_UTILS_H #define LATINIME_CHAR_UTILS_H
#include <cctype> #include <cctype>
#include <stdint.h>
namespace latinime { namespace latinime {
@ -43,7 +44,7 @@ unsigned short latin_tolower(const unsigned short c);
*/ */
static const int BASE_CHARS_SIZE = 0x0500; static const int BASE_CHARS_SIZE = 0x0500;
extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE]; extern const uint16_t BASE_CHARS[BASE_CHARS_SIZE];
inline static unsigned short toBaseChar(unsigned short c) { inline static unsigned short toBaseChar(unsigned short c) {
if (c < BASE_CHARS_SIZE) { if (c < BASE_CHARS_SIZE) {

View File

@ -362,7 +362,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
if (mSkipPos >= 0) { if (mSkipPos >= 0) {
if (mSkippedCount == 0 && mSkipPos < mOutputIndex) { if (mSkippedCount == 0 && mSkipPos < mOutputIndex) {
if (DEBUG_DICT) { if (DEBUG_DICT) {
assert(mSkipPos == mOutputIndex - 1); // TODO: Enable this assertion.
//assert(mSkipPos == mOutputIndex - 1);
} }
mSkipPos = mOutputIndex; mSkipPos = mOutputIndex;
} }
@ -1126,15 +1127,16 @@ float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *be
return 0; return 0;
} }
const float maxScore = score >= S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX)
: static_cast<float>(MAX_INITIAL_SCORE)
* powf(static_cast<float>(TYPED_LETTER_MULTIPLIER), * powf(static_cast<float>(TYPED_LETTER_MULTIPLIER),
static_cast<float>(min(beforeLength, afterLength - spaceCount))) static_cast<float>(min(beforeLength, afterLength - spaceCount)))
* FULL_WORD_MULTIPLIER; * static_cast<float>(FULL_WORD_MULTIPLIER);
// add a weight based on edit distance. // add a weight based on edit distance.
// distance <= max(afterLength, beforeLength) == afterLength, // distance <= max(afterLength, beforeLength) == afterLength,
// so, 0 <= distance / afterLength <= 1 // so, 0 <= distance / afterLength <= 1
const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength); const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength);
return (score / maxScore) * weight; return (static_cast<float>(score) / maxScore) * weight;
} }
} // namespace latinime } // namespace latinime

View File

@ -17,6 +17,8 @@
#ifndef LATINIME_DEFINES_H #ifndef LATINIME_DEFINES_H
#define LATINIME_DEFINES_H #define LATINIME_DEFINES_H
#include <stdint.h>
#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG) #if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
#include <android/log.h> #include <android/log.h>
#ifndef LOG_TAG #ifndef LOG_TAG
@ -39,7 +41,8 @@ static inline void dumpWordInfo(const unsigned short *word, const int length,
if (c == 0) { if (c == 0) {
break; break;
} }
charBuf[i] = c; // static_cast only for debugging
charBuf[i] = static_cast<char>(c);
} }
charBuf[i] = 0; charBuf[i] = 0;
if (i > 1) { if (i > 1) {
@ -65,7 +68,8 @@ static inline void dumpWord(const unsigned short *word, const int length) {
if (c == 0) { if (c == 0) {
break; break;
} }
charBuf[i] = c; // static_cast only for debugging
charBuf[i] = static_cast<char>(c);
} }
charBuf[i] = 0; charBuf[i] = 0;
if (i > 1) { if (i > 1) {
@ -236,15 +240,15 @@ static inline void prof_out(void) {
#define FLAG_BIGRAM_FREQ 0x7F #define FLAG_BIGRAM_FREQ 0x7F
#define DICTIONARY_VERSION_MIN 200 #define DICTIONARY_VERSION_MIN 200
#define NOT_VALID_WORD -99 #define NOT_VALID_WORD (-99)
#define NOT_A_CHARACTER -1 #define NOT_A_CODE_POINT (-1)
#define NOT_A_DISTANCE -1 #define NOT_A_DISTANCE (-1)
#define NOT_A_COORDINATE -1 #define NOT_A_COORDINATE (-1)
#define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO -2 #define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO (-2)
#define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO -3 #define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO (-3)
#define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO -4 #define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO (-4)
#define NOT_AN_INDEX -1 #define NOT_AN_INDEX (-1)
#define NOT_A_PROBABILITY -1 #define NOT_A_PROBABILITY (-1)
#define KEYCODE_SPACE ' ' #define KEYCODE_SPACE ' '
@ -355,8 +359,8 @@ template<typename T> inline T max(T a, T b) { return a > b ? a : b; }
#define NEUTRAL_AREA_RADIUS_RATIO 1.3f #define NEUTRAL_AREA_RADIUS_RATIO 1.3f
// DEBUG // DEBUG
#define INPUTLENGTH_FOR_DEBUG -1 #define INPUTLENGTH_FOR_DEBUG (-1)
#define MIN_OUTPUT_INDEX_FOR_DEBUG -1 #define MIN_OUTPUT_INDEX_FOR_DEBUG (-1)
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&); \ TypeName(const TypeName&); \

View File

@ -90,11 +90,12 @@ class Dictionary {
// static inline methods should be defined in the header file // static inline methods should be defined in the header file
inline int Dictionary::wideStrLen(unsigned short *str) { inline int Dictionary::wideStrLen(unsigned short *str) {
if (!str) return 0; if (!str) return 0;
unsigned short *end = str; int length = 0;
while (*end) { while (*str) {
end++; str++;
length++;
} }
return end - str; return length;
} }
} // namespace latinime } // namespace latinime
#endif // LATINIME_DICTIONARY_H #endif // LATINIME_DICTIONARY_H

View File

@ -29,7 +29,6 @@
namespace latinime { namespace latinime {
/* static */ const int ProximityInfo::NOT_A_CODE = -1;
/* static */ const float ProximityInfo::NOT_A_DISTANCE_FLOAT = -1.0f; /* static */ const float ProximityInfo::NOT_A_DISTANCE_FLOAT = -1.0f;
static inline void safeGetOrFillZeroIntArrayRegion(JNIEnv *env, jintArray jArray, jsize len, static inline void safeGetOrFillZeroIntArrayRegion(JNIEnv *env, jintArray jArray, jsize len,
@ -84,22 +83,22 @@ ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr, const int ma
safeGetOrFillZeroIntArrayRegion(env, keyYCoordinates, KEY_COUNT, mKeyYCoordinates); safeGetOrFillZeroIntArrayRegion(env, keyYCoordinates, KEY_COUNT, mKeyYCoordinates);
safeGetOrFillZeroIntArrayRegion(env, keyWidths, KEY_COUNT, mKeyWidths); safeGetOrFillZeroIntArrayRegion(env, keyWidths, KEY_COUNT, mKeyWidths);
safeGetOrFillZeroIntArrayRegion(env, keyHeights, KEY_COUNT, mKeyHeights); safeGetOrFillZeroIntArrayRegion(env, keyHeights, KEY_COUNT, mKeyHeights);
safeGetOrFillZeroIntArrayRegion(env, keyCharCodes, KEY_COUNT, mKeyCharCodes); safeGetOrFillZeroIntArrayRegion(env, keyCharCodes, KEY_COUNT, mKeyCodePoints);
safeGetOrFillZeroFloatArrayRegion(env, sweetSpotCenterXs, KEY_COUNT, mSweetSpotCenterXs); safeGetOrFillZeroFloatArrayRegion(env, sweetSpotCenterXs, KEY_COUNT, mSweetSpotCenterXs);
safeGetOrFillZeroFloatArrayRegion(env, sweetSpotCenterYs, KEY_COUNT, mSweetSpotCenterYs); safeGetOrFillZeroFloatArrayRegion(env, sweetSpotCenterYs, KEY_COUNT, mSweetSpotCenterYs);
safeGetOrFillZeroFloatArrayRegion(env, sweetSpotRadii, KEY_COUNT, mSweetSpotRadii); safeGetOrFillZeroFloatArrayRegion(env, sweetSpotRadii, KEY_COUNT, mSweetSpotRadii);
initializeCodeToKeyIndex(); initializeCodePointToKeyIndex();
initializeG(); initializeG();
} }
// Build the reversed look up table from the char code to the index in mKeyXCoordinates, // Build the reversed look up table from the char code to the index in mKeyXCoordinates,
// mKeyYCoordinates, mKeyWidths, mKeyHeights, mKeyCharCodes. // mKeyYCoordinates, mKeyWidths, mKeyHeights, mKeyCharCodes.
void ProximityInfo::initializeCodeToKeyIndex() { void ProximityInfo::initializeCodePointToKeyIndex() {
memset(mCodeToKeyIndex, -1, (MAX_CHAR_CODE + 1) * sizeof(mCodeToKeyIndex[0])); memset(mCodePointToKeyIndex, -1, sizeof(mCodePointToKeyIndex));
for (int i = 0; i < KEY_COUNT; ++i) { for (int i = 0; i < KEY_COUNT; ++i) {
const int code = mKeyCharCodes[i]; const int code = mKeyCodePoints[i];
if (0 <= code && code <= MAX_CHAR_CODE) { if (0 <= code && code <= MAX_CHAR_CODE) {
mCodeToKeyIndex[code] = i; mCodePointToKeyIndex[code] = i;
} }
} }
} }
@ -117,7 +116,8 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
if (x < 0 || y < 0) { if (x < 0 || y < 0) {
if (DEBUG_DICT) { if (DEBUG_DICT) {
AKLOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y); AKLOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y);
assert(false); // TODO: Enable this assertion.
//assert(false);
} }
return false; return false;
} }
@ -145,8 +145,8 @@ static inline float getNormalizedSquaredDistanceFloat(float x1, float y1, float
float ProximityInfo::getNormalizedSquaredDistanceFromCenterFloat( float ProximityInfo::getNormalizedSquaredDistanceFromCenterFloat(
const int keyId, const int x, const int y) const { const int keyId, const int x, const int y) const {
const float centerX = static_cast<float>(getKeyCenterXOfIdG(keyId)); const float centerX = static_cast<float>(getKeyCenterXOfKeyIdG(keyId));
const float centerY = static_cast<float>(getKeyCenterYOfIdG(keyId)); const float centerY = static_cast<float>(getKeyCenterYOfKeyIdG(keyId));
const float touchX = static_cast<float>(x); const float touchX = static_cast<float>(x);
const float touchY = static_cast<float>(y); const float touchY = static_cast<float>(y);
const float keyWidth = static_cast<float>(getMostCommonKeyWidth()); const float keyWidth = static_cast<float>(getMostCommonKeyWidth());
@ -178,7 +178,7 @@ void ProximityInfo::calculateNearbyKeyCodes(
if (c < KEYCODE_SPACE || c == primaryKey) { if (c < KEYCODE_SPACE || c == primaryKey) {
continue; continue;
} }
const int keyIndex = getKeyIndex(c); const int keyIndex = getKeyIndexOf(c);
const bool onKey = isOnKey(keyIndex, x, y); const bool onKey = isOnKey(keyIndex, x, y);
const int distance = squaredDistanceToEdge(keyIndex, x, y); const int distance = squaredDistanceToEdge(keyIndex, x, y);
if (onKey || distance < MOST_COMMON_KEY_WIDTH_SQUARE) { if (onKey || distance < MOST_COMMON_KEY_WIDTH_SQUARE) {
@ -208,7 +208,7 @@ void ProximityInfo::calculateNearbyKeyCodes(
const int32_t ac = additionalProximityChars[j]; const int32_t ac = additionalProximityChars[j];
int k = 0; int k = 0;
for (; k < insertPos; ++k) { for (; k < insertPos; ++k) {
if ((int)ac == inputCodes[k]) { if (static_cast<int>(ac) == inputCodes[k]) {
break; break;
} }
} }
@ -227,11 +227,11 @@ void ProximityInfo::calculateNearbyKeyCodes(
} }
// Add a delimiter for the proximity characters // Add a delimiter for the proximity characters
for (int i = insertPos; i < MAX_PROXIMITY_CHARS_SIZE; ++i) { for (int i = insertPos; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
inputCodes[i] = NOT_A_CODE; inputCodes[i] = NOT_A_CODE_POINT;
} }
} }
int ProximityInfo::getKeyIndex(const int c) const { int ProximityInfo::getKeyIndexOf(const int c) const {
if (KEY_COUNT == 0) { if (KEY_COUNT == 0) {
// We do not have the coordinate data // We do not have the coordinate data
return NOT_AN_INDEX; return NOT_AN_INDEX;
@ -240,28 +240,28 @@ int ProximityInfo::getKeyIndex(const int c) const {
if (baseLowerC > MAX_CHAR_CODE) { if (baseLowerC > MAX_CHAR_CODE) {
return NOT_AN_INDEX; return NOT_AN_INDEX;
} }
return mCodeToKeyIndex[baseLowerC]; return mCodePointToKeyIndex[baseLowerC];
} }
int ProximityInfo::getKeyCode(const int keyIndex) const { int ProximityInfo::getCodePointOf(const int keyIndex) const {
if (keyIndex < 0 || keyIndex >= KEY_COUNT) { if (keyIndex < 0 || keyIndex >= KEY_COUNT) {
return NOT_AN_INDEX; return NOT_A_CODE_POINT;
} }
return mKeyToCodeIndexG[keyIndex]; return mKeyIndexToCodePointG[keyIndex];
} }
void ProximityInfo::initializeG() { void ProximityInfo::initializeG() {
// TODO: Optimize // TODO: Optimize
for (int i = 0; i < KEY_COUNT; ++i) { for (int i = 0; i < KEY_COUNT; ++i) {
const int code = mKeyCharCodes[i]; const int code = mKeyCodePoints[i];
const int lowerCode = toBaseLowerCase(code); const int lowerCode = toBaseLowerCase(code);
mCenterXsG[i] = mKeyXCoordinates[i] + mKeyWidths[i] / 2; mCenterXsG[i] = mKeyXCoordinates[i] + mKeyWidths[i] / 2;
mCenterYsG[i] = mKeyYCoordinates[i] + mKeyHeights[i] / 2; mCenterYsG[i] = mKeyYCoordinates[i] + mKeyHeights[i] / 2;
if (code != lowerCode && lowerCode >= 0 && lowerCode <= MAX_CHAR_CODE) { if (code != lowerCode && lowerCode >= 0 && lowerCode <= MAX_CHAR_CODE) {
mCodeToKeyIndex[lowerCode] = i; mCodePointToKeyIndex[lowerCode] = i;
mKeyToCodeIndexG[i] = lowerCode; mKeyIndexToCodePointG[i] = lowerCode;
} else { } else {
mKeyToCodeIndexG[i] = code; mKeyIndexToCodePointG[i] = code;
} }
} }
for (int i = 0; i < KEY_COUNT; i++) { for (int i = 0; i < KEY_COUNT; i++) {
@ -274,22 +274,22 @@ void ProximityInfo::initializeG() {
} }
} }
float ProximityInfo::getKeyCenterXOfCharG(int charCode) const { float ProximityInfo::getKeyCenterXOfCodePointG(int charCode) const {
return getKeyCenterXOfIdG(getKeyIndex(charCode)); return getKeyCenterXOfKeyIdG(getKeyIndexOf(charCode));
} }
float ProximityInfo::getKeyCenterYOfCharG(int charCode) const { float ProximityInfo::getKeyCenterYOfCodePointG(int charCode) const {
return getKeyCenterYOfIdG(getKeyIndex(charCode)); return getKeyCenterYOfKeyIdG(getKeyIndexOf(charCode));
} }
float ProximityInfo::getKeyCenterXOfIdG(int keyId) const { float ProximityInfo::getKeyCenterXOfKeyIdG(int keyId) const {
if (keyId >= 0) { if (keyId >= 0) {
return mCenterXsG[keyId]; return mCenterXsG[keyId];
} }
return 0; return 0;
} }
float ProximityInfo::getKeyCenterYOfIdG(int keyId) const { float ProximityInfo::getKeyCenterYOfKeyIdG(int keyId) const {
if (keyId >= 0) { if (keyId >= 0) {
return mCenterYsG[keyId]; return mCenterYsG[keyId];
} }
@ -297,8 +297,8 @@ float ProximityInfo::getKeyCenterYOfIdG(int keyId) const {
} }
int ProximityInfo::getKeyKeyDistanceG(int key0, int key1) const { int ProximityInfo::getKeyKeyDistanceG(int key0, int key1) const {
const int keyId0 = getKeyIndex(key0); const int keyId0 = getKeyIndexOf(key0);
const int keyId1 = getKeyIndex(key1); const int keyId1 = getKeyIndexOf(key1);
if (keyId0 >= 0 && keyId1 >= 0) { if (keyId0 >= 0 && keyId1 >= 0) {
return mKeyKeyDistancesG[keyId0][keyId1]; return mKeyKeyDistancesG[keyId0][keyId1];
} }

View File

@ -41,8 +41,8 @@ class ProximityInfo {
float getNormalizedSquaredDistanceFromCenterFloat( float getNormalizedSquaredDistanceFromCenterFloat(
const int keyId, const int x, const int y) const; const int keyId, const int x, const int y) const;
bool sameAsTyped(const unsigned short *word, int length) const; bool sameAsTyped(const unsigned short *word, int length) const;
int getKeyIndex(const int c) const; int getKeyIndexOf(const int c) const;
int getKeyCode(const int keyIndex) const; int getCodePointOf(const int keyIndex) const;
bool hasSweetSpotData(const int keyIndex) const { bool hasSweetSpotData(const int keyIndex) const {
// When there are no calibration data for a key, // When there are no calibration data for a key,
// the radius of the key is assigned to zero. // the radius of the key is assigned to zero.
@ -96,23 +96,23 @@ class ProximityInfo {
return GRID_HEIGHT; return GRID_HEIGHT;
} }
float getKeyCenterXOfCharG(int charCode) const; // TODO: These should return int.
float getKeyCenterYOfCharG(int charCode) const; float getKeyCenterXOfCodePointG(int charCode) const;
float getKeyCenterXOfIdG(int keyId) const; float getKeyCenterYOfCodePointG(int charCode) const;
float getKeyCenterYOfIdG(int keyId) const; float getKeyCenterXOfKeyIdG(int keyId) const;
float getKeyCenterYOfKeyIdG(int keyId) const;
int getKeyKeyDistanceG(int key0, int key1) const; int getKeyKeyDistanceG(int key0, int key1) const;
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfo); DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfo);
// The max number of the keys in one keyboard layout // The max number of the keys in one keyboard layout
static const int MAX_KEY_COUNT_IN_A_KEYBOARD = 64; static const int MAX_KEY_COUNT_IN_A_KEYBOARD = 64;
// The upper limit of the char code in mCodeToKeyIndex // The upper limit of the char code in mCodePointToKeyIndex
static const int MAX_CHAR_CODE = 127; static const int MAX_CHAR_CODE = 127;
static const int NOT_A_CODE;
static const float NOT_A_DISTANCE_FLOAT; static const float NOT_A_DISTANCE_FLOAT;
int getStartIndexFromCoordinates(const int x, const int y) const; int getStartIndexFromCoordinates(const int x, const int y) const;
void initializeCodeToKeyIndex(); void initializeCodePointToKeyIndex();
void initializeG(); void initializeG();
float calculateNormalizedSquaredDistance(const int keyIndex, const int inputIndex) const; float calculateNormalizedSquaredDistance(const int keyIndex, const int inputIndex) const;
float calculateSquaredDistanceFromSweetSpotCenter( float calculateSquaredDistanceFromSweetSpotCenter(
@ -143,13 +143,13 @@ class ProximityInfo {
int32_t mKeyYCoordinates[MAX_KEY_COUNT_IN_A_KEYBOARD]; int32_t mKeyYCoordinates[MAX_KEY_COUNT_IN_A_KEYBOARD];
int32_t mKeyWidths[MAX_KEY_COUNT_IN_A_KEYBOARD]; int32_t mKeyWidths[MAX_KEY_COUNT_IN_A_KEYBOARD];
int32_t mKeyHeights[MAX_KEY_COUNT_IN_A_KEYBOARD]; int32_t mKeyHeights[MAX_KEY_COUNT_IN_A_KEYBOARD];
int32_t mKeyCharCodes[MAX_KEY_COUNT_IN_A_KEYBOARD]; int32_t mKeyCodePoints[MAX_KEY_COUNT_IN_A_KEYBOARD];
float mSweetSpotCenterXs[MAX_KEY_COUNT_IN_A_KEYBOARD]; float mSweetSpotCenterXs[MAX_KEY_COUNT_IN_A_KEYBOARD];
float mSweetSpotCenterYs[MAX_KEY_COUNT_IN_A_KEYBOARD]; float mSweetSpotCenterYs[MAX_KEY_COUNT_IN_A_KEYBOARD];
float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD]; float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCodeToKeyIndex[MAX_CHAR_CODE + 1]; int mCodePointToKeyIndex[MAX_CHAR_CODE + 1];
int mKeyToCodeIndexG[MAX_KEY_COUNT_IN_A_KEYBOARD]; int mKeyIndexToCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCenterXsG[MAX_KEY_COUNT_IN_A_KEYBOARD]; int mCenterXsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD]; int mCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mKeyKeyDistancesG[MAX_KEY_COUNT_IN_A_KEYBOARD][MAX_KEY_COUNT_IN_A_KEYBOARD]; int mKeyKeyDistancesG[MAX_KEY_COUNT_IN_A_KEYBOARD][MAX_KEY_COUNT_IN_A_KEYBOARD];

View File

@ -160,7 +160,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
const int currentChar = proximityChars[j]; const int currentChar = proximityChars[j];
const float squaredDistance = const float squaredDistance =
hasInputCoordinates() ? calculateNormalizedSquaredDistance( hasInputCoordinates() ? calculateNormalizedSquaredDistance(
mProximityInfo->getKeyIndex(currentChar), i) : mProximityInfo->getKeyIndexOf(currentChar), i) :
NOT_A_DISTANCE_FLOAT; NOT_A_DISTANCE_FLOAT;
if (squaredDistance >= 0.0f) { if (squaredDistance >= 0.0f) {
mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] = mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] =
@ -282,7 +282,7 @@ bool ProximityInfoState::pushTouchPoint(const int nodeChar, int x, int y, const
const NearKeysDistanceMap *const prevPrevNearKeysDistances) { const NearKeysDistanceMap *const prevPrevNearKeysDistances) {
static const float LAST_POINT_SKIP_DISTANCE_SCALE = 0.25f; static const float LAST_POINT_SKIP_DISTANCE_SCALE = 0.25f;
uint32_t size = mInputXs.size(); size_t size = mInputXs.size();
bool popped = false; bool popped = false;
if (nodeChar < 0 && sample) { if (nodeChar < 0 && sample) {
const float nearest = updateNearKeysDistances(x, y, currentNearKeysDistances); const float nearest = updateNearKeysDistances(x, y, currentNearKeysDistances);
@ -324,10 +324,10 @@ bool ProximityInfoState::pushTouchPoint(const int nodeChar, int x, int y, const
} }
if (nodeChar >= 0 && (x < 0 || y < 0)) { if (nodeChar >= 0 && (x < 0 || y < 0)) {
const int keyId = mProximityInfo->getKeyIndex(nodeChar); const int keyId = mProximityInfo->getKeyIndexOf(nodeChar);
if (keyId >= 0) { if (keyId >= 0) {
x = mProximityInfo->getKeyCenterXOfIdG(keyId); x = mProximityInfo->getKeyCenterXOfKeyIdG(keyId);
y = mProximityInfo->getKeyCenterYOfIdG(keyId); y = mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
} }
} }
@ -368,8 +368,8 @@ int ProximityInfoState::getDuration(const int index) const {
return 0; return 0;
} }
float ProximityInfoState::getPointToKeyLength(int inputIndex, int charCode, float scale) { float ProximityInfoState::getPointToKeyLength(int inputIndex, int codePoint, float scale) {
const int keyId = mProximityInfo->getKeyIndex(charCode); const int keyId = mProximityInfo->getKeyIndexOf(codePoint);
if (keyId >= 0) { if (keyId >= 0) {
const int index = inputIndex * mProximityInfo->getKeyCount() + keyId; const int index = inputIndex * mProximityInfo->getKeyCount() + keyId;
return min(mDistanceCache[index] * scale, mMaxPointToKeyLength); return min(mDistanceCache[index] * scale, mMaxPointToKeyLength);
@ -382,8 +382,8 @@ int ProximityInfoState::getKeyKeyDistance(int key0, int key1) {
} }
int ProximityInfoState::getSpaceY() { int ProximityInfoState::getSpaceY() {
const int keyId = mProximityInfo->getKeyIndex(' '); const int keyId = mProximityInfo->getKeyIndexOf(' ');
return mProximityInfo->getKeyCenterYOfIdG(keyId); return mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
} }
float ProximityInfoState::calculateSquaredDistanceFromSweetSpotCenter( float ProximityInfoState::calculateSquaredDistanceFromSweetSpotCenter(

View File

@ -52,9 +52,9 @@ class TerminalAttributes {
0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT); 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
unsigned int i; unsigned int i;
for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) { for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) {
const int charCode = BinaryFormat::getCharCodeAndForwardPointer(mDict, &mPos); const int codePoint = BinaryFormat::getCodePointAndForwardPointer(mDict, &mPos);
if (NOT_A_CHARACTER == charCode) break; if (NOT_A_CODE_POINT == codePoint) break;
outWord[i] = (uint16_t)charCode; outWord[i] = (uint16_t)codePoint;
} }
*outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags); *outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags);
mPos += BinaryFormat::CHARACTER_ARRAY_TERMINATOR_SIZE; mPos += BinaryFormat::CHARACTER_ARRAY_TERMINATOR_SIZE;
@ -62,8 +62,8 @@ class TerminalAttributes {
} }
}; };
TerminalAttributes(const uint8_t *const dict, const uint8_t flags, const int pos) : TerminalAttributes(const uint8_t *const dict, const uint8_t flags, const int pos)
mDict(dict), mFlags(flags), mStartPos(pos) { : mDict(dict), mFlags(flags), mStartPos(pos) {
} }
inline ShortcutIterator getShortcutIterator() const { inline ShortcutIterator getShortcutIterator() const {

View File

@ -58,12 +58,12 @@ UnigramDictionary::~UnigramDictionary() {
} }
static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize) { static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize) {
return sizeof(*codes) * codesSize; return static_cast<unsigned int>(sizeof(*codes)) * codesSize;
} }
// TODO: This needs to take a const unsigned short* and not tinker with its contents // TODO: This needs to take a const unsigned short* and not tinker with its contents
static inline void addWord( static inline void addWord(unsigned short *word, int length, int frequency,
unsigned short *word, int length, int frequency, WordsPriorityQueue *queue, int type) { WordsPriorityQueue *queue, int type) {
queue->push(frequency, word, length, type); queue->push(frequency, word, length, type);
} }
@ -106,7 +106,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
WordsPriorityQueuePool *queuePool, WordsPriorityQueuePool *queuePool,
const digraph_t *const digraphs, const unsigned int digraphsSize) const { const digraph_t *const digraphs, const unsigned int digraphsSize) const {
const int startIndex = codesDest - codesBuffer; const int startIndex = static_cast<int>(codesDest - codesBuffer);
if (currentDepth < MAX_DIGRAPH_SEARCH_DEPTH) { if (currentDepth < MAX_DIGRAPH_SEARCH_DEPTH) {
for (int i = 0; i < codesRemain; ++i) { for (int i = 0; i < codesRemain; ++i) {
xCoordinatesBuffer[startIndex + i] = xcoordinates[codesBufferSize - codesRemain + i]; xCoordinatesBuffer[startIndex + i] = xcoordinates[codesBufferSize - codesRemain + i];
@ -170,8 +170,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
// bigramMap contains the association <bigram address> -> <bigram frequency> // bigramMap contains the association <bigram address> -> <bigram frequency>
// bigramFilter is a bloom filter for fast rejection: see functions setInFilter and isInFilter // bigramFilter is a bloom filter for fast rejection: see functions setInFilter and isInFilter
// in bigram_dictionary.cpp // in bigram_dictionary.cpp
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize, const int *ycoordinates, const int *codes, const int codesSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, unsigned short *outWords, int *frequencies, const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
@ -597,11 +596,10 @@ int UnigramDictionary::getSubStringSuggestion(
void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputSize, const bool useFullEditDistance, const int inputSize, Correction *correction,
Correction *correction, WordsPriorityQueuePool *queuePool, WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate,
const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex, const int startInputPos, const int startWordIndex, const int outputWordLength,
const int outputWordLength, int *freqArray, int *wordLengthArray, int *freqArray, int *wordLengthArray, unsigned short *outputWord) const {
unsigned short *outputWord) const {
if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) { if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
// Return if the last word index // Return if the last word index
return; return;
@ -724,13 +722,13 @@ inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
// In and out parameters may point to the same location. This function takes care // In and out parameters may point to the same location. This function takes care
// not to use any input parameters after it wrote into its outputs. // not to use any input parameters after it wrote into its outputs.
static inline bool testCharGroupForContinuedLikeness(const uint8_t flags, static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
const uint8_t *const root, const int startPos, const uint8_t *const root, const int startPos, const uint16_t *const inWord,
const uint16_t *const inWord, const int startInputIndex, const int startInputIndex, const int inputSize, int32_t *outNewWord, int *outInputIndex,
int32_t *outNewWord, int *outInputIndex, int *outPos) { int *outPos) {
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
int pos = startPos; int pos = startPos;
int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); int32_t codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
int32_t baseChar = toBaseLowerCase(character); int32_t baseChar = toBaseLowerCase(codePoint);
const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]); const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]);
if (baseChar != wChar) { if (baseChar != wChar) {
@ -739,18 +737,18 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
return false; return false;
} }
int inputIndex = startInputIndex; int inputIndex = startInputIndex;
outNewWord[inputIndex] = character; outNewWord[inputIndex] = codePoint;
if (hasMultipleChars) { if (hasMultipleChars) {
character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
while (NOT_A_CHARACTER != character) { while (NOT_A_CODE_POINT != codePoint) {
baseChar = toBaseLowerCase(character); baseChar = toBaseLowerCase(codePoint);
if (toBaseLowerCase(inWord[++inputIndex]) != baseChar) { if (inputIndex + 1 >= inputSize || toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
*outPos = BinaryFormat::skipOtherCharacters(root, pos); *outPos = BinaryFormat::skipOtherCharacters(root, pos);
*outInputIndex = startInputIndex; *outInputIndex = startInputIndex;
return false; return false;
} }
outNewWord[inputIndex] = character; outNewWord[inputIndex] = codePoint;
character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
} }
} }
*outInputIndex = inputIndex + 1; *outInputIndex = inputIndex + 1;
@ -765,8 +763,9 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
static inline void onTerminalWordLike(const int freq, int32_t *newWord, const int length, static inline void onTerminalWordLike(const int freq, int32_t *newWord, const int length,
short unsigned int *outWord, int *maxFreq) { short unsigned int *outWord, int *maxFreq) {
if (freq > *maxFreq) { if (freq > *maxFreq) {
for (int q = 0; q < length; ++q) for (int q = 0; q < length; ++q) {
outWord[q] = newWord[q]; outWord[q] = newWord[q];
}
outWord[length] = 0; outWord[length] = 0;
*maxFreq = freq; *maxFreq = freq;
} }
@ -775,7 +774,7 @@ static inline void onTerminalWordLike(const int freq, int32_t *newWord, const in
// Will find the highest frequency of the words like the one passed as an argument, // Will find the highest frequency of the words like the one passed as an argument,
// that is, everything that only differs by case/accents. // that is, everything that only differs by case/accents.
int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord, int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord,
const int length, short unsigned int *outWord) const { const int inputSize, short unsigned int *outWord) const {
int32_t newWord[MAX_WORD_LENGTH_INTERNAL]; int32_t newWord[MAX_WORD_LENGTH_INTERNAL];
int depth = 0; int depth = 0;
int maxFreq = -1; int maxFreq = -1;
@ -795,12 +794,12 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord
int inputIndex = stackInputIndex[depth]; int inputIndex = stackInputIndex[depth];
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
// Test whether all chars in this group match with the word we are searching for. If so, // Test whether all chars in this group match with the word we are searching for. If so,
// we want to traverse its children (or if the length match, evaluate its frequency). // we want to traverse its children (or if the inputSize match, evaluate its frequency).
// Note that this function will output the position regardless, but will only write // Note that this function will output the position regardless, but will only write
// into inputIndex if there is a match. // into inputIndex if there is a match.
const bool isAlike = testCharGroupForContinuedLikeness(flags, root, pos, inWord, const bool isAlike = testCharGroupForContinuedLikeness(flags, root, pos, inWord,
inputIndex, newWord, &inputIndex, &pos); inputIndex, inputSize, newWord, &inputIndex, &pos);
if (isAlike && (BinaryFormat::FLAG_IS_TERMINAL & flags) && (inputIndex == length)) { if (isAlike && (BinaryFormat::FLAG_IS_TERMINAL & flags) && (inputIndex == inputSize)) {
const int frequency = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos); const int frequency = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
onTerminalWordLike(frequency, newWord, inputIndex, outWord, &maxFreq); onTerminalWordLike(frequency, newWord, inputIndex, outWord, &maxFreq);
} }
@ -809,8 +808,8 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord
const int childrenNodePos = BinaryFormat::readChildrenPosition(root, flags, pos); const int childrenNodePos = BinaryFormat::readChildrenPosition(root, flags, pos);
// If we had a match and the word has children, we want to traverse them. We don't have // If we had a match and the word has children, we want to traverse them. We don't have
// to traverse words longer than the one we are searching for, since they will not match // to traverse words longer than the one we are searching for, since they will not match
// anyway, so don't traverse unless inputIndex < length. // anyway, so don't traverse unless inputIndex < inputSize.
if (isAlike && (-1 != childrenNodePos) && (inputIndex < length)) { if (isAlike && (-1 != childrenNodePos) && (inputIndex < inputSize)) {
// Save position for this depth, to get back to this once children are done // Save position for this depth, to get back to this once children are done
stackChildCount[depth] = charGroupIndex; stackChildCount[depth] = charGroupIndex;
stackSiblingPos[depth] = siblingPos; stackSiblingPos[depth] = siblingPos;
@ -853,7 +852,7 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt
if (hasMultipleChars) { if (hasMultipleChars) {
pos = BinaryFormat::skipOtherCharacters(root, pos); pos = BinaryFormat::skipOtherCharacters(root, pos);
} else { } else {
BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos); BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
} }
const int unigramFreq = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos); const int unigramFreq = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
return unigramFreq; return unigramFreq;
@ -907,23 +906,23 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// else if FLAG_IS_TERMINAL: the frequency // else if FLAG_IS_TERMINAL: the frequency
// else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address // else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address
// Note that you can't have a node that both is not a terminal and has no children. // Note that you can't have a node that both is not a terminal and has no children.
int32_t c = BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos); int32_t c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
assert(NOT_A_CHARACTER != c); assert(NOT_A_CODE_POINT != c);
// We are going to loop through each character and make it look like it's a different // We are going to loop through each character and make it look like it's a different
// node each time. To do that, we will process characters in this node in order until // node each time. To do that, we will process characters in this node in order until
// we find the character terminator. This is signalled by getCharCode* returning // we find the character terminator. This is signalled by getCodePoint* returning
// NOT_A_CHARACTER. // NOT_A_CODE_POINT.
// As a special case, if there is only one character in this node, we must not read the // As a special case, if there is only one character in this node, we must not read the
// next bytes so we will simulate the NOT_A_CHARACTER return by testing the flags. // next bytes so we will simulate the NOT_A_CODE_POINT return by testing the flags.
// This way, each loop run will look like a "virtual node". // This way, each loop run will look like a "virtual node".
do { do {
// We prefetch the next char. If 'c' is the last char of this node, we will have // We prefetch the next char. If 'c' is the last char of this node, we will have
// NOT_A_CHARACTER in the next char. From this we can decide whether this virtual node // NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node
// should behave as a terminal or not and whether we have children. // should behave as a terminal or not and whether we have children.
const int32_t nextc = hasMultipleChars const int32_t nextc = hasMultipleChars
? BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CHARACTER; ? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT;
const bool isLastChar = (NOT_A_CHARACTER == nextc); const bool isLastChar = (NOT_A_CODE_POINT == nextc);
// If there are more chars in this nodes, then this virtual node is not a terminal. // If there are more chars in this nodes, then this virtual node is not a terminal.
// If we are on the last char, this virtual node is a terminal if this node is. // If we are on the last char, this virtual node is a terminal if this node is.
const bool isTerminal = isLastChar && isTerminalNode; const bool isTerminal = isLastChar && isTerminalNode;
@ -952,9 +951,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// Prepare for the next character. Promote the prefetched char to current char - the loop // Prepare for the next character. Promote the prefetched char to current char - the loop
// will take care of prefetching the next. If we finally found our last char, nextc will // will take care of prefetching the next. If we finally found our last char, nextc will
// contain NOT_A_CHARACTER. // contain NOT_A_CODE_POINT.
c = nextc; c = nextc;
} while (NOT_A_CHARACTER != c); } while (NOT_A_CODE_POINT != c);
if (isTerminalNode) { if (isTerminalNode) {
// The frequency should be here, because we come here only if this is actually // The frequency should be here, because we come here only if this is actually

View File

@ -43,11 +43,11 @@ class UnigramDictionary {
int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags); int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
int getFrequency(const int32_t *const inWord, const int length) const; int getFrequency(const int32_t *const inWord, const int length) const;
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
int getSuggestions( int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *ycoordinates, const int *codes, const int codesSize,
const int *codes, const int codesSize, const std::map<int, int> *bigramMap, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const uint8_t *bigramFilter, const bool useFullEditDistance, unsigned short *outWords, const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
int *frequencies, int *outputTypes) const; int *outputTypes) const;
virtual ~UnigramDictionary(); virtual ~UnigramDictionary();
private: private:
@ -94,7 +94,7 @@ class UnigramDictionary {
const int currentWordIndex) const; const int currentWordIndex) const;
int getMostFrequentWordLike(const int startInputIndex, const int inputSize, int getMostFrequentWordLike(const int startInputIndex, const int inputSize,
Correction *correction, unsigned short *word) const; Correction *correction, unsigned short *word) const;
int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int length, int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int inputSize,
short unsigned int *outWord) const; short unsigned int *outWord) const;
int getSubStringSuggestion( int getSubStringSuggestion(
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,

View File

@ -30,7 +30,7 @@ class WordsPriorityQueuePool {
mainQueueMaxWords, maxWordLength)) { mainQueueMaxWords, maxWordLength)) {
for (int i = 0, subQueueBufOffset = 0; for (int i = 0, subQueueBufOffset = 0;
i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT; i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT;
++i, subQueueBufOffset += sizeof(WordsPriorityQueue)) { ++i, subQueueBufOffset += static_cast<int>(sizeof(WordsPriorityQueue))) {
mSubQueues[i] = new(mSubQueueBuf + subQueueBufOffset) mSubQueues[i] = new(mSubQueueBuf + subQueueBufOffset)
WordsPriorityQueue(subQueueMaxWords, maxWordLength); WordsPriorityQueue(subQueueMaxWords, maxWordLength);
} }