Cosmetic fixes and a bug fix in UnigramDictionary::testCharGroupForContinuedLikeness().

This change has actually been extracted from a change work in progress I4fe423834b8131fb122251892c98228a6e08ba25

Change-Id: I52568fa09da2ea22be7f8bfe9676b7cd73c31fa4
main
Ken Wakasa 2012-09-04 12:49:46 +09:00
parent 1a397ececf
commit f2789819bd
19 changed files with 190 additions and 179 deletions

View File

@ -68,9 +68,9 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
return 0;
}
int pagesize = getpagesize();
adjust = dictOffset % pagesize;
int adjDictOffset = dictOffset - adjust;
int adjDictSize = dictSize + adjust;
adjust = static_cast<int>(dictOffset) % pagesize;
int adjDictOffset = static_cast<int>(dictOffset) - adjust;
int adjDictSize = static_cast<int>(dictSize) + adjust;
dictBuf = mmap(0, sizeof(char) * adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset);
if (dictBuf == MAP_FAILED) {
AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
@ -120,8 +120,8 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
releaseDictBuf(dictBuf, 0, 0);
#endif // USE_MMAP_FOR_DICTIONARY
} else {
dictionary = new Dictionary(dictBuf, dictSize, fd, adjust, typedLetterMultiplier,
fullWordMultiplier, maxWordLength, maxWords, maxPredictions);
dictionary = new Dictionary(dictBuf, static_cast<int>(dictSize), fd, adjust,
typedLetterMultiplier, fullWordMultiplier, maxWordLength, maxWords, maxPredictions);
}
PROF_END(66);
PROF_CLOSE;

View File

@ -14,6 +14,8 @@
* limitations under the License.
*/
#include <stdint.h>
#include "char_utils.h"
namespace latinime {
@ -24,7 +26,7 @@ namespace latinime {
* if c is not a combined character, or the base character if it
* is combined.
*/
const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = {
const uint16_t BASE_CHARS[BASE_CHARS_SIZE] = {
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,

View File

@ -156,7 +156,7 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
const int flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) return 0;
if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) {
BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
BinaryFormat::getCodePointAndForwardPointer(root, &pos);
} else {
pos = BinaryFormat::skipOtherCharacters(root, pos);
}

View File

@ -84,7 +84,7 @@ class BinaryFormat {
static unsigned int getFlags(const uint8_t *const dict);
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
static int32_t getCharCodeAndForwardPointer(const uint8_t *const dict, int *pos);
static int32_t getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos);
static int skipOtherCharacters(const uint8_t *const dict, const int pos);
static int skipChildrenPosition(const uint8_t flags, const int pos);
@ -176,22 +176,22 @@ inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict
return dict[(*pos)++];
}
inline int32_t BinaryFormat::getCharCodeAndForwardPointer(const uint8_t *const dict, int *pos) {
inline int32_t BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) {
const int origin = *pos;
const int32_t character = dict[origin];
if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
if (character == CHARACTER_ARRAY_TERMINATOR) {
const int32_t codePoint = dict[origin];
if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
if (codePoint == CHARACTER_ARRAY_TERMINATOR) {
*pos = origin + 1;
return NOT_A_CHARACTER;
return NOT_A_CODE_POINT;
} else {
*pos = origin + 3;
const int32_t char_1 = character << 16;
const int32_t char_1 = codePoint << 16;
const int32_t char_2 = char_1 + (dict[origin + 1] << 8);
return char_2 + dict[origin + 2];
}
} else {
*pos = origin + 1;
return character;
return codePoint;
}
}
@ -369,15 +369,15 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
if (0 >= charGroupCount) return NOT_VALID_WORD;
const int charGroupPos = pos;
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
int32_t character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
if (character == wChar) {
// This is the correct node. Only one character group may start with the same
// char within a node, so either we found our match in this node, or there is
// no match and we can return NOT_VALID_WORD. So we will check all the characters
// in this character group indeed does match.
if (FLAG_HAS_MULTIPLE_CHARS & flags) {
character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
while (NOT_A_CHARACTER != character) {
character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
while (NOT_A_CODE_POINT != character) {
++wordPos;
// If we shoot the length of the word we search for, or if we find a single
// character that does not match, as explained above, it means the word is
@ -385,7 +385,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
// match the word on the first character, but not matching the whole word).
if (wordPos > length) return NOT_VALID_WORD;
if (inWord[wordPos] != character) return NOT_VALID_WORD;
character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
}
}
// If we come here we know that so far, we do match. Either we are on a terminal
@ -457,19 +457,19 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
--charGroupCount) {
const int startPos = pos;
const uint8_t flags = getFlagsAndForwardPointer(root, &pos);
const int32_t character = getCharCodeAndForwardPointer(root, &pos);
const int32_t character = getCodePointAndForwardPointer(root, &pos);
if (address == startPos) {
// We found the address. Copy the rest of the word in the buffer and return
// the length.
outWord[wordPos] = character;
if (FLAG_HAS_MULTIPLE_CHARS & flags) {
int32_t nextChar = getCharCodeAndForwardPointer(root, &pos);
int32_t nextChar = getCodePointAndForwardPointer(root, &pos);
// We count chars in order to avoid infinite loops if the file is broken or
// if there is some other bug
int charCount = maxDepth;
while (NOT_A_CHARACTER != nextChar && --charCount > 0) {
while (NOT_A_CODE_POINT != nextChar && --charCount > 0) {
outWord[++wordPos] = nextChar;
nextChar = getCharCodeAndForwardPointer(root, &pos);
nextChar = getCodePointAndForwardPointer(root, &pos);
}
}
*outUnigramFrequency = readFrequencyWithoutMovingPointer(root, pos);
@ -523,16 +523,16 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
const uint8_t lastFlags =
getFlagsAndForwardPointer(root, &lastCandidateGroupPos);
const int32_t lastChar =
getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
// We copy all the characters in this group to the buffer
outWord[wordPos] = lastChar;
if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
int32_t nextChar =
getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
int charCount = maxDepth;
while (-1 != nextChar && --charCount > 0) {
outWord[++wordPos] = nextChar;
nextChar = getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
}
}
++wordPos;
@ -582,8 +582,8 @@ inline int BinaryFormat::computeFrequencyForBigram(const int unigramFreq, const
// 0 for the bigram frequency represents the middle of the 16th step from the top,
// while a value of 15 represents the middle of the top step.
// See makedict.BinaryDictInputOutput for details.
const float stepSize = (static_cast<float>(MAX_FREQ) - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
return static_cast<int>(unigramFreq + (bigramFreq + 1) * stepSize);
const float stepSize = static_cast<float>(MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
return unigramFreq + static_cast<int>(static_cast<float>(bigramFreq + 1) * stepSize);
}
// This returns a probability in log space.

View File

@ -23,14 +23,16 @@
namespace latinime {
static inline void setInFilter(uint8_t *filter, const int position) {
const unsigned int bucket = position % BIGRAM_FILTER_MODULO;
filter[bucket >> 3] |= (1 << (bucket & 0x7));
// TODO: uint32_t position
static inline void setInFilter(uint8_t *filter, const int32_t position) {
const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO);
filter[bucket >> 3] |= static_cast<uint8_t>(1 << (bucket & 0x7));
}
static inline bool isInFilter(const uint8_t *filter, const int position) {
const unsigned int bucket = position % BIGRAM_FILTER_MODULO;
return filter[bucket >> 3] & (1 << (bucket & 0x7));
// TODO: uint32_t position
static inline bool isInFilter(const uint8_t *filter, const int32_t position) {
const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO);
return filter[bucket >> 3] & static_cast<uint8_t>(1 << (bucket & 0x7));
}
} // namespace latinime
#endif // LATINIME_BLOOM_FILTER_H

View File

@ -18,6 +18,7 @@
#define LATINIME_CHAR_UTILS_H
#include <cctype>
#include <stdint.h>
namespace latinime {
@ -43,7 +44,7 @@ unsigned short latin_tolower(const unsigned short c);
*/
static const int BASE_CHARS_SIZE = 0x0500;
extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
extern const uint16_t BASE_CHARS[BASE_CHARS_SIZE];
inline static unsigned short toBaseChar(unsigned short c) {
if (c < BASE_CHARS_SIZE) {

View File

@ -362,7 +362,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
if (mSkipPos >= 0) {
if (mSkippedCount == 0 && mSkipPos < mOutputIndex) {
if (DEBUG_DICT) {
assert(mSkipPos == mOutputIndex - 1);
// TODO: Enable this assertion.
//assert(mSkipPos == mOutputIndex - 1);
}
mSkipPos = mOutputIndex;
}
@ -1126,15 +1127,16 @@ float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *be
return 0;
}
const float maxScore = score >= S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE
const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX)
: static_cast<float>(MAX_INITIAL_SCORE)
* powf(static_cast<float>(TYPED_LETTER_MULTIPLIER),
static_cast<float>(min(beforeLength, afterLength - spaceCount)))
* FULL_WORD_MULTIPLIER;
* static_cast<float>(FULL_WORD_MULTIPLIER);
// add a weight based on edit distance.
// distance <= max(afterLength, beforeLength) == afterLength,
// so, 0 <= distance / afterLength <= 1
const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength);
return (score / maxScore) * weight;
return (static_cast<float>(score) / maxScore) * weight;
}
} // namespace latinime

View File

@ -17,6 +17,8 @@
#ifndef LATINIME_DEFINES_H
#define LATINIME_DEFINES_H
#include <stdint.h>
#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
#include <android/log.h>
#ifndef LOG_TAG
@ -39,7 +41,8 @@ static inline void dumpWordInfo(const unsigned short *word, const int length,
if (c == 0) {
break;
}
charBuf[i] = c;
// static_cast only for debugging
charBuf[i] = static_cast<char>(c);
}
charBuf[i] = 0;
if (i > 1) {
@ -65,7 +68,8 @@ static inline void dumpWord(const unsigned short *word, const int length) {
if (c == 0) {
break;
}
charBuf[i] = c;
// static_cast only for debugging
charBuf[i] = static_cast<char>(c);
}
charBuf[i] = 0;
if (i > 1) {
@ -236,15 +240,15 @@ static inline void prof_out(void) {
#define FLAG_BIGRAM_FREQ 0x7F
#define DICTIONARY_VERSION_MIN 200
#define NOT_VALID_WORD -99
#define NOT_A_CHARACTER -1
#define NOT_A_DISTANCE -1
#define NOT_A_COORDINATE -1
#define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO -2
#define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO -3
#define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO -4
#define NOT_AN_INDEX -1
#define NOT_A_PROBABILITY -1
#define NOT_VALID_WORD (-99)
#define NOT_A_CODE_POINT (-1)
#define NOT_A_DISTANCE (-1)
#define NOT_A_COORDINATE (-1)
#define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO (-2)
#define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO (-3)
#define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO (-4)
#define NOT_AN_INDEX (-1)
#define NOT_A_PROBABILITY (-1)
#define KEYCODE_SPACE ' '
@ -355,8 +359,8 @@ template<typename T> inline T max(T a, T b) { return a > b ? a : b; }
#define NEUTRAL_AREA_RADIUS_RATIO 1.3f
// DEBUG
#define INPUTLENGTH_FOR_DEBUG -1
#define MIN_OUTPUT_INDEX_FOR_DEBUG -1
#define INPUTLENGTH_FOR_DEBUG (-1)
#define MIN_OUTPUT_INDEX_FOR_DEBUG (-1)
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&); \

View File

@ -90,11 +90,12 @@ class Dictionary {
// static inline methods should be defined in the header file
inline int Dictionary::wideStrLen(unsigned short *str) {
if (!str) return 0;
unsigned short *end = str;
while (*end) {
end++;
int length = 0;
while (*str) {
str++;
length++;
}
return end - str;
return length;
}
} // namespace latinime
#endif // LATINIME_DICTIONARY_H

View File

@ -29,7 +29,6 @@
namespace latinime {
/* static */ const int ProximityInfo::NOT_A_CODE = -1;
/* static */ const float ProximityInfo::NOT_A_DISTANCE_FLOAT = -1.0f;
static inline void safeGetOrFillZeroIntArrayRegion(JNIEnv *env, jintArray jArray, jsize len,
@ -84,22 +83,22 @@ ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr, const int ma
safeGetOrFillZeroIntArrayRegion(env, keyYCoordinates, KEY_COUNT, mKeyYCoordinates);
safeGetOrFillZeroIntArrayRegion(env, keyWidths, KEY_COUNT, mKeyWidths);
safeGetOrFillZeroIntArrayRegion(env, keyHeights, KEY_COUNT, mKeyHeights);
safeGetOrFillZeroIntArrayRegion(env, keyCharCodes, KEY_COUNT, mKeyCharCodes);
safeGetOrFillZeroIntArrayRegion(env, keyCharCodes, KEY_COUNT, mKeyCodePoints);
safeGetOrFillZeroFloatArrayRegion(env, sweetSpotCenterXs, KEY_COUNT, mSweetSpotCenterXs);
safeGetOrFillZeroFloatArrayRegion(env, sweetSpotCenterYs, KEY_COUNT, mSweetSpotCenterYs);
safeGetOrFillZeroFloatArrayRegion(env, sweetSpotRadii, KEY_COUNT, mSweetSpotRadii);
initializeCodeToKeyIndex();
initializeCodePointToKeyIndex();
initializeG();
}
// Build the reversed look up table from the char code to the index in mKeyXCoordinates,
// mKeyYCoordinates, mKeyWidths, mKeyHeights, mKeyCharCodes.
void ProximityInfo::initializeCodeToKeyIndex() {
memset(mCodeToKeyIndex, -1, (MAX_CHAR_CODE + 1) * sizeof(mCodeToKeyIndex[0]));
void ProximityInfo::initializeCodePointToKeyIndex() {
memset(mCodePointToKeyIndex, -1, sizeof(mCodePointToKeyIndex));
for (int i = 0; i < KEY_COUNT; ++i) {
const int code = mKeyCharCodes[i];
const int code = mKeyCodePoints[i];
if (0 <= code && code <= MAX_CHAR_CODE) {
mCodeToKeyIndex[code] = i;
mCodePointToKeyIndex[code] = i;
}
}
}
@ -117,7 +116,8 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
if (x < 0 || y < 0) {
if (DEBUG_DICT) {
AKLOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y);
assert(false);
// TODO: Enable this assertion.
//assert(false);
}
return false;
}
@ -145,8 +145,8 @@ static inline float getNormalizedSquaredDistanceFloat(float x1, float y1, float
float ProximityInfo::getNormalizedSquaredDistanceFromCenterFloat(
const int keyId, const int x, const int y) const {
const float centerX = static_cast<float>(getKeyCenterXOfIdG(keyId));
const float centerY = static_cast<float>(getKeyCenterYOfIdG(keyId));
const float centerX = static_cast<float>(getKeyCenterXOfKeyIdG(keyId));
const float centerY = static_cast<float>(getKeyCenterYOfKeyIdG(keyId));
const float touchX = static_cast<float>(x);
const float touchY = static_cast<float>(y);
const float keyWidth = static_cast<float>(getMostCommonKeyWidth());
@ -178,7 +178,7 @@ void ProximityInfo::calculateNearbyKeyCodes(
if (c < KEYCODE_SPACE || c == primaryKey) {
continue;
}
const int keyIndex = getKeyIndex(c);
const int keyIndex = getKeyIndexOf(c);
const bool onKey = isOnKey(keyIndex, x, y);
const int distance = squaredDistanceToEdge(keyIndex, x, y);
if (onKey || distance < MOST_COMMON_KEY_WIDTH_SQUARE) {
@ -208,7 +208,7 @@ void ProximityInfo::calculateNearbyKeyCodes(
const int32_t ac = additionalProximityChars[j];
int k = 0;
for (; k < insertPos; ++k) {
if ((int)ac == inputCodes[k]) {
if (static_cast<int>(ac) == inputCodes[k]) {
break;
}
}
@ -227,11 +227,11 @@ void ProximityInfo::calculateNearbyKeyCodes(
}
// Add a delimiter for the proximity characters
for (int i = insertPos; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
inputCodes[i] = NOT_A_CODE;
inputCodes[i] = NOT_A_CODE_POINT;
}
}
int ProximityInfo::getKeyIndex(const int c) const {
int ProximityInfo::getKeyIndexOf(const int c) const {
if (KEY_COUNT == 0) {
// We do not have the coordinate data
return NOT_AN_INDEX;
@ -240,28 +240,28 @@ int ProximityInfo::getKeyIndex(const int c) const {
if (baseLowerC > MAX_CHAR_CODE) {
return NOT_AN_INDEX;
}
return mCodeToKeyIndex[baseLowerC];
return mCodePointToKeyIndex[baseLowerC];
}
int ProximityInfo::getKeyCode(const int keyIndex) const {
int ProximityInfo::getCodePointOf(const int keyIndex) const {
if (keyIndex < 0 || keyIndex >= KEY_COUNT) {
return NOT_AN_INDEX;
return NOT_A_CODE_POINT;
}
return mKeyToCodeIndexG[keyIndex];
return mKeyIndexToCodePointG[keyIndex];
}
void ProximityInfo::initializeG() {
// TODO: Optimize
for (int i = 0; i < KEY_COUNT; ++i) {
const int code = mKeyCharCodes[i];
const int code = mKeyCodePoints[i];
const int lowerCode = toBaseLowerCase(code);
mCenterXsG[i] = mKeyXCoordinates[i] + mKeyWidths[i] / 2;
mCenterYsG[i] = mKeyYCoordinates[i] + mKeyHeights[i] / 2;
if (code != lowerCode && lowerCode >= 0 && lowerCode <= MAX_CHAR_CODE) {
mCodeToKeyIndex[lowerCode] = i;
mKeyToCodeIndexG[i] = lowerCode;
mCodePointToKeyIndex[lowerCode] = i;
mKeyIndexToCodePointG[i] = lowerCode;
} else {
mKeyToCodeIndexG[i] = code;
mKeyIndexToCodePointG[i] = code;
}
}
for (int i = 0; i < KEY_COUNT; i++) {
@ -274,22 +274,22 @@ void ProximityInfo::initializeG() {
}
}
float ProximityInfo::getKeyCenterXOfCharG(int charCode) const {
return getKeyCenterXOfIdG(getKeyIndex(charCode));
float ProximityInfo::getKeyCenterXOfCodePointG(int charCode) const {
return getKeyCenterXOfKeyIdG(getKeyIndexOf(charCode));
}
float ProximityInfo::getKeyCenterYOfCharG(int charCode) const {
return getKeyCenterYOfIdG(getKeyIndex(charCode));
float ProximityInfo::getKeyCenterYOfCodePointG(int charCode) const {
return getKeyCenterYOfKeyIdG(getKeyIndexOf(charCode));
}
float ProximityInfo::getKeyCenterXOfIdG(int keyId) const {
float ProximityInfo::getKeyCenterXOfKeyIdG(int keyId) const {
if (keyId >= 0) {
return mCenterXsG[keyId];
}
return 0;
}
float ProximityInfo::getKeyCenterYOfIdG(int keyId) const {
float ProximityInfo::getKeyCenterYOfKeyIdG(int keyId) const {
if (keyId >= 0) {
return mCenterYsG[keyId];
}
@ -297,8 +297,8 @@ float ProximityInfo::getKeyCenterYOfIdG(int keyId) const {
}
int ProximityInfo::getKeyKeyDistanceG(int key0, int key1) const {
const int keyId0 = getKeyIndex(key0);
const int keyId1 = getKeyIndex(key1);
const int keyId0 = getKeyIndexOf(key0);
const int keyId1 = getKeyIndexOf(key1);
if (keyId0 >= 0 && keyId1 >= 0) {
return mKeyKeyDistancesG[keyId0][keyId1];
}

View File

@ -41,8 +41,8 @@ class ProximityInfo {
float getNormalizedSquaredDistanceFromCenterFloat(
const int keyId, const int x, const int y) const;
bool sameAsTyped(const unsigned short *word, int length) const;
int getKeyIndex(const int c) const;
int getKeyCode(const int keyIndex) const;
int getKeyIndexOf(const int c) const;
int getCodePointOf(const int keyIndex) const;
bool hasSweetSpotData(const int keyIndex) const {
// When there are no calibration data for a key,
// the radius of the key is assigned to zero.
@ -96,23 +96,23 @@ class ProximityInfo {
return GRID_HEIGHT;
}
float getKeyCenterXOfCharG(int charCode) const;
float getKeyCenterYOfCharG(int charCode) const;
float getKeyCenterXOfIdG(int keyId) const;
float getKeyCenterYOfIdG(int keyId) const;
// TODO: These should return int.
float getKeyCenterXOfCodePointG(int charCode) const;
float getKeyCenterYOfCodePointG(int charCode) const;
float getKeyCenterXOfKeyIdG(int keyId) const;
float getKeyCenterYOfKeyIdG(int keyId) const;
int getKeyKeyDistanceG(int key0, int key1) const;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfo);
// The max number of the keys in one keyboard layout
static const int MAX_KEY_COUNT_IN_A_KEYBOARD = 64;
// The upper limit of the char code in mCodeToKeyIndex
// The upper limit of the char code in mCodePointToKeyIndex
static const int MAX_CHAR_CODE = 127;
static const int NOT_A_CODE;
static const float NOT_A_DISTANCE_FLOAT;
int getStartIndexFromCoordinates(const int x, const int y) const;
void initializeCodeToKeyIndex();
void initializeCodePointToKeyIndex();
void initializeG();
float calculateNormalizedSquaredDistance(const int keyIndex, const int inputIndex) const;
float calculateSquaredDistanceFromSweetSpotCenter(
@ -143,13 +143,13 @@ class ProximityInfo {
int32_t mKeyYCoordinates[MAX_KEY_COUNT_IN_A_KEYBOARD];
int32_t mKeyWidths[MAX_KEY_COUNT_IN_A_KEYBOARD];
int32_t mKeyHeights[MAX_KEY_COUNT_IN_A_KEYBOARD];
int32_t mKeyCharCodes[MAX_KEY_COUNT_IN_A_KEYBOARD];
int32_t mKeyCodePoints[MAX_KEY_COUNT_IN_A_KEYBOARD];
float mSweetSpotCenterXs[MAX_KEY_COUNT_IN_A_KEYBOARD];
float mSweetSpotCenterYs[MAX_KEY_COUNT_IN_A_KEYBOARD];
float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCodeToKeyIndex[MAX_CHAR_CODE + 1];
int mCodePointToKeyIndex[MAX_CHAR_CODE + 1];
int mKeyToCodeIndexG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mKeyIndexToCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCenterXsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
int mKeyKeyDistancesG[MAX_KEY_COUNT_IN_A_KEYBOARD][MAX_KEY_COUNT_IN_A_KEYBOARD];

View File

@ -160,7 +160,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
const int currentChar = proximityChars[j];
const float squaredDistance =
hasInputCoordinates() ? calculateNormalizedSquaredDistance(
mProximityInfo->getKeyIndex(currentChar), i) :
mProximityInfo->getKeyIndexOf(currentChar), i) :
NOT_A_DISTANCE_FLOAT;
if (squaredDistance >= 0.0f) {
mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] =
@ -282,7 +282,7 @@ bool ProximityInfoState::pushTouchPoint(const int nodeChar, int x, int y, const
const NearKeysDistanceMap *const prevPrevNearKeysDistances) {
static const float LAST_POINT_SKIP_DISTANCE_SCALE = 0.25f;
uint32_t size = mInputXs.size();
size_t size = mInputXs.size();
bool popped = false;
if (nodeChar < 0 && sample) {
const float nearest = updateNearKeysDistances(x, y, currentNearKeysDistances);
@ -324,10 +324,10 @@ bool ProximityInfoState::pushTouchPoint(const int nodeChar, int x, int y, const
}
if (nodeChar >= 0 && (x < 0 || y < 0)) {
const int keyId = mProximityInfo->getKeyIndex(nodeChar);
const int keyId = mProximityInfo->getKeyIndexOf(nodeChar);
if (keyId >= 0) {
x = mProximityInfo->getKeyCenterXOfIdG(keyId);
y = mProximityInfo->getKeyCenterYOfIdG(keyId);
x = mProximityInfo->getKeyCenterXOfKeyIdG(keyId);
y = mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
}
}
@ -368,8 +368,8 @@ int ProximityInfoState::getDuration(const int index) const {
return 0;
}
float ProximityInfoState::getPointToKeyLength(int inputIndex, int charCode, float scale) {
const int keyId = mProximityInfo->getKeyIndex(charCode);
float ProximityInfoState::getPointToKeyLength(int inputIndex, int codePoint, float scale) {
const int keyId = mProximityInfo->getKeyIndexOf(codePoint);
if (keyId >= 0) {
const int index = inputIndex * mProximityInfo->getKeyCount() + keyId;
return min(mDistanceCache[index] * scale, mMaxPointToKeyLength);
@ -382,8 +382,8 @@ int ProximityInfoState::getKeyKeyDistance(int key0, int key1) {
}
int ProximityInfoState::getSpaceY() {
const int keyId = mProximityInfo->getKeyIndex(' ');
return mProximityInfo->getKeyCenterYOfIdG(keyId);
const int keyId = mProximityInfo->getKeyIndexOf(' ');
return mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
}
float ProximityInfoState::calculateSquaredDistanceFromSweetSpotCenter(

View File

@ -52,9 +52,9 @@ class TerminalAttributes {
0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
unsigned int i;
for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) {
const int charCode = BinaryFormat::getCharCodeAndForwardPointer(mDict, &mPos);
if (NOT_A_CHARACTER == charCode) break;
outWord[i] = (uint16_t)charCode;
const int codePoint = BinaryFormat::getCodePointAndForwardPointer(mDict, &mPos);
if (NOT_A_CODE_POINT == codePoint) break;
outWord[i] = (uint16_t)codePoint;
}
*outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags);
mPos += BinaryFormat::CHARACTER_ARRAY_TERMINATOR_SIZE;
@ -62,8 +62,8 @@ class TerminalAttributes {
}
};
TerminalAttributes(const uint8_t *const dict, const uint8_t flags, const int pos) :
mDict(dict), mFlags(flags), mStartPos(pos) {
TerminalAttributes(const uint8_t *const dict, const uint8_t flags, const int pos)
: mDict(dict), mFlags(flags), mStartPos(pos) {
}
inline ShortcutIterator getShortcutIterator() const {

View File

@ -58,12 +58,12 @@ UnigramDictionary::~UnigramDictionary() {
}
static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize) {
return sizeof(*codes) * codesSize;
return static_cast<unsigned int>(sizeof(*codes)) * codesSize;
}
// TODO: This needs to take a const unsigned short* and not tinker with its contents
static inline void addWord(
unsigned short *word, int length, int frequency, WordsPriorityQueue *queue, int type) {
static inline void addWord(unsigned short *word, int length, int frequency,
WordsPriorityQueue *queue, int type) {
queue->push(frequency, word, length, type);
}
@ -106,7 +106,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
WordsPriorityQueuePool *queuePool,
const digraph_t *const digraphs, const unsigned int digraphsSize) const {
const int startIndex = codesDest - codesBuffer;
const int startIndex = static_cast<int>(codesDest - codesBuffer);
if (currentDepth < MAX_DIGRAPH_SEARCH_DEPTH) {
for (int i = 0; i < codesRemain; ++i) {
xCoordinatesBuffer[startIndex + i] = xcoordinates[codesBufferSize - codesRemain + i];
@ -170,8 +170,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
// bigramMap contains the association <bigram address> -> <bigram frequency>
// bigramFilter is a bloom filter for fast rejection: see functions setInFilter and isInFilter
// in bigram_dictionary.cpp
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates,
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
@ -597,11 +596,10 @@ int UnigramDictionary::getSubStringSuggestion(
void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
const bool useFullEditDistance, const int inputSize,
Correction *correction, WordsPriorityQueuePool *queuePool,
const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex,
const int outputWordLength, int *freqArray, int *wordLengthArray,
unsigned short *outputWord) const {
const bool useFullEditDistance, const int inputSize, Correction *correction,
WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate,
const int startInputPos, const int startWordIndex, const int outputWordLength,
int *freqArray, int *wordLengthArray, unsigned short *outputWord) const {
if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
// Return if the last word index
return;
@ -724,13 +722,13 @@ inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
// In and out parameters may point to the same location. This function takes care
// not to use any input parameters after it wrote into its outputs.
static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
const uint8_t *const root, const int startPos,
const uint16_t *const inWord, const int startInputIndex,
int32_t *outNewWord, int *outInputIndex, int *outPos) {
const uint8_t *const root, const int startPos, const uint16_t *const inWord,
const int startInputIndex, const int inputSize, int32_t *outNewWord, int *outInputIndex,
int *outPos) {
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
int pos = startPos;
int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
int32_t baseChar = toBaseLowerCase(character);
int32_t codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
int32_t baseChar = toBaseLowerCase(codePoint);
const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]);
if (baseChar != wChar) {
@ -739,18 +737,18 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
return false;
}
int inputIndex = startInputIndex;
outNewWord[inputIndex] = character;
outNewWord[inputIndex] = codePoint;
if (hasMultipleChars) {
character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
while (NOT_A_CHARACTER != character) {
baseChar = toBaseLowerCase(character);
if (toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
while (NOT_A_CODE_POINT != codePoint) {
baseChar = toBaseLowerCase(codePoint);
if (inputIndex + 1 >= inputSize || toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
*outPos = BinaryFormat::skipOtherCharacters(root, pos);
*outInputIndex = startInputIndex;
return false;
}
outNewWord[inputIndex] = character;
character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
outNewWord[inputIndex] = codePoint;
codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
}
}
*outInputIndex = inputIndex + 1;
@ -765,8 +763,9 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
static inline void onTerminalWordLike(const int freq, int32_t *newWord, const int length,
short unsigned int *outWord, int *maxFreq) {
if (freq > *maxFreq) {
for (int q = 0; q < length; ++q)
for (int q = 0; q < length; ++q) {
outWord[q] = newWord[q];
}
outWord[length] = 0;
*maxFreq = freq;
}
@ -775,7 +774,7 @@ static inline void onTerminalWordLike(const int freq, int32_t *newWord, const in
// Will find the highest frequency of the words like the one passed as an argument,
// that is, everything that only differs by case/accents.
int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord,
const int length, short unsigned int *outWord) const {
const int inputSize, short unsigned int *outWord) const {
int32_t newWord[MAX_WORD_LENGTH_INTERNAL];
int depth = 0;
int maxFreq = -1;
@ -795,12 +794,12 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord
int inputIndex = stackInputIndex[depth];
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
// Test whether all chars in this group match with the word we are searching for. If so,
// we want to traverse its children (or if the length match, evaluate its frequency).
// we want to traverse its children (or if the inputSize match, evaluate its frequency).
// Note that this function will output the position regardless, but will only write
// into inputIndex if there is a match.
const bool isAlike = testCharGroupForContinuedLikeness(flags, root, pos, inWord,
inputIndex, newWord, &inputIndex, &pos);
if (isAlike && (BinaryFormat::FLAG_IS_TERMINAL & flags) && (inputIndex == length)) {
inputIndex, inputSize, newWord, &inputIndex, &pos);
if (isAlike && (BinaryFormat::FLAG_IS_TERMINAL & flags) && (inputIndex == inputSize)) {
const int frequency = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
onTerminalWordLike(frequency, newWord, inputIndex, outWord, &maxFreq);
}
@ -809,8 +808,8 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord
const int childrenNodePos = BinaryFormat::readChildrenPosition(root, flags, pos);
// If we had a match and the word has children, we want to traverse them. We don't have
// to traverse words longer than the one we are searching for, since they will not match
// anyway, so don't traverse unless inputIndex < length.
if (isAlike && (-1 != childrenNodePos) && (inputIndex < length)) {
// anyway, so don't traverse unless inputIndex < inputSize.
if (isAlike && (-1 != childrenNodePos) && (inputIndex < inputSize)) {
// Save position for this depth, to get back to this once children are done
stackChildCount[depth] = charGroupIndex;
stackSiblingPos[depth] = siblingPos;
@ -853,7 +852,7 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt
if (hasMultipleChars) {
pos = BinaryFormat::skipOtherCharacters(root, pos);
} else {
BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos);
BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
}
const int unigramFreq = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
return unigramFreq;
@ -907,23 +906,23 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// else if FLAG_IS_TERMINAL: the frequency
// else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address
// Note that you can't have a node that both is not a terminal and has no children.
int32_t c = BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos);
assert(NOT_A_CHARACTER != c);
int32_t c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
assert(NOT_A_CODE_POINT != c);
// We are going to loop through each character and make it look like it's a different
// node each time. To do that, we will process characters in this node in order until
// we find the character terminator. This is signalled by getCharCode* returning
// NOT_A_CHARACTER.
// we find the character terminator. This is signalled by getCodePoint* returning
// NOT_A_CODE_POINT.
// As a special case, if there is only one character in this node, we must not read the
// next bytes so we will simulate the NOT_A_CHARACTER return by testing the flags.
// next bytes so we will simulate the NOT_A_CODE_POINT return by testing the flags.
// This way, each loop run will look like a "virtual node".
do {
// We prefetch the next char. If 'c' is the last char of this node, we will have
// NOT_A_CHARACTER in the next char. From this we can decide whether this virtual node
// NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node
// should behave as a terminal or not and whether we have children.
const int32_t nextc = hasMultipleChars
? BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CHARACTER;
const bool isLastChar = (NOT_A_CHARACTER == nextc);
? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT;
const bool isLastChar = (NOT_A_CODE_POINT == nextc);
// If there are more chars in this nodes, then this virtual node is not a terminal.
// If we are on the last char, this virtual node is a terminal if this node is.
const bool isTerminal = isLastChar && isTerminalNode;
@ -952,9 +951,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// Prepare for the next character. Promote the prefetched char to current char - the loop
// will take care of prefetching the next. If we finally found our last char, nextc will
// contain NOT_A_CHARACTER.
// contain NOT_A_CODE_POINT.
c = nextc;
} while (NOT_A_CHARACTER != c);
} while (NOT_A_CODE_POINT != c);
if (isTerminalNode) {
// The frequency should be here, because we come here only if this is actually

View File

@ -43,11 +43,11 @@ class UnigramDictionary {
int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
int getFrequency(const int32_t *const inWord, const int length) const;
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
int getSuggestions(
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
const int *codes, const int codesSize, const std::map<int, int> *bigramMap,
const uint8_t *bigramFilter, const bool useFullEditDistance, unsigned short *outWords,
int *frequencies, int *outputTypes) const;
int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
int *outputTypes) const;
virtual ~UnigramDictionary();
private:
@ -94,7 +94,7 @@ class UnigramDictionary {
const int currentWordIndex) const;
int getMostFrequentWordLike(const int startInputIndex, const int inputSize,
Correction *correction, unsigned short *word) const;
int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int length,
int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int inputSize,
short unsigned int *outWord) const;
int getSubStringSuggestion(
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,

View File

@ -30,7 +30,7 @@ class WordsPriorityQueuePool {
mainQueueMaxWords, maxWordLength)) {
for (int i = 0, subQueueBufOffset = 0;
i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT;
++i, subQueueBufOffset += sizeof(WordsPriorityQueue)) {
++i, subQueueBufOffset += static_cast<int>(sizeof(WordsPriorityQueue))) {
mSubQueues[i] = new(mSubQueueBuf + subQueueBufOffset)
WordsPriorityQueue(subQueueMaxWords, maxWordLength);
}