Cosmetic fixes and a bug fix in UnigramDictionary::testCharGroupForContinuedLikeness().
This change has actually been extracted from a change work in progress I4fe423834b8131fb122251892c98228a6e08ba25 Change-Id: I52568fa09da2ea22be7f8bfe9676b7cd73c31fa4
This commit is contained in:
parent
1a397ececf
commit
f2789819bd
19 changed files with 190 additions and 179 deletions
|
@ -177,7 +177,7 @@ public class SuggestedWords {
|
|||
return;
|
||||
}
|
||||
int i = 1;
|
||||
while(i < candidates.size()) {
|
||||
while (i < candidates.size()) {
|
||||
final SuggestedWordInfo cur = candidates.get(i);
|
||||
for (int j = 0; j < i; ++j) {
|
||||
final SuggestedWordInfo previous = candidates.get(j);
|
||||
|
|
|
@ -728,7 +728,7 @@ public class FusionDictionary implements Iterable<Word> {
|
|||
// StringBuilder s = new StringBuilder();
|
||||
// for (CharGroup g : node.data) {
|
||||
// s.append(g.frequency);
|
||||
// for (int ch : g.chars){
|
||||
// for (int ch : g.chars) {
|
||||
// s.append(Character.toChars(ch));
|
||||
// }
|
||||
// }
|
||||
|
@ -794,7 +794,7 @@ public class FusionDictionary implements Iterable<Word> {
|
|||
currentPos = mPositions.getLast();
|
||||
mCurrentString.setLength(mCurrentString.length() - mPositions.getLast().length);
|
||||
}
|
||||
} while(true);
|
||||
} while (true);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -68,9 +68,9 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
|
|||
return 0;
|
||||
}
|
||||
int pagesize = getpagesize();
|
||||
adjust = dictOffset % pagesize;
|
||||
int adjDictOffset = dictOffset - adjust;
|
||||
int adjDictSize = dictSize + adjust;
|
||||
adjust = static_cast<int>(dictOffset) % pagesize;
|
||||
int adjDictOffset = static_cast<int>(dictOffset) - adjust;
|
||||
int adjDictSize = static_cast<int>(dictSize) + adjust;
|
||||
dictBuf = mmap(0, sizeof(char) * adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset);
|
||||
if (dictBuf == MAP_FAILED) {
|
||||
AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
|
||||
|
@ -120,8 +120,8 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
|
|||
releaseDictBuf(dictBuf, 0, 0);
|
||||
#endif // USE_MMAP_FOR_DICTIONARY
|
||||
} else {
|
||||
dictionary = new Dictionary(dictBuf, dictSize, fd, adjust, typedLetterMultiplier,
|
||||
fullWordMultiplier, maxWordLength, maxWords, maxPredictions);
|
||||
dictionary = new Dictionary(dictBuf, static_cast<int>(dictSize), fd, adjust,
|
||||
typedLetterMultiplier, fullWordMultiplier, maxWordLength, maxWords, maxPredictions);
|
||||
}
|
||||
PROF_END(66);
|
||||
PROF_CLOSE;
|
||||
|
|
|
@ -50,7 +50,7 @@ class AdditionalProximityChars {
|
|||
if (!isEnLocale(localeStr)) {
|
||||
return 0;
|
||||
}
|
||||
switch(c) {
|
||||
switch (c) {
|
||||
case 'a':
|
||||
return EN_US_ADDITIONAL_A_SIZE;
|
||||
case 'e':
|
||||
|
@ -70,7 +70,7 @@ class AdditionalProximityChars {
|
|||
if (!isEnLocale(localeStr)) {
|
||||
return 0;
|
||||
}
|
||||
switch(c) {
|
||||
switch (c) {
|
||||
case 'a':
|
||||
return EN_US_ADDITIONAL_A;
|
||||
case 'e':
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "char_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -24,7 +26,7 @@ namespace latinime {
|
|||
* if c is not a combined character, or the base character if it
|
||||
* is combined.
|
||||
*/
|
||||
const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = {
|
||||
const uint16_t BASE_CHARS[BASE_CHARS_SIZE] = {
|
||||
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
||||
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
|
||||
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
|
||||
|
|
|
@ -156,7 +156,7 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
|
|||
const int flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||
if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) return 0;
|
||||
if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) {
|
||||
BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
|
||||
BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||
} else {
|
||||
pos = BinaryFormat::skipOtherCharacters(root, pos);
|
||||
}
|
||||
|
|
|
@ -84,7 +84,7 @@ class BinaryFormat {
|
|||
static unsigned int getFlags(const uint8_t *const dict);
|
||||
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
|
||||
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
|
||||
static int32_t getCharCodeAndForwardPointer(const uint8_t *const dict, int *pos);
|
||||
static int32_t getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
|
||||
static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos);
|
||||
static int skipOtherCharacters(const uint8_t *const dict, const int pos);
|
||||
static int skipChildrenPosition(const uint8_t flags, const int pos);
|
||||
|
@ -176,22 +176,22 @@ inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict
|
|||
return dict[(*pos)++];
|
||||
}
|
||||
|
||||
inline int32_t BinaryFormat::getCharCodeAndForwardPointer(const uint8_t *const dict, int *pos) {
|
||||
inline int32_t BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) {
|
||||
const int origin = *pos;
|
||||
const int32_t character = dict[origin];
|
||||
if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
|
||||
if (character == CHARACTER_ARRAY_TERMINATOR) {
|
||||
const int32_t codePoint = dict[origin];
|
||||
if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
|
||||
if (codePoint == CHARACTER_ARRAY_TERMINATOR) {
|
||||
*pos = origin + 1;
|
||||
return NOT_A_CHARACTER;
|
||||
return NOT_A_CODE_POINT;
|
||||
} else {
|
||||
*pos = origin + 3;
|
||||
const int32_t char_1 = character << 16;
|
||||
const int32_t char_1 = codePoint << 16;
|
||||
const int32_t char_2 = char_1 + (dict[origin + 1] << 8);
|
||||
return char_2 + dict[origin + 2];
|
||||
}
|
||||
} else {
|
||||
*pos = origin + 1;
|
||||
return character;
|
||||
return codePoint;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -369,15 +369,15 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
|||
if (0 >= charGroupCount) return NOT_VALID_WORD;
|
||||
const int charGroupPos = pos;
|
||||
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||
int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
|
||||
int32_t character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||
if (character == wChar) {
|
||||
// This is the correct node. Only one character group may start with the same
|
||||
// char within a node, so either we found our match in this node, or there is
|
||||
// no match and we can return NOT_VALID_WORD. So we will check all the characters
|
||||
// in this character group indeed does match.
|
||||
if (FLAG_HAS_MULTIPLE_CHARS & flags) {
|
||||
character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
|
||||
while (NOT_A_CHARACTER != character) {
|
||||
character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||
while (NOT_A_CODE_POINT != character) {
|
||||
++wordPos;
|
||||
// If we shoot the length of the word we search for, or if we find a single
|
||||
// character that does not match, as explained above, it means the word is
|
||||
|
@ -385,7 +385,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
|||
// match the word on the first character, but not matching the whole word).
|
||||
if (wordPos > length) return NOT_VALID_WORD;
|
||||
if (inWord[wordPos] != character) return NOT_VALID_WORD;
|
||||
character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
|
||||
character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||
}
|
||||
}
|
||||
// If we come here we know that so far, we do match. Either we are on a terminal
|
||||
|
@ -457,19 +457,19 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
|
|||
--charGroupCount) {
|
||||
const int startPos = pos;
|
||||
const uint8_t flags = getFlagsAndForwardPointer(root, &pos);
|
||||
const int32_t character = getCharCodeAndForwardPointer(root, &pos);
|
||||
const int32_t character = getCodePointAndForwardPointer(root, &pos);
|
||||
if (address == startPos) {
|
||||
// We found the address. Copy the rest of the word in the buffer and return
|
||||
// the length.
|
||||
outWord[wordPos] = character;
|
||||
if (FLAG_HAS_MULTIPLE_CHARS & flags) {
|
||||
int32_t nextChar = getCharCodeAndForwardPointer(root, &pos);
|
||||
int32_t nextChar = getCodePointAndForwardPointer(root, &pos);
|
||||
// We count chars in order to avoid infinite loops if the file is broken or
|
||||
// if there is some other bug
|
||||
int charCount = maxDepth;
|
||||
while (NOT_A_CHARACTER != nextChar && --charCount > 0) {
|
||||
while (NOT_A_CODE_POINT != nextChar && --charCount > 0) {
|
||||
outWord[++wordPos] = nextChar;
|
||||
nextChar = getCharCodeAndForwardPointer(root, &pos);
|
||||
nextChar = getCodePointAndForwardPointer(root, &pos);
|
||||
}
|
||||
}
|
||||
*outUnigramFrequency = readFrequencyWithoutMovingPointer(root, pos);
|
||||
|
@ -523,16 +523,16 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
|
|||
const uint8_t lastFlags =
|
||||
getFlagsAndForwardPointer(root, &lastCandidateGroupPos);
|
||||
const int32_t lastChar =
|
||||
getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
|
||||
getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
|
||||
// We copy all the characters in this group to the buffer
|
||||
outWord[wordPos] = lastChar;
|
||||
if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
|
||||
int32_t nextChar =
|
||||
getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
|
||||
getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
|
||||
int charCount = maxDepth;
|
||||
while (-1 != nextChar && --charCount > 0) {
|
||||
outWord[++wordPos] = nextChar;
|
||||
nextChar = getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
|
||||
nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
|
||||
}
|
||||
}
|
||||
++wordPos;
|
||||
|
@ -582,8 +582,8 @@ inline int BinaryFormat::computeFrequencyForBigram(const int unigramFreq, const
|
|||
// 0 for the bigram frequency represents the middle of the 16th step from the top,
|
||||
// while a value of 15 represents the middle of the top step.
|
||||
// See makedict.BinaryDictInputOutput for details.
|
||||
const float stepSize = (static_cast<float>(MAX_FREQ) - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
|
||||
return static_cast<int>(unigramFreq + (bigramFreq + 1) * stepSize);
|
||||
const float stepSize = static_cast<float>(MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
|
||||
return unigramFreq + static_cast<int>(static_cast<float>(bigramFreq + 1) * stepSize);
|
||||
}
|
||||
|
||||
// This returns a probability in log space.
|
||||
|
|
|
@ -23,14 +23,16 @@
|
|||
|
||||
namespace latinime {
|
||||
|
||||
static inline void setInFilter(uint8_t *filter, const int position) {
|
||||
const unsigned int bucket = position % BIGRAM_FILTER_MODULO;
|
||||
filter[bucket >> 3] |= (1 << (bucket & 0x7));
|
||||
// TODO: uint32_t position
|
||||
static inline void setInFilter(uint8_t *filter, const int32_t position) {
|
||||
const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO);
|
||||
filter[bucket >> 3] |= static_cast<uint8_t>(1 << (bucket & 0x7));
|
||||
}
|
||||
|
||||
static inline bool isInFilter(const uint8_t *filter, const int position) {
|
||||
const unsigned int bucket = position % BIGRAM_FILTER_MODULO;
|
||||
return filter[bucket >> 3] & (1 << (bucket & 0x7));
|
||||
// TODO: uint32_t position
|
||||
static inline bool isInFilter(const uint8_t *filter, const int32_t position) {
|
||||
const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO);
|
||||
return filter[bucket >> 3] & static_cast<uint8_t>(1 << (bucket & 0x7));
|
||||
}
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_BLOOM_FILTER_H
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define LATINIME_CHAR_UTILS_H
|
||||
|
||||
#include <cctype>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -43,7 +44,7 @@ unsigned short latin_tolower(const unsigned short c);
|
|||
*/
|
||||
|
||||
static const int BASE_CHARS_SIZE = 0x0500;
|
||||
extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
|
||||
extern const uint16_t BASE_CHARS[BASE_CHARS_SIZE];
|
||||
|
||||
inline static unsigned short toBaseChar(unsigned short c) {
|
||||
if (c < BASE_CHARS_SIZE) {
|
||||
|
|
|
@ -362,7 +362,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|
|||
if (mSkipPos >= 0) {
|
||||
if (mSkippedCount == 0 && mSkipPos < mOutputIndex) {
|
||||
if (DEBUG_DICT) {
|
||||
assert(mSkipPos == mOutputIndex - 1);
|
||||
// TODO: Enable this assertion.
|
||||
//assert(mSkipPos == mOutputIndex - 1);
|
||||
}
|
||||
mSkipPos = mOutputIndex;
|
||||
}
|
||||
|
@ -630,7 +631,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
|
|||
inline static int getQuoteCount(const unsigned short *word, const int length) {
|
||||
int quoteCount = 0;
|
||||
for (int i = 0; i < length; ++i) {
|
||||
if(word[i] == '\'') {
|
||||
if (word[i] == '\'') {
|
||||
++quoteCount;
|
||||
}
|
||||
}
|
||||
|
@ -936,7 +937,7 @@ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
|
|||
|
||||
int totalLength = 0;
|
||||
int totalFreq = 0;
|
||||
for (int i = 0; i < wordCount; ++i){
|
||||
for (int i = 0; i < wordCount; ++i) {
|
||||
const int wordLength = wordLengthArray[i];
|
||||
if (wordLength <= 0) {
|
||||
return 0;
|
||||
|
@ -1126,15 +1127,16 @@ float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *be
|
|||
return 0;
|
||||
}
|
||||
|
||||
const float maxScore = score >= S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE
|
||||
* powf(static_cast<float>(TYPED_LETTER_MULTIPLIER),
|
||||
static_cast<float>(min(beforeLength, afterLength - spaceCount)))
|
||||
* FULL_WORD_MULTIPLIER;
|
||||
const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX)
|
||||
: static_cast<float>(MAX_INITIAL_SCORE)
|
||||
* powf(static_cast<float>(TYPED_LETTER_MULTIPLIER),
|
||||
static_cast<float>(min(beforeLength, afterLength - spaceCount)))
|
||||
* static_cast<float>(FULL_WORD_MULTIPLIER);
|
||||
|
||||
// add a weight based on edit distance.
|
||||
// distance <= max(afterLength, beforeLength) == afterLength,
|
||||
// so, 0 <= distance / afterLength <= 1
|
||||
const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength);
|
||||
return (score / maxScore) * weight;
|
||||
return (static_cast<float>(score) / maxScore) * weight;
|
||||
}
|
||||
} // namespace latinime
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
#ifndef LATINIME_DEFINES_H
|
||||
#define LATINIME_DEFINES_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(FLAG_DO_PROFILE) || defined(FLAG_DBG)
|
||||
#include <android/log.h>
|
||||
#ifndef LOG_TAG
|
||||
|
@ -26,9 +28,9 @@
|
|||
#define AKLOGI(fmt, ...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, fmt, ##__VA_ARGS__)
|
||||
|
||||
#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) do { \
|
||||
dumpResult(words, frequencies, maxWordCount, maxWordLength); } while(0)
|
||||
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
|
||||
#define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while(0)
|
||||
dumpResult(words, frequencies, maxWordCount, maxWordLength); } while (0)
|
||||
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
|
||||
#define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while (0)
|
||||
|
||||
static inline void dumpWordInfo(const unsigned short *word, const int length,
|
||||
const int rank, const int frequency) {
|
||||
|
@ -39,7 +41,8 @@ static inline void dumpWordInfo(const unsigned short *word, const int length,
|
|||
if (c == 0) {
|
||||
break;
|
||||
}
|
||||
charBuf[i] = c;
|
||||
// static_cast only for debugging
|
||||
charBuf[i] = static_cast<char>(c);
|
||||
}
|
||||
charBuf[i] = 0;
|
||||
if (i > 1) {
|
||||
|
@ -65,7 +68,8 @@ static inline void dumpWord(const unsigned short *word, const int length) {
|
|||
if (c == 0) {
|
||||
break;
|
||||
}
|
||||
charBuf[i] = c;
|
||||
// static_cast only for debugging
|
||||
charBuf[i] = static_cast<char>(c);
|
||||
}
|
||||
charBuf[i] = 0;
|
||||
if (i > 1) {
|
||||
|
@ -84,7 +88,7 @@ static inline void dumpWordInt(const int *word, const int length) {
|
|||
}
|
||||
|
||||
#ifndef __ANDROID__
|
||||
#define ASSERT(success) do { if(!success) { showStackTrace(); assert(success);};} while (0)
|
||||
#define ASSERT(success) do { if (!success) { showStackTrace(); assert(success);};} while (0)
|
||||
#define SHOW_STACK_TRACE do { showStackTrace(); } while (0)
|
||||
|
||||
#include <execinfo.h>
|
||||
|
@ -128,14 +132,14 @@ static unsigned int profile_counter[PROF_BUF_SIZE];
|
|||
|
||||
#define PROF_RESET prof_reset()
|
||||
#define PROF_COUNT(prof_buf_id) ++profile_counter[prof_buf_id]
|
||||
#define PROF_OPEN do { PROF_RESET; PROF_START(PROF_BUF_SIZE - 1); } while(0)
|
||||
#define PROF_OPEN do { PROF_RESET; PROF_START(PROF_BUF_SIZE - 1); } while (0)
|
||||
#define PROF_START(prof_buf_id) do { \
|
||||
PROF_COUNT(prof_buf_id); profile_old[prof_buf_id] = (clock()); } while(0)
|
||||
#define PROF_CLOSE do { PROF_END(PROF_BUF_SIZE - 1); PROF_OUTALL; } while(0)
|
||||
PROF_COUNT(prof_buf_id); profile_old[prof_buf_id] = (clock()); } while (0)
|
||||
#define PROF_CLOSE do { PROF_END(PROF_BUF_SIZE - 1); PROF_OUTALL; } while (0)
|
||||
#define PROF_END(prof_buf_id) profile_buf[prof_buf_id] += ((clock()) - profile_old[prof_buf_id])
|
||||
#define PROF_CLOCKOUT(prof_buf_id) \
|
||||
AKLOGI("%s : clock is %f", __FUNCTION__, (clock() - profile_old[prof_buf_id]))
|
||||
#define PROF_OUTALL do { AKLOGI("--- %s ---", __FUNCTION__); prof_out(); } while(0)
|
||||
#define PROF_OUTALL do { AKLOGI("--- %s ---", __FUNCTION__); prof_out(); } while (0)
|
||||
|
||||
static inline void prof_reset(void) {
|
||||
for (int i = 0; i < PROF_BUF_SIZE; ++i) {
|
||||
|
@ -236,15 +240,15 @@ static inline void prof_out(void) {
|
|||
#define FLAG_BIGRAM_FREQ 0x7F
|
||||
|
||||
#define DICTIONARY_VERSION_MIN 200
|
||||
#define NOT_VALID_WORD -99
|
||||
#define NOT_A_CHARACTER -1
|
||||
#define NOT_A_DISTANCE -1
|
||||
#define NOT_A_COORDINATE -1
|
||||
#define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO -2
|
||||
#define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO -3
|
||||
#define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO -4
|
||||
#define NOT_AN_INDEX -1
|
||||
#define NOT_A_PROBABILITY -1
|
||||
#define NOT_VALID_WORD (-99)
|
||||
#define NOT_A_CODE_POINT (-1)
|
||||
#define NOT_A_DISTANCE (-1)
|
||||
#define NOT_A_COORDINATE (-1)
|
||||
#define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO (-2)
|
||||
#define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO (-3)
|
||||
#define ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO (-4)
|
||||
#define NOT_AN_INDEX (-1)
|
||||
#define NOT_A_PROBABILITY (-1)
|
||||
|
||||
#define KEYCODE_SPACE ' '
|
||||
|
||||
|
@ -355,8 +359,8 @@ template<typename T> inline T max(T a, T b) { return a > b ? a : b; }
|
|||
#define NEUTRAL_AREA_RADIUS_RATIO 1.3f
|
||||
|
||||
// DEBUG
|
||||
#define INPUTLENGTH_FOR_DEBUG -1
|
||||
#define MIN_OUTPUT_INDEX_FOR_DEBUG -1
|
||||
#define INPUTLENGTH_FOR_DEBUG (-1)
|
||||
#define MIN_OUTPUT_INDEX_FOR_DEBUG (-1)
|
||||
|
||||
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
||||
TypeName(const TypeName&); \
|
||||
|
|
|
@ -90,11 +90,12 @@ class Dictionary {
|
|||
// static inline methods should be defined in the header file
|
||||
inline int Dictionary::wideStrLen(unsigned short *str) {
|
||||
if (!str) return 0;
|
||||
unsigned short *end = str;
|
||||
while (*end) {
|
||||
end++;
|
||||
int length = 0;
|
||||
while (*str) {
|
||||
str++;
|
||||
length++;
|
||||
}
|
||||
return end - str;
|
||||
return length;
|
||||
}
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DICTIONARY_H
|
||||
|
|
|
@ -29,7 +29,6 @@
|
|||
|
||||
namespace latinime {
|
||||
|
||||
/* static */ const int ProximityInfo::NOT_A_CODE = -1;
|
||||
/* static */ const float ProximityInfo::NOT_A_DISTANCE_FLOAT = -1.0f;
|
||||
|
||||
static inline void safeGetOrFillZeroIntArrayRegion(JNIEnv *env, jintArray jArray, jsize len,
|
||||
|
@ -84,22 +83,22 @@ ProximityInfo::ProximityInfo(JNIEnv *env, const jstring localeJStr, const int ma
|
|||
safeGetOrFillZeroIntArrayRegion(env, keyYCoordinates, KEY_COUNT, mKeyYCoordinates);
|
||||
safeGetOrFillZeroIntArrayRegion(env, keyWidths, KEY_COUNT, mKeyWidths);
|
||||
safeGetOrFillZeroIntArrayRegion(env, keyHeights, KEY_COUNT, mKeyHeights);
|
||||
safeGetOrFillZeroIntArrayRegion(env, keyCharCodes, KEY_COUNT, mKeyCharCodes);
|
||||
safeGetOrFillZeroIntArrayRegion(env, keyCharCodes, KEY_COUNT, mKeyCodePoints);
|
||||
safeGetOrFillZeroFloatArrayRegion(env, sweetSpotCenterXs, KEY_COUNT, mSweetSpotCenterXs);
|
||||
safeGetOrFillZeroFloatArrayRegion(env, sweetSpotCenterYs, KEY_COUNT, mSweetSpotCenterYs);
|
||||
safeGetOrFillZeroFloatArrayRegion(env, sweetSpotRadii, KEY_COUNT, mSweetSpotRadii);
|
||||
initializeCodeToKeyIndex();
|
||||
initializeCodePointToKeyIndex();
|
||||
initializeG();
|
||||
}
|
||||
|
||||
// Build the reversed look up table from the char code to the index in mKeyXCoordinates,
|
||||
// mKeyYCoordinates, mKeyWidths, mKeyHeights, mKeyCharCodes.
|
||||
void ProximityInfo::initializeCodeToKeyIndex() {
|
||||
memset(mCodeToKeyIndex, -1, (MAX_CHAR_CODE + 1) * sizeof(mCodeToKeyIndex[0]));
|
||||
void ProximityInfo::initializeCodePointToKeyIndex() {
|
||||
memset(mCodePointToKeyIndex, -1, sizeof(mCodePointToKeyIndex));
|
||||
for (int i = 0; i < KEY_COUNT; ++i) {
|
||||
const int code = mKeyCharCodes[i];
|
||||
const int code = mKeyCodePoints[i];
|
||||
if (0 <= code && code <= MAX_CHAR_CODE) {
|
||||
mCodeToKeyIndex[code] = i;
|
||||
mCodePointToKeyIndex[code] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -117,7 +116,8 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
|
|||
if (x < 0 || y < 0) {
|
||||
if (DEBUG_DICT) {
|
||||
AKLOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y);
|
||||
assert(false);
|
||||
// TODO: Enable this assertion.
|
||||
//assert(false);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -145,8 +145,8 @@ static inline float getNormalizedSquaredDistanceFloat(float x1, float y1, float
|
|||
|
||||
float ProximityInfo::getNormalizedSquaredDistanceFromCenterFloat(
|
||||
const int keyId, const int x, const int y) const {
|
||||
const float centerX = static_cast<float>(getKeyCenterXOfIdG(keyId));
|
||||
const float centerY = static_cast<float>(getKeyCenterYOfIdG(keyId));
|
||||
const float centerX = static_cast<float>(getKeyCenterXOfKeyIdG(keyId));
|
||||
const float centerY = static_cast<float>(getKeyCenterYOfKeyIdG(keyId));
|
||||
const float touchX = static_cast<float>(x);
|
||||
const float touchY = static_cast<float>(y);
|
||||
const float keyWidth = static_cast<float>(getMostCommonKeyWidth());
|
||||
|
@ -178,7 +178,7 @@ void ProximityInfo::calculateNearbyKeyCodes(
|
|||
if (c < KEYCODE_SPACE || c == primaryKey) {
|
||||
continue;
|
||||
}
|
||||
const int keyIndex = getKeyIndex(c);
|
||||
const int keyIndex = getKeyIndexOf(c);
|
||||
const bool onKey = isOnKey(keyIndex, x, y);
|
||||
const int distance = squaredDistanceToEdge(keyIndex, x, y);
|
||||
if (onKey || distance < MOST_COMMON_KEY_WIDTH_SQUARE) {
|
||||
|
@ -208,7 +208,7 @@ void ProximityInfo::calculateNearbyKeyCodes(
|
|||
const int32_t ac = additionalProximityChars[j];
|
||||
int k = 0;
|
||||
for (; k < insertPos; ++k) {
|
||||
if ((int)ac == inputCodes[k]) {
|
||||
if (static_cast<int>(ac) == inputCodes[k]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -227,11 +227,11 @@ void ProximityInfo::calculateNearbyKeyCodes(
|
|||
}
|
||||
// Add a delimiter for the proximity characters
|
||||
for (int i = insertPos; i < MAX_PROXIMITY_CHARS_SIZE; ++i) {
|
||||
inputCodes[i] = NOT_A_CODE;
|
||||
inputCodes[i] = NOT_A_CODE_POINT;
|
||||
}
|
||||
}
|
||||
|
||||
int ProximityInfo::getKeyIndex(const int c) const {
|
||||
int ProximityInfo::getKeyIndexOf(const int c) const {
|
||||
if (KEY_COUNT == 0) {
|
||||
// We do not have the coordinate data
|
||||
return NOT_AN_INDEX;
|
||||
|
@ -240,28 +240,28 @@ int ProximityInfo::getKeyIndex(const int c) const {
|
|||
if (baseLowerC > MAX_CHAR_CODE) {
|
||||
return NOT_AN_INDEX;
|
||||
}
|
||||
return mCodeToKeyIndex[baseLowerC];
|
||||
return mCodePointToKeyIndex[baseLowerC];
|
||||
}
|
||||
|
||||
int ProximityInfo::getKeyCode(const int keyIndex) const {
|
||||
int ProximityInfo::getCodePointOf(const int keyIndex) const {
|
||||
if (keyIndex < 0 || keyIndex >= KEY_COUNT) {
|
||||
return NOT_AN_INDEX;
|
||||
return NOT_A_CODE_POINT;
|
||||
}
|
||||
return mKeyToCodeIndexG[keyIndex];
|
||||
return mKeyIndexToCodePointG[keyIndex];
|
||||
}
|
||||
|
||||
void ProximityInfo::initializeG() {
|
||||
// TODO: Optimize
|
||||
for (int i = 0; i < KEY_COUNT; ++i) {
|
||||
const int code = mKeyCharCodes[i];
|
||||
const int code = mKeyCodePoints[i];
|
||||
const int lowerCode = toBaseLowerCase(code);
|
||||
mCenterXsG[i] = mKeyXCoordinates[i] + mKeyWidths[i] / 2;
|
||||
mCenterYsG[i] = mKeyYCoordinates[i] + mKeyHeights[i] / 2;
|
||||
if (code != lowerCode && lowerCode >= 0 && lowerCode <= MAX_CHAR_CODE) {
|
||||
mCodeToKeyIndex[lowerCode] = i;
|
||||
mKeyToCodeIndexG[i] = lowerCode;
|
||||
mCodePointToKeyIndex[lowerCode] = i;
|
||||
mKeyIndexToCodePointG[i] = lowerCode;
|
||||
} else {
|
||||
mKeyToCodeIndexG[i] = code;
|
||||
mKeyIndexToCodePointG[i] = code;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < KEY_COUNT; i++) {
|
||||
|
@ -274,22 +274,22 @@ void ProximityInfo::initializeG() {
|
|||
}
|
||||
}
|
||||
|
||||
float ProximityInfo::getKeyCenterXOfCharG(int charCode) const {
|
||||
return getKeyCenterXOfIdG(getKeyIndex(charCode));
|
||||
float ProximityInfo::getKeyCenterXOfCodePointG(int charCode) const {
|
||||
return getKeyCenterXOfKeyIdG(getKeyIndexOf(charCode));
|
||||
}
|
||||
|
||||
float ProximityInfo::getKeyCenterYOfCharG(int charCode) const {
|
||||
return getKeyCenterYOfIdG(getKeyIndex(charCode));
|
||||
float ProximityInfo::getKeyCenterYOfCodePointG(int charCode) const {
|
||||
return getKeyCenterYOfKeyIdG(getKeyIndexOf(charCode));
|
||||
}
|
||||
|
||||
float ProximityInfo::getKeyCenterXOfIdG(int keyId) const {
|
||||
float ProximityInfo::getKeyCenterXOfKeyIdG(int keyId) const {
|
||||
if (keyId >= 0) {
|
||||
return mCenterXsG[keyId];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
float ProximityInfo::getKeyCenterYOfIdG(int keyId) const {
|
||||
float ProximityInfo::getKeyCenterYOfKeyIdG(int keyId) const {
|
||||
if (keyId >= 0) {
|
||||
return mCenterYsG[keyId];
|
||||
}
|
||||
|
@ -297,8 +297,8 @@ float ProximityInfo::getKeyCenterYOfIdG(int keyId) const {
|
|||
}
|
||||
|
||||
int ProximityInfo::getKeyKeyDistanceG(int key0, int key1) const {
|
||||
const int keyId0 = getKeyIndex(key0);
|
||||
const int keyId1 = getKeyIndex(key1);
|
||||
const int keyId0 = getKeyIndexOf(key0);
|
||||
const int keyId1 = getKeyIndexOf(key1);
|
||||
if (keyId0 >= 0 && keyId1 >= 0) {
|
||||
return mKeyKeyDistancesG[keyId0][keyId1];
|
||||
}
|
||||
|
|
|
@ -41,8 +41,8 @@ class ProximityInfo {
|
|||
float getNormalizedSquaredDistanceFromCenterFloat(
|
||||
const int keyId, const int x, const int y) const;
|
||||
bool sameAsTyped(const unsigned short *word, int length) const;
|
||||
int getKeyIndex(const int c) const;
|
||||
int getKeyCode(const int keyIndex) const;
|
||||
int getKeyIndexOf(const int c) const;
|
||||
int getCodePointOf(const int keyIndex) const;
|
||||
bool hasSweetSpotData(const int keyIndex) const {
|
||||
// When there are no calibration data for a key,
|
||||
// the radius of the key is assigned to zero.
|
||||
|
@ -96,23 +96,23 @@ class ProximityInfo {
|
|||
return GRID_HEIGHT;
|
||||
}
|
||||
|
||||
float getKeyCenterXOfCharG(int charCode) const;
|
||||
float getKeyCenterYOfCharG(int charCode) const;
|
||||
float getKeyCenterXOfIdG(int keyId) const;
|
||||
float getKeyCenterYOfIdG(int keyId) const;
|
||||
// TODO: These should return int.
|
||||
float getKeyCenterXOfCodePointG(int charCode) const;
|
||||
float getKeyCenterYOfCodePointG(int charCode) const;
|
||||
float getKeyCenterXOfKeyIdG(int keyId) const;
|
||||
float getKeyCenterYOfKeyIdG(int keyId) const;
|
||||
int getKeyKeyDistanceG(int key0, int key1) const;
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfo);
|
||||
// The max number of the keys in one keyboard layout
|
||||
static const int MAX_KEY_COUNT_IN_A_KEYBOARD = 64;
|
||||
// The upper limit of the char code in mCodeToKeyIndex
|
||||
// The upper limit of the char code in mCodePointToKeyIndex
|
||||
static const int MAX_CHAR_CODE = 127;
|
||||
static const int NOT_A_CODE;
|
||||
static const float NOT_A_DISTANCE_FLOAT;
|
||||
|
||||
int getStartIndexFromCoordinates(const int x, const int y) const;
|
||||
void initializeCodeToKeyIndex();
|
||||
void initializeCodePointToKeyIndex();
|
||||
void initializeG();
|
||||
float calculateNormalizedSquaredDistance(const int keyIndex, const int inputIndex) const;
|
||||
float calculateSquaredDistanceFromSweetSpotCenter(
|
||||
|
@ -143,13 +143,13 @@ class ProximityInfo {
|
|||
int32_t mKeyYCoordinates[MAX_KEY_COUNT_IN_A_KEYBOARD];
|
||||
int32_t mKeyWidths[MAX_KEY_COUNT_IN_A_KEYBOARD];
|
||||
int32_t mKeyHeights[MAX_KEY_COUNT_IN_A_KEYBOARD];
|
||||
int32_t mKeyCharCodes[MAX_KEY_COUNT_IN_A_KEYBOARD];
|
||||
int32_t mKeyCodePoints[MAX_KEY_COUNT_IN_A_KEYBOARD];
|
||||
float mSweetSpotCenterXs[MAX_KEY_COUNT_IN_A_KEYBOARD];
|
||||
float mSweetSpotCenterYs[MAX_KEY_COUNT_IN_A_KEYBOARD];
|
||||
float mSweetSpotRadii[MAX_KEY_COUNT_IN_A_KEYBOARD];
|
||||
int mCodeToKeyIndex[MAX_CHAR_CODE + 1];
|
||||
int mCodePointToKeyIndex[MAX_CHAR_CODE + 1];
|
||||
|
||||
int mKeyToCodeIndexG[MAX_KEY_COUNT_IN_A_KEYBOARD];
|
||||
int mKeyIndexToCodePointG[MAX_KEY_COUNT_IN_A_KEYBOARD];
|
||||
int mCenterXsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
|
||||
int mCenterYsG[MAX_KEY_COUNT_IN_A_KEYBOARD];
|
||||
int mKeyKeyDistancesG[MAX_KEY_COUNT_IN_A_KEYBOARD][MAX_KEY_COUNT_IN_A_KEYBOARD];
|
||||
|
|
|
@ -160,7 +160,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
|
|||
const int currentChar = proximityChars[j];
|
||||
const float squaredDistance =
|
||||
hasInputCoordinates() ? calculateNormalizedSquaredDistance(
|
||||
mProximityInfo->getKeyIndex(currentChar), i) :
|
||||
mProximityInfo->getKeyIndexOf(currentChar), i) :
|
||||
NOT_A_DISTANCE_FLOAT;
|
||||
if (squaredDistance >= 0.0f) {
|
||||
mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] =
|
||||
|
@ -282,7 +282,7 @@ bool ProximityInfoState::pushTouchPoint(const int nodeChar, int x, int y, const
|
|||
const NearKeysDistanceMap *const prevPrevNearKeysDistances) {
|
||||
static const float LAST_POINT_SKIP_DISTANCE_SCALE = 0.25f;
|
||||
|
||||
uint32_t size = mInputXs.size();
|
||||
size_t size = mInputXs.size();
|
||||
bool popped = false;
|
||||
if (nodeChar < 0 && sample) {
|
||||
const float nearest = updateNearKeysDistances(x, y, currentNearKeysDistances);
|
||||
|
@ -309,7 +309,7 @@ bool ProximityInfoState::pushTouchPoint(const int nodeChar, int x, int y, const
|
|||
float minDist = mMaxPointToKeyLength;
|
||||
for (NearKeysDistanceMap::const_iterator it = currentNearKeysDistances->begin();
|
||||
it != currentNearKeysDistances->end(); ++it) {
|
||||
if(minDist > it->second){
|
||||
if (minDist > it->second) {
|
||||
minChar = it->first;
|
||||
minDist = it->second;
|
||||
}
|
||||
|
@ -324,10 +324,10 @@ bool ProximityInfoState::pushTouchPoint(const int nodeChar, int x, int y, const
|
|||
}
|
||||
|
||||
if (nodeChar >= 0 && (x < 0 || y < 0)) {
|
||||
const int keyId = mProximityInfo->getKeyIndex(nodeChar);
|
||||
const int keyId = mProximityInfo->getKeyIndexOf(nodeChar);
|
||||
if (keyId >= 0) {
|
||||
x = mProximityInfo->getKeyCenterXOfIdG(keyId);
|
||||
y = mProximityInfo->getKeyCenterYOfIdG(keyId);
|
||||
x = mProximityInfo->getKeyCenterXOfKeyIdG(keyId);
|
||||
y = mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -368,8 +368,8 @@ int ProximityInfoState::getDuration(const int index) const {
|
|||
return 0;
|
||||
}
|
||||
|
||||
float ProximityInfoState::getPointToKeyLength(int inputIndex, int charCode, float scale) {
|
||||
const int keyId = mProximityInfo->getKeyIndex(charCode);
|
||||
float ProximityInfoState::getPointToKeyLength(int inputIndex, int codePoint, float scale) {
|
||||
const int keyId = mProximityInfo->getKeyIndexOf(codePoint);
|
||||
if (keyId >= 0) {
|
||||
const int index = inputIndex * mProximityInfo->getKeyCount() + keyId;
|
||||
return min(mDistanceCache[index] * scale, mMaxPointToKeyLength);
|
||||
|
@ -382,8 +382,8 @@ int ProximityInfoState::getKeyKeyDistance(int key0, int key1) {
|
|||
}
|
||||
|
||||
int ProximityInfoState::getSpaceY() {
|
||||
const int keyId = mProximityInfo->getKeyIndex(' ');
|
||||
return mProximityInfo->getKeyCenterYOfIdG(keyId);
|
||||
const int keyId = mProximityInfo->getKeyIndexOf(' ');
|
||||
return mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
|
||||
}
|
||||
|
||||
float ProximityInfoState::calculateSquaredDistanceFromSweetSpotCenter(
|
||||
|
|
|
@ -52,9 +52,9 @@ class TerminalAttributes {
|
|||
0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
|
||||
unsigned int i;
|
||||
for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) {
|
||||
const int charCode = BinaryFormat::getCharCodeAndForwardPointer(mDict, &mPos);
|
||||
if (NOT_A_CHARACTER == charCode) break;
|
||||
outWord[i] = (uint16_t)charCode;
|
||||
const int codePoint = BinaryFormat::getCodePointAndForwardPointer(mDict, &mPos);
|
||||
if (NOT_A_CODE_POINT == codePoint) break;
|
||||
outWord[i] = (uint16_t)codePoint;
|
||||
}
|
||||
*outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags);
|
||||
mPos += BinaryFormat::CHARACTER_ARRAY_TERMINATOR_SIZE;
|
||||
|
@ -62,8 +62,8 @@ class TerminalAttributes {
|
|||
}
|
||||
};
|
||||
|
||||
TerminalAttributes(const uint8_t *const dict, const uint8_t flags, const int pos) :
|
||||
mDict(dict), mFlags(flags), mStartPos(pos) {
|
||||
TerminalAttributes(const uint8_t *const dict, const uint8_t flags, const int pos)
|
||||
: mDict(dict), mFlags(flags), mStartPos(pos) {
|
||||
}
|
||||
|
||||
inline ShortcutIterator getShortcutIterator() const {
|
||||
|
|
|
@ -58,12 +58,12 @@ UnigramDictionary::~UnigramDictionary() {
|
|||
}
|
||||
|
||||
static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize) {
|
||||
return sizeof(*codes) * codesSize;
|
||||
return static_cast<unsigned int>(sizeof(*codes)) * codesSize;
|
||||
}
|
||||
|
||||
// TODO: This needs to take a const unsigned short* and not tinker with its contents
|
||||
static inline void addWord(
|
||||
unsigned short *word, int length, int frequency, WordsPriorityQueue *queue, int type) {
|
||||
static inline void addWord(unsigned short *word, int length, int frequency,
|
||||
WordsPriorityQueue *queue, int type) {
|
||||
queue->push(frequency, word, length, type);
|
||||
}
|
||||
|
||||
|
@ -106,7 +106,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
|
|||
WordsPriorityQueuePool *queuePool,
|
||||
const digraph_t *const digraphs, const unsigned int digraphsSize) const {
|
||||
|
||||
const int startIndex = codesDest - codesBuffer;
|
||||
const int startIndex = static_cast<int>(codesDest - codesBuffer);
|
||||
if (currentDepth < MAX_DIGRAPH_SEARCH_DEPTH) {
|
||||
for (int i = 0; i < codesRemain; ++i) {
|
||||
xCoordinatesBuffer[startIndex + i] = xcoordinates[codesBufferSize - codesRemain + i];
|
||||
|
@ -170,8 +170,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
|
|||
// bigramMap contains the association <bigram address> -> <bigram frequency>
|
||||
// bigramFilter is a bloom filter for fast rejection: see functions setInFilter and isInFilter
|
||||
// in bigram_dictionary.cpp
|
||||
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
|
||||
const int *xcoordinates,
|
||||
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int codesSize,
|
||||
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||
const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
|
||||
|
@ -597,11 +596,10 @@ int UnigramDictionary::getSubStringSuggestion(
|
|||
|
||||
void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
|
||||
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
||||
const bool useFullEditDistance, const int inputSize,
|
||||
Correction *correction, WordsPriorityQueuePool *queuePool,
|
||||
const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex,
|
||||
const int outputWordLength, int *freqArray, int *wordLengthArray,
|
||||
unsigned short *outputWord) const {
|
||||
const bool useFullEditDistance, const int inputSize, Correction *correction,
|
||||
WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate,
|
||||
const int startInputPos, const int startWordIndex, const int outputWordLength,
|
||||
int *freqArray, int *wordLengthArray, unsigned short *outputWord) const {
|
||||
if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
|
||||
// Return if the last word index
|
||||
return;
|
||||
|
@ -641,7 +639,7 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
|
|||
// Missing space
|
||||
inputWordStartPos = i;
|
||||
inputWordLength = inputSize - i;
|
||||
if(getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
|
||||
if (getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
|
||||
useFullEditDistance, correction, queuePool, inputSize, hasAutoCorrectionCandidate,
|
||||
startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength,
|
||||
false /* missing space */, freqArray, wordLengthArray, outputWord, 0)
|
||||
|
@ -724,13 +722,13 @@ inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
|
|||
// In and out parameters may point to the same location. This function takes care
|
||||
// not to use any input parameters after it wrote into its outputs.
|
||||
static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
|
||||
const uint8_t *const root, const int startPos,
|
||||
const uint16_t *const inWord, const int startInputIndex,
|
||||
int32_t *outNewWord, int *outInputIndex, int *outPos) {
|
||||
const uint8_t *const root, const int startPos, const uint16_t *const inWord,
|
||||
const int startInputIndex, const int inputSize, int32_t *outNewWord, int *outInputIndex,
|
||||
int *outPos) {
|
||||
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||
int pos = startPos;
|
||||
int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
|
||||
int32_t baseChar = toBaseLowerCase(character);
|
||||
int32_t codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||
int32_t baseChar = toBaseLowerCase(codePoint);
|
||||
const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]);
|
||||
|
||||
if (baseChar != wChar) {
|
||||
|
@ -739,18 +737,18 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
|
|||
return false;
|
||||
}
|
||||
int inputIndex = startInputIndex;
|
||||
outNewWord[inputIndex] = character;
|
||||
outNewWord[inputIndex] = codePoint;
|
||||
if (hasMultipleChars) {
|
||||
character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
|
||||
while (NOT_A_CHARACTER != character) {
|
||||
baseChar = toBaseLowerCase(character);
|
||||
if (toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
|
||||
codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||
while (NOT_A_CODE_POINT != codePoint) {
|
||||
baseChar = toBaseLowerCase(codePoint);
|
||||
if (inputIndex + 1 >= inputSize || toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
|
||||
*outPos = BinaryFormat::skipOtherCharacters(root, pos);
|
||||
*outInputIndex = startInputIndex;
|
||||
return false;
|
||||
}
|
||||
outNewWord[inputIndex] = character;
|
||||
character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
|
||||
outNewWord[inputIndex] = codePoint;
|
||||
codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||
}
|
||||
}
|
||||
*outInputIndex = inputIndex + 1;
|
||||
|
@ -765,8 +763,9 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
|
|||
static inline void onTerminalWordLike(const int freq, int32_t *newWord, const int length,
|
||||
short unsigned int *outWord, int *maxFreq) {
|
||||
if (freq > *maxFreq) {
|
||||
for (int q = 0; q < length; ++q)
|
||||
for (int q = 0; q < length; ++q) {
|
||||
outWord[q] = newWord[q];
|
||||
}
|
||||
outWord[length] = 0;
|
||||
*maxFreq = freq;
|
||||
}
|
||||
|
@ -775,7 +774,7 @@ static inline void onTerminalWordLike(const int freq, int32_t *newWord, const in
|
|||
// Will find the highest frequency of the words like the one passed as an argument,
|
||||
// that is, everything that only differs by case/accents.
|
||||
int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord,
|
||||
const int length, short unsigned int *outWord) const {
|
||||
const int inputSize, short unsigned int *outWord) const {
|
||||
int32_t newWord[MAX_WORD_LENGTH_INTERNAL];
|
||||
int depth = 0;
|
||||
int maxFreq = -1;
|
||||
|
@ -795,12 +794,12 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord
|
|||
int inputIndex = stackInputIndex[depth];
|
||||
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||
// Test whether all chars in this group match with the word we are searching for. If so,
|
||||
// we want to traverse its children (or if the length match, evaluate its frequency).
|
||||
// we want to traverse its children (or if the inputSize match, evaluate its frequency).
|
||||
// Note that this function will output the position regardless, but will only write
|
||||
// into inputIndex if there is a match.
|
||||
const bool isAlike = testCharGroupForContinuedLikeness(flags, root, pos, inWord,
|
||||
inputIndex, newWord, &inputIndex, &pos);
|
||||
if (isAlike && (BinaryFormat::FLAG_IS_TERMINAL & flags) && (inputIndex == length)) {
|
||||
inputIndex, inputSize, newWord, &inputIndex, &pos);
|
||||
if (isAlike && (BinaryFormat::FLAG_IS_TERMINAL & flags) && (inputIndex == inputSize)) {
|
||||
const int frequency = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
|
||||
onTerminalWordLike(frequency, newWord, inputIndex, outWord, &maxFreq);
|
||||
}
|
||||
|
@ -809,8 +808,8 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord
|
|||
const int childrenNodePos = BinaryFormat::readChildrenPosition(root, flags, pos);
|
||||
// If we had a match and the word has children, we want to traverse them. We don't have
|
||||
// to traverse words longer than the one we are searching for, since they will not match
|
||||
// anyway, so don't traverse unless inputIndex < length.
|
||||
if (isAlike && (-1 != childrenNodePos) && (inputIndex < length)) {
|
||||
// anyway, so don't traverse unless inputIndex < inputSize.
|
||||
if (isAlike && (-1 != childrenNodePos) && (inputIndex < inputSize)) {
|
||||
// Save position for this depth, to get back to this once children are done
|
||||
stackChildCount[depth] = charGroupIndex;
|
||||
stackSiblingPos[depth] = siblingPos;
|
||||
|
@ -853,7 +852,7 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt
|
|||
if (hasMultipleChars) {
|
||||
pos = BinaryFormat::skipOtherCharacters(root, pos);
|
||||
} else {
|
||||
BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos);
|
||||
BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
|
||||
}
|
||||
const int unigramFreq = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos);
|
||||
return unigramFreq;
|
||||
|
@ -907,23 +906,23 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
|
|||
// else if FLAG_IS_TERMINAL: the frequency
|
||||
// else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address
|
||||
// Note that you can't have a node that both is not a terminal and has no children.
|
||||
int32_t c = BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos);
|
||||
assert(NOT_A_CHARACTER != c);
|
||||
int32_t c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
|
||||
assert(NOT_A_CODE_POINT != c);
|
||||
|
||||
// We are going to loop through each character and make it look like it's a different
|
||||
// node each time. To do that, we will process characters in this node in order until
|
||||
// we find the character terminator. This is signalled by getCharCode* returning
|
||||
// NOT_A_CHARACTER.
|
||||
// we find the character terminator. This is signalled by getCodePoint* returning
|
||||
// NOT_A_CODE_POINT.
|
||||
// As a special case, if there is only one character in this node, we must not read the
|
||||
// next bytes so we will simulate the NOT_A_CHARACTER return by testing the flags.
|
||||
// next bytes so we will simulate the NOT_A_CODE_POINT return by testing the flags.
|
||||
// This way, each loop run will look like a "virtual node".
|
||||
do {
|
||||
// We prefetch the next char. If 'c' is the last char of this node, we will have
|
||||
// NOT_A_CHARACTER in the next char. From this we can decide whether this virtual node
|
||||
// NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node
|
||||
// should behave as a terminal or not and whether we have children.
|
||||
const int32_t nextc = hasMultipleChars
|
||||
? BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CHARACTER;
|
||||
const bool isLastChar = (NOT_A_CHARACTER == nextc);
|
||||
? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT;
|
||||
const bool isLastChar = (NOT_A_CODE_POINT == nextc);
|
||||
// If there are more chars in this nodes, then this virtual node is not a terminal.
|
||||
// If we are on the last char, this virtual node is a terminal if this node is.
|
||||
const bool isTerminal = isLastChar && isTerminalNode;
|
||||
|
@ -952,9 +951,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
|
|||
|
||||
// Prepare for the next character. Promote the prefetched char to current char - the loop
|
||||
// will take care of prefetching the next. If we finally found our last char, nextc will
|
||||
// contain NOT_A_CHARACTER.
|
||||
// contain NOT_A_CODE_POINT.
|
||||
c = nextc;
|
||||
} while (NOT_A_CHARACTER != c);
|
||||
} while (NOT_A_CODE_POINT != c);
|
||||
|
||||
if (isTerminalNode) {
|
||||
// The frequency should be here, because we come here only if this is actually
|
||||
|
|
|
@ -43,11 +43,11 @@ class UnigramDictionary {
|
|||
int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
|
||||
int getFrequency(const int32_t *const inWord, const int length) const;
|
||||
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
|
||||
int getSuggestions(
|
||||
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
|
||||
const int *codes, const int codesSize, const std::map<int, int> *bigramMap,
|
||||
const uint8_t *bigramFilter, const bool useFullEditDistance, unsigned short *outWords,
|
||||
int *frequencies, int *outputTypes) const;
|
||||
int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int codesSize,
|
||||
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||
const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
|
||||
int *outputTypes) const;
|
||||
virtual ~UnigramDictionary();
|
||||
|
||||
private:
|
||||
|
@ -94,7 +94,7 @@ class UnigramDictionary {
|
|||
const int currentWordIndex) const;
|
||||
int getMostFrequentWordLike(const int startInputIndex, const int inputSize,
|
||||
Correction *correction, unsigned short *word) const;
|
||||
int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int length,
|
||||
int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int inputSize,
|
||||
short unsigned int *outWord) const;
|
||||
int getSubStringSuggestion(
|
||||
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
|
||||
|
|
|
@ -30,7 +30,7 @@ class WordsPriorityQueuePool {
|
|||
mainQueueMaxWords, maxWordLength)) {
|
||||
for (int i = 0, subQueueBufOffset = 0;
|
||||
i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT;
|
||||
++i, subQueueBufOffset += sizeof(WordsPriorityQueue)) {
|
||||
++i, subQueueBufOffset += static_cast<int>(sizeof(WordsPriorityQueue))) {
|
||||
mSubQueues[i] = new(mSubQueueBuf + subQueueBufOffset)
|
||||
WordsPriorityQueue(subQueueMaxWords, maxWordLength);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue