Merge "Suggest words with missing space"
This commit is contained in:
commit
71890a78b0
5 changed files with 164 additions and 84 deletions
|
@ -30,8 +30,8 @@ BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength,
|
|||
: DICT(dict), MAX_WORD_LENGTH(maxWordLength),
|
||||
MAX_ALTERNATIVES(maxAlternatives), IS_LATEST_DICT_VERSION(isLatestDictVersion),
|
||||
HAS_BIGRAM(hasBigram), mParentDictionary(parentDictionary) {
|
||||
LOGI("BigramDictionary - constructor");
|
||||
LOGI("Has Bigram : %d \n", hasBigram);
|
||||
if (DEBUG_DICT) LOGI("BigramDictionary - constructor");
|
||||
if (DEBUG_DICT) LOGI("Has Bigram : %d \n", hasBigram);
|
||||
}
|
||||
|
||||
BigramDictionary::~BigramDictionary() {
|
||||
|
@ -54,7 +54,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
|
|||
}
|
||||
insertAt++;
|
||||
}
|
||||
LOGI("Bigram: InsertAt -> %d maxBigrams: %d\n", insertAt, mMaxBigrams);
|
||||
if (DEBUG_DICT) LOGI("Bigram: InsertAt -> %d maxBigrams: %d\n", insertAt, mMaxBigrams);
|
||||
if (insertAt < mMaxBigrams) {
|
||||
memmove((char*) mBigramFreq + (insertAt + 1) * sizeof(mBigramFreq[0]),
|
||||
(char*) mBigramFreq + insertAt * sizeof(mBigramFreq[0]),
|
||||
|
@ -107,7 +107,7 @@ int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, i
|
|||
if (HAS_BIGRAM && IS_LATEST_DICT_VERSION) {
|
||||
int pos = mParentDictionary->isValidWordRec(
|
||||
DICTIONARY_HEADER_SIZE, prevWord, 0, prevWordLength);
|
||||
LOGI("Pos -> %d\n", pos);
|
||||
if (DEBUG_DICT) LOGI("Pos -> %d\n", pos);
|
||||
if (pos < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -151,7 +151,7 @@ void BigramDictionary::searchForTerminalNode(int addressLookingFor, int frequenc
|
|||
}
|
||||
pos = followDownBranchAddress; // pos start at count
|
||||
int count = DICT[pos] & 0xFF;
|
||||
LOGI("count - %d\n",count);
|
||||
if (DEBUG_DICT) LOGI("count - %d\n",count);
|
||||
pos++;
|
||||
for (int i = 0; i < count; i++) {
|
||||
// pos at data
|
||||
|
@ -225,7 +225,7 @@ void BigramDictionary::searchForTerminalNode(int addressLookingFor, int frequenc
|
|||
}
|
||||
depth++;
|
||||
if (followDownBranchAddress == 0) {
|
||||
LOGI("ERROR!!! Cannot find bigram!!");
|
||||
if (DEBUG_DICT) LOGI("ERROR!!! Cannot find bigram!!");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,6 +31,10 @@
|
|||
#define DEBUG_SHOW_FOUND_WORD false
|
||||
#endif // FLAG_DBG
|
||||
|
||||
#ifndef U_SHORT_MAX
|
||||
#define U_SHORT_MAX 1 << 16
|
||||
#endif
|
||||
|
||||
// 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words
|
||||
#define ADDRESS_MASK 0x3FFFFF
|
||||
|
||||
|
@ -49,9 +53,9 @@
|
|||
#define DICTIONARY_HEADER_SIZE 2
|
||||
#define NOT_VALID_WORD -99
|
||||
|
||||
#define SUGGEST_MISSING_CHARACTERS true
|
||||
|
||||
#define SUGGEST_EXCESSIVE_CHARACTERS true
|
||||
#define SUGGEST_WORDS_WITH_MISSING_CHARACTER true
|
||||
#define SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER true
|
||||
#define SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER true
|
||||
|
||||
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
|
||||
// This is only used for the size of array. Not to be used in c functions.
|
||||
|
@ -59,6 +63,8 @@
|
|||
|
||||
#define MAX_DEPTH_MULTIPLIER 3
|
||||
|
||||
#define MIN_SUGGEST_DEPTH 2
|
||||
|
||||
#define min(a,b) ((a)<(b)?(a):(b))
|
||||
|
||||
#endif // LATINIME_DEFINES_H
|
||||
|
|
|
@ -27,20 +27,21 @@ Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultip
|
|||
int maxWordLength, int maxWords, int maxAlternatives)
|
||||
: DICT((unsigned char*) dict),
|
||||
// Checks whether it has the latest dictionary or the old dictionary
|
||||
IS_LATEST_DICT_VERSION((((unsigned char*) dict)[0] & 0xFF) >= DICTIONARY_VERSION_MIN)
|
||||
{
|
||||
if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) {
|
||||
LOGI("Max word length (%d) is greater than %d", maxWordLength, MAX_WORD_LENGTH_INTERNAL);
|
||||
IS_LATEST_DICT_VERSION((((unsigned char*) dict)[0] & 0xFF) >= DICTIONARY_VERSION_MIN) {
|
||||
if (DEBUG_DICT) {
|
||||
if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) {
|
||||
LOGI("Max word length (%d) is greater than %d",
|
||||
maxWordLength, MAX_WORD_LENGTH_INTERNAL);
|
||||
LOGI("IN NATIVE SUGGEST Version: %d \n", (DICT[0] & 0xFF));
|
||||
}
|
||||
}
|
||||
LOGI("IN NATIVE SUGGEST Version: %d \n", (DICT[0] & 0xFF));
|
||||
mUnigramDictionary = new UnigramDictionary(DICT, typedLetterMultiplier, fullWordMultiplier,
|
||||
maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION);
|
||||
mBigramDictionary = new BigramDictionary(DICT, maxWordLength, maxAlternatives,
|
||||
IS_LATEST_DICT_VERSION, hasBigram(), this);
|
||||
}
|
||||
|
||||
Dictionary::~Dictionary()
|
||||
{
|
||||
Dictionary::~Dictionary() {
|
||||
delete mUnigramDictionary;
|
||||
delete mBigramDictionary;
|
||||
}
|
||||
|
|
|
@ -30,11 +30,12 @@
|
|||
namespace latinime {
|
||||
|
||||
UnigramDictionary::UnigramDictionary(const unsigned char *dict, int typedLetterMultiplier,
|
||||
int fullWordMultiplier, int maxWordLength, int maxWords, int maxAlternatives,
|
||||
int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars,
|
||||
const bool isLatestDictVersion)
|
||||
: DICT(dict), MAX_WORD_LENGTH(maxWordLength),MAX_WORDS(maxWords),
|
||||
MAX_ALTERNATIVES(maxAlternatives), IS_LATEST_DICT_VERSION(isLatestDictVersion),
|
||||
TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier) {
|
||||
MAX_PROXIMITY_CHARS(maxProximityChars), IS_LATEST_DICT_VERSION(isLatestDictVersion),
|
||||
TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier),
|
||||
ROOT_POS(isLatestDictVersion ? DICTIONARY_HEADER_SIZE : 0) {
|
||||
LOGI("UnigramDictionary - constructor");
|
||||
}
|
||||
|
||||
|
@ -43,37 +44,39 @@ UnigramDictionary::~UnigramDictionary() {}
|
|||
int UnigramDictionary::getSuggestions(int *codes, int codesSize, unsigned short *outWords,
|
||||
int *frequencies, int *nextLetters, int nextLettersSize)
|
||||
{
|
||||
|
||||
initSuggestions(codes, codesSize, outWords, frequencies);
|
||||
getSuggestionCandidates(codesSize, -1, -1, nextLetters, nextLettersSize);
|
||||
|
||||
int suggestedWordsCount = getSuggestionCandidates(codesSize, -1, -1, nextLetters,
|
||||
nextLettersSize);
|
||||
|
||||
// If there aren't sufficient suggestions, search for words by allowing wild cards at
|
||||
// the different character positions. This feature is not ready for prime-time as we need
|
||||
// to figure out the best ranking for such words compared to proximity corrections and
|
||||
// completions.
|
||||
if (SUGGEST_MISSING_CHARACTERS) {
|
||||
// Suggestion with missing character
|
||||
if (SUGGEST_WORDS_WITH_MISSING_CHARACTER) {
|
||||
for (int i = 0; i < codesSize; ++i) {
|
||||
if (DEBUG_DICT) LOGI("--- Suggest missing characters %d", i);
|
||||
const int tempCount = getSuggestionCandidates(codesSize, i, -1, NULL, 0);
|
||||
if (tempCount > suggestedWordsCount) {
|
||||
suggestedWordsCount = tempCount;
|
||||
}
|
||||
getSuggestionCandidates(codesSize, i, -1, NULL, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Suggest excessive characters
|
||||
if (SUGGEST_EXCESSIVE_CHARACTERS) {
|
||||
// Suggestion with excessive character
|
||||
if (SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER) {
|
||||
for (int i = 0; i < codesSize; ++i) {
|
||||
if (DEBUG_DICT) LOGI("--- Suggest excessive characters %d", i);
|
||||
const int tempCount = getSuggestionCandidates(codesSize, -1, i, NULL, 0);
|
||||
if (tempCount > suggestedWordsCount) {
|
||||
suggestedWordsCount = tempCount;
|
||||
}
|
||||
getSuggestionCandidates(codesSize, -1, i, NULL, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Suggestions with missing space
|
||||
if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER) {
|
||||
for (int i = 1; i < codesSize; ++i) {
|
||||
if (DEBUG_DICT) LOGI("--- Suggest missing space characters %d", i);
|
||||
getMissingSpaceWords(mInputLength, i);
|
||||
}
|
||||
}
|
||||
|
||||
// Get the word count
|
||||
int suggestedWordsCount = 0;
|
||||
while (suggestedWordsCount < MAX_WORDS && mFrequencies[suggestedWordsCount] > 0) {
|
||||
suggestedWordsCount++;
|
||||
}
|
||||
|
||||
if (DEBUG_DICT) {
|
||||
LOGI("Returning %d words", suggestedWordsCount);
|
||||
LOGI("Next letters: ");
|
||||
|
@ -84,6 +87,7 @@ int UnigramDictionary::getSuggestions(int *codes, int codesSize, unsigned short
|
|||
}
|
||||
LOGI("\n");
|
||||
}
|
||||
|
||||
return suggestedWordsCount;
|
||||
}
|
||||
|
||||
|
@ -97,23 +101,6 @@ void UnigramDictionary::initSuggestions(int *codes, int codesSize, unsigned shor
|
|||
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
|
||||
}
|
||||
|
||||
int UnigramDictionary::getSuggestionCandidates(int inputLength, int skipPos, int excessivePos,
|
||||
int *nextLetters, int nextLettersSize) {
|
||||
if (DEBUG_DICT) LOGI("getSuggestionCandidates");
|
||||
int initialPos = 0;
|
||||
if (IS_LATEST_DICT_VERSION) {
|
||||
initialPos = DICTIONARY_HEADER_SIZE;
|
||||
}
|
||||
getWords(initialPos, inputLength, skipPos, excessivePos, nextLetters, nextLettersSize);
|
||||
|
||||
// Get the word count
|
||||
int suggestedWordsCount = 0;
|
||||
while (suggestedWordsCount < MAX_WORDS && mFrequencies[suggestedWordsCount] > 0) {
|
||||
suggestedWordsCount++;
|
||||
}
|
||||
return suggestedWordsCount;
|
||||
}
|
||||
|
||||
void UnigramDictionary::registerNextLetter(
|
||||
unsigned short c, int *nextLetters, int nextLettersSize) {
|
||||
if (c < nextLettersSize) {
|
||||
|
@ -121,12 +108,13 @@ void UnigramDictionary::registerNextLetter(
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: We need to optimize addWord by using STL or something
|
||||
bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) {
|
||||
word[length] = 0;
|
||||
if (DEBUG_DICT) {
|
||||
if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) {
|
||||
char s[length + 1];
|
||||
for (int i = 0; i <= length; i++) s[i] = word[i];
|
||||
if (DEBUG_SHOW_FOUND_WORD) LOGI("Found word = %s, freq = %d : \n", s, frequency);
|
||||
LOGI("Found word = %s, freq = %d", s, frequency);
|
||||
}
|
||||
if (length > MAX_WORD_LENGTH) {
|
||||
if (DEBUG_DICT) LOGI("Exceeded max word length.");
|
||||
|
@ -146,7 +134,7 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
|
|||
if (DEBUG_DICT) {
|
||||
char s[length + 1];
|
||||
for (int i = 0; i <= length; i++) s[i] = word[i];
|
||||
LOGI("Added word = %s, freq = %d : \n", s, frequency);
|
||||
LOGI("Added word = %s, freq = %d", s, frequency);
|
||||
}
|
||||
memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
|
||||
(char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
|
||||
|
@ -160,7 +148,7 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
|
|||
*dest++ = *word++;
|
||||
}
|
||||
*dest = 0; // NULL terminate
|
||||
if (DEBUG_DICT) LOGI("Added word at %d\n", insertAt);
|
||||
if (DEBUG_DICT) LOGI("Added word at %d", insertAt);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -187,27 +175,19 @@ bool UnigramDictionary::sameAsTyped(unsigned short *word, int length) {
|
|||
if ((unsigned int) *inputCodes != (unsigned int) *word) {
|
||||
return false;
|
||||
}
|
||||
inputCodes += MAX_ALTERNATIVES;
|
||||
inputCodes += MAX_PROXIMITY_CHARS;
|
||||
word++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static const char QUOTE = '\'';
|
||||
static const char SPACE = ' ';
|
||||
|
||||
// Keep this for comparing spec to new getWords
|
||||
void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength, const int skipPos,
|
||||
void UnigramDictionary::getSuggestionCandidates(const int inputLength, const int skipPos,
|
||||
const int excessivePos, int *nextLetters, const int nextLettersSize) {
|
||||
int initialPosition = initialPos;
|
||||
const int count = Dictionary::getCount(DICT, &initialPosition);
|
||||
getWordsRec(count, initialPosition, 0,
|
||||
min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH),
|
||||
mInputLength <= 0, 1, 0, 0, skipPos, excessivePos, nextLetters, nextLettersSize);
|
||||
}
|
||||
|
||||
void UnigramDictionary::getWords(const int rootPos, const int inputLength, const int skipPos,
|
||||
const int excessivePos, int *nextLetters, const int nextLettersSize) {
|
||||
int rootPosition = rootPos;
|
||||
if (DEBUG_DICT) LOGI("getSuggestionCandidates");
|
||||
int rootPosition = ROOT_POS;
|
||||
const int MAX_DEPTH = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
||||
// Get the number of child of root, then increment the position
|
||||
int childCount = Dictionary::getCount(DICT, &rootPosition);
|
||||
|
@ -220,6 +200,7 @@ void UnigramDictionary::getWords(const int rootPos, const int inputLength, const
|
|||
mStackDiffs[0] = 0;
|
||||
mStackSiblingPos[0] = rootPosition;
|
||||
|
||||
// Depth first search
|
||||
while (depth >= 0) {
|
||||
if (mStackChildCount[depth] > 0) {
|
||||
--mStackChildCount[depth];
|
||||
|
@ -235,7 +216,7 @@ void UnigramDictionary::getWords(const int rootPos, const int inputLength, const
|
|||
MAX_DEPTH, traverseAllNodes, snr, inputIndex, diffs, skipPos, excessivePos,
|
||||
nextLetters, nextLettersSize, &childCount, &firstChildPos, &traverseAllNodes,
|
||||
&snr, &inputIndex, &diffs, &siblingPos);
|
||||
// Next sibling pos
|
||||
// Update next sibling pos
|
||||
mStackSiblingPos[depth] = siblingPos;
|
||||
if (needsToTraverseChildrenNodes) {
|
||||
// Goes to child node
|
||||
|
@ -254,7 +235,48 @@ void UnigramDictionary::getWords(const int rootPos, const int inputLength, const
|
|||
}
|
||||
}
|
||||
|
||||
// snr : frequency?
|
||||
bool UnigramDictionary::getMissingSpaceWords(const int inputLength, const int missingSpacePos) {
|
||||
if (missingSpacePos <= 0 || missingSpacePos >= inputLength) return false;
|
||||
const int firstFreq = getWordFreq(0, missingSpacePos);
|
||||
const int secondFreq = getWordFreq(missingSpacePos, inputLength - missingSpacePos);
|
||||
if (DEBUG_DICT) LOGI("First freq: %d, Second freq: %d", firstFreq, secondFreq);
|
||||
|
||||
if (firstFreq <= 0 || secondFreq <= 0) return false;
|
||||
int pairFreq = (firstFreq + secondFreq) / 2;
|
||||
for (int i = 0; i < inputLength; ++i) pairFreq *= TYPED_LETTER_MULTIPLIER;
|
||||
const int newWordLength = inputLength + 1;
|
||||
// Allocating variable length array on stack
|
||||
unsigned short word[newWordLength];
|
||||
int j = 0;
|
||||
for (int i = 0; i < missingSpacePos; ++i) {
|
||||
// Down-casting
|
||||
if (DEBUG_DICT) {
|
||||
assert((*(mInputCodes + i * MAX_PROXIMITY_CHARS)) <= U_SHORT_MAX);
|
||||
}
|
||||
word[i] = (unsigned short) *(mInputCodes + i * MAX_PROXIMITY_CHARS);
|
||||
}
|
||||
word[missingSpacePos] = SPACE;
|
||||
for (int i = (missingSpacePos + 1); i < newWordLength; ++i) {
|
||||
// Down-casting
|
||||
if (DEBUG_DICT) {
|
||||
assert((*(mInputCodes + (i - 1) * MAX_PROXIMITY_CHARS)) <= U_SHORT_MAX);
|
||||
}
|
||||
word[i] = (unsigned short) *(mInputCodes + (i - 1) * MAX_PROXIMITY_CHARS);
|
||||
}
|
||||
addWord(word, newWordLength, pairFreq);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Keep this for comparing spec to new getWords
|
||||
void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength, const int skipPos,
|
||||
const int excessivePos, int *nextLetters, const int nextLettersSize) {
|
||||
int initialPosition = initialPos;
|
||||
const int count = Dictionary::getCount(DICT, &initialPosition);
|
||||
getWordsRec(count, initialPosition, 0,
|
||||
min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH),
|
||||
mInputLength <= 0, 1, 0, 0, skipPos, excessivePos, nextLetters, nextLettersSize);
|
||||
}
|
||||
|
||||
void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth,
|
||||
const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex,
|
||||
const int diffs, const int skipPos, const int excessivePos, int *nextLetters,
|
||||
|
@ -287,7 +309,7 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
|
|||
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength(
|
||||
unsigned short *word, const int inputLength, const int depth, const int snr,
|
||||
int *nextLetters, const int nextLettersSize, const int skipPos, const int freq) {
|
||||
addWord(word, depth + 1, freq * snr);
|
||||
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, freq * snr);
|
||||
if (depth >= inputLength && skipPos < 0) {
|
||||
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
|
||||
}
|
||||
|
@ -301,13 +323,13 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
|
|||
// Proximity collection will promote a word of the same length as
|
||||
// what user typed.
|
||||
if (skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
|
||||
addWord(word, depth + 1, finalFreq);
|
||||
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
||||
}
|
||||
}
|
||||
|
||||
inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
|
||||
const int inputIndex, const int skipPos, const int depth) {
|
||||
const unsigned short userTypedChar = (mInputCodes + (inputIndex * MAX_ALTERNATIVES))[0];
|
||||
const unsigned short userTypedChar = (mInputCodes + (inputIndex * MAX_PROXIMITY_CHARS))[0];
|
||||
// Skip the ' or other letter and continue deeper
|
||||
return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
|
||||
}
|
||||
|
@ -361,7 +383,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
|
|||
*newDiffs = diffs;
|
||||
*newInputIndex = inputIndex;
|
||||
} else {
|
||||
int *currentChars = mInputCodes + (inputIndex * MAX_ALTERNATIVES);
|
||||
int *currentChars = mInputCodes + (inputIndex * MAX_PROXIMITY_CHARS);
|
||||
int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos);
|
||||
if (matchedProximityCharId < 0) return false;
|
||||
mWord[depth] = c;
|
||||
|
@ -396,4 +418,48 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
|
|||
return needsToTraverseChildrenNodes;
|
||||
}
|
||||
|
||||
inline int UnigramDictionary::getWordFreq(const int startInputIndex, const int inputLength) {
|
||||
int pos = ROOT_POS;
|
||||
int count = Dictionary::getCount(DICT, &pos);
|
||||
int freq = 0;
|
||||
bool terminal = false;
|
||||
|
||||
for (int i = 0; i < inputLength; ++i) {
|
||||
bool needsToTraverseChildrenNodes = processCurrentNodeForExactMatch(pos, count,
|
||||
startInputIndex + i, &pos, &count, &terminal, &freq);
|
||||
if (!needsToTraverseChildrenNodes && (i < inputLength - 1)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (terminal) {
|
||||
return freq;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstChildPos,
|
||||
const int count, const int inputIndex, int *newChildPosition, int *newCount,
|
||||
bool *newTerminal, int *newFreq) {
|
||||
const int *currentChars = mInputCodes + (inputIndex * MAX_PROXIMITY_CHARS);
|
||||
int pos = firstChildPos;
|
||||
unsigned short c;
|
||||
for (int i = 0; i < count; ++i) {
|
||||
pos = Dictionary::setDictionaryValues(DICT, IS_LATEST_DICT_VERSION, pos, &c,
|
||||
newChildPosition, newTerminal, newFreq);
|
||||
const unsigned int inputC = currentChars[0];
|
||||
const unsigned short lowerC = toLowerCase(c);
|
||||
const bool matched = (inputC == lowerC || inputC == c);
|
||||
const bool hasChild = *newChildPosition != 0;
|
||||
if (matched) {
|
||||
if (hasChild) {
|
||||
*newCount = Dictionary::getCount(DICT, newChildPosition);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} // namespace latinime
|
||||
|
|
|
@ -24,15 +24,15 @@ namespace latinime {
|
|||
class UnigramDictionary {
|
||||
public:
|
||||
UnigramDictionary(const unsigned char *dict, int typedLetterMultipler, int fullWordMultiplier,
|
||||
int maxWordLength, int maxWords, int maxAlternatives, const bool isLatestDictVersion);
|
||||
int maxWordLength, int maxWords, int maxProximityChars, const bool isLatestDictVersion);
|
||||
int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
|
||||
int *nextLetters, int nextLettersSize);
|
||||
~UnigramDictionary();
|
||||
|
||||
private:
|
||||
void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies);
|
||||
int getSuggestionCandidates(int inputLength, int skipPos, int excessivePos, int *nextLetters,
|
||||
int nextLettersSize);
|
||||
void getSuggestionCandidates(const int inputLength, const int skipPos, const int excessivePos,
|
||||
int *nextLetters, const int nextLettersSize);
|
||||
void getVersionNumber();
|
||||
bool checkIfDictVersionIsLatest();
|
||||
int getAddress(int *pos);
|
||||
|
@ -44,8 +44,7 @@ private:
|
|||
void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
|
||||
const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
|
||||
const int skipPos, const int excessivePos, int *nextLetters, const int nextLettersSize);
|
||||
void getWords(const int rootPos, const int inputLength, const int skipPos,
|
||||
const int excessivePos, int *nextLetters, const int nextLettersSize);
|
||||
bool getMissingSpaceWords(const int inputLength, const int missingSpacePos);
|
||||
// Keep getWordsOld for comparing performance between getWords and getWordsOld
|
||||
void getWordsOld(const int initialPos, const int inputLength, const int skipPos,
|
||||
const int excessivePos, int *nextLetters, const int nextLettersSize);
|
||||
|
@ -58,17 +57,25 @@ private:
|
|||
bool needsToSkipCurrentNode(const unsigned short c,
|
||||
const int inputIndex, const int skipPos, const int depth);
|
||||
int getMatchedProximityId(const int *currentChars, const unsigned short c, const int skipPos);
|
||||
// Process a node by considering proximity, missing and excessive character
|
||||
bool processCurrentNode(const int pos, const int depth,
|
||||
const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,
|
||||
const int diffs, const int skipPos, const int excessivePos, int *nextLetters,
|
||||
const int nextLettersSize, int *newCount, int *newChildPosition,
|
||||
bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
|
||||
int *nextSiblingPosition);
|
||||
int getWordFreq(const int startInputIndex, const int inputLength);
|
||||
// Process a node by considering missing space
|
||||
bool processCurrentNodeForExactMatch(const int firstChildPos, const int count,
|
||||
const int inputIndex, int *newChildPosition, int *newCount, bool *newTerminal,
|
||||
int *newFreq);
|
||||
|
||||
const unsigned char *DICT;
|
||||
const int MAX_WORDS;
|
||||
const int MAX_WORD_LENGTH;
|
||||
const int MAX_ALTERNATIVES;
|
||||
const int MAX_PROXIMITY_CHARS;
|
||||
const bool IS_LATEST_DICT_VERSION;
|
||||
const int ROOT_POS;
|
||||
const int TYPED_LETTER_MULTIPLIER;
|
||||
const int FULL_WORD_MULTIPLIER;
|
||||
|
||||
|
|
Loading…
Reference in a new issue