(Step2)Move functions related to proximity to proximity_info.cpp

Change-Id: Iae0eb2a5cd758bda820fa42b4bc3eb3d2665bf96
main
satok 2011-07-14 15:43:42 +09:00
parent f7f2e82e8b
commit d24df43eaf
6 changed files with 150 additions and 119 deletions

View File

@ -1,3 +1,22 @@
/*
* Copyright (C) 2009 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_BASECHARS_H
#define LATINIME_BASECHARS_H
/** /**
* Table mapping most combined Latin, Greek, and Cyrillic characters * Table mapping most combined Latin, Greek, and Cyrillic characters
* to their base characters. If c is in range, BASE_CHARS[c] == c * to their base characters. If c is in range, BASE_CHARS[c] == c
@ -170,3 +189,4 @@ static unsigned short BASE_CHARS[] = {
// generated with: // generated with:
// cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }' // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
#endif // LATINIME_BASECHARS_H

View File

@ -17,7 +17,9 @@
#ifndef LATINIME_DICTIONARY_H #ifndef LATINIME_DICTIONARY_H
#define LATINIME_DICTIONARY_H #define LATINIME_DICTIONARY_H
#include "basechars.h"
#include "bigram_dictionary.h" #include "bigram_dictionary.h"
#include "char_utils.h"
#include "defines.h" #include "defines.h"
#include "proximity_info.h" #include "proximity_info.h"
#include "unigram_dictionary.h" #include "unigram_dictionary.h"
@ -61,7 +63,7 @@ public:
static int setDictionaryValues(const unsigned char *dict, const bool isLatestDictVersion, static int setDictionaryValues(const unsigned char *dict, const bool isLatestDictVersion,
const int pos, unsigned short *c, int *childrenPosition, const int pos, unsigned short *c, int *childrenPosition,
bool *terminal, int *freq); bool *terminal, int *freq);
static inline unsigned short toBaseLowerCase(unsigned short c);
// TODO: delete this // TODO: delete this
int getBigramPosition(unsigned short *word, int length); int getBigramPosition(unsigned short *word, int length);
@ -156,6 +158,19 @@ inline int Dictionary::setDictionaryValues(const unsigned char *dict,
return position; return position;
} }
inline unsigned short Dictionary::toBaseLowerCase(unsigned short c) {
if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
c = BASE_CHARS[c];
}
if (c >='A' && c <= 'Z') {
c |= 32;
} else if (c > 127) {
c = latin_tolower(c);
}
return c;
}
} // namespace latinime } // namespace latinime
#endif // LATINIME_DICTIONARY_H #endif // LATINIME_DICTIONARY_H

View File

@ -19,6 +19,7 @@
#define LOG_TAG "LatinIME: proximity_info.cpp" #define LOG_TAG "LatinIME: proximity_info.cpp"
#include "dictionary.h"
#include "proximity_info.h" #include "proximity_info.h"
namespace latinime { namespace latinime {
@ -69,10 +70,82 @@ void ProximityInfo::setInputParams(const int* inputCodes, const int inputLength)
mInputLength = inputLength; mInputLength = inputLength;
} }
const int* ProximityInfo::getProximityCharsAt(const int index) const { inline const int* ProximityInfo::getProximityCharsAt(const int index) const {
return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE); return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE);
} }
unsigned short ProximityInfo::getPrimaryCharAt(const int index) const {
return getProximityCharsAt(index)[0];
}
bool ProximityInfo::existsCharInProximityAt(const int index, const int c) const {
const int *chars = getProximityCharsAt(index);
int i = 0;
while (chars[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE) {
if (chars[i++] == c) {
return true;
}
}
return false;
}
bool ProximityInfo::existsAdjacentProximityChars(const int index) const {
if (index < 0 || index >= mInputLength) return false;
const int currentChar = getPrimaryCharAt(index);
const int leftIndex = index - 1;
if (leftIndex >= 0 && existsCharInProximityAt(leftIndex, currentChar)) {
return true;
}
const int rightIndex = index + 1;
if (rightIndex < mInputLength && existsCharInProximityAt(rightIndex, currentChar)) {
return true;
}
return false;
}
// In the following function, c is the current character of the dictionary word
// currently examined.
// currentChars is an array containing the keys close to the character the
// user actually typed at the same position. We want to see if c is in it: if so,
// then the word contains at that position a character close to what the user
// typed.
// What the user typed is actually the first character of the array.
// Notice : accented characters do not have a proximity list, so they are alone
// in their list. The non-accented version of the character should be considered
// "close", but not the other keys close to the non-accented version.
ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(
const int index, const unsigned short c, const int skipPos,
const int excessivePos, const int transposedPos) const {
const int *currentChars = getProximityCharsAt(index);
const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
// The first char in the array is what user typed. If it matches right away,
// that means the user typed that same char for this pos.
if (currentChars[0] == baseLowerC || currentChars[0] == c)
return SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR;
// If one of those is true, we should not check for close characters at all.
if (skipPos >= 0 || excessivePos >= 0 || transposedPos >= 0)
return UNRELATED_CHAR;
// If the non-accented, lowercased version of that first character matches c,
// then we have a non-accented version of the accented character the user
// typed. Treat it as a close char.
if (Dictionary::toBaseLowerCase(currentChars[0]) == baseLowerC)
return NEAR_PROXIMITY_CHAR;
// Not an exact nor an accent-alike match: search the list of close keys
int j = 1;
while (currentChars[j] > 0 && j < MAX_PROXIMITY_CHARS_SIZE) {
const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
if (matched) return NEAR_PROXIMITY_CHAR;
++j;
}
// Was not included, signal this as an unrelated character.
return UNRELATED_CHAR;
}
bool ProximityInfo::sameAsTyped(const unsigned short *word, int length) const { bool ProximityInfo::sameAsTyped(const unsigned short *word, int length) const {
if (length != mInputLength) { if (length != mInputLength) {
return false; return false;

View File

@ -25,6 +25,12 @@ namespace latinime {
class ProximityInfo { class ProximityInfo {
public: public:
typedef enum { // Used as a return value for character comparison
SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR, // Same char, possibly with different case or accent
NEAR_PROXIMITY_CHAR, // It is a char located nearby on the keyboard
UNRELATED_CHAR // It is an unrelated char
} ProximityType;
ProximityInfo(const int maxProximityCharsSize, const int keyboardWidth, ProximityInfo(const int maxProximityCharsSize, const int keyboardWidth,
const int keybaordHeight, const int gridWidth, const int gridHeight, const int keybaordHeight, const int gridWidth, const int gridHeight,
const uint32_t *proximityCharsArray); const uint32_t *proximityCharsArray);
@ -32,6 +38,12 @@ public:
bool hasSpaceProximity(const int x, const int y) const; bool hasSpaceProximity(const int x, const int y) const;
void setInputParams(const int* inputCodes, const int inputLength); void setInputParams(const int* inputCodes, const int inputLength);
const int* getProximityCharsAt(const int index) const; const int* getProximityCharsAt(const int index) const;
unsigned short getPrimaryCharAt(const int index) const;
bool existsCharInProximityAt(const int index, const int c) const;
bool existsAdjacentProximityChars(const int index) const;
ProximityType getMatchedProximityId(
const int index, const unsigned short c, const int skipPos,
const int excessivePos, const int transposedPos) const;
bool sameAsTyped(const unsigned short *word, int length) const; bool sameAsTyped(const unsigned short *word, int length) const;
private: private:
int getStartIndexFromCoordinates(const int x, const int y) const; int getStartIndexFromCoordinates(const int x, const int y) const;

View File

@ -20,7 +20,6 @@
#define LOG_TAG "LatinIME: unigram_dictionary.cpp" #define LOG_TAG "LatinIME: unigram_dictionary.cpp"
#include "basechars.h"
#include "char_utils.h" #include "char_utils.h"
#include "dictionary.h" #include "dictionary.h"
#include "unigram_dictionary.h" #include "unigram_dictionary.h"
@ -351,18 +350,6 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
return false; return false;
} }
static inline unsigned short toBaseLowerCase(unsigned short c) {
if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
c = BASE_CHARS[c];
}
if (c >='A' && c <= 'Z') {
c |= 32;
} else if (c > 127) {
c = latin_tolower(c);
}
return c;
}
static const char QUOTE = '\''; static const char QUOTE = '\'';
static const char SPACE = ' '; static const char SPACE = ' ';
@ -556,7 +543,7 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq); WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
if (excessivePos >= 0) { if (excessivePos >= 0) {
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq); multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
if (!existsAdjacentProximityChars(inputIndex, mInputLength)) { if (!mProximityInfo->existsAdjacentProximityChars(inputIndex)) {
// If an excessive character is not adjacent to the left char or the right char, // If an excessive character is not adjacent to the left char or the right char,
// we will demote this word. // we will demote this word.
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq); multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
@ -592,75 +579,11 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c, inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth) { const int inputIndex, const int skipPos, const int depth) {
const unsigned short userTypedChar = getInputCharsAt(inputIndex)[0]; const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(inputIndex);
// Skip the ' or other letter and continue deeper // Skip the ' or other letter and continue deeper
return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth; return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
} }
inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex,
const int inputLength) const {
if (inputIndex < 0 || inputIndex >= inputLength) return false;
const int currentChar = *getInputCharsAt(inputIndex);
const int leftIndex = inputIndex - 1;
if (leftIndex >= 0) {
const int *leftChars = getInputCharsAt(leftIndex);
int i = 0;
while (leftChars[i] > 0 && i < MAX_PROXIMITY_CHARS) {
if (leftChars[i++] == currentChar) return true;
}
}
const int rightIndex = inputIndex + 1;
if (rightIndex < inputLength) {
const int *rightChars = getInputCharsAt(rightIndex);
int i = 0;
while (rightChars[i] > 0 && i < MAX_PROXIMITY_CHARS) {
if (rightChars[i++] == currentChar) return true;
}
}
return false;
}
// In the following function, c is the current character of the dictionary word
// currently examined.
// currentChars is an array containing the keys close to the character the
// user actually typed at the same position. We want to see if c is in it: if so,
// then the word contains at that position a character close to what the user
// typed.
// What the user typed is actually the first character of the array.
// Notice : accented characters do not have a proximity list, so they are alone
// in their list. The non-accented version of the character should be considered
// "close", but not the other keys close to the non-accented version.
inline UnigramDictionary::ProximityType UnigramDictionary::getMatchedProximityId(
const int *currentChars, const unsigned short c, const int skipPos,
const int excessivePos, const int transposedPos) {
const unsigned short baseLowerC = toBaseLowerCase(c);
// The first char in the array is what user typed. If it matches right away,
// that means the user typed that same char for this pos.
if (currentChars[0] == baseLowerC || currentChars[0] == c)
return SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR;
// If one of those is true, we should not check for close characters at all.
if (skipPos >= 0 || excessivePos >= 0 || transposedPos >= 0)
return UNRELATED_CHAR;
// If the non-accented, lowercased version of that first character matches c,
// then we have a non-accented version of the accented character the user
// typed. Treat it as a close char.
if (toBaseLowerCase(currentChars[0]) == baseLowerC)
return NEAR_PROXIMITY_CHAR;
// Not an exact nor an accent-alike match: search the list of close keys
int j = 1;
while (currentChars[j] > 0 && j < MAX_PROXIMITY_CHARS) {
const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
if (matched) return NEAR_PROXIMITY_CHAR;
++j;
}
// Was not included, signal this as an unrelated character.
return UNRELATED_CHAR;
}
inline void UnigramDictionary::onTerminal(unsigned short int* word, const int depth, inline void UnigramDictionary::onTerminal(unsigned short int* word, const int depth,
const uint8_t* const root, const uint8_t flags, const int pos, const uint8_t* const root, const uint8_t flags, const int pos,
@ -826,15 +749,14 @@ inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstCh
const int startInputIndex, const int depth, unsigned short *word, int *newChildPosition, const int startInputIndex, const int depth, unsigned short *word, int *newChildPosition,
int *newCount, bool *newTerminal, int *newFreq, int *siblingPos) { int *newCount, bool *newTerminal, int *newFreq, int *siblingPos) {
const int inputIndex = startInputIndex + depth; const int inputIndex = startInputIndex + depth;
const int *currentChars = getInputCharsAt(inputIndex);
unsigned short c; unsigned short c;
*siblingPos = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, firstChildPos, *siblingPos = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, firstChildPos,
&c, newChildPosition, newTerminal, newFreq); &c, newChildPosition, newTerminal, newFreq);
const unsigned int inputC = currentChars[0]; const unsigned int inputC = mProximityInfo->getPrimaryCharAt(inputIndex);
if (DEBUG_DICT) { if (DEBUG_DICT) {
assert(inputC <= U_SHORT_MAX); assert(inputC <= U_SHORT_MAX);
} }
const unsigned short baseLowerC = toBaseLowerCase(c); const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
const bool matched = (inputC == baseLowerC || inputC == c); const bool matched = (inputC == baseLowerC || inputC == c);
const bool hasChild = *newChildPosition != 0; const bool hasChild = *newChildPosition != 0;
if (matched) { if (matched) {
@ -952,20 +874,20 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
*newDiffs = diffs; *newDiffs = diffs;
*newInputIndex = inputIndex; *newInputIndex = inputIndex;
} else { } else {
const int *currentChars = getInputCharsAt(inputIndex); int inputIndexForProximity = inputIndex;
if (transposedPos >= 0) { if (transposedPos >= 0) {
if (inputIndex == transposedPos) currentChars += MAX_PROXIMITY_CHARS; if (inputIndex == transposedPos) ++inputIndexForProximity;
if (inputIndex == (transposedPos + 1)) currentChars -= MAX_PROXIMITY_CHARS; if (inputIndex == (transposedPos + 1)) --inputIndexForProximity;
} }
int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos, excessivePos, ProximityInfo::ProximityType matchedProximityCharId = mProximityInfo->getMatchedProximityId(
transposedPos); inputIndexForProximity, c, skipPos, excessivePos, transposedPos);
if (UNRELATED_CHAR == matchedProximityCharId) return false; if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) return false;
mWord[depth] = c; mWord[depth] = c;
// If inputIndex is greater than mInputLength, that means there is no // If inputIndex is greater than mInputLength, that means there is no
// proximity chars. So, we don't need to check proximity. // proximity chars. So, we don't need to check proximity.
if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) { if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight); multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight);
} }
bool isSameAsUserTypedLength = mInputLength == inputIndex + 1 bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
@ -978,7 +900,8 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
// Start traversing all nodes after the index exceeds the user typed length // Start traversing all nodes after the index exceeds the user typed length
*newTraverseAllNodes = isSameAsUserTypedLength; *newTraverseAllNodes = isSameAsUserTypedLength;
*newMatchRate = matchWeight; *newMatchRate = matchWeight;
*newDiffs = diffs + ((NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0); *newDiffs = diffs
+ ((ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0);
*newInputIndex = inputIndex + 1; *newInputIndex = inputIndex + 1;
} }
// Optimization: Prune out words that are too long compared to how much was typed. // Optimization: Prune out words that are too long compared to how much was typed.
@ -1007,7 +930,7 @@ inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
uint16_t inWord[inputLength]; uint16_t inWord[inputLength];
for (int i = 0; i < inputLength; ++i) { for (int i = 0; i < inputLength; ++i) {
inWord[i] = *getInputCharsAt(startInputIndex + i); inWord[i] = (uint16_t)mProximityInfo->getPrimaryCharAt(startInputIndex + i);
} }
return getMostFrequentWordLikeInner(inWord, inputLength, word); return getMostFrequentWordLikeInner(inWord, inputLength, word);
} }
@ -1031,8 +954,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
const bool hasMultipleChars = (0 != (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags)); const bool hasMultipleChars = (0 != (UnigramDictionary::FLAG_HAS_MULTIPLE_CHARS & flags));
int pos = startPos; int pos = startPos;
int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
int32_t baseChar = toBaseLowerCase(character); int32_t baseChar = Dictionary::toBaseLowerCase(character);
const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]); const uint16_t wChar = Dictionary::toBaseLowerCase(inWord[startInputIndex]);
if (baseChar != wChar) { if (baseChar != wChar) {
*outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos; *outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos;
@ -1044,8 +967,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
if (hasMultipleChars) { if (hasMultipleChars) {
character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
while (NOT_A_CHARACTER != character) { while (NOT_A_CHARACTER != character) {
baseChar = toBaseLowerCase(character); baseChar = Dictionary::toBaseLowerCase(character);
if (toBaseLowerCase(inWord[++inputIndex]) != baseChar) { if (Dictionary::toBaseLowerCase(inWord[++inputIndex]) != baseChar) {
*outPos = BinaryFormat::skipOtherCharacters(root, pos); *outPos = BinaryFormat::skipOtherCharacters(root, pos);
*outInputIndex = startInputIndex; *outInputIndex = startInputIndex;
return false; return false;
@ -1290,7 +1213,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
const bool hasChildren = (!isLastChar) || BinaryFormat::hasChildrenInFlags(flags); const bool hasChildren = (!isLastChar) || BinaryFormat::hasChildrenInFlags(flags);
// This has to be done for each virtual char (this forwards the "inputIndex" which // This has to be done for each virtual char (this forwards the "inputIndex" which
// is the index in the user-inputted chars, as read by getInputCharsAt. // is the index in the user-inputted chars, as read by proximity chars.
if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex; if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;
if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) { if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
mWord[depth] = c; mWord[depth] = c;
@ -1314,16 +1237,16 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
return false; return false;
} }
} else { } else {
const int *currentChars = getInputCharsAt(inputIndex); int inputIndexForProximity = inputIndex;
if (transposedPos >= 0) { if (transposedPos >= 0) {
if (inputIndex == transposedPos) currentChars += MAX_PROXIMITY_CHARS; if (inputIndex == transposedPos) ++inputIndexForProximity;
if (inputIndex == (transposedPos + 1)) currentChars -= MAX_PROXIMITY_CHARS; if (inputIndex == (transposedPos + 1)) --inputIndexForProximity;
} }
const int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos, int matchedProximityCharId = mProximityInfo->getMatchedProximityId(
excessivePos, transposedPos); inputIndexForProximity, c, skipPos, excessivePos, transposedPos);
if (UNRELATED_CHAR == matchedProximityCharId) { if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
// We found that this is an unrelated character, so we should give up traversing // We found that this is an unrelated character, so we should give up traversing
// this node and its children entirely. // this node and its children entirely.
// However we may not be on the last virtual node yet so we skip the remaining // However we may not be on the last virtual node yet so we skip the remaining
@ -1342,7 +1265,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
mWord[depth] = c; mWord[depth] = c;
// If inputIndex is greater than mInputLength, that means there is no // If inputIndex is greater than mInputLength, that means there is no
// proximity chars. So, we don't need to check proximity. // proximity chars. So, we don't need to check proximity.
if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) { if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight); multiplyIntCapped(TYPED_LETTER_MULTIPLIER, &matchWeight);
} }
const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1 const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
@ -1366,7 +1289,8 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
} }
// Start traversing all nodes after the index exceeds the user typed length // Start traversing all nodes after the index exceeds the user typed length
traverseAllNodes = isSameAsUserTypedLength; traverseAllNodes = isSameAsUserTypedLength;
diffs = diffs + ((NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0); diffs = diffs
+ ((ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0);
// Finally, we are ready to go to the next character, the next "virtual node". // Finally, we are ready to go to the next character, the next "virtual node".
// We should advance the input index. // We should advance the input index.
// We do this in this branch of the 'if traverseAllNodes' because we are still matching // We do this in this branch of the 'if traverseAllNodes' because we are still matching

View File

@ -29,12 +29,6 @@ namespace latinime {
class UnigramDictionary { class UnigramDictionary {
typedef enum { // Used as a return value for character comparison
SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR, // Same char, possibly with different case or accent
NEAR_PROXIMITY_CHAR, // It is a char located nearby on the keyboard
UNRELATED_CHAR // It is an unrelated char
} ProximityType;
public: public:
#ifdef NEW_DICTIONARY_FORMAT #ifdef NEW_DICTIONARY_FORMAT
@ -118,8 +112,6 @@ private:
int *nextLetters, const int nextLettersSize); int *nextLetters, const int nextLettersSize);
bool needsToSkipCurrentNode(const unsigned short c, bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth); const int inputIndex, const int skipPos, const int depth);
ProximityType getMatchedProximityId(const int *currentChars, const unsigned short c,
const int skipPos, const int excessivePos, const int transposedPos);
// Process a node by considering proximity, missing and excessive character // Process a node by considering proximity, missing and excessive character
bool processCurrentNode(const int initialPos, const int initialDepth, bool processCurrentNode(const int initialPos, const int initialDepth,
const int maxDepth, const bool initialTraverseAllNodes, const int snr, int inputIndex, const int maxDepth, const bool initialTraverseAllNodes, const int snr, int inputIndex,
@ -127,10 +119,6 @@ private:
const int transposedPos, int *nextLetters, const int nextLettersSize, int *newCount, const int transposedPos, int *nextLetters, const int nextLettersSize, int *newCount,
int *newChildPosition, bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newChildPosition, bool *newTraverseAllNodes, int *newSnr, int*newInputIndex,
int *newDiffs, int *nextSiblingPosition, int *nextOutputIndex); int *newDiffs, int *nextSiblingPosition, int *nextOutputIndex);
bool existsAdjacentProximityChars(const int inputIndex, const int inputLength) const;
inline const int* getInputCharsAt(const int index) const {
return mProximityInfo->getProximityCharsAt(index);
}
int getMostFrequentWordLike(const int startInputIndex, const int inputLength, int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
unsigned short *word); unsigned short *word);
#ifndef NEW_DICTIONARY_FORMAT #ifndef NEW_DICTIONARY_FORMAT
@ -189,7 +177,6 @@ private:
int mStackOutputIndex[MAX_WORD_LENGTH_INTERNAL]; int mStackOutputIndex[MAX_WORD_LENGTH_INTERNAL];
int mNextLettersFrequency[NEXT_LETTERS_SIZE]; int mNextLettersFrequency[NEXT_LETTERS_SIZE];
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_UNIGRAM_DICTIONARY_H #endif // LATINIME_UNIGRAM_DICTIONARY_H