am d94567d9
: Merge "Introduce MAX_PREV_WORD_COUNT_FOR_N_GRAM."
* commit 'd94567d9637aae33905fc7f292a615996df6d3c9': Introduce MAX_PREV_WORD_COUNT_FOR_N_GRAM.
This commit is contained in:
commit
77f993728c
4 changed files with 36 additions and 19 deletions
|
@ -336,6 +336,9 @@ static inline void prof_out(void) {
|
|||
#define MAX_POINTER_COUNT 1
|
||||
#define MAX_POINTER_COUNT_G 2
|
||||
|
||||
// (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported.
|
||||
#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 1
|
||||
|
||||
#define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \
|
||||
TypeName() = delete
|
||||
|
||||
|
|
|
@ -36,17 +36,17 @@ void DicTraverseSession::init(const Dictionary *const dictionary,
|
|||
->getMultiWordCostMultiplier();
|
||||
mSuggestOptions = suggestOptions;
|
||||
if (!prevWordsInfo->getPrevWordCodePoints()) {
|
||||
mPrevWordPtNodePos = NOT_A_DICT_POS;
|
||||
mPrevWordsPtNodePos[0] = NOT_A_DICT_POS;
|
||||
return;
|
||||
}
|
||||
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
|
||||
mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
|
||||
mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
|
||||
prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (mPrevWordPtNodePos == NOT_A_DICT_POS) {
|
||||
if (mPrevWordsPtNodePos[0] == NOT_A_DICT_POS) {
|
||||
// Check bigrams for lower-cased previous word if original was not found. Useful for
|
||||
// auto-capitalized words like "The [current_word]".
|
||||
mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
|
||||
mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
|
||||
prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
|
||||
true /* forceLowerCaseSearch */);
|
||||
}
|
||||
|
|
|
@ -50,12 +50,14 @@ class DicTraverseSession {
|
|||
}
|
||||
|
||||
AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache)
|
||||
: mPrevWordPtNodePos(NOT_A_DICT_POS), mProximityInfo(nullptr),
|
||||
mDictionary(nullptr), mSuggestOptions(nullptr), mDicNodesCache(usesLargeCache),
|
||||
mMultiBigramMap(), mInputSize(0), mMaxPointerCount(1),
|
||||
: mProximityInfo(nullptr), mDictionary(nullptr), mSuggestOptions(nullptr),
|
||||
mDicNodesCache(usesLargeCache), mMultiBigramMap(), mInputSize(0), mMaxPointerCount(1),
|
||||
mMultiWordCostMultiplier(1.0f) {
|
||||
// NOTE: mProximityInfoStates is an array of instances.
|
||||
// No need to initialize it explicitly here.
|
||||
for (size_t i = 0; i < NELEMS(mPrevWordsPtNodePos); ++i) {
|
||||
mPrevWordsPtNodePos[i] = NOT_A_DICT_POS;
|
||||
}
|
||||
}
|
||||
|
||||
// Non virtual inline destructor -- never inherit this class
|
||||
|
@ -77,7 +79,7 @@ class DicTraverseSession {
|
|||
//--------------------
|
||||
const ProximityInfo *getProximityInfo() const { return mProximityInfo; }
|
||||
const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; }
|
||||
int getPrevWordPtNodePos() const { return mPrevWordPtNodePos; }
|
||||
int getPrevWordPtNodePos() const { return mPrevWordsPtNodePos[0]; }
|
||||
DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; }
|
||||
MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; }
|
||||
const ProximityInfoState *getProximityInfoState(int id) const {
|
||||
|
@ -164,7 +166,7 @@ class DicTraverseSession {
|
|||
const int *const inputYs, const int *const times, const int *const pointerIds,
|
||||
const int inputSize, const float maxSpatialDistance, const int maxPointerCount);
|
||||
|
||||
int mPrevWordPtNodePos;
|
||||
int mPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
const ProximityInfo *mProximityInfo;
|
||||
const Dictionary *mDictionary;
|
||||
const SuggestOptions *mSuggestOptions;
|
||||
|
|
|
@ -27,27 +27,39 @@ namespace latinime {
|
|||
class PrevWordsInfo {
|
||||
public:
|
||||
// No prev word information.
|
||||
PrevWordsInfo()
|
||||
: mPrevWordCodePoints(nullptr), mPrevWordCodePointCount(0) {}
|
||||
PrevWordsInfo() {
|
||||
clear();
|
||||
}
|
||||
|
||||
PrevWordsInfo(const int *const prevWordCodePoints, const int prevWordCodePointCount,
|
||||
const bool isBeginningOfSentence)
|
||||
: mPrevWordCodePoints(prevWordCodePoints),
|
||||
mPrevWordCodePointCount(prevWordCodePointCount) {}
|
||||
|
||||
const bool isBeginningOfSentence) {
|
||||
clear();
|
||||
mPrevWordCodePoints[0] = prevWordCodePoints;
|
||||
mPrevWordCodePointCount[0] = prevWordCodePointCount;
|
||||
mIsBeginningOfSentence[0] = isBeginningOfSentence;
|
||||
}
|
||||
const int *getPrevWordCodePoints() const {
|
||||
return mPrevWordCodePoints;
|
||||
return mPrevWordCodePoints[0];
|
||||
}
|
||||
|
||||
int getPrevWordCodePointCount() const {
|
||||
return mPrevWordCodePointCount;
|
||||
return mPrevWordCodePointCount[0];
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo);
|
||||
|
||||
const int *const mPrevWordCodePoints;
|
||||
const int mPrevWordCodePointCount;
|
||||
void clear() {
|
||||
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
|
||||
mPrevWordCodePoints[i] = nullptr;
|
||||
mPrevWordCodePointCount[i] = 0;
|
||||
mIsBeginningOfSentence[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
const int *mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_PREV_WORDS_INFO_H
|
||||
|
|
Loading…
Reference in a new issue