am d6f1c5c7: Merge "Add Beginning-of-Sentence info in UnigramProperty."
* commit 'd6f1c5c7fbb65965fe28ab568d1b2d44c1315a33': Add Beginning-of-Sentence info in UnigramProperty.main
commit
74dea1a20c
|
@ -341,8 +341,8 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz,
|
||||||
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
|
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
|
||||||
}
|
}
|
||||||
// Use 1 for count to indicate the word has inputted.
|
// Use 1 for count to indicate the word has inputted.
|
||||||
const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
|
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
|
||||||
probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
|
isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
|
||||||
dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
|
dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -450,8 +450,9 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
||||||
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
|
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
|
||||||
}
|
}
|
||||||
// Use 1 for count to indicate the word has inputted.
|
// Use 1 for count to indicate the word has inputted.
|
||||||
const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
|
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
|
||||||
unigramProbability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
|
isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */,
|
||||||
|
&shortcuts);
|
||||||
dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty);
|
dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty);
|
||||||
if (word0) {
|
if (word0) {
|
||||||
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
|
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
|
||||||
|
|
|
@ -82,6 +82,12 @@ int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, c
|
||||||
|
|
||||||
void Dictionary::addUnigramEntry(const int *const word, const int length,
|
void Dictionary::addUnigramEntry(const int *const word, const int length,
|
||||||
const UnigramProperty *const unigramProperty) {
|
const UnigramProperty *const unigramProperty) {
|
||||||
|
if (unigramProperty->representsBeginningOfSentence()
|
||||||
|
&& !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
|
||||||
|
->supportsBeginningOfSentence()) {
|
||||||
|
AKLOGE("The dictionary doesn't support Beginning-of-Sentence.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
|
mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,15 +48,21 @@ class UnigramProperty {
|
||||||
};
|
};
|
||||||
|
|
||||||
UnigramProperty()
|
UnigramProperty()
|
||||||
: mIsNotAWord(false), mIsBlacklisted(false), mProbability(NOT_A_PROBABILITY),
|
: mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false),
|
||||||
mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0), mShortcuts() {}
|
mProbability(NOT_A_PROBABILITY), mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0),
|
||||||
|
mShortcuts() {}
|
||||||
|
|
||||||
UnigramProperty(const bool isNotAWord, const bool isBlacklisted, const int probability,
|
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
|
||||||
const int timestamp, const int level, const int count,
|
const bool isBlacklisted, const int probability, const int timestamp, const int level,
|
||||||
const std::vector<ShortcutProperty> *const shortcuts)
|
const int count, const std::vector<ShortcutProperty> *const shortcuts)
|
||||||
: mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
|
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
|
||||||
|
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
|
||||||
mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {}
|
mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {}
|
||||||
|
|
||||||
|
bool representsBeginningOfSentence() const {
|
||||||
|
return mRepresentsBeginningOfSentence;
|
||||||
|
}
|
||||||
|
|
||||||
bool isNotAWord() const {
|
bool isNotAWord() const {
|
||||||
return mIsNotAWord;
|
return mIsNotAWord;
|
||||||
}
|
}
|
||||||
|
@ -94,6 +100,7 @@ class UnigramProperty {
|
||||||
DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
|
DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
|
||||||
|
|
||||||
// TODO: Make members const.
|
// TODO: Make members const.
|
||||||
|
bool mRepresentsBeginningOfSentence;
|
||||||
bool mIsNotAWord;
|
bool mIsNotAWord;
|
||||||
bool mIsBlacklisted;
|
bool mIsBlacklisted;
|
||||||
int mProbability;
|
int mProbability;
|
||||||
|
|
|
@ -51,6 +51,8 @@ class DictionaryHeaderStructurePolicy {
|
||||||
|
|
||||||
virtual const std::vector<int> *getLocale() const = 0;
|
virtual const std::vector<int> *getLocale() const = 0;
|
||||||
|
|
||||||
|
virtual bool supportsBeginningOfSentence() const = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DictionaryHeaderStructurePolicy() {}
|
DictionaryHeaderStructurePolicy() {}
|
||||||
|
|
||||||
|
|
|
@ -246,6 +246,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
||||||
return &mLocale;
|
return &mLocale;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool supportsBeginningOfSentence() const {
|
||||||
|
return mDictFormatVersion == FormatUtils::VERSION_4_DEV;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
|
DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
|
||||||
|
|
||||||
|
|
|
@ -432,8 +432,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
|
||||||
shortcuts.emplace_back(&target, shortcutProbability);
|
shortcuts.emplace_back(&target, shortcutProbability);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
|
const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
|
||||||
ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
||||||
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
||||||
historicalInfo->getCount(), &shortcuts);
|
historicalInfo->getCount(), &shortcuts);
|
||||||
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
|
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
|
||||||
|
|
|
@ -164,6 +164,11 @@ class PtNodeParams {
|
||||||
&& isNotAWord();
|
&& isNotAWord();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE int representsBeginningOfSentence() const {
|
||||||
|
return getCodePointCount() > 0 && getCodePoints()[0] == CODE_POINT_BEGINNING_OF_SENTENCE
|
||||||
|
&& isNotAWord();
|
||||||
|
}
|
||||||
|
|
||||||
// Parent node position
|
// Parent node position
|
||||||
AK_FORCE_INLINE int getParentPos() const {
|
AK_FORCE_INLINE int getParentPos() const {
|
||||||
return mParentPos;
|
return mParentPos;
|
||||||
|
|
|
@ -383,8 +383,8 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin
|
||||||
shortcuts.emplace_back(&shortcutTarget, shortcutProbability);
|
shortcuts.emplace_back(&shortcutTarget, shortcutProbability);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
|
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
|
||||||
ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
||||||
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
|
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
|
||||||
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
|
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
|
||||||
}
|
}
|
||||||
|
|
|
@ -430,8 +430,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
|
||||||
shortcuts.emplace_back(&target, shortcutProbability);
|
shortcuts.emplace_back(&target, shortcutProbability);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
|
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
|
||||||
ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
||||||
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
||||||
historicalInfo->getCount(), &shortcuts);
|
historicalInfo->getCount(), &shortcuts);
|
||||||
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
|
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
|
||||||
|
|
Loading…
Reference in New Issue