Add Beginning-of-Sentence info in UnigramProperty.

Bug: 14119293
Bug: 14425059
Change-Id: I8a894352568377d32468e5563f4e89af00d22944
main
Keisuke Kuroyanagi 2014-05-23 00:07:14 +09:00
parent 9f8c9a0161
commit 2a015dcb25
9 changed files with 41 additions and 16 deletions

View File

@ -341,8 +341,8 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz,
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
}
// Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
}
@ -450,8 +450,9 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
}
// Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
unigramProbability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */,
&shortcuts);
dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty);
if (word0) {
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);

View File

@ -82,6 +82,12 @@ int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, c
void Dictionary::addUnigramEntry(const int *const word, const int length,
const UnigramProperty *const unigramProperty) {
if (unigramProperty->representsBeginningOfSentence()
&& !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
->supportsBeginningOfSentence()) {
AKLOGE("The dictionary doesn't support Beginning-of-Sentence.");
return;
}
TimeKeeper::setCurrentTime();
mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
}

View File

@ -48,15 +48,21 @@ class UnigramProperty {
};
UnigramProperty()
: mIsNotAWord(false), mIsBlacklisted(false), mProbability(NOT_A_PROBABILITY),
mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0), mShortcuts() {}
: mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false),
mProbability(NOT_A_PROBABILITY), mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0),
mShortcuts() {}
UnigramProperty(const bool isNotAWord, const bool isBlacklisted, const int probability,
const int timestamp, const int level, const int count,
const std::vector<ShortcutProperty> *const shortcuts)
: mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
const bool isBlacklisted, const int probability, const int timestamp, const int level,
const int count, const std::vector<ShortcutProperty> *const shortcuts)
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {}
bool representsBeginningOfSentence() const {
return mRepresentsBeginningOfSentence;
}
bool isNotAWord() const {
return mIsNotAWord;
}
@ -94,6 +100,7 @@ class UnigramProperty {
DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
// TODO: Make members const.
bool mRepresentsBeginningOfSentence;
bool mIsNotAWord;
bool mIsBlacklisted;
int mProbability;

View File

@ -51,6 +51,8 @@ class DictionaryHeaderStructurePolicy {
virtual const std::vector<int> *getLocale() const = 0;
virtual bool supportsBeginningOfSentence() const = 0;
protected:
DictionaryHeaderStructurePolicy() {}

View File

@ -246,6 +246,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return &mLocale;
}
bool supportsBeginningOfSentence() const {
return mDictFormatVersion == FormatUtils::VERSION_4_DEV;
}
private:
DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);

View File

@ -432,8 +432,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
shortcuts.emplace_back(&target, shortcutProbability);
}
}
const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount(), &shortcuts);
return WordProperty(&codePointVector, &unigramProperty, &bigrams);

View File

@ -164,6 +164,11 @@ class PtNodeParams {
&& isNotAWord();
}
AK_FORCE_INLINE int representsBeginningOfSentence() const {
return getCodePointCount() > 0 && getCodePoints()[0] == CODE_POINT_BEGINNING_OF_SENTENCE
&& isNotAWord();
}
// Parent node position
AK_FORCE_INLINE int getParentPos() const {
return mParentPos;

View File

@ -383,8 +383,8 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin
shortcuts.emplace_back(&shortcutTarget, shortcutProbability);
}
}
const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
}

View File

@ -430,8 +430,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
shortcuts.emplace_back(&target, shortcutProbability);
}
}
const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount(), &shortcuts);
return WordProperty(&codePointVector, &unigramProperty, &bigrams);