Update input checking logic for adding/removing n-gram.

Bug: 14425059
Change-Id: Ifb4e3c3f06aaf9a16f632ff208ba76cf741cea83
main
Keisuke Kuroyanagi 2014-09-16 14:14:22 +09:00
parent d2230525bc
commit 7eb6e28b9e
3 changed files with 39 additions and 20 deletions

View File

@ -310,30 +310,32 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
if (prevWordIds.empty()) {
return false;
}
// TODO: Support N-gram.
if (prevWordIds[0] == NOT_A_WORD_ID) {
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */, true /* isNotAWord */,
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) {
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
return false;
}
// Refresh word ids.
prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
} else {
for (size_t i = 0; i < prevWordIds.size(); ++i) {
if (prevWordIds[i] != NOT_A_WORD_ID) {
continue;
}
if (!prevWordsInfo->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) {
return false;
}
const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */, true /* isNotAWord */,
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) {
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
return false;
}
// Refresh word ids.
prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
}
const int wordId = getWordId(CodePointArrayView(*bigramProperty->getTargetCodePoints()),
false /* forceLowerCaseSearch */);
if (wordId == NOT_A_WORD_ID) {
return false;
}
// TODO: Support N-gram.
bool addedNewEntry = false;
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordsPtNodePos;
for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
@ -375,8 +377,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray,
false /* tryLowerCaseSerch */);
// TODO: Support N-gram.
if (prevWordIds.empty() || prevWordIds[0] == NOT_A_WORD_ID) {
if (prevWordIds.empty() || prevWordIds.contains(NOT_A_WORD_ID)) {
return false;
}
const int wordId = getWordId(wordCodePoints, false /* forceLowerCaseSearch */);

View File

@ -17,6 +17,7 @@
#ifndef LATINIME_INT_ARRAY_VIEW_H
#define LATINIME_INT_ARRAY_VIEW_H
#include <algorithm>
#include <array>
#include <cstdint>
#include <cstring>
@ -92,12 +93,16 @@ class IntArrayView {
return mPtr + mSize;
}
AK_FORCE_INLINE bool contains(const int value) const {
return std::find(begin(), end(), value) != end();
}
// Returns the view whose size is smaller than or equal to the given count.
const IntArrayView limit(const size_t maxSize) const {
AK_FORCE_INLINE const IntArrayView limit(const size_t maxSize) const {
return IntArrayView(mPtr, std::min(maxSize, mSize));
}
const IntArrayView skip(const size_t n) const {
AK_FORCE_INLINE const IntArrayView skip(const size_t n) const {
if (mSize <= n) {
return IntArrayView();
}

View File

@ -58,6 +58,19 @@ TEST(IntArrayViewTest, TestConstructFromObject) {
EXPECT_EQ(object, intArrayView[0]);
}
TEST(IntArrayViewTest, TestContains) {
EXPECT_FALSE(IntArrayView().contains(0));
EXPECT_FALSE(IntArrayView().contains(1));
const std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
IntArrayView intArrayView(intVector);
EXPECT_TRUE(intArrayView.contains(0));
EXPECT_TRUE(intArrayView.contains(3));
EXPECT_TRUE(intArrayView.contains(-2));
EXPECT_FALSE(intArrayView.contains(-3));
EXPECT_FALSE(intArrayView.limit(0).contains(3));
}
TEST(IntArrayViewTest, TestLimit) {
const std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
IntArrayView intArrayView(intVector);