am 30023f4d: Merge "Update input checking logic for adding/removing n-gram."

* commit '30023f4d8daac3678d791a70a6e270f0f1c7156f':
  Update input checking logic for adding/removing n-gram.
This commit is contained in:
Keisuke Kuroyanagi 2014-09-16 12:24:55 +00:00 committed by Android Git Automerger
commit ab131309b7
3 changed files with 39 additions and 20 deletions

View file

@ -310,30 +310,32 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
if (prevWordIds.empty()) { if (prevWordIds.empty()) {
return false; return false;
} }
// TODO: Support N-gram. for (size_t i = 0; i < prevWordIds.size(); ++i) {
if (prevWordIds[0] == NOT_A_WORD_ID) { if (prevWordIds[i] != NOT_A_WORD_ID) {
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) { continue;
const std::vector<UnigramProperty::ShortcutProperty> shortcuts; }
const UnigramProperty beginningOfSentenceUnigramProperty( if (!prevWordsInfo->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) {
true /* representsBeginningOfSentence */, true /* isNotAWord */,
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) {
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
return false;
}
// Refresh word ids.
prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
} else {
return false; return false;
} }
const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */, true /* isNotAWord */,
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) {
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
return false;
}
// Refresh word ids.
prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
} }
const int wordId = getWordId(CodePointArrayView(*bigramProperty->getTargetCodePoints()), const int wordId = getWordId(CodePointArrayView(*bigramProperty->getTargetCodePoints()),
false /* forceLowerCaseSearch */); false /* forceLowerCaseSearch */);
if (wordId == NOT_A_WORD_ID) { if (wordId == NOT_A_WORD_ID) {
return false; return false;
} }
// TODO: Support N-gram.
bool addedNewEntry = false; bool addedNewEntry = false;
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordsPtNodePos; WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordsPtNodePos;
for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) { for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
@ -375,8 +377,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray; WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray,
false /* tryLowerCaseSerch */); false /* tryLowerCaseSerch */);
// TODO: Support N-gram. if (prevWordIds.empty() || prevWordIds.contains(NOT_A_WORD_ID)) {
if (prevWordIds.empty() || prevWordIds[0] == NOT_A_WORD_ID) {
return false; return false;
} }
const int wordId = getWordId(wordCodePoints, false /* forceLowerCaseSearch */); const int wordId = getWordId(wordCodePoints, false /* forceLowerCaseSearch */);

View file

@ -17,6 +17,7 @@
#ifndef LATINIME_INT_ARRAY_VIEW_H #ifndef LATINIME_INT_ARRAY_VIEW_H
#define LATINIME_INT_ARRAY_VIEW_H #define LATINIME_INT_ARRAY_VIEW_H
#include <algorithm>
#include <array> #include <array>
#include <cstdint> #include <cstdint>
#include <cstring> #include <cstring>
@ -92,12 +93,16 @@ class IntArrayView {
return mPtr + mSize; return mPtr + mSize;
} }
AK_FORCE_INLINE bool contains(const int value) const {
return std::find(begin(), end(), value) != end();
}
// Returns the view whose size is smaller than or equal to the given count. // Returns the view whose size is smaller than or equal to the given count.
const IntArrayView limit(const size_t maxSize) const { AK_FORCE_INLINE const IntArrayView limit(const size_t maxSize) const {
return IntArrayView(mPtr, std::min(maxSize, mSize)); return IntArrayView(mPtr, std::min(maxSize, mSize));
} }
const IntArrayView skip(const size_t n) const { AK_FORCE_INLINE const IntArrayView skip(const size_t n) const {
if (mSize <= n) { if (mSize <= n) {
return IntArrayView(); return IntArrayView();
} }

View file

@ -58,6 +58,19 @@ TEST(IntArrayViewTest, TestConstructFromObject) {
EXPECT_EQ(object, intArrayView[0]); EXPECT_EQ(object, intArrayView[0]);
} }
TEST(IntArrayViewTest, TestContains) {
EXPECT_FALSE(IntArrayView().contains(0));
EXPECT_FALSE(IntArrayView().contains(1));
const std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
IntArrayView intArrayView(intVector);
EXPECT_TRUE(intArrayView.contains(0));
EXPECT_TRUE(intArrayView.contains(3));
EXPECT_TRUE(intArrayView.contains(-2));
EXPECT_FALSE(intArrayView.contains(-3));
EXPECT_FALSE(intArrayView.limit(0).contains(3));
}
TEST(IntArrayViewTest, TestLimit) { TEST(IntArrayViewTest, TestLimit) {
const std::vector<int> intVector = {3, 2, 1, 0, -1, -2}; const std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
IntArrayView intArrayView(intVector); IntArrayView intArrayView(intVector);