am 30023f4d
: Merge "Update input checking logic for adding/removing n-gram."
* commit '30023f4d8daac3678d791a70a6e270f0f1c7156f': Update input checking logic for adding/removing n-gram.
This commit is contained in:
commit
ab131309b7
3 changed files with 39 additions and 20 deletions
|
@ -310,30 +310,32 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
|||
if (prevWordIds.empty()) {
|
||||
return false;
|
||||
}
|
||||
// TODO: Support N-gram.
|
||||
if (prevWordIds[0] == NOT_A_WORD_ID) {
|
||||
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
|
||||
const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
||||
const UnigramProperty beginningOfSentenceUnigramProperty(
|
||||
true /* representsBeginningOfSentence */, true /* isNotAWord */,
|
||||
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
|
||||
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
|
||||
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
||||
&beginningOfSentenceUnigramProperty)) {
|
||||
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
||||
return false;
|
||||
}
|
||||
// Refresh word ids.
|
||||
prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
|
||||
} else {
|
||||
for (size_t i = 0; i < prevWordIds.size(); ++i) {
|
||||
if (prevWordIds[i] != NOT_A_WORD_ID) {
|
||||
continue;
|
||||
}
|
||||
if (!prevWordsInfo->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) {
|
||||
return false;
|
||||
}
|
||||
const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
||||
const UnigramProperty beginningOfSentenceUnigramProperty(
|
||||
true /* representsBeginningOfSentence */, true /* isNotAWord */,
|
||||
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
|
||||
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
|
||||
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
||||
&beginningOfSentenceUnigramProperty)) {
|
||||
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
||||
return false;
|
||||
}
|
||||
// Refresh word ids.
|
||||
prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
|
||||
}
|
||||
const int wordId = getWordId(CodePointArrayView(*bigramProperty->getTargetCodePoints()),
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (wordId == NOT_A_WORD_ID) {
|
||||
return false;
|
||||
}
|
||||
// TODO: Support N-gram.
|
||||
bool addedNewEntry = false;
|
||||
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordsPtNodePos;
|
||||
for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
|
||||
|
@ -375,8 +377,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
|
|||
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
|
||||
const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray,
|
||||
false /* tryLowerCaseSerch */);
|
||||
// TODO: Support N-gram.
|
||||
if (prevWordIds.empty() || prevWordIds[0] == NOT_A_WORD_ID) {
|
||||
if (prevWordIds.empty() || prevWordIds.contains(NOT_A_WORD_ID)) {
|
||||
return false;
|
||||
}
|
||||
const int wordId = getWordId(wordCodePoints, false /* forceLowerCaseSearch */);
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#ifndef LATINIME_INT_ARRAY_VIEW_H
|
||||
#define LATINIME_INT_ARRAY_VIEW_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
|
@ -92,12 +93,16 @@ class IntArrayView {
|
|||
return mPtr + mSize;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE bool contains(const int value) const {
|
||||
return std::find(begin(), end(), value) != end();
|
||||
}
|
||||
|
||||
// Returns the view whose size is smaller than or equal to the given count.
|
||||
const IntArrayView limit(const size_t maxSize) const {
|
||||
AK_FORCE_INLINE const IntArrayView limit(const size_t maxSize) const {
|
||||
return IntArrayView(mPtr, std::min(maxSize, mSize));
|
||||
}
|
||||
|
||||
const IntArrayView skip(const size_t n) const {
|
||||
AK_FORCE_INLINE const IntArrayView skip(const size_t n) const {
|
||||
if (mSize <= n) {
|
||||
return IntArrayView();
|
||||
}
|
||||
|
|
|
@ -58,6 +58,19 @@ TEST(IntArrayViewTest, TestConstructFromObject) {
|
|||
EXPECT_EQ(object, intArrayView[0]);
|
||||
}
|
||||
|
||||
TEST(IntArrayViewTest, TestContains) {
|
||||
EXPECT_FALSE(IntArrayView().contains(0));
|
||||
EXPECT_FALSE(IntArrayView().contains(1));
|
||||
|
||||
const std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
|
||||
IntArrayView intArrayView(intVector);
|
||||
EXPECT_TRUE(intArrayView.contains(0));
|
||||
EXPECT_TRUE(intArrayView.contains(3));
|
||||
EXPECT_TRUE(intArrayView.contains(-2));
|
||||
EXPECT_FALSE(intArrayView.contains(-3));
|
||||
EXPECT_FALSE(intArrayView.limit(0).contains(3));
|
||||
}
|
||||
|
||||
TEST(IntArrayViewTest, TestLimit) {
|
||||
const std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
|
||||
IntArrayView intArrayView(intVector);
|
||||
|
|
Loading…
Reference in a new issue