Merge "Update input checking logic for adding/removing n-gram."
This commit is contained in:
commit
30023f4d8d
3 changed files with 39 additions and 20 deletions
|
@ -310,30 +310,32 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
||||||
if (prevWordIds.empty()) {
|
if (prevWordIds.empty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// TODO: Support N-gram.
|
for (size_t i = 0; i < prevWordIds.size(); ++i) {
|
||||||
if (prevWordIds[0] == NOT_A_WORD_ID) {
|
if (prevWordIds[i] != NOT_A_WORD_ID) {
|
||||||
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
|
continue;
|
||||||
const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
}
|
||||||
const UnigramProperty beginningOfSentenceUnigramProperty(
|
if (!prevWordsInfo->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) {
|
||||||
true /* representsBeginningOfSentence */, true /* isNotAWord */,
|
|
||||||
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
|
|
||||||
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
|
|
||||||
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
|
||||||
&beginningOfSentenceUnigramProperty)) {
|
|
||||||
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Refresh word ids.
|
|
||||||
prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
|
|
||||||
} else {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
||||||
|
const UnigramProperty beginningOfSentenceUnigramProperty(
|
||||||
|
true /* representsBeginningOfSentence */, true /* isNotAWord */,
|
||||||
|
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
|
||||||
|
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
|
||||||
|
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
||||||
|
&beginningOfSentenceUnigramProperty)) {
|
||||||
|
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Refresh word ids.
|
||||||
|
prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
|
||||||
}
|
}
|
||||||
const int wordId = getWordId(CodePointArrayView(*bigramProperty->getTargetCodePoints()),
|
const int wordId = getWordId(CodePointArrayView(*bigramProperty->getTargetCodePoints()),
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (wordId == NOT_A_WORD_ID) {
|
if (wordId == NOT_A_WORD_ID) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// TODO: Support N-gram.
|
||||||
bool addedNewEntry = false;
|
bool addedNewEntry = false;
|
||||||
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordsPtNodePos;
|
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordsPtNodePos;
|
||||||
for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
|
for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
|
||||||
|
@ -375,8 +377,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
|
||||||
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
|
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
|
||||||
const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray,
|
const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray,
|
||||||
false /* tryLowerCaseSerch */);
|
false /* tryLowerCaseSerch */);
|
||||||
// TODO: Support N-gram.
|
if (prevWordIds.empty() || prevWordIds.contains(NOT_A_WORD_ID)) {
|
||||||
if (prevWordIds.empty() || prevWordIds[0] == NOT_A_WORD_ID) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int wordId = getWordId(wordCodePoints, false /* forceLowerCaseSearch */);
|
const int wordId = getWordId(wordCodePoints, false /* forceLowerCaseSearch */);
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#ifndef LATINIME_INT_ARRAY_VIEW_H
|
#ifndef LATINIME_INT_ARRAY_VIEW_H
|
||||||
#define LATINIME_INT_ARRAY_VIEW_H
|
#define LATINIME_INT_ARRAY_VIEW_H
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
@ -92,12 +93,16 @@ class IntArrayView {
|
||||||
return mPtr + mSize;
|
return mPtr + mSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool contains(const int value) const {
|
||||||
|
return std::find(begin(), end(), value) != end();
|
||||||
|
}
|
||||||
|
|
||||||
// Returns the view whose size is smaller than or equal to the given count.
|
// Returns the view whose size is smaller than or equal to the given count.
|
||||||
const IntArrayView limit(const size_t maxSize) const {
|
AK_FORCE_INLINE const IntArrayView limit(const size_t maxSize) const {
|
||||||
return IntArrayView(mPtr, std::min(maxSize, mSize));
|
return IntArrayView(mPtr, std::min(maxSize, mSize));
|
||||||
}
|
}
|
||||||
|
|
||||||
const IntArrayView skip(const size_t n) const {
|
AK_FORCE_INLINE const IntArrayView skip(const size_t n) const {
|
||||||
if (mSize <= n) {
|
if (mSize <= n) {
|
||||||
return IntArrayView();
|
return IntArrayView();
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,6 +58,19 @@ TEST(IntArrayViewTest, TestConstructFromObject) {
|
||||||
EXPECT_EQ(object, intArrayView[0]);
|
EXPECT_EQ(object, intArrayView[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(IntArrayViewTest, TestContains) {
|
||||||
|
EXPECT_FALSE(IntArrayView().contains(0));
|
||||||
|
EXPECT_FALSE(IntArrayView().contains(1));
|
||||||
|
|
||||||
|
const std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
|
||||||
|
IntArrayView intArrayView(intVector);
|
||||||
|
EXPECT_TRUE(intArrayView.contains(0));
|
||||||
|
EXPECT_TRUE(intArrayView.contains(3));
|
||||||
|
EXPECT_TRUE(intArrayView.contains(-2));
|
||||||
|
EXPECT_FALSE(intArrayView.contains(-3));
|
||||||
|
EXPECT_FALSE(intArrayView.limit(0).contains(3));
|
||||||
|
}
|
||||||
|
|
||||||
TEST(IntArrayViewTest, TestLimit) {
|
TEST(IntArrayViewTest, TestLimit) {
|
||||||
const std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
|
const std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
|
||||||
IntArrayView intArrayView(intVector);
|
IntArrayView intArrayView(intVector);
|
||||||
|
|
Loading…
Reference in a new issue