am 4efa2c0c
: Merge "Use IntArrayView to add/remove n-gram entry." into lmp-dev
* commit '4efa2c0cdbc56ba0a43dc34b42c512941678ae70': Use IntArrayView to add/remove n-gram entry.
This commit is contained in:
commit
aa9ec4a41b
11 changed files with 108 additions and 68 deletions
|
@ -231,30 +231,31 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
|
|||
&probabilityEntryToWrite);
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
|
||||
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam,
|
||||
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
|
||||
if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
|
||||
targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
|
||||
bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
|
||||
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
|
||||
if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewEntry)) {
|
||||
AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
|
||||
sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
|
||||
return false;
|
||||
}
|
||||
if (!sourcePtNodeParams->hasBigrams()) {
|
||||
const int ptNodePos =
|
||||
mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(prevWordIds[0]);
|
||||
const PtNodeParams sourcePtNodeParams =
|
||||
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
|
||||
if (!sourcePtNodeParams.hasBigrams()) {
|
||||
// Update has bigrams flag.
|
||||
return updatePtNodeFlags(sourcePtNodeParams->getHeadPos(),
|
||||
sourcePtNodeParams->isBlacklisted(), sourcePtNodeParams->isNotAWord(),
|
||||
sourcePtNodeParams->isTerminal(), sourcePtNodeParams->hasShortcutTargets(),
|
||||
return updatePtNodeFlags(sourcePtNodeParams.getHeadPos(),
|
||||
sourcePtNodeParams.isBlacklisted(), sourcePtNodeParams.isNotAWord(),
|
||||
sourcePtNodeParams.isTerminal(), sourcePtNodeParams.hasShortcutTargets(),
|
||||
true /* hasBigrams */,
|
||||
sourcePtNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
|
||||
sourcePtNodeParams.getCodePointCount() > 1 /* hasMultipleChars */);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
|
||||
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
|
||||
return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
|
||||
targetPtNodeParam->getTerminalId());
|
||||
bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds,
|
||||
const int wordId) {
|
||||
return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
|
||||
#include "utils/int_array_view.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
|
@ -61,8 +62,8 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
|||
const PtNodeArrayReader *const ptNodeArrayReader,
|
||||
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
|
||||
: mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy),
|
||||
mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy),
|
||||
mShortcutPolicy(shortcutPolicy) {}
|
||||
mPtNodeReader(ptNodeReader), mReadingHelper(ptNodeReader, ptNodeArrayReader),
|
||||
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
|
||||
|
||||
virtual ~Ver4PatriciaTrieNodeWriter() {}
|
||||
|
||||
|
@ -92,12 +93,10 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
|||
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
||||
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
|
||||
|
||||
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||
const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
|
||||
bool *const outAddedNewBigram);
|
||||
virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
|
||||
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
|
||||
|
||||
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||
const PtNodeParams *const targetPtNodeParam);
|
||||
virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId);
|
||||
|
||||
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
|
||||
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
|
||||
|
@ -135,6 +134,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
|||
BufferWithExtendableBuffer *const mTrieBuffer;
|
||||
Ver4DictBuffers *const mBuffers;
|
||||
const HeaderPolicy *const mHeaderPolicy;
|
||||
const PtNodeReader *const mPtNodeReader;
|
||||
DynamicPtReadingHelper mReadingHelper;
|
||||
Ver4BigramListPolicy *const mBigramPolicy;
|
||||
Ver4ShortcutListPolicy *const mShortcutPolicy;
|
||||
|
|
|
@ -309,8 +309,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
|||
return false;
|
||||
}
|
||||
bool addedNewBigram = false;
|
||||
if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty,
|
||||
&addedNewBigram)) {
|
||||
if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::fromObject(prevWordsPtNodePos),
|
||||
word1Pos, bigramProperty, &addedNewBigram)) {
|
||||
if (addedNewBigram) {
|
||||
mBigramCount++;
|
||||
}
|
||||
|
@ -350,7 +350,8 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
|
|||
if (wordPos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) {
|
||||
if (mUpdatingHelper.removeNgramEntry(
|
||||
PtNodePosArrayView::fromObject(prevWordsPtNodePos), wordPos)) {
|
||||
mBigramCount--;
|
||||
return true;
|
||||
} else {
|
||||
|
|
|
@ -84,23 +84,39 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
|
|||
unigramProperty, &pos);
|
||||
}
|
||||
|
||||
bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
||||
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
|
||||
const PtNodeParams sourcePtNodeParams(
|
||||
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word0Pos));
|
||||
const PtNodeParams targetPtNodeParams(
|
||||
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word1Pos));
|
||||
return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams,
|
||||
bigramProperty, outAddedNewBigram);
|
||||
bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
|
||||
const int wordPos, const BigramProperty *const bigramProperty,
|
||||
bool *const outAddedNewEntry) {
|
||||
if (prevWordsPtNodePos.empty()) {
|
||||
return false;
|
||||
}
|
||||
ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM);
|
||||
int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
|
||||
prevWordTerminalIds[i] = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
|
||||
prevWordsPtNodePos[i]).getTerminalId();
|
||||
}
|
||||
const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size());
|
||||
const int wordId =
|
||||
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId();
|
||||
return mPtNodeWriter->addNgramEntry(prevWordIds, wordId, bigramProperty, outAddedNewEntry);
|
||||
}
|
||||
|
||||
// Remove a bigram relation from word0Pos to word1Pos.
|
||||
bool DynamicPtUpdatingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
|
||||
const PtNodeParams sourcePtNodeParams(
|
||||
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word0Pos));
|
||||
const PtNodeParams targetPtNodeParams(
|
||||
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word1Pos));
|
||||
return mPtNodeWriter->removeBigramEntry(&sourcePtNodeParams, &targetPtNodeParams);
|
||||
bool DynamicPtUpdatingHelper::removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
|
||||
const int wordPos) {
|
||||
if (prevWordsPtNodePos.empty()) {
|
||||
return false;
|
||||
}
|
||||
ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM);
|
||||
int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
|
||||
prevWordTerminalIds[i] = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
|
||||
prevWordsPtNodePos[i]).getTerminalId();
|
||||
}
|
||||
const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size());
|
||||
const int wordId =
|
||||
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId();
|
||||
return mPtNodeWriter->removeNgramEntry(prevWordIds, wordId);
|
||||
}
|
||||
|
||||
bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos,
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
||||
#include "utils/int_array_view.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -42,12 +43,12 @@ class DynamicPtUpdatingHelper {
|
|||
const int *const wordCodePoints, const int codePointCount,
|
||||
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
|
||||
|
||||
// Add a bigram relation from word0Pos to word1Pos.
|
||||
bool addBigramWords(const int word0Pos, const int word1Pos,
|
||||
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram);
|
||||
// Add an n-gram entry.
|
||||
bool addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos,
|
||||
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
|
||||
|
||||
// Remove a bigram relation from word0Pos to word1Pos.
|
||||
bool removeBigramWords(const int word0Pos, const int word1Pos);
|
||||
// Remove an n-gram entry.
|
||||
bool removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos);
|
||||
|
||||
// Add a shortcut target.
|
||||
bool addShortcutTarget(const int wordPos, const int *const targetCodePoints,
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
||||
#include "utils/int_array_view.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -70,12 +71,10 @@ class PtNodeWriter {
|
|||
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
||||
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0;
|
||||
|
||||
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||
const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
|
||||
bool *const outAddedNewBigram) = 0;
|
||||
virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
|
||||
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) = 0;
|
||||
|
||||
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||
const PtNodeParams *const targetPtNodeParam) = 0;
|
||||
virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId) = 0;
|
||||
|
||||
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
|
||||
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) = 0;
|
||||
|
|
|
@ -222,22 +222,19 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
|
|||
terminalId, &probabilityEntryToWrite);
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
|
||||
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam,
|
||||
bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
|
||||
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
|
||||
if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
|
||||
targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
|
||||
if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewBigram)) {
|
||||
AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
|
||||
sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
|
||||
prevWordIds[0], wordId);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
|
||||
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
|
||||
return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
|
||||
targetPtNodeParam->getTerminalId());
|
||||
bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds,
|
||||
const int wordId) {
|
||||
return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
|
||||
|
|
|
@ -75,12 +75,10 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
|||
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
||||
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
|
||||
|
||||
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||
const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
|
||||
bool *const outAddedNewBigram);
|
||||
virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
|
||||
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
|
||||
|
||||
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||
const PtNodeParams *const targetPtNodeParam);
|
||||
virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId);
|
||||
|
||||
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
|
||||
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
|
||||
|
|
|
@ -292,6 +292,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
|||
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
|
||||
false /* tryLowerCaseSearch */);
|
||||
const auto prevWordsPtNodePosView = PtNodePosArrayView::fromFixedSizeArray(prevWordsPtNodePos);
|
||||
// TODO: Support N-gram.
|
||||
if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
|
||||
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
|
||||
|
@ -319,10 +320,10 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
|||
if (word1Pos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
bool addedNewBigram = false;
|
||||
if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty,
|
||||
&addedNewBigram)) {
|
||||
if (addedNewBigram) {
|
||||
bool addedNewEntry = false;
|
||||
if (mUpdatingHelper.addNgramEntry(prevWordsPtNodePosView, word1Pos, bigramProperty,
|
||||
&addedNewEntry)) {
|
||||
if (addedNewEntry) {
|
||||
mBigramCount++;
|
||||
}
|
||||
return true;
|
||||
|
@ -352,6 +353,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
|
|||
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
|
||||
false /* tryLowerCaseSerch */);
|
||||
const auto prevWordsPtNodePosView = PtNodePosArrayView::fromFixedSizeArray(prevWordsPtNodePos);
|
||||
// TODO: Support N-gram.
|
||||
if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
|
@ -361,7 +363,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
|
|||
if (wordPos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) {
|
||||
if (mUpdatingHelper.removeNgramEntry(prevWordsPtNodePosView, wordPos)) {
|
||||
mBigramCount--;
|
||||
return true;
|
||||
} else {
|
||||
|
|
|
@ -56,6 +56,16 @@ class IntArrayView {
|
|||
explicit IntArrayView(const std::vector<int> &vector)
|
||||
: mPtr(vector.data()), mSize(vector.size()) {}
|
||||
|
||||
template <int N>
|
||||
AK_FORCE_INLINE static IntArrayView fromFixedSizeArray(const int (&array)[N]) {
|
||||
return IntArrayView(array, N);
|
||||
}
|
||||
|
||||
// Returns a view that points one int object. Does not take ownership of the given object.
|
||||
AK_FORCE_INLINE static IntArrayView fromObject(const int *const object) {
|
||||
return IntArrayView(object, 1);
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE int operator[](const size_t index) const {
|
||||
ASSERT(index < mSize);
|
||||
return mPtr[index];
|
||||
|
@ -89,6 +99,7 @@ class IntArrayView {
|
|||
};
|
||||
|
||||
using WordIdArrayView = IntArrayView;
|
||||
using PtNodePosArrayView = IntArrayView;
|
||||
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_MEMORY_VIEW_H
|
||||
|
|
|
@ -43,5 +43,19 @@ TEST(IntArrayViewTest, TestIteration) {
|
|||
EXPECT_EQ(expectedIndex, intArrayView.size());
|
||||
}
|
||||
|
||||
TEST(IntArrayViewTest, TestConstructFromArray) {
|
||||
const size_t ARRAY_SIZE = 100;
|
||||
int intArray[ARRAY_SIZE];
|
||||
const auto intArrayView = IntArrayView::fromFixedSizeArray(intArray);
|
||||
EXPECT_EQ(ARRAY_SIZE, intArrayView.size());
|
||||
}
|
||||
|
||||
TEST(IntArrayViewTest, TestConstructFromObject) {
|
||||
const int object = 10;
|
||||
const auto intArrayView = IntArrayView::fromObject(&object);
|
||||
EXPECT_EQ(1, intArrayView.size());
|
||||
EXPECT_EQ(object, intArrayView[0]);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace latinime
|
||||
|
|
Loading…
Reference in a new issue