Merge "Use IntArrayView to add/remove n-gram entry." into lmp-dev

main
Keisuke Kuroyanagi 2014-08-06 05:01:49 +00:00 committed by Android (Google) Code Review
commit 4efa2c0cdb
11 changed files with 108 additions and 68 deletions

View File

@ -231,30 +231,31 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
&probabilityEntryToWrite); &probabilityEntryToWrite);
} }
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry( bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) { if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewEntry)) {
if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d", AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId()); sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
return false; return false;
} }
if (!sourcePtNodeParams->hasBigrams()) { const int ptNodePos =
mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(prevWordIds[0]);
const PtNodeParams sourcePtNodeParams =
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
if (!sourcePtNodeParams.hasBigrams()) {
// Update has bigrams flag. // Update has bigrams flag.
return updatePtNodeFlags(sourcePtNodeParams->getHeadPos(), return updatePtNodeFlags(sourcePtNodeParams.getHeadPos(),
sourcePtNodeParams->isBlacklisted(), sourcePtNodeParams->isNotAWord(), sourcePtNodeParams.isBlacklisted(), sourcePtNodeParams.isNotAWord(),
sourcePtNodeParams->isTerminal(), sourcePtNodeParams->hasShortcutTargets(), sourcePtNodeParams.isTerminal(), sourcePtNodeParams.hasShortcutTargets(),
true /* hasBigrams */, true /* hasBigrams */,
sourcePtNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); sourcePtNodeParams.getCodePointCount() > 1 /* hasMultipleChars */);
} }
return true; return true;
} }
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry( bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds,
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) { const int wordId) {
return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(), return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
targetPtNodeParam->getTerminalId());
} }
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries( bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(

View File

@ -29,6 +29,7 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h" #include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_entry.h"
#include "utils/int_array_view.h"
namespace latinime { namespace latinime {
namespace backward { namespace backward {
@ -61,8 +62,8 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
const PtNodeArrayReader *const ptNodeArrayReader, const PtNodeArrayReader *const ptNodeArrayReader,
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy) Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
: mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy), : mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy),
mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy), mPtNodeReader(ptNodeReader), mReadingHelper(ptNodeReader, ptNodeArrayReader),
mShortcutPolicy(shortcutPolicy) {} mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
virtual ~Ver4PatriciaTrieNodeWriter() {} virtual ~Ver4PatriciaTrieNodeWriter() {}
@ -92,12 +93,10 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos); const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty, const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
bool *const outAddedNewBigram);
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams, virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId);
const PtNodeParams *const targetPtNodeParam);
virtual bool updateAllBigramEntriesAndDeleteUselessEntries( virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount); const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
@ -135,6 +134,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
BufferWithExtendableBuffer *const mTrieBuffer; BufferWithExtendableBuffer *const mTrieBuffer;
Ver4DictBuffers *const mBuffers; Ver4DictBuffers *const mBuffers;
const HeaderPolicy *const mHeaderPolicy; const HeaderPolicy *const mHeaderPolicy;
const PtNodeReader *const mPtNodeReader;
DynamicPtReadingHelper mReadingHelper; DynamicPtReadingHelper mReadingHelper;
Ver4BigramListPolicy *const mBigramPolicy; Ver4BigramListPolicy *const mBigramPolicy;
Ver4ShortcutListPolicy *const mShortcutPolicy; Ver4ShortcutListPolicy *const mShortcutPolicy;

View File

@ -309,8 +309,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
return false; return false;
} }
bool addedNewBigram = false; bool addedNewBigram = false;
if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty, if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::fromObject(prevWordsPtNodePos),
&addedNewBigram)) { word1Pos, bigramProperty, &addedNewBigram)) {
if (addedNewBigram) { if (addedNewBigram) {
mBigramCount++; mBigramCount++;
} }
@ -350,7 +350,8 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
if (wordPos == NOT_A_DICT_POS) { if (wordPos == NOT_A_DICT_POS) {
return false; return false;
} }
if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) { if (mUpdatingHelper.removeNgramEntry(
PtNodePosArrayView::fromObject(prevWordsPtNodePos), wordPos)) {
mBigramCount--; mBigramCount--;
return true; return true;
} else { } else {

View File

@ -84,23 +84,39 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
unigramProperty, &pos); unigramProperty, &pos);
} }
bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos, bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) { const int wordPos, const BigramProperty *const bigramProperty,
const PtNodeParams sourcePtNodeParams( bool *const outAddedNewEntry) {
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word0Pos)); if (prevWordsPtNodePos.empty()) {
const PtNodeParams targetPtNodeParams( return false;
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word1Pos)); }
return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams, ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM);
bigramProperty, outAddedNewBigram); int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
prevWordTerminalIds[i] = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
prevWordsPtNodePos[i]).getTerminalId();
}
const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size());
const int wordId =
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId();
return mPtNodeWriter->addNgramEntry(prevWordIds, wordId, bigramProperty, outAddedNewEntry);
} }
// Remove a bigram relation from word0Pos to word1Pos. bool DynamicPtUpdatingHelper::removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
bool DynamicPtUpdatingHelper::removeBigramWords(const int word0Pos, const int word1Pos) { const int wordPos) {
const PtNodeParams sourcePtNodeParams( if (prevWordsPtNodePos.empty()) {
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word0Pos)); return false;
const PtNodeParams targetPtNodeParams( }
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(word1Pos)); ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM);
return mPtNodeWriter->removeBigramEntry(&sourcePtNodeParams, &targetPtNodeParams); int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
prevWordTerminalIds[i] = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
prevWordsPtNodePos[i]).getTerminalId();
}
const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size());
const int wordId =
mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId();
return mPtNodeWriter->removeNgramEntry(prevWordIds, wordId);
} }
bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos, bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos,

View File

@ -19,6 +19,7 @@
#include "defines.h" #include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "utils/int_array_view.h"
namespace latinime { namespace latinime {
@ -42,12 +43,12 @@ class DynamicPtUpdatingHelper {
const int *const wordCodePoints, const int codePointCount, const int *const wordCodePoints, const int codePointCount,
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram); const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
// Add a bigram relation from word0Pos to word1Pos. // Add an n-gram entry.
bool addBigramWords(const int word0Pos, const int word1Pos, bool addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos,
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram); const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
// Remove a bigram relation from word0Pos to word1Pos. // Remove an n-gram entry.
bool removeBigramWords(const int word0Pos, const int word1Pos); bool removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos);
// Add a shortcut target. // Add a shortcut target.
bool addShortcutTarget(const int wordPos, const int *const targetCodePoints, bool addShortcutTarget(const int wordPos, const int *const targetCodePoints,

View File

@ -21,6 +21,7 @@
#include "defines.h" #include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "utils/int_array_view.h"
namespace latinime { namespace latinime {
@ -70,12 +71,10 @@ class PtNodeWriter {
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0; const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0;
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty, const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) = 0;
bool *const outAddedNewBigram) = 0;
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams, virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId) = 0;
const PtNodeParams *const targetPtNodeParam) = 0;
virtual bool updateAllBigramEntriesAndDeleteUselessEntries( virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) = 0; const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) = 0;

View File

@ -222,22 +222,19 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
terminalId, &probabilityEntryToWrite); terminalId, &probabilityEntryToWrite);
} }
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry( bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam,
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) { const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(), if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewBigram)) {
targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d", AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId()); prevWordIds[0], wordId);
return false; return false;
} }
return true; return true;
} }
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry( bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWordIds,
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) { const int wordId) {
return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(), return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
targetPtNodeParam->getTerminalId());
} }
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries( bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(

View File

@ -75,12 +75,10 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos); const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, virtual bool addNgramEntry(const WordIdArrayView prevWordIds, const int wordId,
const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty, const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
bool *const outAddedNewBigram);
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams, virtual bool removeNgramEntry(const WordIdArrayView prevWordIds, const int wordId);
const PtNodeParams *const targetPtNodeParam);
virtual bool updateAllBigramEntriesAndDeleteUselessEntries( virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount); const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);

View File

@ -292,6 +292,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
false /* tryLowerCaseSearch */); false /* tryLowerCaseSearch */);
const auto prevWordsPtNodePosView = PtNodePosArrayView::fromFixedSizeArray(prevWordsPtNodePos);
// TODO: Support N-gram. // TODO: Support N-gram.
if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) { if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
@ -319,10 +320,10 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
if (word1Pos == NOT_A_DICT_POS) { if (word1Pos == NOT_A_DICT_POS) {
return false; return false;
} }
bool addedNewBigram = false; bool addedNewEntry = false;
if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty, if (mUpdatingHelper.addNgramEntry(prevWordsPtNodePosView, word1Pos, bigramProperty,
&addedNewBigram)) { &addedNewEntry)) {
if (addedNewBigram) { if (addedNewEntry) {
mBigramCount++; mBigramCount++;
} }
return true; return true;
@ -352,6 +353,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
false /* tryLowerCaseSerch */); false /* tryLowerCaseSerch */);
const auto prevWordsPtNodePosView = PtNodePosArrayView::fromFixedSizeArray(prevWordsPtNodePos);
// TODO: Support N-gram. // TODO: Support N-gram.
if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
return false; return false;
@ -361,7 +363,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
if (wordPos == NOT_A_DICT_POS) { if (wordPos == NOT_A_DICT_POS) {
return false; return false;
} }
if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) { if (mUpdatingHelper.removeNgramEntry(prevWordsPtNodePosView, wordPos)) {
mBigramCount--; mBigramCount--;
return true; return true;
} else { } else {

View File

@ -56,6 +56,16 @@ class IntArrayView {
explicit IntArrayView(const std::vector<int> &vector) explicit IntArrayView(const std::vector<int> &vector)
: mPtr(vector.data()), mSize(vector.size()) {} : mPtr(vector.data()), mSize(vector.size()) {}
template <int N>
AK_FORCE_INLINE static IntArrayView fromFixedSizeArray(const int (&array)[N]) {
return IntArrayView(array, N);
}
// Returns a view that points one int object. Does not take ownership of the given object.
AK_FORCE_INLINE static IntArrayView fromObject(const int *const object) {
return IntArrayView(object, 1);
}
AK_FORCE_INLINE int operator[](const size_t index) const { AK_FORCE_INLINE int operator[](const size_t index) const {
ASSERT(index < mSize); ASSERT(index < mSize);
return mPtr[index]; return mPtr[index];
@ -89,6 +99,7 @@ class IntArrayView {
}; };
using WordIdArrayView = IntArrayView; using WordIdArrayView = IntArrayView;
using PtNodePosArrayView = IntArrayView;
} // namespace latinime } // namespace latinime
#endif // LATINIME_MEMORY_VIEW_H #endif // LATINIME_MEMORY_VIEW_H

View File

@ -43,5 +43,19 @@ TEST(IntArrayViewTest, TestIteration) {
EXPECT_EQ(expectedIndex, intArrayView.size()); EXPECT_EQ(expectedIndex, intArrayView.size());
} }
TEST(IntArrayViewTest, TestConstructFromArray) {
const size_t ARRAY_SIZE = 100;
int intArray[ARRAY_SIZE];
const auto intArrayView = IntArrayView::fromFixedSizeArray(intArray);
EXPECT_EQ(ARRAY_SIZE, intArrayView.size());
}
TEST(IntArrayViewTest, TestConstructFromObject) {
const int object = 10;
const auto intArrayView = IntArrayView::fromObject(&object);
EXPECT_EQ(1, intArrayView.size());
EXPECT_EQ(object, intArrayView[0]);
}
} // namespace } // namespace
} // namespace latinime } // namespace latinime