am 57c3c33c
: am a7be110d
: am 4fd1ffdb
: Merge "Add/Get n-gram probability entry in languageModelDictContent" into lmp-dev
* commit '57c3c33c394dad248bd3b199f52e52e7354a56cb': Add/Get n-gram probability entry in languageModelDictContent
This commit is contained in:
commit
289b84734d
5 changed files with 44 additions and 10 deletions
|
@ -32,11 +32,11 @@ bool LanguageModelDictContent::runGC(
|
||||||
|
|
||||||
ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
|
ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
|
||||||
const WordIdArrayView prevWordIds, const int wordId) const {
|
const WordIdArrayView prevWordIds, const int wordId) const {
|
||||||
if (!prevWordIds.empty()) {
|
const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
|
||||||
// TODO: Read n-gram entry.
|
if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
|
||||||
return ProbabilityEntry();
|
return ProbabilityEntry();
|
||||||
}
|
}
|
||||||
const TrieMap::Result result = mTrieMap.getRoot(wordId);
|
const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex);
|
||||||
if (!result.mIsValid) {
|
if (!result.mIsValid) {
|
||||||
// Not found.
|
// Not found.
|
||||||
return ProbabilityEntry();
|
return ProbabilityEntry();
|
||||||
|
@ -46,14 +46,13 @@ ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
|
||||||
|
|
||||||
bool LanguageModelDictContent::setNgramProbabilityEntry(const WordIdArrayView prevWordIds,
|
bool LanguageModelDictContent::setNgramProbabilityEntry(const WordIdArrayView prevWordIds,
|
||||||
const int terminalId, const ProbabilityEntry *const probabilityEntry) {
|
const int terminalId, const ProbabilityEntry *const probabilityEntry) {
|
||||||
if (!prevWordIds.empty()) {
|
const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
|
||||||
// TODO: Add n-gram entry.
|
if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return mTrieMap.putRoot(terminalId, probabilityEntry->encode(mHasHistoricalInfo));
|
return mTrieMap.put(terminalId, probabilityEntry->encode(mHasHistoricalInfo), bitmapEntryIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool LanguageModelDictContent::runGCInner(
|
bool LanguageModelDictContent::runGCInner(
|
||||||
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||||
const TrieMap::TrieMapRange trieMapRange,
|
const TrieMap::TrieMapRange trieMapRange,
|
||||||
|
@ -81,4 +80,16 @@ bool LanguageModelDictContent::runGCInner(
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int LanguageModelDictContent::getBitmapEntryIndex(const WordIdArrayView prevWordIds) const {
|
||||||
|
int bitmapEntryIndex = mTrieMap.getRootBitmapEntryIndex();
|
||||||
|
for (const int wordId : prevWordIds) {
|
||||||
|
const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex);
|
||||||
|
if (!result.mIsValid) {
|
||||||
|
return TrieMap::INVALID_INDEX;
|
||||||
|
}
|
||||||
|
bitmapEntryIndex = result.mNextLevelBitmapEntryIndex;
|
||||||
|
}
|
||||||
|
return bitmapEntryIndex;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -76,6 +76,8 @@ class LanguageModelDictContent {
|
||||||
bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||||
const TrieMap::TrieMapRange trieMapRange, const int nextLevelBitmapEntryIndex,
|
const TrieMap::TrieMapRange trieMapRange, const int nextLevelBitmapEntryIndex,
|
||||||
int *const outNgramCount);
|
int *const outNgramCount);
|
||||||
|
|
||||||
|
int getBitmapEntryIndex(const WordIdArrayView prevWordIds) const;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H */
|
#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H */
|
||||||
|
|
|
@ -169,6 +169,10 @@ class TrieMap {
|
||||||
return mBuffer.isNearSizeLimit();
|
return mBuffer.isNearSizeLimit();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int getRootBitmapEntryIndex() const {
|
||||||
|
return ROOT_BITMAP_ENTRY_INDEX;
|
||||||
|
}
|
||||||
|
|
||||||
// Returns bitmapEntryIndex. Create the next level map if it doesn't exist.
|
// Returns bitmapEntryIndex. Create the next level map if it doesn't exist.
|
||||||
int getNextLevelBitmapEntryIndex(const int key) {
|
int getNextLevelBitmapEntryIndex(const int key) {
|
||||||
return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX);
|
return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX);
|
||||||
|
|
|
@ -73,6 +73,14 @@ class IntArrayView {
|
||||||
return mPtr;
|
return mPtr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE const int *begin() const {
|
||||||
|
return mPtr;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE const int *end() const {
|
||||||
|
return mPtr + mSize;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView);
|
DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView);
|
||||||
|
|
||||||
|
|
|
@ -24,15 +24,24 @@ namespace latinime {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
TEST(MemoryViewTest, TestAccess) {
|
TEST(MemoryViewTest, TestAccess) {
|
||||||
static const int DATA_SIZE = 10000;
|
|
||||||
|
|
||||||
std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
|
std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
|
||||||
IntArrayView intArrayView(intVector);
|
IntArrayView intArrayView(intVector);
|
||||||
EXPECT_EQ(intVector.size(), intArrayView.size());
|
EXPECT_EQ(intVector.size(), intArrayView.size());
|
||||||
for (int i = 0; i < DATA_SIZE; ++i) {
|
for (int i = 0; i < static_cast<int>(intVector.size()); ++i) {
|
||||||
EXPECT_EQ(intVector[i], intArrayView[i]);
|
EXPECT_EQ(intVector[i], intArrayView[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(MemoryViewTest, TestIteration) {
|
||||||
|
std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
|
||||||
|
IntArrayView intArrayView(intVector);
|
||||||
|
std::set<int> intSet(intVector.begin(), intVector.end());
|
||||||
|
for (const int i : intArrayView) {
|
||||||
|
EXPECT_TRUE(intSet.count(i) > 0);
|
||||||
|
intSet.erase(i);
|
||||||
|
}
|
||||||
|
EXPECT_TRUE(intSet.empty());
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
Loading…
Reference in a new issue