am 4fd1ffdb
: Merge "Add/Get n-gram probability entry in languageModelDictContent" into lmp-dev
* commit '4fd1ffdb8ee4e3b172c34f52c476f130e85edb49': Add/Get n-gram probability entry in languageModelDictContent
This commit is contained in:
commit
a7be110d12
5 changed files with 44 additions and 10 deletions
|
@ -32,11 +32,11 @@ bool LanguageModelDictContent::runGC(
|
|||
|
||||
ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
|
||||
const WordIdArrayView prevWordIds, const int wordId) const {
|
||||
if (!prevWordIds.empty()) {
|
||||
// TODO: Read n-gram entry.
|
||||
const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
|
||||
if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
|
||||
return ProbabilityEntry();
|
||||
}
|
||||
const TrieMap::Result result = mTrieMap.getRoot(wordId);
|
||||
const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex);
|
||||
if (!result.mIsValid) {
|
||||
// Not found.
|
||||
return ProbabilityEntry();
|
||||
|
@ -46,14 +46,13 @@ ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
|
|||
|
||||
bool LanguageModelDictContent::setNgramProbabilityEntry(const WordIdArrayView prevWordIds,
|
||||
const int terminalId, const ProbabilityEntry *const probabilityEntry) {
|
||||
if (!prevWordIds.empty()) {
|
||||
// TODO: Add n-gram entry.
|
||||
const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
|
||||
if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
|
||||
return false;
|
||||
}
|
||||
return mTrieMap.putRoot(terminalId, probabilityEntry->encode(mHasHistoricalInfo));
|
||||
return mTrieMap.put(terminalId, probabilityEntry->encode(mHasHistoricalInfo), bitmapEntryIndex);
|
||||
}
|
||||
|
||||
|
||||
bool LanguageModelDictContent::runGCInner(
|
||||
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||
const TrieMap::TrieMapRange trieMapRange,
|
||||
|
@ -81,4 +80,16 @@ bool LanguageModelDictContent::runGCInner(
|
|||
return true;
|
||||
}
|
||||
|
||||
int LanguageModelDictContent::getBitmapEntryIndex(const WordIdArrayView prevWordIds) const {
|
||||
int bitmapEntryIndex = mTrieMap.getRootBitmapEntryIndex();
|
||||
for (const int wordId : prevWordIds) {
|
||||
const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex);
|
||||
if (!result.mIsValid) {
|
||||
return TrieMap::INVALID_INDEX;
|
||||
}
|
||||
bitmapEntryIndex = result.mNextLevelBitmapEntryIndex;
|
||||
}
|
||||
return bitmapEntryIndex;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -76,6 +76,8 @@ class LanguageModelDictContent {
|
|||
bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||
const TrieMap::TrieMapRange trieMapRange, const int nextLevelBitmapEntryIndex,
|
||||
int *const outNgramCount);
|
||||
|
||||
int getBitmapEntryIndex(const WordIdArrayView prevWordIds) const;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H */
|
||||
|
|
|
@ -169,6 +169,10 @@ class TrieMap {
|
|||
return mBuffer.isNearSizeLimit();
|
||||
}
|
||||
|
||||
int getRootBitmapEntryIndex() const {
|
||||
return ROOT_BITMAP_ENTRY_INDEX;
|
||||
}
|
||||
|
||||
// Returns bitmapEntryIndex. Create the next level map if it doesn't exist.
|
||||
int getNextLevelBitmapEntryIndex(const int key) {
|
||||
return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX);
|
||||
|
|
|
@ -73,6 +73,14 @@ class IntArrayView {
|
|||
return mPtr;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE const int *begin() const {
|
||||
return mPtr;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE const int *end() const {
|
||||
return mPtr + mSize;
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView);
|
||||
|
||||
|
|
|
@ -24,15 +24,24 @@ namespace latinime {
|
|||
namespace {
|
||||
|
||||
TEST(MemoryViewTest, TestAccess) {
|
||||
static const int DATA_SIZE = 10000;
|
||||
|
||||
std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
|
||||
IntArrayView intArrayView(intVector);
|
||||
EXPECT_EQ(intVector.size(), intArrayView.size());
|
||||
for (int i = 0; i < DATA_SIZE; ++i) {
|
||||
for (int i = 0; i < static_cast<int>(intVector.size()); ++i) {
|
||||
EXPECT_EQ(intVector[i], intArrayView[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(MemoryViewTest, TestIteration) {
|
||||
std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
|
||||
IntArrayView intArrayView(intVector);
|
||||
std::set<int> intSet(intVector.begin(), intVector.end());
|
||||
for (const int i : intArrayView) {
|
||||
EXPECT_TRUE(intSet.count(i) > 0);
|
||||
intSet.erase(i);
|
||||
}
|
||||
EXPECT_TRUE(intSet.empty());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace latinime
|
||||
|
|
Loading…
Reference in a new issue