Merge "Add/Get n-gram probability entry in languageModelDictContent" into lmp-dev

This commit is contained in:
Keisuke Kuroyanagi 2014-08-06 00:50:57 +00:00 committed by Android (Google) Code Review
commit 4fd1ffdb8e
5 changed files with 44 additions and 10 deletions

View file

@ -32,11 +32,11 @@ bool LanguageModelDictContent::runGC(
ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry( ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
const WordIdArrayView prevWordIds, const int wordId) const { const WordIdArrayView prevWordIds, const int wordId) const {
if (!prevWordIds.empty()) { const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
// TODO: Read n-gram entry. if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
return ProbabilityEntry(); return ProbabilityEntry();
} }
const TrieMap::Result result = mTrieMap.getRoot(wordId); const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex);
if (!result.mIsValid) { if (!result.mIsValid) {
// Not found. // Not found.
return ProbabilityEntry(); return ProbabilityEntry();
@ -46,14 +46,13 @@ ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
bool LanguageModelDictContent::setNgramProbabilityEntry(const WordIdArrayView prevWordIds, bool LanguageModelDictContent::setNgramProbabilityEntry(const WordIdArrayView prevWordIds,
const int terminalId, const ProbabilityEntry *const probabilityEntry) { const int terminalId, const ProbabilityEntry *const probabilityEntry) {
if (!prevWordIds.empty()) { const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
// TODO: Add n-gram entry. if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
return false; return false;
} }
return mTrieMap.putRoot(terminalId, probabilityEntry->encode(mHasHistoricalInfo)); return mTrieMap.put(terminalId, probabilityEntry->encode(mHasHistoricalInfo), bitmapEntryIndex);
} }
bool LanguageModelDictContent::runGCInner( bool LanguageModelDictContent::runGCInner(
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const TrieMap::TrieMapRange trieMapRange, const TrieMap::TrieMapRange trieMapRange,
@ -81,4 +80,16 @@ bool LanguageModelDictContent::runGCInner(
return true; return true;
} }
int LanguageModelDictContent::getBitmapEntryIndex(const WordIdArrayView prevWordIds) const {
int bitmapEntryIndex = mTrieMap.getRootBitmapEntryIndex();
for (const int wordId : prevWordIds) {
const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex);
if (!result.mIsValid) {
return TrieMap::INVALID_INDEX;
}
bitmapEntryIndex = result.mNextLevelBitmapEntryIndex;
}
return bitmapEntryIndex;
}
} // namespace latinime } // namespace latinime

View file

@ -76,6 +76,8 @@ class LanguageModelDictContent {
bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, bool runGCInner(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const TrieMap::TrieMapRange trieMapRange, const int nextLevelBitmapEntryIndex, const TrieMap::TrieMapRange trieMapRange, const int nextLevelBitmapEntryIndex,
int *const outNgramCount); int *const outNgramCount);
int getBitmapEntryIndex(const WordIdArrayView prevWordIds) const;
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H */ #endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H */

View file

@ -169,6 +169,10 @@ class TrieMap {
return mBuffer.isNearSizeLimit(); return mBuffer.isNearSizeLimit();
} }
int getRootBitmapEntryIndex() const {
return ROOT_BITMAP_ENTRY_INDEX;
}
// Returns bitmapEntryIndex. Create the next level map if it doesn't exist. // Returns bitmapEntryIndex. Create the next level map if it doesn't exist.
int getNextLevelBitmapEntryIndex(const int key) { int getNextLevelBitmapEntryIndex(const int key) {
return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX); return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX);

View file

@ -73,6 +73,14 @@ class IntArrayView {
return mPtr; return mPtr;
} }
AK_FORCE_INLINE const int *begin() const {
return mPtr;
}
AK_FORCE_INLINE const int *end() const {
return mPtr + mSize;
}
private: private:
DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView); DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView);

View file

@ -24,15 +24,24 @@ namespace latinime {
namespace { namespace {
TEST(MemoryViewTest, TestAccess) { TEST(MemoryViewTest, TestAccess) {
static const int DATA_SIZE = 10000;
std::vector<int> intVector = {3, 2, 1, 0, -1, -2}; std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
IntArrayView intArrayView(intVector); IntArrayView intArrayView(intVector);
EXPECT_EQ(intVector.size(), intArrayView.size()); EXPECT_EQ(intVector.size(), intArrayView.size());
for (int i = 0; i < DATA_SIZE; ++i) { for (int i = 0; i < static_cast<int>(intVector.size()); ++i) {
EXPECT_EQ(intVector[i], intArrayView[i]); EXPECT_EQ(intVector[i], intArrayView[i]);
} }
} }
TEST(MemoryViewTest, TestIteration) {
std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
IntArrayView intArrayView(intVector);
std::set<int> intSet(intVector.begin(), intVector.end());
for (const int i : intArrayView) {
EXPECT_TRUE(intSet.count(i) > 0);
intSet.erase(i);
}
EXPECT_TRUE(intSet.empty());
}
} // namespace } // namespace
} // namespace latinime } // namespace latinime