Use LanguageModelDictContent in getWordProperty().

Bug: 14425059
Change-Id: Ic230f764ff5570f24ce6ce930023798718f326df
main
Keisuke Kuroyanagi 2014-08-25 21:07:10 +09:00
parent 1c14effab5
commit 8b4409f4b9
2 changed files with 25 additions and 48 deletions

View File

@ -188,18 +188,6 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con
ptNodeParams.getTerminalId()); ptNodeParams.getTerminalId());
} }
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
if (ptNodeParams.isDeleted()) {
return NOT_A_DICT_POS;
}
return mBuffers->getBigramDictContent()->getBigramListHeadPos(
ptNodeParams.getTerminalId());
}
bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length, bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length,
const UnigramProperty *const unigramProperty) { const UnigramProperty *const unigramProperty) {
if (!mBuffers->isUpdatable()) { if (!mBuffers->isUpdatable()) {
@ -480,41 +468,32 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
ptNodeParams.getTerminalId()); ptNodeParams.getTerminalId());
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
// Fetch bigram information. // Fetch bigram information.
// TODO: Support n-gram.
std::vector<BigramProperty> bigrams; std::vector<BigramProperty> bigrams;
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); const int wordId = ptNodeParams.getTerminalId();
if (bigramListPos != NOT_A_DICT_POS) { const WordIdArrayView prevWordIds = WordIdArrayView::fromObject(&wordId);
int bigramWord1CodePoints[MAX_WORD_LENGTH]; const TerminalPositionLookupTable *const terminalPositionLookupTable =
const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent(); mBuffers->getTerminalPositionLookupTable();
const TerminalPositionLookupTable *const terminalPositionLookupTable = int bigramWord1CodePoints[MAX_WORD_LENGTH];
mBuffers->getTerminalPositionLookupTable(); for (const auto entry : mBuffers->getLanguageModelDictContent()->getProbabilityEntries(
bool hasNext = true; prevWordIds)) {
int readingPos = bigramListPos; const int word1TerminalPtNodePos =
while (hasNext) { terminalPositionLookupTable->getTerminalPtNodePosition(entry.getWordId());
const BigramEntry bigramEntry = // Word (unigram) probability
bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); int word1Probability = NOT_A_PROBABILITY;
hasNext = bigramEntry.hasNext(); const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
const int word1TerminalId = bigramEntry.getTargetTerminalId(); word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
const int word1TerminalPtNodePos = &word1Probability);
terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId); const std::vector<int> word1(bigramWord1CodePoints,
if (word1TerminalPtNodePos == NOT_A_DICT_POS) { bigramWord1CodePoints + codePointCount);
continue; const ProbabilityEntry probabilityEntry = entry.getProbabilityEntry();
} const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
// Word (unigram) probability const int probability = probabilityEntry.hasHistoricalInfo() ?
int word1Probability = NOT_A_PROBABILITY; ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) :
const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( probabilityEntry.getProbability();
word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints, bigrams.emplace_back(&word1, probability,
&word1Probability); historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
const std::vector<int> word1(bigramWord1CodePoints, historicalInfo->getCount());
bigramWord1CodePoints + codePointCount);
const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
const int probability = bigramEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability(
bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
bigramEntry.getProbability();
bigrams.emplace_back(&word1, probability,
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount());
}
} }
// Fetch shortcut information. // Fetch shortcut information.
std::vector<UnigramProperty::ShortcutProperty> shortcuts; std::vector<UnigramProperty::ShortcutProperty> shortcuts;

View File

@ -143,8 +143,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int mBigramCount; int mBigramCount;
std::vector<int> mTerminalPtNodePositionsForIteratingWords; std::vector<int> mTerminalPtNodePositionsForIteratingWords;
mutable bool mIsCorrupted; mutable bool mIsCorrupted;
int getBigramsPositionOfPtNode(const int ptNodePos) const;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H #endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H