Merge "Use LanguageModelDictContent in getWordProperty()."

This commit is contained in:
Keisuke Kuroyanagi 2014-08-27 10:58:28 +00:00 committed by Android (Google) Code Review
commit c7f1de826c
2 changed files with 25 additions and 48 deletions

View file

@ -188,18 +188,6 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con
ptNodeParams.getTerminalId()); ptNodeParams.getTerminalId());
} }
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
if (ptNodeParams.isDeleted()) {
return NOT_A_DICT_POS;
}
return mBuffers->getBigramDictContent()->getBigramListHeadPos(
ptNodeParams.getTerminalId());
}
bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length, bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length,
const UnigramProperty *const unigramProperty) { const UnigramProperty *const unigramProperty) {
if (!mBuffers->isUpdatable()) { if (!mBuffers->isUpdatable()) {
@ -480,25 +468,17 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
ptNodeParams.getTerminalId()); ptNodeParams.getTerminalId());
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
// Fetch bigram information. // Fetch bigram information.
// TODO: Support n-gram.
std::vector<BigramProperty> bigrams; std::vector<BigramProperty> bigrams;
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); const int wordId = ptNodeParams.getTerminalId();
if (bigramListPos != NOT_A_DICT_POS) { const WordIdArrayView prevWordIds = WordIdArrayView::fromObject(&wordId);
int bigramWord1CodePoints[MAX_WORD_LENGTH];
const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent();
const TerminalPositionLookupTable *const terminalPositionLookupTable = const TerminalPositionLookupTable *const terminalPositionLookupTable =
mBuffers->getTerminalPositionLookupTable(); mBuffers->getTerminalPositionLookupTable();
bool hasNext = true; int bigramWord1CodePoints[MAX_WORD_LENGTH];
int readingPos = bigramListPos; for (const auto entry : mBuffers->getLanguageModelDictContent()->getProbabilityEntries(
while (hasNext) { prevWordIds)) {
const BigramEntry bigramEntry =
bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
hasNext = bigramEntry.hasNext();
const int word1TerminalId = bigramEntry.getTargetTerminalId();
const int word1TerminalPtNodePos = const int word1TerminalPtNodePos =
terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId); terminalPositionLookupTable->getTerminalPtNodePosition(entry.getWordId());
if (word1TerminalPtNodePos == NOT_A_DICT_POS) {
continue;
}
// Word (unigram) probability // Word (unigram) probability
int word1Probability = NOT_A_PROBABILITY; int word1Probability = NOT_A_PROBABILITY;
const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
@ -506,16 +486,15 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
&word1Probability); &word1Probability);
const std::vector<int> word1(bigramWord1CodePoints, const std::vector<int> word1(bigramWord1CodePoints,
bigramWord1CodePoints + codePointCount); bigramWord1CodePoints + codePointCount);
const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo(); const ProbabilityEntry probabilityEntry = entry.getProbabilityEntry();
const int probability = bigramEntry.hasHistoricalInfo() ? const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
ForgettingCurveUtils::decodeProbability( const int probability = probabilityEntry.hasHistoricalInfo() ?
bigramEntry.getHistoricalInfo(), mHeaderPolicy) : ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) :
bigramEntry.getProbability(); probabilityEntry.getProbability();
bigrams.emplace_back(&word1, probability, bigrams.emplace_back(&word1, probability,
historicalInfo->getTimeStamp(), historicalInfo->getLevel(), historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount()); historicalInfo->getCount());
} }
}
// Fetch shortcut information. // Fetch shortcut information.
std::vector<UnigramProperty::ShortcutProperty> shortcuts; std::vector<UnigramProperty::ShortcutProperty> shortcuts;
int shortcutPos = getShortcutPositionOfPtNode(ptNodePos); int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);

View file

@ -143,8 +143,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int mBigramCount; int mBigramCount;
std::vector<int> mTerminalPtNodePositionsForIteratingWords; std::vector<int> mTerminalPtNodePositionsForIteratingWords;
mutable bool mIsCorrupted; mutable bool mIsCorrupted;
int getBigramsPositionOfPtNode(const int ptNodePos) const;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H #endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H