am c7f1de82: Merge "Use LanguageModelDictContent in getWordProperty()."
* commit 'c7f1de826c3982b8e44557316b85fe7b3e62eb10': Use LanguageModelDictContent in getWordProperty().main
commit
445f5536b6
|
@ -188,18 +188,6 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con
|
||||||
ptNodeParams.getTerminalId());
|
ptNodeParams.getTerminalId());
|
||||||
}
|
}
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
|
|
||||||
if (ptNodePos == NOT_A_DICT_POS) {
|
|
||||||
return NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
|
|
||||||
if (ptNodeParams.isDeleted()) {
|
|
||||||
return NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
return mBuffers->getBigramDictContent()->getBigramListHeadPos(
|
|
||||||
ptNodeParams.getTerminalId());
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length,
|
bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length,
|
||||||
const UnigramProperty *const unigramProperty) {
|
const UnigramProperty *const unigramProperty) {
|
||||||
if (!mBuffers->isUpdatable()) {
|
if (!mBuffers->isUpdatable()) {
|
||||||
|
@ -480,41 +468,32 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
|
||||||
ptNodeParams.getTerminalId());
|
ptNodeParams.getTerminalId());
|
||||||
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
|
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
|
||||||
// Fetch bigram information.
|
// Fetch bigram information.
|
||||||
|
// TODO: Support n-gram.
|
||||||
std::vector<BigramProperty> bigrams;
|
std::vector<BigramProperty> bigrams;
|
||||||
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
|
const int wordId = ptNodeParams.getTerminalId();
|
||||||
if (bigramListPos != NOT_A_DICT_POS) {
|
const WordIdArrayView prevWordIds = WordIdArrayView::fromObject(&wordId);
|
||||||
int bigramWord1CodePoints[MAX_WORD_LENGTH];
|
const TerminalPositionLookupTable *const terminalPositionLookupTable =
|
||||||
const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent();
|
mBuffers->getTerminalPositionLookupTable();
|
||||||
const TerminalPositionLookupTable *const terminalPositionLookupTable =
|
int bigramWord1CodePoints[MAX_WORD_LENGTH];
|
||||||
mBuffers->getTerminalPositionLookupTable();
|
for (const auto entry : mBuffers->getLanguageModelDictContent()->getProbabilityEntries(
|
||||||
bool hasNext = true;
|
prevWordIds)) {
|
||||||
int readingPos = bigramListPos;
|
const int word1TerminalPtNodePos =
|
||||||
while (hasNext) {
|
terminalPositionLookupTable->getTerminalPtNodePosition(entry.getWordId());
|
||||||
const BigramEntry bigramEntry =
|
// Word (unigram) probability
|
||||||
bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
int word1Probability = NOT_A_PROBABILITY;
|
||||||
hasNext = bigramEntry.hasNext();
|
const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const int word1TerminalId = bigramEntry.getTargetTerminalId();
|
word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
|
||||||
const int word1TerminalPtNodePos =
|
&word1Probability);
|
||||||
terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId);
|
const std::vector<int> word1(bigramWord1CodePoints,
|
||||||
if (word1TerminalPtNodePos == NOT_A_DICT_POS) {
|
bigramWord1CodePoints + codePointCount);
|
||||||
continue;
|
const ProbabilityEntry probabilityEntry = entry.getProbabilityEntry();
|
||||||
}
|
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
|
||||||
// Word (unigram) probability
|
const int probability = probabilityEntry.hasHistoricalInfo() ?
|
||||||
int word1Probability = NOT_A_PROBABILITY;
|
ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) :
|
||||||
const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
|
probabilityEntry.getProbability();
|
||||||
word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
|
bigrams.emplace_back(&word1, probability,
|
||||||
&word1Probability);
|
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
||||||
const std::vector<int> word1(bigramWord1CodePoints,
|
historicalInfo->getCount());
|
||||||
bigramWord1CodePoints + codePointCount);
|
|
||||||
const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
|
|
||||||
const int probability = bigramEntry.hasHistoricalInfo() ?
|
|
||||||
ForgettingCurveUtils::decodeProbability(
|
|
||||||
bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
|
|
||||||
bigramEntry.getProbability();
|
|
||||||
bigrams.emplace_back(&word1, probability,
|
|
||||||
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
|
||||||
historicalInfo->getCount());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// Fetch shortcut information.
|
// Fetch shortcut information.
|
||||||
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
||||||
|
|
|
@ -143,8 +143,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
int mBigramCount;
|
int mBigramCount;
|
||||||
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
|
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
|
||||||
mutable bool mIsCorrupted;
|
mutable bool mIsCorrupted;
|
||||||
|
|
||||||
int getBigramsPositionOfPtNode(const int ptNodePos) const;
|
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
|
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
|
||||||
|
|
Loading…
Reference in New Issue