Merge "Quit reading unigram probability in Ver4PatriciaTrieNodeReader."
This commit is contained in:
commit
4cc9c8b587
6 changed files with 20 additions and 38 deletions
|
@ -63,9 +63,14 @@ const WordAttributes LanguageModelDictContent::getWordAttributes(const WordIdArr
|
|||
int probability = NOT_A_PROBABILITY;
|
||||
if (mHasHistoricalInfo) {
|
||||
const int rawProbability = ForgettingCurveUtils::decodeProbability(
|
||||
probabilityEntry.getHistoricalInfo(), headerPolicy)
|
||||
+ ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */);
|
||||
probability = std::min(rawProbability, MAX_PROBABILITY);
|
||||
probabilityEntry.getHistoricalInfo(), headerPolicy);
|
||||
if (rawProbability == NOT_A_PROBABILITY) {
|
||||
// The entry should not be treated as a valid entry.
|
||||
continue;
|
||||
}
|
||||
probability = std::min(rawProbability
|
||||
+ ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */),
|
||||
MAX_PROBABILITY);
|
||||
} else {
|
||||
probability = probabilityEntry.getProbability();
|
||||
}
|
||||
|
|
|
@ -51,26 +51,17 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
|
|||
const int parentPos =
|
||||
DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
|
||||
int codePoints[MAX_WORD_LENGTH];
|
||||
const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
|
||||
dictBuf, flags, MAX_WORD_LENGTH, mHeaderPolicy->getCodePointTable(), codePoints, &pos);
|
||||
// Code point table is not used for ver4 dictionaries.
|
||||
const int codePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
|
||||
dictBuf, flags, MAX_WORD_LENGTH, nullptr /* codePointTable */, codePoints, &pos);
|
||||
int terminalIdFieldPos = NOT_A_DICT_POS;
|
||||
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
int probability = NOT_A_PROBABILITY;
|
||||
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
||||
terminalIdFieldPos = pos;
|
||||
if (usesAdditionalBuffer) {
|
||||
terminalIdFieldPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
|
||||
// TODO: Quit reading probability here.
|
||||
const ProbabilityEntry probabilityEntry =
|
||||
mLanguageModelDictContent->getProbabilityEntry(terminalId);
|
||||
if (probabilityEntry.hasHistoricalInfo()) {
|
||||
probability = ForgettingCurveUtils::decodeProbability(
|
||||
probabilityEntry.getHistoricalInfo(), mHeaderPolicy);
|
||||
} else {
|
||||
probability = probabilityEntry.getProbability();
|
||||
}
|
||||
}
|
||||
int childrenPosFieldPos = pos;
|
||||
if (usesAdditionalBuffer) {
|
||||
|
@ -91,8 +82,8 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
|
|||
// The destination position is stored at the same place as the parent position.
|
||||
return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos);
|
||||
} else {
|
||||
return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
|
||||
terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
|
||||
return PtNodeParams(headPos, flags, parentPos, codePointCount, codePoints,
|
||||
terminalIdFieldPos, terminalId, NOT_A_PROBABILITY, childrenPosFieldPos, childrenPos,
|
||||
newSiblingNodePos);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,15 +29,12 @@ class LanguageModelDictContent;
|
|||
|
||||
/*
|
||||
* This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
|
||||
* node and reads node attributes including probability form language model.
|
||||
* node and reads node attributes.
|
||||
*/
|
||||
class Ver4PatriciaTrieNodeReader : public PtNodeReader {
|
||||
public:
|
||||
Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
|
||||
const LanguageModelDictContent *const languageModelDictContent,
|
||||
const HeaderPolicy *const headerPolicy)
|
||||
: mBuffer(buffer), mLanguageModelDictContent(languageModelDictContent),
|
||||
mHeaderPolicy(headerPolicy) {}
|
||||
explicit Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer)
|
||||
: mBuffer(buffer) {}
|
||||
|
||||
~Ver4PatriciaTrieNodeReader() {}
|
||||
|
||||
|
@ -50,8 +47,6 @@ class Ver4PatriciaTrieNodeReader : public PtNodeReader {
|
|||
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
|
||||
|
||||
const BufferWithExtendableBuffer *const mBuffer;
|
||||
const LanguageModelDictContent *const mLanguageModelDictContent;
|
||||
const HeaderPolicy *const mHeaderPolicy;
|
||||
|
||||
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
|
||||
const int siblingNodePos) const;
|
||||
|
|
|
@ -56,13 +56,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
|
|||
if (!ptNodeParams.isValid()) {
|
||||
break;
|
||||
}
|
||||
bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
|
||||
if (isTerminal && mHeaderPolicy->isDecayingDict()) {
|
||||
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
|
||||
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
|
||||
// valid terminal DicNode.
|
||||
isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
|
||||
}
|
||||
const bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
|
||||
const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID;
|
||||
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
|
||||
wordId, ptNodeParams.getCodePointArrayView());
|
||||
|
|
|
@ -45,8 +45,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
mDictBuffer(mBuffers->getWritableTrieBuffer()),
|
||||
mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
|
||||
mBuffers->getTerminalPositionLookupTable()),
|
||||
mNodeReader(mDictBuffer, mBuffers->getLanguageModelDictContent(), mHeaderPolicy),
|
||||
mPtNodeArrayReader(mDictBuffer),
|
||||
mNodeReader(mDictBuffer), mPtNodeArrayReader(mDictBuffer),
|
||||
mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
|
||||
&mPtNodeArrayReader, &mShortcutPolicy),
|
||||
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
|
||||
|
|
|
@ -73,8 +73,7 @@ bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeAr
|
|||
bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
|
||||
int *const outUnigramCount, int *const outBigramCount) {
|
||||
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
|
||||
mBuffers->getLanguageModelDictContent(), headerPolicy);
|
||||
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer());
|
||||
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
|
||||
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
|
||||
mBuffers->getTerminalPositionLookupTable());
|
||||
|
@ -137,8 +136,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|||
}
|
||||
|
||||
// Create policy instances for the GCed dictionary.
|
||||
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
|
||||
buffersToWrite->getLanguageModelDictContent(), headerPolicy);
|
||||
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer());
|
||||
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
|
||||
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
|
||||
buffersToWrite->getTerminalPositionLookupTable());
|
||||
|
|
Loading…
Reference in a new issue