am 4cc9c8b5
: Merge "Quit reading unigram probability in Ver4PatriciaTrieNodeReader."
* commit '4cc9c8b58774279c7af486f610f4ebcb54735ff2': Quit reading unigram probability in Ver4PatriciaTrieNodeReader.
This commit is contained in:
commit
b497ff1c31
6 changed files with 20 additions and 38 deletions
|
@ -63,9 +63,14 @@ const WordAttributes LanguageModelDictContent::getWordAttributes(const WordIdArr
|
||||||
int probability = NOT_A_PROBABILITY;
|
int probability = NOT_A_PROBABILITY;
|
||||||
if (mHasHistoricalInfo) {
|
if (mHasHistoricalInfo) {
|
||||||
const int rawProbability = ForgettingCurveUtils::decodeProbability(
|
const int rawProbability = ForgettingCurveUtils::decodeProbability(
|
||||||
probabilityEntry.getHistoricalInfo(), headerPolicy)
|
probabilityEntry.getHistoricalInfo(), headerPolicy);
|
||||||
+ ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */);
|
if (rawProbability == NOT_A_PROBABILITY) {
|
||||||
probability = std::min(rawProbability, MAX_PROBABILITY);
|
// The entry should not be treated as a valid entry.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
probability = std::min(rawProbability
|
||||||
|
+ ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */),
|
||||||
|
MAX_PROBABILITY);
|
||||||
} else {
|
} else {
|
||||||
probability = probabilityEntry.getProbability();
|
probability = probabilityEntry.getProbability();
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,26 +51,17 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
|
||||||
const int parentPos =
|
const int parentPos =
|
||||||
DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
|
DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
|
||||||
int codePoints[MAX_WORD_LENGTH];
|
int codePoints[MAX_WORD_LENGTH];
|
||||||
const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
|
// Code point table is not used for ver4 dictionaries.
|
||||||
dictBuf, flags, MAX_WORD_LENGTH, mHeaderPolicy->getCodePointTable(), codePoints, &pos);
|
const int codePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
|
||||||
|
dictBuf, flags, MAX_WORD_LENGTH, nullptr /* codePointTable */, codePoints, &pos);
|
||||||
int terminalIdFieldPos = NOT_A_DICT_POS;
|
int terminalIdFieldPos = NOT_A_DICT_POS;
|
||||||
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||||
int probability = NOT_A_PROBABILITY;
|
|
||||||
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
||||||
terminalIdFieldPos = pos;
|
terminalIdFieldPos = pos;
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
terminalIdFieldPos += mBuffer->getOriginalBufferSize();
|
terminalIdFieldPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
|
terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
|
||||||
// TODO: Quit reading probability here.
|
|
||||||
const ProbabilityEntry probabilityEntry =
|
|
||||||
mLanguageModelDictContent->getProbabilityEntry(terminalId);
|
|
||||||
if (probabilityEntry.hasHistoricalInfo()) {
|
|
||||||
probability = ForgettingCurveUtils::decodeProbability(
|
|
||||||
probabilityEntry.getHistoricalInfo(), mHeaderPolicy);
|
|
||||||
} else {
|
|
||||||
probability = probabilityEntry.getProbability();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
int childrenPosFieldPos = pos;
|
int childrenPosFieldPos = pos;
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
|
@ -91,8 +82,8 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
|
||||||
// The destination position is stored at the same place as the parent position.
|
// The destination position is stored at the same place as the parent position.
|
||||||
return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos);
|
return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos);
|
||||||
} else {
|
} else {
|
||||||
return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
|
return PtNodeParams(headPos, flags, parentPos, codePointCount, codePoints,
|
||||||
terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
|
terminalIdFieldPos, terminalId, NOT_A_PROBABILITY, childrenPosFieldPos, childrenPos,
|
||||||
newSiblingNodePos);
|
newSiblingNodePos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,15 +29,12 @@ class LanguageModelDictContent;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
|
* This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
|
||||||
* node and reads node attributes including probability form language model.
|
* node and reads node attributes.
|
||||||
*/
|
*/
|
||||||
class Ver4PatriciaTrieNodeReader : public PtNodeReader {
|
class Ver4PatriciaTrieNodeReader : public PtNodeReader {
|
||||||
public:
|
public:
|
||||||
Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
|
explicit Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer)
|
||||||
const LanguageModelDictContent *const languageModelDictContent,
|
: mBuffer(buffer) {}
|
||||||
const HeaderPolicy *const headerPolicy)
|
|
||||||
: mBuffer(buffer), mLanguageModelDictContent(languageModelDictContent),
|
|
||||||
mHeaderPolicy(headerPolicy) {}
|
|
||||||
|
|
||||||
~Ver4PatriciaTrieNodeReader() {}
|
~Ver4PatriciaTrieNodeReader() {}
|
||||||
|
|
||||||
|
@ -50,8 +47,6 @@ class Ver4PatriciaTrieNodeReader : public PtNodeReader {
|
||||||
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
|
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
|
||||||
|
|
||||||
const BufferWithExtendableBuffer *const mBuffer;
|
const BufferWithExtendableBuffer *const mBuffer;
|
||||||
const LanguageModelDictContent *const mLanguageModelDictContent;
|
|
||||||
const HeaderPolicy *const mHeaderPolicy;
|
|
||||||
|
|
||||||
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
|
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
|
||||||
const int siblingNodePos) const;
|
const int siblingNodePos) const;
|
||||||
|
|
|
@ -56,13 +56,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
|
||||||
if (!ptNodeParams.isValid()) {
|
if (!ptNodeParams.isValid()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
|
const bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
|
||||||
if (isTerminal && mHeaderPolicy->isDecayingDict()) {
|
|
||||||
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
|
|
||||||
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
|
|
||||||
// valid terminal DicNode.
|
|
||||||
isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
|
|
||||||
}
|
|
||||||
const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID;
|
const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID;
|
||||||
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
|
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
|
||||||
wordId, ptNodeParams.getCodePointArrayView());
|
wordId, ptNodeParams.getCodePointArrayView());
|
||||||
|
|
|
@ -45,8 +45,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
mDictBuffer(mBuffers->getWritableTrieBuffer()),
|
mDictBuffer(mBuffers->getWritableTrieBuffer()),
|
||||||
mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
|
mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
|
||||||
mBuffers->getTerminalPositionLookupTable()),
|
mBuffers->getTerminalPositionLookupTable()),
|
||||||
mNodeReader(mDictBuffer, mBuffers->getLanguageModelDictContent(), mHeaderPolicy),
|
mNodeReader(mDictBuffer), mPtNodeArrayReader(mDictBuffer),
|
||||||
mPtNodeArrayReader(mDictBuffer),
|
|
||||||
mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
|
mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
|
||||||
&mPtNodeArrayReader, &mShortcutPolicy),
|
&mPtNodeArrayReader, &mShortcutPolicy),
|
||||||
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
|
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
|
||||||
|
|
|
@ -73,8 +73,7 @@ bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeAr
|
||||||
bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
|
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
|
||||||
int *const outUnigramCount, int *const outBigramCount) {
|
int *const outUnigramCount, int *const outBigramCount) {
|
||||||
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
|
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer());
|
||||||
mBuffers->getLanguageModelDictContent(), headerPolicy);
|
|
||||||
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
|
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
|
||||||
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
|
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
|
||||||
mBuffers->getTerminalPositionLookupTable());
|
mBuffers->getTerminalPositionLookupTable());
|
||||||
|
@ -137,8 +136,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create policy instances for the GCed dictionary.
|
// Create policy instances for the GCed dictionary.
|
||||||
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
|
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer());
|
||||||
buffersToWrite->getLanguageModelDictContent(), headerPolicy);
|
|
||||||
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
|
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
|
||||||
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
|
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
|
||||||
buffersToWrite->getTerminalPositionLookupTable());
|
buffersToWrite->getTerminalPositionLookupTable());
|
||||||
|
|
Loading…
Reference in a new issue