Quit reading unigram probability in Ver4PatriciaTrieNodeReader.

Bug: 14425059
Change-Id: I4fc7b0e236151a2c64e7131772264024c6597633
This commit is contained in:
Keisuke Kuroyanagi 2014-09-25 11:41:50 +09:00
parent 2842e50c4b
commit cb4f544198
6 changed files with 20 additions and 38 deletions

View file

@ -63,9 +63,14 @@ const WordAttributes LanguageModelDictContent::getWordAttributes(const WordIdArr
int probability = NOT_A_PROBABILITY;
if (mHasHistoricalInfo) {
const int rawProbability = ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo(), headerPolicy)
+ ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */);
probability = std::min(rawProbability, MAX_PROBABILITY);
probabilityEntry.getHistoricalInfo(), headerPolicy);
if (rawProbability == NOT_A_PROBABILITY) {
// The entry should not be treated as a valid entry.
continue;
}
probability = std::min(rawProbability
+ ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */),
MAX_PROBABILITY);
} else {
probability = probabilityEntry.getProbability();
}

View file

@ -51,26 +51,17 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
const int parentPos =
DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
int codePoints[MAX_WORD_LENGTH];
const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
dictBuf, flags, MAX_WORD_LENGTH, mHeaderPolicy->getCodePointTable(), codePoints, &pos);
// Code point table is not used for ver4 dictionaries.
const int codePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
dictBuf, flags, MAX_WORD_LENGTH, nullptr /* codePointTable */, codePoints, &pos);
int terminalIdFieldPos = NOT_A_DICT_POS;
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
int probability = NOT_A_PROBABILITY;
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
terminalIdFieldPos = pos;
if (usesAdditionalBuffer) {
terminalIdFieldPos += mBuffer->getOriginalBufferSize();
}
terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
// TODO: Quit reading probability here.
const ProbabilityEntry probabilityEntry =
mLanguageModelDictContent->getProbabilityEntry(terminalId);
if (probabilityEntry.hasHistoricalInfo()) {
probability = ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo(), mHeaderPolicy);
} else {
probability = probabilityEntry.getProbability();
}
}
int childrenPosFieldPos = pos;
if (usesAdditionalBuffer) {
@ -91,8 +82,8 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
// The destination position is stored at the same place as the parent position.
return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos);
} else {
return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
return PtNodeParams(headPos, flags, parentPos, codePointCount, codePoints,
terminalIdFieldPos, terminalId, NOT_A_PROBABILITY, childrenPosFieldPos, childrenPos,
newSiblingNodePos);
}
}

View file

@ -29,15 +29,12 @@ class LanguageModelDictContent;
/*
* This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
* node and reads node attributes including probability form language model.
* node and reads node attributes.
*/
class Ver4PatriciaTrieNodeReader : public PtNodeReader {
public:
Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
const LanguageModelDictContent *const languageModelDictContent,
const HeaderPolicy *const headerPolicy)
: mBuffer(buffer), mLanguageModelDictContent(languageModelDictContent),
mHeaderPolicy(headerPolicy) {}
explicit Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer)
: mBuffer(buffer) {}
~Ver4PatriciaTrieNodeReader() {}
@ -50,8 +47,6 @@ class Ver4PatriciaTrieNodeReader : public PtNodeReader {
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
const BufferWithExtendableBuffer *const mBuffer;
const LanguageModelDictContent *const mLanguageModelDictContent;
const HeaderPolicy *const mHeaderPolicy;
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
const int siblingNodePos) const;

View file

@ -56,13 +56,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
if (!ptNodeParams.isValid()) {
break;
}
bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
if (isTerminal && mHeaderPolicy->isDecayingDict()) {
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
// valid terminal DicNode.
isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
}
const bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID;
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
wordId, ptNodeParams.getCodePointArrayView());

View file

@ -45,8 +45,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mDictBuffer(mBuffers->getWritableTrieBuffer()),
mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable()),
mNodeReader(mDictBuffer, mBuffers->getLanguageModelDictContent(), mHeaderPolicy),
mPtNodeArrayReader(mDictBuffer),
mNodeReader(mDictBuffer), mPtNodeArrayReader(mDictBuffer),
mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
&mPtNodeArrayReader, &mShortcutPolicy),
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),

View file

@ -73,8 +73,7 @@ bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeAr
bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
int *const outUnigramCount, int *const outBigramCount) {
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
mBuffers->getLanguageModelDictContent(), headerPolicy);
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer());
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable());
@ -137,8 +136,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
}
// Create policy instances for the GCed dictionary.
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
buffersToWrite->getLanguageModelDictContent(), headerPolicy);
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer());
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
buffersToWrite->getTerminalPositionLookupTable());