Merge "Always keep PtNodes that represent non-word info during GC."
commit
39e5a15af0
|
@ -29,10 +29,10 @@ bool DynamicPtGcEventListeners
|
|||
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
|
||||
// children.
|
||||
bool isUselessPtNode = !ptNodeParams->isTerminal();
|
||||
if (ptNodeParams->isTerminal()) {
|
||||
if (ptNodeParams->isTerminal() && !ptNodeParams->representsNonWordInfo()) {
|
||||
bool needsToKeepPtNode = true;
|
||||
if (!mPtNodeWriter->updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(ptNodeParams,
|
||||
&needsToKeepPtNode)) {
|
||||
if (!mPtNodeWriter->updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
|
||||
ptNodeParams, &needsToKeepPtNode)) {
|
||||
AKLOGE("Cannot update PtNode probability or get needs to keep PtNode after GC.");
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -160,7 +160,8 @@ class PtNodeParams {
|
|||
}
|
||||
|
||||
AK_FORCE_INLINE bool representsNonWordInfo() const {
|
||||
return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0]);
|
||||
return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0])
|
||||
&& isNotAWord();
|
||||
}
|
||||
|
||||
// Parent node position
|
||||
|
|
|
@ -213,13 +213,16 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
|
|||
// Delete unigrams.
|
||||
while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) {
|
||||
const int ptNodePos = priorityQueue.top().getDictPos();
|
||||
priorityQueue.pop();
|
||||
const PtNodeParams ptNodeParams =
|
||||
ptNodeReader->fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
|
||||
if (ptNodeParams.representsNonWordInfo()) {
|
||||
continue;
|
||||
}
|
||||
if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) {
|
||||
AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos);
|
||||
return false;
|
||||
}
|
||||
priorityQueue.pop();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue