2013-12-13 08:09:16 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2013, The Android Open Source Project
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
|
|
|
|
|
|
|
|
#include <cstring>
|
|
|
|
#include <queue>
|
|
|
|
|
|
|
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
2014-05-12 12:05:14 +00:00
|
|
|
#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
|
2013-12-13 08:09:16 +00:00
|
|
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
|
|
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
|
|
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
|
|
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
|
2014-02-10 12:06:07 +00:00
|
|
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h"
|
2013-12-13 08:09:16 +00:00
|
|
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
|
|
|
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
|
|
|
|
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
2014-11-20 06:27:30 +00:00
|
|
|
#include "utils/ngram_utils.h"
|
2013-12-13 08:09:16 +00:00
|
|
|
|
|
|
|
namespace latinime {
|
|
|
|
|
2014-02-20 10:12:32 +00:00
|
|
|
bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
|
2014-10-21 06:46:14 +00:00
|
|
|
const EntryCounts &entryCounts) const {
|
2013-12-13 08:09:16 +00:00
|
|
|
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
|
|
|
|
BufferWithExtendableBuffer headerBuffer(
|
|
|
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
|
|
|
const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
|
|
|
|
+ mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
|
2014-01-30 09:15:35 +00:00
|
|
|
if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */,
|
2014-10-21 06:46:14 +00:00
|
|
|
entryCounts, extendedRegionSize, &headerBuffer)) {
|
2014-01-30 09:15:35 +00:00
|
|
|
AKLOGE("Cannot write header structure to buffer. "
|
2014-10-21 06:46:14 +00:00
|
|
|
"updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, trigramCount: %d,"
|
2014-11-20 06:27:30 +00:00
|
|
|
"extendedRegionSize: %d", false, entryCounts.getNgramCount(NgramType::Unigram),
|
|
|
|
entryCounts.getNgramCount(NgramType::Bigram),
|
|
|
|
entryCounts.getNgramCount(NgramType::Trigram),
|
2013-12-13 08:09:16 +00:00
|
|
|
extendedRegionSize);
|
2014-02-20 10:12:32 +00:00
|
|
|
return false;
|
2013-12-13 08:09:16 +00:00
|
|
|
}
|
2014-02-20 10:12:32 +00:00
|
|
|
return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
|
2013-12-13 08:09:16 +00:00
|
|
|
}
|
|
|
|
|
2014-02-20 10:12:32 +00:00
|
|
|
bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
|
2013-12-13 08:09:16 +00:00
|
|
|
const char *const dictDirPath) {
|
|
|
|
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
|
|
|
|
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
|
2014-04-19 00:57:28 +00:00
|
|
|
Ver4DictBuffers::createVer4DictBuffers(headerPolicy,
|
|
|
|
Ver4DictConstants::MAX_DICTIONARY_SIZE));
|
2014-10-21 07:36:03 +00:00
|
|
|
MutableEntryCounters entryCounters;
|
|
|
|
if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &entryCounters)) {
|
2014-02-20 10:12:32 +00:00
|
|
|
return false;
|
2013-12-13 08:09:16 +00:00
|
|
|
}
|
|
|
|
BufferWithExtendableBuffer headerBuffer(
|
|
|
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
2014-01-30 09:15:35 +00:00
|
|
|
if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
|
2014-10-21 07:36:03 +00:00
|
|
|
entryCounters.getEntryCounts(), 0 /* extendedRegionSize */, &headerBuffer)) {
|
2014-02-20 10:12:32 +00:00
|
|
|
return false;
|
2013-12-13 08:09:16 +00:00
|
|
|
}
|
2014-03-06 14:36:38 +00:00
|
|
|
return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
|
2013-12-13 08:09:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|
|
|
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
|
2014-10-21 07:36:03 +00:00
|
|
|
MutableEntryCounters *const outEntryCounters) {
|
2014-09-25 02:41:50 +00:00
|
|
|
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer());
|
2014-02-10 12:06:07 +00:00
|
|
|
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
|
2013-12-13 08:09:16 +00:00
|
|
|
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
|
|
|
|
mBuffers->getTerminalPositionLookupTable());
|
|
|
|
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
|
2014-10-15 03:29:31 +00:00
|
|
|
mBuffers, &ptNodeReader, &ptNodeArrayReader, &shortcutPolicy);
|
2013-12-13 08:09:16 +00:00
|
|
|
|
2014-10-15 03:29:31 +00:00
|
|
|
if (!mBuffers->getMutableLanguageModelDictContent()->updateAllProbabilityEntriesForGC(
|
2014-10-21 07:36:03 +00:00
|
|
|
headerPolicy, outEntryCounters)) {
|
2014-08-22 11:07:54 +00:00
|
|
|
AKLOGE("Failed to update probabilities in language model dict content.");
|
|
|
|
return false;
|
|
|
|
}
|
2014-08-21 03:48:24 +00:00
|
|
|
if (headerPolicy->isDecayingDict()) {
|
2014-11-20 06:27:30 +00:00
|
|
|
const EntryCounts &maxEntryCounts = headerPolicy->getMaxNgramCounts();
|
2014-10-21 07:36:03 +00:00
|
|
|
if (!mBuffers->getMutableLanguageModelDictContent()->truncateEntries(
|
|
|
|
outEntryCounters->getEntryCounts(), maxEntryCounts, headerPolicy,
|
|
|
|
outEntryCounters)) {
|
2014-08-21 03:48:24 +00:00
|
|
|
AKLOGE("Failed to truncate entries in language model dict content.");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-10 10:34:34 +00:00
|
|
|
DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader);
|
2013-12-13 08:09:16 +00:00
|
|
|
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
|
|
|
DynamicPtGcEventListeners
|
|
|
|
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
|
|
|
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
|
|
|
&ptNodeWriter);
|
|
|
|
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
|
|
|
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Mapping from positions in mBuffer to positions in bufferToWrite.
|
|
|
|
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
|
|
|
|
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
|
|
|
Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
|
2014-10-15 03:29:31 +00:00
|
|
|
buffersToWrite, &ptNodeReader, &ptNodeArrayReader, &shortcutPolicy);
|
2013-12-13 08:09:16 +00:00
|
|
|
DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
|
|
|
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
|
|
|
|
buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
|
|
|
|
if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
|
|
|
&traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create policy instances for the GCed dictionary.
|
2014-09-25 02:41:50 +00:00
|
|
|
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer());
|
2014-02-10 12:06:07 +00:00
|
|
|
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
|
2013-12-13 08:09:16 +00:00
|
|
|
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
|
|
|
|
buffersToWrite->getTerminalPositionLookupTable());
|
|
|
|
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
|
2014-10-15 03:29:31 +00:00
|
|
|
buffersToWrite, &newPtNodeReader, &newPtNodeArrayreader,
|
2014-02-10 12:06:07 +00:00
|
|
|
&newShortcutPolicy);
|
2013-12-13 08:09:16 +00:00
|
|
|
// Re-assign terminal IDs for valid terminal PtNodes.
|
|
|
|
TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
|
|
|
|
if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds(
|
|
|
|
&terminalIdMap)) {
|
|
|
|
return false;
|
|
|
|
}
|
2014-10-21 07:36:03 +00:00
|
|
|
// Run GC for language model dict content.
|
2014-08-05 05:13:07 +00:00
|
|
|
if (!buffersToWrite->getMutableLanguageModelDictContent()->runGC(&terminalIdMap,
|
2014-10-21 07:36:03 +00:00
|
|
|
mBuffers->getLanguageModelDictContent())) {
|
2013-12-13 08:09:16 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
// Run GC for shortcut dict content.
|
|
|
|
if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap,
|
|
|
|
mBuffers->getShortcutDictContent())) {
|
|
|
|
return false;
|
|
|
|
}
|
2014-02-10 10:34:34 +00:00
|
|
|
DynamicPtReadingHelper newDictReadingHelper(&newPtNodeReader, &newPtNodeArrayreader);
|
2013-12-13 08:09:16 +00:00
|
|
|
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
|
|
|
DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields
|
|
|
|
traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap);
|
|
|
|
if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
|
|
|
&traversePolicyToUpdateAllPositionFields)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
|
|
|
TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
|
|
|
|
traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(&newPtNodeWriter, &terminalIdMap);
|
|
|
|
if (!newDictReadingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
|
|
|
&traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
|
|
|
|
::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
|
|
|
|
if (!ptNodeParams->isTerminal()) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
|
|
|
|
mTerminalIdMap->find(ptNodeParams->getTerminalId());
|
|
|
|
if (it == mTerminalIdMap->end()) {
|
|
|
|
AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd",
|
|
|
|
ptNodeParams->getTerminalId(), mTerminalIdMap->size());
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (!mPtNodeWriter->updateTerminalId(ptNodeParams, it->second)) {
|
|
|
|
AKLOGE("Cannot update terminal id. %d -> %d", it->first, it->second);
|
2014-06-23 07:49:49 +00:00
|
|
|
return false;
|
2013-12-13 08:09:16 +00:00
|
|
|
}
|
2014-06-23 07:49:49 +00:00
|
|
|
return true;
|
2013-12-13 08:09:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace latinime
|