LatinIME/native/jni/src/dictionary/structure/v2/patricia_trie_policy.h

181 lines
7.4 KiB
C++

/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_PATRICIA_TRIE_POLICY_H
#define LATINIME_PATRICIA_TRIE_POLICY_H
#include <cstdint>
#include <vector>
#include "defines.h"
#include "dictionary/header/header_policy.h"
#include "dictionary/interface/dictionary_structure_with_buffer_policy.h"
#include "dictionary/structure/v2/bigram/bigram_list_policy.h"
#include "dictionary/structure/v2/shortcut/shortcut_list_policy.h"
#include "dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
#include "dictionary/structure/v2/ver2_pt_node_array_reader.h"
#include "dictionary/utils/format_utils.h"
#include "dictionary/utils/mmapped_buffer.h"
#include "utils/byte_array_view.h"
#include "utils/int_array_view.h"
namespace latinime {
class DicNode;
class DicNodeVector;
// Word id = Position of a PtNode that represents the word.
// Max supported n-gram is bigram.
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)
: mMmappedBuffer(std::move(mmappedBuffer)),
mHeaderPolicy(mMmappedBuffer->getReadOnlyByteArrayView().data(),
FormatUtils::detectFormatVersion(mMmappedBuffer->getReadOnlyByteArrayView())),
mBuffer(mMmappedBuffer->getReadOnlyByteArrayView().skip(mHeaderPolicy.getSize())),
mBigramListPolicy(mBuffer), mShortcutListPolicy(mBuffer),
mPtNodeReader(mBuffer, &mBigramListPolicy, &mShortcutListPolicy,
mHeaderPolicy.getCodePointTable()),
mPtNodeArrayReader(mBuffer), mTerminalPtNodePositionsForIteratingWords(),
mIsCorrupted(false) {}
AK_FORCE_INLINE int getRootPosition() const {
return 0;
}
void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const;
int getCodePointsAndReturnCodePointCount(const int wordId, const int maxCodePointCount,
int *const outCodePoints) const;
int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const;
const WordAttributes getWordAttributesInContext(const WordIdArrayView prevWordIds,
const int wordId, MultiBigramMap *const multiBigramMap) const;
int getProbability(const int unigramProbability, const int bigramProbability) const;
int getProbabilityOfWord(const WordIdArrayView prevWordIds, const int wordId) const;
void iterateNgramEntries(const WordIdArrayView prevWordIds,
NgramListener *const listener) const;
BinaryDictionaryShortcutIterator getShortcutIterator(const int wordId) const;
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return &mHeaderPolicy;
}
bool addUnigramEntry(const CodePointArrayView wordCodePoints,
const UnigramProperty *const unigramProperty) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
return false;
}
bool removeUnigramEntry(const CodePointArrayView wordCodePoints) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
return false;
}
bool addNgramEntry(const NgramProperty *const ngramProperty) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
return false;
}
bool removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
return false;
}
bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints, const bool isValidWord,
const HistoricalInfo historicalInfo) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable "
"dictionary.");
return false;
}
bool flush(const char *const filePath) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
return false;
}
bool flushWithGC(const char *const filePath) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
return false;
}
bool needsToRunGC(const bool mindsBlockByGC) const {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
return false;
}
void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength) {
// getProperty is not supported for this class.
if (maxResultLength > 0) {
outResult[0] = '\0';
}
}
const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const;
int getNextWordAndNextToken(const int token, int *const outCodePoints,
int *const outCodePointCount);
bool isCorrupted() const {
return mIsCorrupted;
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
const HeaderPolicy mHeaderPolicy;
const ReadOnlyByteArrayView mBuffer;
const BigramListPolicy mBigramListPolicy;
const ShortcutListPolicy mShortcutListPolicy;
const Ver2ParticiaTrieNodeReader mPtNodeReader;
const Ver2PtNodeArrayReader mPtNodeArrayReader;
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
mutable bool mIsCorrupted;
int getCodePointsAndProbabilityAndReturnCodePointCount(const int wordId,
const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
int getShortcutPositionOfPtNode(const int ptNodePos) const;
int getBigramsPositionOfPtNode(const int ptNodePos) const;
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
DicNodeVector *const childDicNodes) const;
int getWordIdFromTerminalPtNodePos(const int ptNodePos) const;
int getTerminalPtNodePosFromWordId(const int wordId) const;
const WordAttributes getWordAttributes(const int probability,
const PtNodeParams &ptNodeParams) const;
bool isValidPos(const int pos) const;
};
} // namespace latinime
#endif // LATINIME_PATRICIA_TRIE_POLICY_H