Use IntArrayView for dictionary structure policy.
Change-Id: I0dc94908259d70d5085ff22abf422d90affb1452
This commit is contained in:
parent
b85bf4ebb8
commit
6ae4d79d81
11 changed files with 117 additions and 113 deletions
|
@ -32,6 +32,7 @@
|
||||||
#include "suggest/core/suggest_options.h"
|
#include "suggest/core/suggest_options.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
|
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
|
||||||
#include "utils/char_utils.h"
|
#include "utils/char_utils.h"
|
||||||
|
#include "utils/int_array_view.h"
|
||||||
#include "utils/jni_data_utils.h"
|
#include "utils/jni_data_utils.h"
|
||||||
#include "utils/log_utils.h"
|
#include "utils/log_utils.h"
|
||||||
#include "utils/time_keeper.h"
|
#include "utils/time_keeper.h"
|
||||||
|
@ -581,8 +582,9 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(wordCodePoints,
|
if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(
|
||||||
wordCodePointCount, wordProperty.getUnigramProperty())) {
|
CodePointArrayView(wordCodePoints, wordCodePointCount),
|
||||||
|
wordProperty.getUnigramProperty())) {
|
||||||
LogUtils::logToJava(env, "Cannot add unigram to the new dict.");
|
LogUtils::logToJava(env, "Cannot add unigram to the new dict.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,6 +28,7 @@
|
||||||
#include "suggest/core/suggest_options.h"
|
#include "suggest/core/suggest_options.h"
|
||||||
#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
|
#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
|
||||||
#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
|
#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
|
||||||
|
#include "utils/int_array_view.h"
|
||||||
#include "utils/log_utils.h"
|
#include "utils/log_utils.h"
|
||||||
#include "utils/time_keeper.h"
|
#include "utils/time_keeper.h"
|
||||||
|
|
||||||
|
@ -112,8 +113,8 @@ int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) con
|
||||||
int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
|
int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
|
||||||
int length) const {
|
int length) const {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(word,
|
int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(
|
||||||
length, false /* forceLowerCaseSearch */);
|
CodePointArrayView(word, length), false /* forceLowerCaseSearch */);
|
||||||
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
|
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
|
||||||
if (!prevWordsInfo) {
|
if (!prevWordsInfo) {
|
||||||
return getDictionaryStructurePolicy()->getProbabilityOfPtNode(
|
return getDictionaryStructurePolicy()->getProbabilityOfPtNode(
|
||||||
|
@ -135,12 +136,14 @@ bool Dictionary::addUnigramEntry(const int *const word, const int length,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
return mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
|
return mDictionaryStructureWithBufferPolicy->addUnigramEntry(CodePointArrayView(word, length),
|
||||||
|
unigramProperty);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::removeUnigramEntry(const int *const codePoints, const int codePointCount) {
|
bool Dictionary::removeUnigramEntry(const int *const codePoints, const int codePointCount) {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints, codePointCount);
|
return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(
|
||||||
|
CodePointArrayView(codePoints, codePointCount));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
@ -152,7 +155,8 @@ bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const int *const word, const int length) {
|
const int *const word, const int length) {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, word, length);
|
return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo,
|
||||||
|
CodePointArrayView(word, length));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::flush(const char *const filePath) {
|
bool Dictionary::flush(const char *const filePath) {
|
||||||
|
@ -181,7 +185,7 @@ const WordProperty Dictionary::getWordProperty(const int *const codePoints,
|
||||||
const int codePointCount) {
|
const int codePointCount) {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
return mDictionaryStructureWithBufferPolicy->getWordProperty(
|
return mDictionaryStructureWithBufferPolicy->getWordProperty(
|
||||||
codePoints, codePointCount);
|
CodePointArrayView(codePoints, codePointCount));
|
||||||
}
|
}
|
||||||
|
|
||||||
int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,
|
int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/property/word_property.h"
|
#include "suggest/core/dictionary/property/word_property.h"
|
||||||
|
#include "utils/int_array_view.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -49,33 +50,32 @@ class DictionaryStructureWithBufferPolicy {
|
||||||
DicNodeVector *const childDicNodes) const = 0;
|
DicNodeVector *const childDicNodes) const = 0;
|
||||||
|
|
||||||
virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
|
virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const = 0;
|
int *const outUnigramProbability) const = 0;
|
||||||
|
|
||||||
virtual int getTerminalPtNodePositionOfWord(const int *const inWord,
|
virtual int getTerminalPtNodePositionOfWord(const CodePointArrayView wordCodePoints,
|
||||||
const int length, const bool forceLowerCaseSearch) const = 0;
|
const bool forceLowerCaseSearch) const = 0;
|
||||||
|
|
||||||
virtual int getProbability(const int unigramProbability,
|
virtual int getProbability(const int unigramProbability, const int bigramProbability) const = 0;
|
||||||
const int bigramProbability) const = 0;
|
|
||||||
|
|
||||||
virtual int getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
|
virtual int getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
|
||||||
const int nodePos) const = 0;
|
const int ptNodePos) const = 0;
|
||||||
|
|
||||||
virtual void iterateNgramEntries(const int *const prevWordsPtNodePos,
|
virtual void iterateNgramEntries(const int *const prevWordsPtNodePos,
|
||||||
NgramListener *const listener) const = 0;
|
NgramListener *const listener) const = 0;
|
||||||
|
|
||||||
virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
|
virtual int getShortcutPositionOfPtNode(const int ptNodePos) const = 0;
|
||||||
|
|
||||||
virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0;
|
virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0;
|
||||||
|
|
||||||
virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0;
|
virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0;
|
||||||
|
|
||||||
// Returns whether the update was success or not.
|
// Returns whether the update was success or not.
|
||||||
virtual bool addUnigramEntry(const int *const word, const int length,
|
virtual bool addUnigramEntry(const CodePointArrayView wordCodePoints,
|
||||||
const UnigramProperty *const unigramProperty) = 0;
|
const UnigramProperty *const unigramProperty) = 0;
|
||||||
|
|
||||||
// Returns whether the update was success or not.
|
// Returns whether the update was success or not.
|
||||||
virtual bool removeUnigramEntry(const int *const word, const int length) = 0;
|
virtual bool removeUnigramEntry(const CodePointArrayView wordCodePoints) = 0;
|
||||||
|
|
||||||
// Returns whether the update was success or not.
|
// Returns whether the update was success or not.
|
||||||
virtual bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
virtual bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
@ -83,7 +83,7 @@ class DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
// Returns whether the update was success or not.
|
// Returns whether the update was success or not.
|
||||||
virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const int *const word, const int length) = 0;
|
const CodePointArrayView wordCodePoints) = 0;
|
||||||
|
|
||||||
// Returns whether the flush was success or not.
|
// Returns whether the flush was success or not.
|
||||||
virtual bool flush(const char *const filePath) = 0;
|
virtual bool flush(const char *const filePath) = 0;
|
||||||
|
@ -99,8 +99,7 @@ class DictionaryStructureWithBufferPolicy {
|
||||||
const int maxResultLength) = 0;
|
const int maxResultLength) = 0;
|
||||||
|
|
||||||
// Used for testing.
|
// Used for testing.
|
||||||
virtual const WordProperty getWordProperty(const int *const codePonts,
|
virtual const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const = 0;
|
||||||
const int codePointCount) const = 0;
|
|
||||||
|
|
||||||
// Method to iterate all words in the dictionary.
|
// Method to iterate all words in the dictionary.
|
||||||
// The returned token has to be used to get the next word. If token is 0, this method newly
|
// The returned token has to be used to get the next word. If token is 0, this method newly
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
|
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
|
||||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
#include "utils/char_utils.h"
|
#include "utils/char_utils.h"
|
||||||
|
#include "utils/int_array_view.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -91,19 +92,11 @@ class PrevWordsInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
// n is 1-indexed.
|
// n is 1-indexed.
|
||||||
const int *getNthPrevWordCodePoints(const int n) const {
|
const CodePointArrayView getNthPrevWordCodePoints(const int n) const {
|
||||||
if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
|
if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
|
||||||
return nullptr;
|
return CodePointArrayView();
|
||||||
}
|
}
|
||||||
return mPrevWordCodePoints[n - 1];
|
return CodePointArrayView(mPrevWordCodePoints[n - 1], mPrevWordCodePointCount[n - 1]);
|
||||||
}
|
|
||||||
|
|
||||||
// n is 1-indexed.
|
|
||||||
int getNthPrevWordCodePointCount(const int n) const {
|
|
||||||
if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return mPrevWordCodePointCount[n - 1];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// n is 1-indexed.
|
// n is 1-indexed.
|
||||||
|
@ -134,8 +127,9 @@ class PrevWordsInfo {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
const CodePointArrayView codePointArrayView(codePoints, codePointCount);
|
||||||
const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
|
const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
|
||||||
codePoints, codePointCount, false /* forceLowerCaseSearch */);
|
codePointArrayView, false /* forceLowerCaseSearch */);
|
||||||
if (wordPtNodePos != NOT_A_DICT_POS || !tryLowerCaseSearch) {
|
if (wordPtNodePos != NOT_A_DICT_POS || !tryLowerCaseSearch) {
|
||||||
// Return the position when when the word was found or doesn't try lower case
|
// Return the position when when the word was found or doesn't try lower case
|
||||||
// search.
|
// search.
|
||||||
|
@ -144,7 +138,7 @@ class PrevWordsInfo {
|
||||||
// Check bigrams for lower-cased previous word if original was not found. Useful for
|
// Check bigrams for lower-cased previous word if original was not found. Useful for
|
||||||
// auto-capitalized words like "The [current_word]".
|
// auto-capitalized words like "The [current_word]".
|
||||||
return dictStructurePolicy->getTerminalPtNodePositionOfWord(
|
return dictStructurePolicy->getTerminalPtNodePositionOfWord(
|
||||||
codePoints, codePointCount, true /* forceLowerCaseSearch */);
|
codePointArrayView, true /* forceLowerCaseSearch */);
|
||||||
}
|
}
|
||||||
|
|
||||||
void clear() {
|
void clear() {
|
||||||
|
|
|
@ -104,12 +104,12 @@ int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
return codePointCount;
|
return codePointCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const CodePointArrayView wordCodePoints,
|
||||||
const int length, const bool forceLowerCaseSearch) const {
|
const bool forceLowerCaseSearch) const {
|
||||||
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
|
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
const int ptNodePos =
|
const int ptNodePos = readingHelper.getTerminalPtNodePositionOfWord(wordCodePoints.data(),
|
||||||
readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
|
wordCodePoints.size(), forceLowerCaseSearch);
|
||||||
if (readingHelper.isError()) {
|
if (readingHelper.isError()) {
|
||||||
mIsCorrupted = true;
|
mIsCorrupted = true;
|
||||||
AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
|
AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
|
||||||
|
@ -194,7 +194,7 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons
|
||||||
ptNodeParams.getTerminalId());
|
ptNodeParams.getTerminalId());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length,
|
bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePoints,
|
||||||
const UnigramProperty *const unigramProperty) {
|
const UnigramProperty *const unigramProperty) {
|
||||||
if (!mBuffers->isUpdatable()) {
|
if (!mBuffers->isUpdatable()) {
|
||||||
AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
|
||||||
|
@ -205,8 +205,9 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
|
||||||
mDictBuffer->getTailPosition());
|
mDictBuffer->getTailPosition());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (length > MAX_WORD_LENGTH) {
|
if (wordCodePoints.size() > MAX_WORD_LENGTH) {
|
||||||
AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
|
AKLOGE("The word is too long to insert to the dictionary, length: %zd",
|
||||||
|
wordCodePoints.size());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (const auto &shortcut : unigramProperty->getShortcuts()) {
|
for (const auto &shortcut : unigramProperty->getShortcuts()) {
|
||||||
|
@ -220,8 +221,8 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
bool addedNewUnigram = false;
|
bool addedNewUnigram = false;
|
||||||
int codePointsToAdd[MAX_WORD_LENGTH];
|
int codePointsToAdd[MAX_WORD_LENGTH];
|
||||||
int codePointCountToAdd = length;
|
int codePointCountToAdd = wordCodePoints.size();
|
||||||
memmove(codePointsToAdd, word, sizeof(int) * length);
|
memmove(codePointsToAdd, wordCodePoints.data(), sizeof(int) * codePointCountToAdd);
|
||||||
if (unigramProperty->representsBeginningOfSentence()) {
|
if (unigramProperty->representsBeginningOfSentence()) {
|
||||||
codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
|
codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
|
||||||
codePointCountToAdd, MAX_WORD_LENGTH);
|
codePointCountToAdd, MAX_WORD_LENGTH);
|
||||||
|
@ -229,14 +230,15 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
|
||||||
if (codePointCountToAdd <= 0) {
|
if (codePointCountToAdd <= 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd,
|
const CodePointArrayView codePointArrayView(codePointsToAdd, codePointCountToAdd);
|
||||||
unigramProperty, &addedNewUnigram)) {
|
if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView.data(),
|
||||||
|
codePointArrayView.size(), unigramProperty, &addedNewUnigram)) {
|
||||||
if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
|
if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
|
||||||
mUnigramCount++;
|
mUnigramCount++;
|
||||||
}
|
}
|
||||||
if (unigramProperty->getShortcuts().size() > 0) {
|
if (unigramProperty->getShortcuts().size() > 0) {
|
||||||
// Add shortcut target.
|
// Add shortcut target.
|
||||||
const int wordPos = getTerminalPtNodePositionOfWord(word, length,
|
const int wordPos = getTerminalPtNodePositionOfWord(codePointArrayView,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (wordPos == NOT_A_DICT_POS) {
|
if (wordPos == NOT_A_DICT_POS) {
|
||||||
AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
|
AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
|
||||||
|
@ -259,12 +261,12 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::removeUnigramEntry(const int *const word, const int length) {
|
bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCodePoints) {
|
||||||
if (!mBuffers->isUpdatable()) {
|
if (!mBuffers->isUpdatable()) {
|
||||||
AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
|
AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int ptNodePos = getTerminalPtNodePositionOfWord(word, length,
|
const int ptNodePos = getTerminalPtNodePositionOfWord(wordCodePoints,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (ptNodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -305,7 +307,6 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
||||||
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
|
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
|
||||||
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
|
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
|
||||||
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
||||||
prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */),
|
|
||||||
&beginningOfSentenceUnigramProperty)) {
|
&beginningOfSentenceUnigramProperty)) {
|
||||||
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
||||||
return false;
|
return false;
|
||||||
|
@ -318,8 +319,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const int word1Pos = getTerminalPtNodePositionOfWord(
|
const int word1Pos = getTerminalPtNodePositionOfWord(
|
||||||
bigramProperty->getTargetCodePoints()->data(),
|
CodePointArrayView(*bigramProperty->getTargetCodePoints()),
|
||||||
bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (word1Pos == NOT_A_DICT_POS) {
|
if (word1Pos == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -336,7 +337,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const int *const word, const int length) {
|
const CodePointArrayView wordCodePoints) {
|
||||||
if (!mBuffers->isUpdatable()) {
|
if (!mBuffers->isUpdatable()) {
|
||||||
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
|
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
|
@ -350,8 +351,9 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
|
||||||
AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
|
AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (length > MAX_WORD_LENGTH) {
|
if (wordCodePoints.size() > MAX_WORD_LENGTH) {
|
||||||
AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length);
|
AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %zd",
|
||||||
|
wordCodePoints.size());
|
||||||
}
|
}
|
||||||
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||||
prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
|
prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
|
||||||
|
@ -360,7 +362,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
|
||||||
if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
|
if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int wordPos = getTerminalPtNodePositionOfWord(word, length,
|
const int wordPos = getTerminalPtNodePositionOfWord(wordCodePoints,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (wordPos == NOT_A_DICT_POS) {
|
if (wordPos == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -445,9 +447,9 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints,
|
const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
|
||||||
const int codePointCount) const {
|
const CodePointArrayView wordCodePoints) const {
|
||||||
const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
|
const int ptNodePos = getTerminalPtNodePositionOfWord(wordCodePoints,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (ptNodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
AKLOGE("getWordProperty is called for invalid word.");
|
AKLOGE("getWordProperty is called for invalid word.");
|
||||||
|
|
|
@ -39,6 +39,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h"
|
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
#include "utils/int_array_view.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
namespace backward {
|
namespace backward {
|
||||||
|
@ -75,7 +76,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
mBigramCount(mHeaderPolicy->getBigramCount()),
|
mBigramCount(mHeaderPolicy->getBigramCount()),
|
||||||
mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {};
|
mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {};
|
||||||
|
|
||||||
AK_FORCE_INLINE int getRootPosition() const {
|
virtual int getRootPosition() const {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -86,8 +87,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const;
|
int *const outUnigramProbability) const;
|
||||||
|
|
||||||
int getTerminalPtNodePositionOfWord(const int *const inWord,
|
int getTerminalPtNodePositionOfWord(const CodePointArrayView wordCodePoints,
|
||||||
const int length, const bool forceLowerCaseSearch) const;
|
const bool forceLowerCaseSearch) const;
|
||||||
|
|
||||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
||||||
|
|
||||||
|
@ -106,16 +107,16 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
return &mShortcutPolicy;
|
return &mShortcutPolicy;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool addUnigramEntry(const int *const word, const int length,
|
bool addUnigramEntry(const CodePointArrayView wordCodePoints,
|
||||||
const UnigramProperty *const unigramProperty);
|
const UnigramProperty *const unigramProperty);
|
||||||
|
|
||||||
bool removeUnigramEntry(const int *const word, const int length);
|
bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
|
||||||
|
|
||||||
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const BigramProperty *const bigramProperty);
|
const BigramProperty *const bigramProperty);
|
||||||
|
|
||||||
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word1,
|
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const int length1);
|
const CodePointArrayView wordCodePoints);
|
||||||
|
|
||||||
bool flush(const char *const filePath);
|
bool flush(const char *const filePath);
|
||||||
|
|
||||||
|
@ -126,8 +127,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
void getProperty(const char *const query, const int queryLength, char *const outResult,
|
void getProperty(const char *const query, const int queryLength, char *const outResult,
|
||||||
const int maxResultLength);
|
const int maxResultLength);
|
||||||
|
|
||||||
const WordProperty getWordProperty(const int *const codePoints,
|
const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const;
|
||||||
const int codePointCount) const;
|
|
||||||
|
|
||||||
int getNextWordAndNextToken(const int token, int *const outCodePoints,
|
int getNextWordAndNextToken(const int token, int *const outCodePoints,
|
||||||
int *const outCodePointCount);
|
int *const outCodePointCount);
|
||||||
|
|
|
@ -268,12 +268,12 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
|
|
||||||
// This function gets the position of the terminal PtNode of the exact matching word in the
|
// This function gets the position of the terminal PtNode of the exact matching word in the
|
||||||
// dictionary. If no match is found, it returns NOT_A_DICT_POS.
|
// dictionary. If no match is found, it returns NOT_A_DICT_POS.
|
||||||
int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const CodePointArrayView wordCodePoints,
|
||||||
const int length, const bool forceLowerCaseSearch) const {
|
const bool forceLowerCaseSearch) const {
|
||||||
DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
|
DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
const int ptNodePos =
|
const int ptNodePos = readingHelper.getTerminalPtNodePositionOfWord(wordCodePoints.data(),
|
||||||
readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
|
wordCodePoints.size(), forceLowerCaseSearch);
|
||||||
if (readingHelper.isError()) {
|
if (readingHelper.isError()) {
|
||||||
mIsCorrupted = true;
|
mIsCorrupted = true;
|
||||||
AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
|
AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
|
||||||
|
@ -377,9 +377,9 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
|
||||||
return siblingPos;
|
return siblingPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoints,
|
const WordProperty PatriciaTriePolicy::getWordProperty(
|
||||||
const int codePointCount) const {
|
const CodePointArrayView wordCodePoints) const {
|
||||||
const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
|
const int ptNodePos = getTerminalPtNodePositionOfWord(wordCodePoints,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (ptNodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
AKLOGE("getWordProperty was called for invalid word.");
|
AKLOGE("getWordProperty was called for invalid word.");
|
||||||
|
|
|
@ -30,6 +30,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||||
#include "utils/byte_array_view.h"
|
#include "utils/byte_array_view.h"
|
||||||
|
#include "utils/int_array_view.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -63,8 +64,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const;
|
int *const outUnigramProbability) const;
|
||||||
|
|
||||||
int getTerminalPtNodePositionOfWord(const int *const inWord,
|
int getTerminalPtNodePositionOfWord(const CodePointArrayView wordCodePoints,
|
||||||
const int length, const bool forceLowerCaseSearch) const;
|
const bool forceLowerCaseSearch) const;
|
||||||
|
|
||||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
||||||
|
|
||||||
|
@ -83,14 +84,14 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
return &mShortcutListPolicy;
|
return &mShortcutListPolicy;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool addUnigramEntry(const int *const word, const int length,
|
bool addUnigramEntry(const CodePointArrayView wordCodePoints,
|
||||||
const UnigramProperty *const unigramProperty) {
|
const UnigramProperty *const unigramProperty) {
|
||||||
// This method should not be called for non-updatable dictionary.
|
// This method should not be called for non-updatable dictionary.
|
||||||
AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool removeUnigramEntry(const int *const word, const int length) {
|
bool removeUnigramEntry(const CodePointArrayView wordCodePoints) {
|
||||||
// This method should not be called for non-updatable dictionary.
|
// This method should not be called for non-updatable dictionary.
|
||||||
AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
|
AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
|
@ -103,8 +104,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
|
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const int length) {
|
const CodePointArrayView wordCodePoints) {
|
||||||
// This method should not be called for non-updatable dictionary.
|
// This method should not be called for non-updatable dictionary.
|
||||||
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
|
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
|
@ -136,8 +137,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const WordProperty getWordProperty(const int *const codePoints,
|
const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const;
|
||||||
const int codePointCount) const;
|
|
||||||
|
|
||||||
int getNextWordAndNextToken(const int token, int *const outCodePoints,
|
int getNextWordAndNextToken(const int token, int *const outCodePoints,
|
||||||
int *const outCodePointCount);
|
int *const outCodePointCount);
|
||||||
|
|
|
@ -94,12 +94,12 @@ int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
return codePointCount;
|
return codePointCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const CodePointArrayView wordCodePoints,
|
||||||
const int length, const bool forceLowerCaseSearch) const {
|
const bool forceLowerCaseSearch) const {
|
||||||
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
|
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
const int ptNodePos =
|
const int ptNodePos = readingHelper.getTerminalPtNodePositionOfWord(wordCodePoints.data(),
|
||||||
readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
|
wordCodePoints.size(), forceLowerCaseSearch);
|
||||||
if (readingHelper.isError()) {
|
if (readingHelper.isError()) {
|
||||||
mIsCorrupted = true;
|
mIsCorrupted = true;
|
||||||
AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
|
AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
|
||||||
|
@ -189,7 +189,7 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con
|
||||||
ptNodeParams.getTerminalId());
|
ptNodeParams.getTerminalId());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length,
|
bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePoints,
|
||||||
const UnigramProperty *const unigramProperty) {
|
const UnigramProperty *const unigramProperty) {
|
||||||
if (!mBuffers->isUpdatable()) {
|
if (!mBuffers->isUpdatable()) {
|
||||||
AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
|
||||||
|
@ -200,8 +200,9 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
|
||||||
mDictBuffer->getTailPosition());
|
mDictBuffer->getTailPosition());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (length > MAX_WORD_LENGTH) {
|
if (wordCodePoints.size() > MAX_WORD_LENGTH) {
|
||||||
AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
|
AKLOGE("The word is too long to insert to the dictionary, length: %zd",
|
||||||
|
wordCodePoints.size());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (const auto &shortcut : unigramProperty->getShortcuts()) {
|
for (const auto &shortcut : unigramProperty->getShortcuts()) {
|
||||||
|
@ -215,8 +216,8 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
bool addedNewUnigram = false;
|
bool addedNewUnigram = false;
|
||||||
int codePointsToAdd[MAX_WORD_LENGTH];
|
int codePointsToAdd[MAX_WORD_LENGTH];
|
||||||
int codePointCountToAdd = length;
|
int codePointCountToAdd = wordCodePoints.size();
|
||||||
memmove(codePointsToAdd, word, sizeof(int) * length);
|
memmove(codePointsToAdd, wordCodePoints.data(), sizeof(int) * codePointCountToAdd);
|
||||||
if (unigramProperty->representsBeginningOfSentence()) {
|
if (unigramProperty->representsBeginningOfSentence()) {
|
||||||
codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
|
codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
|
||||||
codePointCountToAdd, MAX_WORD_LENGTH);
|
codePointCountToAdd, MAX_WORD_LENGTH);
|
||||||
|
@ -224,14 +225,15 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
|
||||||
if (codePointCountToAdd <= 0) {
|
if (codePointCountToAdd <= 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd,
|
const CodePointArrayView codePointArrayView(codePointsToAdd, codePointCountToAdd);
|
||||||
unigramProperty, &addedNewUnigram)) {
|
if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView.data(),
|
||||||
|
codePointArrayView.size(), unigramProperty, &addedNewUnigram)) {
|
||||||
if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
|
if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
|
||||||
mUnigramCount++;
|
mUnigramCount++;
|
||||||
}
|
}
|
||||||
if (unigramProperty->getShortcuts().size() > 0) {
|
if (unigramProperty->getShortcuts().size() > 0) {
|
||||||
// Add shortcut target.
|
// Add shortcut target.
|
||||||
const int wordPos = getTerminalPtNodePositionOfWord(word, length,
|
const int wordPos = getTerminalPtNodePositionOfWord(codePointArrayView,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (wordPos == NOT_A_DICT_POS) {
|
if (wordPos == NOT_A_DICT_POS) {
|
||||||
AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
|
AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
|
||||||
|
@ -254,12 +256,12 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::removeUnigramEntry(const int *const word, const int length) {
|
bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCodePoints) {
|
||||||
if (!mBuffers->isUpdatable()) {
|
if (!mBuffers->isUpdatable()) {
|
||||||
AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
|
AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int ptNodePos = getTerminalPtNodePositionOfWord(word, length,
|
const int ptNodePos = getTerminalPtNodePositionOfWord(wordCodePoints,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (ptNodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -313,7 +315,6 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
||||||
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
|
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
|
||||||
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
|
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
|
||||||
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
||||||
prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */),
|
|
||||||
&beginningOfSentenceUnigramProperty)) {
|
&beginningOfSentenceUnigramProperty)) {
|
||||||
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
||||||
return false;
|
return false;
|
||||||
|
@ -326,8 +327,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const int word1Pos = getTerminalPtNodePositionOfWord(
|
const int word1Pos = getTerminalPtNodePositionOfWord(
|
||||||
bigramProperty->getTargetCodePoints()->data(),
|
CodePointArrayView(*bigramProperty->getTargetCodePoints()),
|
||||||
bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (word1Pos == NOT_A_DICT_POS) {
|
if (word1Pos == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -344,7 +345,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const int *const word, const int length) {
|
const CodePointArrayView wordCodePoints) {
|
||||||
if (!mBuffers->isUpdatable()) {
|
if (!mBuffers->isUpdatable()) {
|
||||||
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
|
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
|
@ -358,8 +359,9 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
|
||||||
AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
|
AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (length > MAX_WORD_LENGTH) {
|
if (wordCodePoints.size() > MAX_WORD_LENGTH) {
|
||||||
AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length);
|
AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %zd",
|
||||||
|
wordCodePoints.size());
|
||||||
}
|
}
|
||||||
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||||
prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
|
prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
|
||||||
|
@ -369,7 +371,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
|
||||||
if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
|
if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int wordPos = getTerminalPtNodePositionOfWord(word, length,
|
const int wordPos = getTerminalPtNodePositionOfWord(wordCodePoints,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (wordPos == NOT_A_DICT_POS) {
|
if (wordPos == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -453,9 +455,9 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints,
|
const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
|
||||||
const int codePointCount) const {
|
const CodePointArrayView wordCodePoints) const {
|
||||||
const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
|
const int ptNodePos = getTerminalPtNodePositionOfWord(wordCodePoints,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (ptNodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
AKLOGE("getWordProperty is called for invalid word.");
|
AKLOGE("getWordProperty is called for invalid word.");
|
||||||
|
|
|
@ -30,6 +30,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
#include "utils/int_array_view.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -65,8 +66,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const;
|
int *const outUnigramProbability) const;
|
||||||
|
|
||||||
int getTerminalPtNodePositionOfWord(const int *const inWord,
|
int getTerminalPtNodePositionOfWord(const CodePointArrayView wordCodePoints,
|
||||||
const int length, const bool forceLowerCaseSearch) const;
|
const bool forceLowerCaseSearch) const;
|
||||||
|
|
||||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
||||||
|
|
||||||
|
@ -85,16 +86,16 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
return &mShortcutPolicy;
|
return &mShortcutPolicy;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool addUnigramEntry(const int *const word, const int length,
|
bool addUnigramEntry(const CodePointArrayView wordCodePoints,
|
||||||
const UnigramProperty *const unigramProperty);
|
const UnigramProperty *const unigramProperty);
|
||||||
|
|
||||||
bool removeUnigramEntry(const int *const word, const int length);
|
bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
|
||||||
|
|
||||||
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const BigramProperty *const bigramProperty);
|
const BigramProperty *const bigramProperty);
|
||||||
|
|
||||||
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word1,
|
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const int length1);
|
const CodePointArrayView wordCodePoints);
|
||||||
|
|
||||||
bool flush(const char *const filePath);
|
bool flush(const char *const filePath);
|
||||||
|
|
||||||
|
@ -105,8 +106,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
void getProperty(const char *const query, const int queryLength, char *const outResult,
|
void getProperty(const char *const query, const int queryLength, char *const outResult,
|
||||||
const int maxResultLength);
|
const int maxResultLength);
|
||||||
|
|
||||||
const WordProperty getWordProperty(const int *const codePoints,
|
const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const;
|
||||||
const int codePointCount) const;
|
|
||||||
|
|
||||||
int getNextWordAndNextToken(const int token, int *const outCodePoints,
|
int getNextWordAndNextToken(const int token, int *const outCodePoints,
|
||||||
int *const outCodePointCount);
|
int *const outCodePointCount);
|
||||||
|
|
|
@ -105,6 +105,7 @@ class IntArrayView {
|
||||||
|
|
||||||
using WordIdArrayView = IntArrayView;
|
using WordIdArrayView = IntArrayView;
|
||||||
using PtNodePosArrayView = IntArrayView;
|
using PtNodePosArrayView = IntArrayView;
|
||||||
|
using CodePointArrayView = IntArrayView;
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_MEMORY_VIEW_H
|
#endif // LATINIME_MEMORY_VIEW_H
|
||||||
|
|
Loading…
Reference in a new issue