Merge "Implement reading side of updating methods of dpt."
This commit is contained in:
commit
c6cfa6c657
5 changed files with 192 additions and 9 deletions
|
@ -76,6 +76,7 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
dynamic_patricia_trie_policy.cpp \
|
dynamic_patricia_trie_policy.cpp \
|
||||||
dynamic_patricia_trie_reading_helper.cpp \
|
dynamic_patricia_trie_reading_helper.cpp \
|
||||||
dynamic_patricia_trie_reading_utils.cpp \
|
dynamic_patricia_trie_reading_utils.cpp \
|
||||||
|
dynamic_patricia_trie_writing_helper.cpp \
|
||||||
patricia_trie_policy.cpp \
|
patricia_trie_policy.cpp \
|
||||||
patricia_trie_reading_utils.cpp) \
|
patricia_trie_reading_utils.cpp) \
|
||||||
$(addprefix suggest/policyimpl/dictionary/utils/, \
|
$(addprefix suggest/policyimpl/dictionary/utils/, \
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -178,8 +179,12 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int
|
||||||
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// TODO: Implement.
|
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
||||||
return false;
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
|
readingHelper.initWithNodeArrayPos(getRootPosition());
|
||||||
|
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||||
|
&mBigramListPolicy, &mShortcutListPolicy);
|
||||||
|
return writingHelper.addUnigramWord(&readingHelper, word, length, probability);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
|
bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
|
||||||
|
@ -188,9 +193,20 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int
|
||||||
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// TODO: Implement.
|
const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
|
||||||
|
false /* forceLowerCaseSearch */);
|
||||||
|
if (word0Pos == NOT_A_VALID_WORD_POS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
|
||||||
|
false /* forceLowerCaseSearch */);
|
||||||
|
if (word1Pos == NOT_A_VALID_WORD_POS) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||||
|
&mBigramListPolicy, &mShortcutListPolicy);
|
||||||
|
return writingHelper.addBigramWords(word0Pos, word1Pos, probability);
|
||||||
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
|
bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
|
||||||
const int *const word1, const int length1) {
|
const int *const word1, const int length1) {
|
||||||
|
@ -198,8 +214,19 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const
|
||||||
AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
|
AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// TODO: Implement.
|
const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
|
||||||
|
false /* forceLowerCaseSearch */);
|
||||||
|
if (word0Pos == NOT_A_VALID_WORD_POS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
|
||||||
|
false /* forceLowerCaseSearch */);
|
||||||
|
if (word1Pos == NOT_A_VALID_WORD_POS) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||||
|
&mBigramListPolicy, &mShortcutListPolicy);
|
||||||
|
return writingHelper.removeBigramWords(word0Pos, word1Pos);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -88,9 +88,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
const MmappedBuffer *const mBuffer;
|
const MmappedBuffer *const mBuffer;
|
||||||
const HeaderPolicy mHeaderPolicy;
|
const HeaderPolicy mHeaderPolicy;
|
||||||
const BufferWithExtendableBuffer mBufferWithExtendableBuffer;
|
BufferWithExtendableBuffer mBufferWithExtendableBuffer;
|
||||||
const DynamicBigramListPolicy mBigramListPolicy;
|
DynamicBigramListPolicy mBigramListPolicy;
|
||||||
const DynamicShortcutListPolicy mShortcutListPolicy;
|
DynamicShortcutListPolicy mShortcutListPolicy;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
||||||
|
|
|
@ -0,0 +1,99 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||||
|
DynamicPatriciaTrieReadingHelper *const readingHelper,
|
||||||
|
const int *const wordCodePoints, const int codePointCount, const int probability) {
|
||||||
|
while (!readingHelper->isEnd()) {
|
||||||
|
const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
|
||||||
|
if (!readingHelper->isMatchedCodePoint(0 /* index */,
|
||||||
|
wordCodePoints[matchedCodePointCount])) {
|
||||||
|
// The first code point is different from target code point. Skip this node and read
|
||||||
|
// the next sibling node.
|
||||||
|
readingHelper->readNextSiblingNode();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Check following merged node code points.
|
||||||
|
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper->getNodeReader();
|
||||||
|
const int nodeCodePointCount = nodeReader->getCodePointCount();
|
||||||
|
for (int j = 1; j < nodeCodePointCount; ++j) {
|
||||||
|
const int nextIndex = matchedCodePointCount + j;
|
||||||
|
if (nextIndex >= codePointCount) {
|
||||||
|
// TODO: split current node after j - 1, create child and make this terminal.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!readingHelper->isMatchedCodePoint(j,
|
||||||
|
wordCodePoints[matchedCodePointCount + j])) {
|
||||||
|
// TODO: split current node after j - 1 and create two children.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// All characters are matched.
|
||||||
|
if (codePointCount == readingHelper->getTotalCodePointCount()) {
|
||||||
|
if (nodeReader->isTerminal()) {
|
||||||
|
// TODO: Update probability.
|
||||||
|
} else {
|
||||||
|
// TODO: Make it terminal and update probability.
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!nodeReader->hasChildren()) {
|
||||||
|
// TODO: Create children node array and add new node as a child.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Advance to the children nodes.
|
||||||
|
readingHelper->readChildNode();
|
||||||
|
}
|
||||||
|
if (readingHelper->isError()) {
|
||||||
|
// The dictionary is invalid.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// TODO: add at the last position of the node array.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
||||||
|
const int probability) {
|
||||||
|
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
|
nodeReader.fetchNodeInfoFromBuffer(word0Pos);
|
||||||
|
if (nodeReader.isDeleted()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// TODO: Implement.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove a bigram relation from word0Pos to word1Pos.
|
||||||
|
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
|
||||||
|
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
|
nodeReader.fetchNodeInfoFromBuffer(word0Pos);
|
||||||
|
if (nodeReader.isDeleted() || nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// TODO: Implement.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace latinime
|
|
@ -0,0 +1,56 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
|
||||||
|
#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class BufferWithExtendableBuffer;
|
||||||
|
class DynamicBigramListPolicy;
|
||||||
|
class DynamicPatriciaTrieReadingHelper;
|
||||||
|
class DynamicShortcutListPolicy;
|
||||||
|
|
||||||
|
class DynamicPatriciaTrieWritingHelper {
|
||||||
|
public:
|
||||||
|
DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
|
||||||
|
DynamicBigramListPolicy *const bigramPolicy,
|
||||||
|
DynamicShortcutListPolicy *const shortcutPolicy)
|
||||||
|
: mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
|
||||||
|
|
||||||
|
~DynamicPatriciaTrieWritingHelper() {}
|
||||||
|
|
||||||
|
// Add a word to the dictionary. If the word already exists, update the probability.
|
||||||
|
bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper,
|
||||||
|
const int *const wordCodePoints, const int codePointCount, const int probability);
|
||||||
|
|
||||||
|
// Add a bigram relation from word0Pos to word1Pos.
|
||||||
|
bool addBigramWords(const int word0Pos, const int word1Pos, const int probability);
|
||||||
|
|
||||||
|
// Remove a bigram relation from word0Pos to word1Pos.
|
||||||
|
bool removeBigramWords(const int word0Pos, const int word1Pos);
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
|
||||||
|
|
||||||
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
|
DynamicBigramListPolicy *const mBigramPolicy;
|
||||||
|
DynamicShortcutListPolicy *const mShortcutPolicy;
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
|
Loading…
Reference in a new issue