am da99cfc2
: Merge "Introduce OffdeviceIntermediateDict for dicttolkit."
* commit 'da99cfc29d71f7aa417b4646234d532ed5dcb7de': Introduce OffdeviceIntermediateDict for dicttolkit.
This commit is contained in:
commit
bdc9221e43
6 changed files with 391 additions and 1 deletions
|
@ -22,8 +22,13 @@ LATIN_IME_DICT_TOOLKIT_SRC_FILES := \
|
||||||
help_executor.cpp \
|
help_executor.cpp \
|
||||||
info_executor.cpp \
|
info_executor.cpp \
|
||||||
makedict_executor.cpp) \
|
makedict_executor.cpp) \
|
||||||
|
$(addprefix offdevice_intermediate_dict/, \
|
||||||
|
offdevice_intermediate_dict.cpp) \
|
||||||
utils/command_utils.cpp
|
utils/command_utils.cpp
|
||||||
|
|
||||||
LATIN_IME_DICT_TOOLKIT_TEST_FILES := \
|
LATIN_IME_DICT_TOOLKIT_TEST_FILES := \
|
||||||
dict_toolkit_defines_test.cpp \
|
dict_toolkit_defines_test.cpp \
|
||||||
utils/command_utils_test.cpp
|
$(addprefix offdevice_intermediate_dict/, \
|
||||||
|
offdevice_intermediate_dict_test.cpp) \
|
||||||
|
$(addprefix utils/, \
|
||||||
|
command_utils_test.cpp)
|
||||||
|
|
|
@ -0,0 +1,126 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h"
|
||||||
|
|
||||||
|
#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
namespace dicttoolkit {
|
||||||
|
|
||||||
|
bool OffdeviceIntermediateDict::addWord(const WordProperty &wordProperty) {
|
||||||
|
const CodePointArrayView codePoints = wordProperty.getCodePoints();
|
||||||
|
if (codePoints.empty() || codePoints.size() > MAX_WORD_LENGTH) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return addWordInner(codePoints, wordProperty, mRootPtNodeArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool OffdeviceIntermediateDict::addWordInner(const CodePointArrayView codePoints,
|
||||||
|
const WordProperty &wordProperty, OffdeviceIntermediateDictPtNodeArray &ptNodeArray) {
|
||||||
|
auto ptNodeList = ptNodeArray.getMutablePtNodeList();
|
||||||
|
auto ptNodeIt = ptNodeList->begin();
|
||||||
|
for (; ptNodeIt != ptNodeList->end(); ++ptNodeIt) {
|
||||||
|
const auto &ptNode = *ptNodeIt;
|
||||||
|
const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints();
|
||||||
|
if (codePoints[0] < ptNodeCodePoints[0]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (codePoints[0] > ptNodeCodePoints[0]) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
size_t i = 1;
|
||||||
|
for (; i < codePoints.size(); ++i) {
|
||||||
|
if (i >= ptNodeCodePoints.size()) {
|
||||||
|
// Add new child.
|
||||||
|
return addWordInner(codePoints.skip(i), wordProperty,
|
||||||
|
ptNode->getChildrenPtNodeArray());
|
||||||
|
}
|
||||||
|
if (codePoints[i] != ptNodeCodePoints[i]) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (codePoints.size() == i && codePoints.size() == ptNodeCodePoints.size()) {
|
||||||
|
// All code points matched.
|
||||||
|
if (ptNode->getWordProperty()) {
|
||||||
|
// Adding the same word multiple times is not supported.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ptNodeList->insert(ptNodeIt,
|
||||||
|
std::make_shared<OffdeviceIntermediateDictPtNode>(wordProperty, *ptNode));
|
||||||
|
ptNodeList->erase(ptNodeIt);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// The (i+1)-th elements are different.
|
||||||
|
// Create and Add new parent ptNode for the common part.
|
||||||
|
auto newPtNode = codePoints.size() == i
|
||||||
|
? std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty)
|
||||||
|
: std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints.limit(i));
|
||||||
|
ptNodeList->insert(ptNodeIt, newPtNode);
|
||||||
|
OffdeviceIntermediateDictPtNodeArray &childrenPtNodeArray =
|
||||||
|
newPtNode->getChildrenPtNodeArray();
|
||||||
|
// Add new child for the existing ptNode.
|
||||||
|
childrenPtNodeArray.getMutablePtNodeList()->push_back(
|
||||||
|
std::make_shared<OffdeviceIntermediateDictPtNode>(
|
||||||
|
ptNodeCodePoints.skip(i), *ptNode));
|
||||||
|
ptNodeList->erase(ptNodeIt);
|
||||||
|
if (codePoints.size() != i) {
|
||||||
|
// Add a child for the new word.
|
||||||
|
return addWordInner(codePoints.skip(i), wordProperty, childrenPtNodeArray);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
ptNodeList->insert(ptNodeIt,
|
||||||
|
std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const WordProperty *OffdeviceIntermediateDict::getWordProperty(
|
||||||
|
const CodePointArrayView codePoints) const {
|
||||||
|
const OffdeviceIntermediateDictPtNodeArray *ptNodeArray = &mRootPtNodeArray;
|
||||||
|
for (size_t i = 0; i < codePoints.size();) {
|
||||||
|
bool foundNext = false;
|
||||||
|
for (const auto ptNode : ptNodeArray->getPtNodeList()) {
|
||||||
|
const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints();
|
||||||
|
if (codePoints[i] < ptNodeCodePoints[0]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (codePoints[i] > ptNodeCodePoints[0]
|
||||||
|
|| codePoints.size() < ptNodeCodePoints.size()) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
for (size_t j = 1; j < ptNodeCodePoints.size(); ++j) {
|
||||||
|
if (codePoints[i + j] != ptNodeCodePoints[j]) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i += ptNodeCodePoints.size();
|
||||||
|
if (i == codePoints.size()) {
|
||||||
|
return ptNode->getWordProperty();
|
||||||
|
}
|
||||||
|
ptNodeArray = &ptNode->getChildrenPtNodeArray();
|
||||||
|
foundNext = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!foundNext) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace dicttoolkit
|
||||||
|
} // namespace latinime
|
|
@ -0,0 +1,48 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
|
||||||
|
#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
|
||||||
|
|
||||||
|
#include "dict_toolkit_defines.h"
|
||||||
|
#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h"
|
||||||
|
#include "suggest/core/dictionary/property/word_property.h"
|
||||||
|
#include "utils/int_array_view.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
namespace dicttoolkit {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* On memory patricia trie to represent a dictionary.
|
||||||
|
*/
|
||||||
|
class OffdeviceIntermediateDict final {
|
||||||
|
public:
|
||||||
|
bool addWord(const WordProperty &wordProperty);
|
||||||
|
// The returned value will be invalid after modifying the dictionary. e.g. calling addWord().
|
||||||
|
const WordProperty *getWordProperty(const CodePointArrayView codePoints) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDict);
|
||||||
|
|
||||||
|
OffdeviceIntermediateDictPtNodeArray mRootPtNodeArray;
|
||||||
|
|
||||||
|
bool addWordInner(const CodePointArrayView codePoints, const WordProperty &wordProperty,
|
||||||
|
OffdeviceIntermediateDictPtNodeArray &ptNodeArray);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace dicttoolkit
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
|
|
@ -0,0 +1,79 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
|
||||||
|
#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "dict_toolkit_defines.h"
|
||||||
|
#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h"
|
||||||
|
#include "suggest/core/dictionary/property/word_property.h"
|
||||||
|
#include "utils/int_array_view.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
namespace dicttoolkit {
|
||||||
|
|
||||||
|
class OffdeviceIntermediateDictPtNode final {
|
||||||
|
public:
|
||||||
|
// Non-terminal
|
||||||
|
OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints)
|
||||||
|
: mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(),
|
||||||
|
mWortProperty(nullptr) {}
|
||||||
|
|
||||||
|
// Terminal
|
||||||
|
OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints,
|
||||||
|
const WordProperty &wordProperty)
|
||||||
|
: mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(),
|
||||||
|
mWortProperty(new WordProperty(wordProperty)) {}
|
||||||
|
|
||||||
|
// Replacing PtNodeCodePoints.
|
||||||
|
OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints,
|
||||||
|
const OffdeviceIntermediateDictPtNode &ptNode)
|
||||||
|
: mPtNodeCodePoints(ptNodeCodePoints.toVector()),
|
||||||
|
mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray),
|
||||||
|
mWortProperty(new WordProperty(*ptNode.mWortProperty)) {}
|
||||||
|
|
||||||
|
// Replacing WordProperty.
|
||||||
|
OffdeviceIntermediateDictPtNode(const WordProperty &wordProperty,
|
||||||
|
const OffdeviceIntermediateDictPtNode &ptNode)
|
||||||
|
: mPtNodeCodePoints(ptNode.mPtNodeCodePoints),
|
||||||
|
mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray),
|
||||||
|
mWortProperty(new WordProperty(wordProperty)) {}
|
||||||
|
|
||||||
|
const WordProperty *getWordProperty() const {
|
||||||
|
return mWortProperty.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
const CodePointArrayView getPtNodeCodePoints() const {
|
||||||
|
return CodePointArrayView(mPtNodeCodePoints);
|
||||||
|
}
|
||||||
|
|
||||||
|
OffdeviceIntermediateDictPtNodeArray &getChildrenPtNodeArray() {
|
||||||
|
return mChildrenPtNodeArray;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(OffdeviceIntermediateDictPtNode);
|
||||||
|
|
||||||
|
const std::vector<int> mPtNodeCodePoints;
|
||||||
|
OffdeviceIntermediateDictPtNodeArray mChildrenPtNodeArray;
|
||||||
|
const std::unique_ptr<WordProperty> mWortProperty;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace dicttoolkit
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
|
|
@ -0,0 +1,48 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
|
||||||
|
#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
|
||||||
|
|
||||||
|
#include <list>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "dict_toolkit_defines.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
namespace dicttoolkit {
|
||||||
|
|
||||||
|
class OffdeviceIntermediateDictPtNode;
|
||||||
|
|
||||||
|
class OffdeviceIntermediateDictPtNodeArray final {
|
||||||
|
public:
|
||||||
|
const std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> &getPtNodeList() const {
|
||||||
|
return mPtNodes;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> *getMutablePtNodeList() {
|
||||||
|
return &mPtNodes;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictPtNodeArray);
|
||||||
|
|
||||||
|
std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> mPtNodes;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace dicttoolkit
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
|
|
@ -0,0 +1,84 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h"
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "suggest/core/dictionary/property/word_property.h"
|
||||||
|
#include "utils/int_array_view.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
namespace dicttoolkit {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
const std::vector<int> getCodePointVector(const char *str) {
|
||||||
|
std::vector<int> codePoints;
|
||||||
|
while (*str) {
|
||||||
|
codePoints.push_back(*str);
|
||||||
|
++str;
|
||||||
|
}
|
||||||
|
return codePoints;
|
||||||
|
}
|
||||||
|
|
||||||
|
const WordProperty getDummpWordProperty(const std::vector<int> &&codePoints) {
|
||||||
|
return WordProperty(std::move(codePoints), UnigramProperty(), std::vector<NgramProperty>());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(OffdeviceIntermediateDictTest, TestAddWordProperties) {
|
||||||
|
OffdeviceIntermediateDict dict;
|
||||||
|
EXPECT_EQ(nullptr, dict.getWordProperty(CodePointArrayView()));
|
||||||
|
|
||||||
|
const WordProperty wordProperty0 = getDummpWordProperty(getCodePointVector("abcd"));
|
||||||
|
EXPECT_TRUE(dict.addWord(wordProperty0));
|
||||||
|
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints()));
|
||||||
|
|
||||||
|
const WordProperty wordProperty1 = getDummpWordProperty(getCodePointVector("efgh"));
|
||||||
|
EXPECT_TRUE(dict.addWord(wordProperty1));
|
||||||
|
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints()));
|
||||||
|
|
||||||
|
const WordProperty wordProperty2 = getDummpWordProperty(getCodePointVector("ab"));
|
||||||
|
EXPECT_TRUE(dict.addWord(wordProperty2));
|
||||||
|
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints()));
|
||||||
|
|
||||||
|
const WordProperty wordProperty3 = getDummpWordProperty(getCodePointVector("abcdefg"));
|
||||||
|
EXPECT_TRUE(dict.addWord(wordProperty3));
|
||||||
|
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints()));
|
||||||
|
|
||||||
|
const WordProperty wordProperty4 = getDummpWordProperty(getCodePointVector("efef"));
|
||||||
|
EXPECT_TRUE(dict.addWord(wordProperty4));
|
||||||
|
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints()));
|
||||||
|
|
||||||
|
const WordProperty wordProperty5 = getDummpWordProperty(getCodePointVector("ef"));
|
||||||
|
EXPECT_TRUE(dict.addWord(wordProperty5));
|
||||||
|
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints()));
|
||||||
|
|
||||||
|
const WordProperty wordProperty6 = getDummpWordProperty(getCodePointVector("abcd"));
|
||||||
|
EXPECT_FALSE(dict.addWord(wordProperty6)) << "Adding the same word multiple times should fail.";
|
||||||
|
|
||||||
|
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints()));
|
||||||
|
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints()));
|
||||||
|
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints()));
|
||||||
|
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints()));
|
||||||
|
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints()));
|
||||||
|
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints()));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace dicttoolkit
|
||||||
|
} // namespace latinime
|
Loading…
Reference in a new issue