From cd105409739db85f5f325e7cca322dadb12cded7 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Mon, 10 Nov 2014 08:57:07 +0900 Subject: [PATCH] Introduce OffdeviceIntermediateDict for dicttolkit. Bug: 10059681 Change-Id: Ib6e9019502b59dd959c04c8f4996ca932c2b1ba8 --- native/dicttoolkit/NativeFileList.mk | 7 +- .../offdevice_intermediate_dict.cpp | 126 ++++++++++++++++++ .../offdevice_intermediate_dict.h | 48 +++++++ .../offdevice_intermediate_dict_pt_node.h | 79 +++++++++++ ...ffdevice_intermediate_dict_pt_node_array.h | 48 +++++++ .../offdevice_intermediate_dict_test.cpp | 84 ++++++++++++ 6 files changed, 391 insertions(+), 1 deletion(-) create mode 100644 native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp create mode 100644 native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h create mode 100644 native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h create mode 100644 native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h create mode 100644 native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp diff --git a/native/dicttoolkit/NativeFileList.mk b/native/dicttoolkit/NativeFileList.mk index 925ec458b..b39a24890 100644 --- a/native/dicttoolkit/NativeFileList.mk +++ b/native/dicttoolkit/NativeFileList.mk @@ -22,8 +22,13 @@ LATIN_IME_DICT_TOOLKIT_SRC_FILES := \ help_executor.cpp \ info_executor.cpp \ makedict_executor.cpp) \ + $(addprefix offdevice_intermediate_dict/, \ + offdevice_intermediate_dict.cpp) \ utils/command_utils.cpp LATIN_IME_DICT_TOOLKIT_TEST_FILES := \ dict_toolkit_defines_test.cpp \ - utils/command_utils_test.cpp + $(addprefix offdevice_intermediate_dict/, \ + offdevice_intermediate_dict_test.cpp) \ + $(addprefix utils/, \ + command_utils_test.cpp) diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp new file mode 100644 index 000000000..af28131cf --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h" + +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h" + +namespace latinime { +namespace dicttoolkit { + +bool OffdeviceIntermediateDict::addWord(const WordProperty &wordProperty) { + const CodePointArrayView codePoints = wordProperty.getCodePoints(); + if (codePoints.empty() || codePoints.size() > MAX_WORD_LENGTH) { + return false; + } + return addWordInner(codePoints, wordProperty, mRootPtNodeArray); +} + +bool OffdeviceIntermediateDict::addWordInner(const CodePointArrayView codePoints, + const WordProperty &wordProperty, OffdeviceIntermediateDictPtNodeArray &ptNodeArray) { + auto ptNodeList = ptNodeArray.getMutablePtNodeList(); + auto ptNodeIt = ptNodeList->begin(); + for (; ptNodeIt != ptNodeList->end(); ++ptNodeIt) { + const auto &ptNode = *ptNodeIt; + const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints(); + if (codePoints[0] < ptNodeCodePoints[0]) { + continue; + } + if (codePoints[0] > ptNodeCodePoints[0]) { + break; + } + size_t i = 1; + for (; i < codePoints.size(); ++i) { + if (i >= ptNodeCodePoints.size()) { + // Add new child. + return addWordInner(codePoints.skip(i), wordProperty, + ptNode->getChildrenPtNodeArray()); + } + if (codePoints[i] != ptNodeCodePoints[i]) { + break; + } + } + if (codePoints.size() == i && codePoints.size() == ptNodeCodePoints.size()) { + // All code points matched. + if (ptNode->getWordProperty()) { + // Adding the same word multiple times is not supported. + return false; + } + ptNodeList->insert(ptNodeIt, + std::make_shared(wordProperty, *ptNode)); + ptNodeList->erase(ptNodeIt); + return true; + } + // The (i+1)-th elements are different. + // Create and Add new parent ptNode for the common part. + auto newPtNode = codePoints.size() == i + ? std::make_shared(codePoints, wordProperty) + : std::make_shared(codePoints.limit(i)); + ptNodeList->insert(ptNodeIt, newPtNode); + OffdeviceIntermediateDictPtNodeArray &childrenPtNodeArray = + newPtNode->getChildrenPtNodeArray(); + // Add new child for the existing ptNode. + childrenPtNodeArray.getMutablePtNodeList()->push_back( + std::make_shared( + ptNodeCodePoints.skip(i), *ptNode)); + ptNodeList->erase(ptNodeIt); + if (codePoints.size() != i) { + // Add a child for the new word. + return addWordInner(codePoints.skip(i), wordProperty, childrenPtNodeArray); + } + return true; + } + ptNodeList->insert(ptNodeIt, + std::make_shared(codePoints, wordProperty)); + return true; +} + +const WordProperty *OffdeviceIntermediateDict::getWordProperty( + const CodePointArrayView codePoints) const { + const OffdeviceIntermediateDictPtNodeArray *ptNodeArray = &mRootPtNodeArray; + for (size_t i = 0; i < codePoints.size();) { + bool foundNext = false; + for (const auto ptNode : ptNodeArray->getPtNodeList()) { + const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints(); + if (codePoints[i] < ptNodeCodePoints[0]) { + continue; + } + if (codePoints[i] > ptNodeCodePoints[0] + || codePoints.size() < ptNodeCodePoints.size()) { + return nullptr; + } + for (size_t j = 1; j < ptNodeCodePoints.size(); ++j) { + if (codePoints[i + j] != ptNodeCodePoints[j]) { + return nullptr; + } + } + i += ptNodeCodePoints.size(); + if (i == codePoints.size()) { + return ptNode->getWordProperty(); + } + ptNodeArray = &ptNode->getChildrenPtNodeArray(); + foundNext = true; + break; + } + if (!foundNext) { + break; + } + } + return nullptr; +} + +} // namespace dicttoolkit +} // namespace latinime diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h new file mode 100644 index 000000000..dfdb6a897 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H + +#include "dict_toolkit_defines.h" +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h" +#include "suggest/core/dictionary/property/word_property.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { + +/** + * On memory patricia trie to represent a dictionary. + */ +class OffdeviceIntermediateDict final { + public: + bool addWord(const WordProperty &wordProperty); + // The returned value will be invalid after modifying the dictionary. e.g. calling addWord(). + const WordProperty *getWordProperty(const CodePointArrayView codePoints) const; + + private: + DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDict); + + OffdeviceIntermediateDictPtNodeArray mRootPtNodeArray; + + bool addWordInner(const CodePointArrayView codePoints, const WordProperty &wordProperty, + OffdeviceIntermediateDictPtNodeArray &ptNodeArray); +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h new file mode 100644 index 000000000..721ccd778 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H + +#include + +#include "dict_toolkit_defines.h" +#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h" +#include "suggest/core/dictionary/property/word_property.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { + +class OffdeviceIntermediateDictPtNode final { + public: + // Non-terminal + OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints) + : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(), + mWortProperty(nullptr) {} + + // Terminal + OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints, + const WordProperty &wordProperty) + : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(), + mWortProperty(new WordProperty(wordProperty)) {} + + // Replacing PtNodeCodePoints. + OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints, + const OffdeviceIntermediateDictPtNode &ptNode) + : mPtNodeCodePoints(ptNodeCodePoints.toVector()), + mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray), + mWortProperty(new WordProperty(*ptNode.mWortProperty)) {} + + // Replacing WordProperty. + OffdeviceIntermediateDictPtNode(const WordProperty &wordProperty, + const OffdeviceIntermediateDictPtNode &ptNode) + : mPtNodeCodePoints(ptNode.mPtNodeCodePoints), + mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray), + mWortProperty(new WordProperty(wordProperty)) {} + + const WordProperty *getWordProperty() const { + return mWortProperty.get(); + } + + const CodePointArrayView getPtNodeCodePoints() const { + return CodePointArrayView(mPtNodeCodePoints); + } + + OffdeviceIntermediateDictPtNodeArray &getChildrenPtNodeArray() { + return mChildrenPtNodeArray; + } + + private: + DISALLOW_COPY_AND_ASSIGN(OffdeviceIntermediateDictPtNode); + + const std::vector mPtNodeCodePoints; + OffdeviceIntermediateDictPtNodeArray mChildrenPtNodeArray; + const std::unique_ptr mWortProperty; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h new file mode 100644 index 000000000..f87456ce0 --- /dev/null +++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H +#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H + +#include +#include + +#include "dict_toolkit_defines.h" + +namespace latinime { +namespace dicttoolkit { + +class OffdeviceIntermediateDictPtNode; + +class OffdeviceIntermediateDictPtNodeArray final { + public: + const std::list> &getPtNodeList() const { + return mPtNodes; + } + + std::list> *getMutablePtNodeList() { + return &mPtNodes; + } + + private: + DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictPtNodeArray); + + std::list> mPtNodes; +}; + +} // namespace dicttoolkit +} // namespace latinime +#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H diff --git a/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp b/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp new file mode 100644 index 000000000..3bcb89e46 --- /dev/null +++ b/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h" + +#include + +#include + +#include "suggest/core/dictionary/property/word_property.h" +#include "utils/int_array_view.h" + +namespace latinime { +namespace dicttoolkit { +namespace { + +const std::vector getCodePointVector(const char *str) { + std::vector codePoints; + while (*str) { + codePoints.push_back(*str); + ++str; + } + return codePoints; +} + +const WordProperty getDummpWordProperty(const std::vector &&codePoints) { + return WordProperty(std::move(codePoints), UnigramProperty(), std::vector()); +} + +TEST(OffdeviceIntermediateDictTest, TestAddWordProperties) { + OffdeviceIntermediateDict dict; + EXPECT_EQ(nullptr, dict.getWordProperty(CodePointArrayView())); + + const WordProperty wordProperty0 = getDummpWordProperty(getCodePointVector("abcd")); + EXPECT_TRUE(dict.addWord(wordProperty0)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints())); + + const WordProperty wordProperty1 = getDummpWordProperty(getCodePointVector("efgh")); + EXPECT_TRUE(dict.addWord(wordProperty1)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints())); + + const WordProperty wordProperty2 = getDummpWordProperty(getCodePointVector("ab")); + EXPECT_TRUE(dict.addWord(wordProperty2)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints())); + + const WordProperty wordProperty3 = getDummpWordProperty(getCodePointVector("abcdefg")); + EXPECT_TRUE(dict.addWord(wordProperty3)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints())); + + const WordProperty wordProperty4 = getDummpWordProperty(getCodePointVector("efef")); + EXPECT_TRUE(dict.addWord(wordProperty4)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints())); + + const WordProperty wordProperty5 = getDummpWordProperty(getCodePointVector("ef")); + EXPECT_TRUE(dict.addWord(wordProperty5)); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints())); + + const WordProperty wordProperty6 = getDummpWordProperty(getCodePointVector("abcd")); + EXPECT_FALSE(dict.addWord(wordProperty6)) << "Adding the same word multiple times should fail."; + + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints())); + EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints())); +} + +} // namespace +} // namespace dicttoolkit +} // namespace latinime