From de3121dead395d32760379c03938faef6eac2f98 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Tue, 29 Jul 2014 17:00:29 +0900 Subject: [PATCH] Add TrieMap. Bug: 14425059 Change-Id: Ief36f808ed4f3e35bfab07837ea314d2564973ab --- native/jni/NativeFileList.mk | 4 +- .../policyimpl/dictionary/utils/trie_map.cpp | 340 ++++++++++++++++++ .../policyimpl/dictionary/utils/trie_map.h | 253 +++++++++++++ .../dictionary/utils/trie_map_test.cpp | 169 +++++++++ 4 files changed, 765 insertions(+), 1 deletion(-) create mode 100644 native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h create mode 100644 native/jni/tests/suggest/policyimpl/dictionary/utils/trie_map_test.cpp diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk index 1cb61c45f..ef6f3d69e 100644 --- a/native/jni/NativeFileList.mk +++ b/native/jni/NativeFileList.mk @@ -84,7 +84,8 @@ LATIN_IME_CORE_SRC_FILES := \ forgetting_curve_utils.cpp \ format_utils.cpp \ mmapped_buffer.cpp \ - sparse_table.cpp) \ + sparse_table.cpp \ + trie_map.cpp ) \ suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \ $(addprefix suggest/policyimpl/typing/, \ scoring_params.cpp \ @@ -125,4 +126,5 @@ LATIN_IME_CORE_TEST_FILES := \ suggest/core/layout/normal_distribution_2d_test.cpp \ suggest/core/dictionary/bloom_filter_test.cpp \ suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer_test.cpp \ + suggest/policyimpl/dictionary/utils/trie_map_test.cpp \ utils/autocorrection_threshold_utils_test.cpp diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp new file mode 100644 index 000000000..a7d86f9ae --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp @@ -0,0 +1,340 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/utils/trie_map.h" + +namespace latinime { + +const int TrieMap::INVALID_INDEX = -1; +const int TrieMap::FIELD0_SIZE = 4; +const int TrieMap::FIELD1_SIZE = 3; +const int TrieMap::ENTRY_SIZE = FIELD0_SIZE + FIELD1_SIZE; +const uint32_t TrieMap::VALUE_FLAG = 0x400000; +const uint32_t TrieMap::VALUE_MASK = 0x3FFFFF; +const uint32_t TrieMap::TERMINAL_LINK_FLAG = 0x800000; +const uint32_t TrieMap::TERMINAL_LINK_MASK = 0x7FFFFF; +const int TrieMap::NUM_OF_BITS_USED_FOR_ONE_LEVEL = 5; +const uint32_t TrieMap::LABEL_MASK = 0x1F; +const int TrieMap::MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL = 1 << NUM_OF_BITS_USED_FOR_ONE_LEVEL; +const int TrieMap::ROOT_BITMAP_ENTRY_INDEX = 0; +const int TrieMap::ROOT_BITMAP_ENTRY_POS = MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL * FIELD0_SIZE; +const TrieMap::Entry TrieMap::EMPTY_BITMAP_ENTRY = TrieMap::Entry(0, 0); +const uint64_t TrieMap::MAX_VALUE = + (static_cast(1) << ((FIELD0_SIZE + FIELD1_SIZE) * CHAR_BIT)) - 1; +const int TrieMap::MAX_BUFFER_SIZE = TERMINAL_LINK_MASK * ENTRY_SIZE; + +TrieMap::TrieMap() : mBuffer(MAX_BUFFER_SIZE) { + mBuffer.extend(ROOT_BITMAP_ENTRY_POS); + writeEntry(EMPTY_BITMAP_ENTRY, ROOT_BITMAP_ENTRY_INDEX); +} + +void TrieMap::dump(const int from, const int to) const { + AKLOGI("BufSize: %d", mBuffer.getTailPosition()); + for (int i = from; i < to; ++i) { + AKLOGI("Entry[%d]: %x, %x", i, readField0(i), readField1(i)); + } + int unusedRegionSize = 0; + for (int i = 1; i <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; ++i) { + int index = readEmptyTableLink(i); + while (index != ROOT_BITMAP_ENTRY_INDEX) { + index = readField0(index); + unusedRegionSize += i; + } + } + AKLOGI("Unused Size: %d", unusedRegionSize); +} + +int TrieMap::getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex) { + const Entry bitmapEntry = readEntry(bitmapEntryIndex); + const uint32_t unsignedKey = static_cast(key); + const int terminalEntryIndex = getTerminalEntryIndex( + unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntry, 0 /* level */); + if (terminalEntryIndex == INVALID_INDEX) { + // Not found. + return INVALID_INDEX; + } + const Entry terminalEntry = readEntry(terminalEntryIndex); + if (terminalEntry.hasTerminalLink()) { + return terminalEntry.getValueEntryIndex() + 1; + } + // Create a value entry and a bitmap entry. + const int valueEntryIndex = allocateTable(2 /* entryCount */); + if (!writeEntry(Entry(0, terminalEntry.getValue()), valueEntryIndex)) { + return INVALID_INDEX; + } + if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) { + return INVALID_INDEX; + } + if (!writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, valueEntryIndex)) { + return INVALID_INDEX; + } + return valueEntryIndex + 1; +} + +const TrieMap::Result TrieMap::get(const int key, const int bitmapEntryIndex) const { + const uint32_t unsignedKey = static_cast(key); + return getInternal(unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntryIndex, + 0 /* level */); +} + +bool TrieMap::put(const int key, const uint64_t value, const int bitmapEntryIndex) { + if (value > MAX_VALUE) { + return false; + } + const uint32_t unsignedKey = static_cast(key); + return putInternal(unsignedKey, value, getBitShuffledKey(unsignedKey), bitmapEntryIndex, + readEntry(bitmapEntryIndex), 0 /* level */); +} + +/** + * Shuffle bits of the key in the fixed order. + * + * This method is used as a hash function. This returns different values for different inputs. + */ +uint32_t TrieMap::getBitShuffledKey(const uint32_t key) const { + uint32_t shuffledKey = 0; + for (int i = 0; i < 4; ++i) { + const uint32_t keyPiece = (key >> (i * 8)) & 0xFF; + shuffledKey ^= ((keyPiece ^ (keyPiece << 7) ^ (keyPiece << 14) ^ (keyPiece << 21)) + & 0x11111111) << i; + } + return shuffledKey; +} + +bool TrieMap::writeValue(const uint64_t value, const int terminalEntryIndex) { + if (value <= VALUE_MASK) { + // Write value into the terminal entry. + return writeField1(value | VALUE_FLAG, terminalEntryIndex); + } + // Create value entry and write value. + const int valueEntryIndex = allocateTable(2 /* entryCount */); + if (!writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex)) { + return false; + } + if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) { + return false; + } + return writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, terminalEntryIndex); +} + +bool TrieMap::updateValue(const Entry &terminalEntry, const uint64_t value, + const int terminalEntryIndex) { + if (!terminalEntry.hasTerminalLink()) { + return writeValue(value, terminalEntryIndex); + } + const int valueEntryIndex = terminalEntry.getValueEntryIndex(); + return writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex); +} + +bool TrieMap::freeTable(const int tableIndex, const int entryCount) { + if (!writeField0(readEmptyTableLink(entryCount), tableIndex)) { + return false; + } + return writeEmptyTableLink(tableIndex, entryCount); +} + +/** + * Allocate table with entryCount-entries. Reuse freed table if possible. + */ +int TrieMap::allocateTable(const int entryCount) { + if (entryCount > 0 && entryCount <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL) { + const int tableIndex = readEmptyTableLink(entryCount); + if (tableIndex > 0) { + if (!writeEmptyTableLink(readField0(tableIndex), entryCount)) { + return INVALID_INDEX; + } + // Reuse the table. + return tableIndex; + } + } + // Allocate memory space at tail position of the buffer. + const int mapIndex = getTailEntryIndex(); + if (!mBuffer.extend(entryCount * ENTRY_SIZE)) { + return INVALID_INDEX; + } + return mapIndex; +} + +int TrieMap::getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey, + const Entry &bitmapEntry, const int level) const { + const int label = getLabel(hashedKey, level); + if (!exists(bitmapEntry.getBitmap(), label)) { + return INVALID_INDEX; + } + const int entryIndex = bitmapEntry.getTableIndex() + popCount(bitmapEntry.getBitmap(), label); + const Entry entry = readEntry(entryIndex); + if (entry.isBitmapEntry()) { + // Move to the next level. + return getTerminalEntryIndex(key, hashedKey, entry, level + 1); + } + if (entry.getKey() == key) { + // Terminal entry is found. + return entryIndex; + } + return INVALID_INDEX; +} + +/** + * Get Result corresponding to the key. + * + * @param key the key. + * @param hashedKey the hashed key. + * @param bitmapEntryIndex the index of bitmap entry + * @param level current level + * @return Result instance corresponding to the key. mIsValid indicates whether the key is in the + * map. + */ +const TrieMap::Result TrieMap::getInternal(const uint32_t key, const uint32_t hashedKey, + const int bitmapEntryIndex, const int level) const { + const int terminalEntryIndex = getTerminalEntryIndex(key, hashedKey, + readEntry(bitmapEntryIndex), level); + if (terminalEntryIndex == INVALID_INDEX) { + // Not found. + return Result(0, false, INVALID_INDEX); + } + const Entry terminalEntry = readEntry(terminalEntryIndex); + if (!terminalEntry.hasTerminalLink()) { + return Result(terminalEntry.getValue(), true, INVALID_INDEX); + } + const int valueEntryIndex = terminalEntry.getValueEntryIndex(); + const Entry valueEntry = readEntry(valueEntryIndex); + return Result(valueEntry.getValueOfValueEntry(), true, valueEntryIndex + 1); +} + +/** + * Put key to value mapping to the map. + * + * @param key the key. + * @param value the value + * @param hashedKey the hashed key. + * @param bitmapEntryIndex the index of bitmap entry + * @param bitmapEntry the bitmap entry + * @param level current level + * @return whether the key-value has been correctly inserted to the map or not. + */ +bool TrieMap::putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey, + const int bitmapEntryIndex, const Entry &bitmapEntry, const int level) { + const int label = getLabel(hashedKey, level); + const uint32_t bitmap = bitmapEntry.getBitmap(); + const int mapIndex = bitmapEntry.getTableIndex(); + if (!exists(bitmap, label)) { + // Current map doesn't contain the label. + return addNewEntryByExpandingTable(key, value, mapIndex, bitmap, bitmapEntryIndex, label); + } + const int entryIndex = mapIndex + popCount(bitmap, label); + const Entry entry = readEntry(entryIndex); + if (entry.isBitmapEntry()) { + // Bitmap entry is found. Go to the next level. + return putInternal(key, value, hashedKey, entryIndex, entry, level + 1); + } + if (entry.getKey() == key) { + // Terminal entry for the key is found. Update the value. + return updateValue(entry, value, entryIndex); + } + // Conflict with the existing key. + return addNewEntryByResolvingConflict(key, value, hashedKey, entry, entryIndex, level); +} + +/** + * Resolve a conflict in the current level and add new entry. + * + * @param key the key + * @param value the value + * @param hashedKey the hashed key + * @param conflictedEntry the existing conflicted entry + * @param conflictedEntryIndex the index of existing conflicted entry + * @param level current level + * @return whether the key-value has been correctly inserted to the map or not. + */ +bool TrieMap::addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value, + const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex, + const int level) { + const int conflictedKeyNextLabel = + getLabel(getBitShuffledKey(conflictedEntry.getKey()), level + 1); + const int nextLabel = getLabel(hashedKey, level + 1); + if (conflictedKeyNextLabel == nextLabel) { + // Conflicted again in the next level. + const int newTableIndex = allocateTable(1 /* entryCount */); + if (newTableIndex == INVALID_INDEX) { + return false; + } + if (!writeEntry(conflictedEntry, newTableIndex)) { + return false; + } + const Entry newBitmapEntry(setExist(0 /* bitmap */, nextLabel), newTableIndex); + if (!writeEntry(newBitmapEntry, conflictedEntryIndex)) { + return false; + } + return putInternal(key, value, hashedKey, conflictedEntryIndex, newBitmapEntry, level + 1); + } + // The conflict has been resolved. Create a table that contains 2 entries. + const int newTableIndex = allocateTable(2 /* entryCount */); + if (newTableIndex == INVALID_INDEX) { + return false; + } + if (nextLabel < conflictedKeyNextLabel) { + if (!writeTerminalEntry(key, value, newTableIndex)) { + return false; + } + if (!writeEntry(conflictedEntry, newTableIndex + 1)) { + return false; + } + } else { // nextLabel > conflictedKeyNextLabel + if (!writeEntry(conflictedEntry, newTableIndex)) { + return false; + } + if (!writeTerminalEntry(key, value, newTableIndex + 1)) { + return false; + } + } + const uint32_t updatedBitmap = + setExist(setExist(0 /* bitmap */, nextLabel), conflictedKeyNextLabel); + return writeEntry(Entry(updatedBitmap, newTableIndex), conflictedEntryIndex); +} + +/** + * Add new entry to the existing table. + */ +bool TrieMap::addNewEntryByExpandingTable(const uint32_t key, const uint64_t value, + const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex, const int label) { + // Current map doesn't contain the label. + const int entryCount = popCount(bitmap); + const int newTableIndex = allocateTable(entryCount + 1); + if (newTableIndex == INVALID_INDEX) { + return false; + } + const int newEntryIndexInTable = popCount(bitmap, label); + // Copy from existing table to the new table. + for (int i = 0; i < entryCount; ++i) { + if (!copyEntry(tableIndex + i, newTableIndex + i + (i >= newEntryIndexInTable ? 1 : 0))) { + return false; + } + } + // Write new terminal entry. + if (!writeTerminalEntry(key, value, newTableIndex + newEntryIndexInTable)) { + return false; + } + // Update bitmap. + if (!writeEntry(Entry(setExist(bitmap, label), newTableIndex), bitmapEntryIndex)) { + return false; + } + if (entryCount > 0) { + return freeTable(tableIndex, entryCount); + } + return true; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h new file mode 100644 index 000000000..2a9051f98 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h @@ -0,0 +1,253 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_TRIE_MAP_H +#define LATINIME_TRIE_MAP_H + +#include +#include +#include + +#include "defines.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { + +/** + * Trie map derived from Phil Bagwell's Hash Array Mapped Trie. + * key is int and value is uint64_t. + * This supports multiple level map. Terminal entries can have a bitmap for the next level map. + * This doesn't support root map resizing. + */ +class TrieMap { + public: + struct Result { + const uint64_t mValue; + const bool mIsValid; + const int mNextLevelBitmapEntryIndex; + + Result(const uint64_t value, const bool isValid, const int nextLevelBitmapEntryIndex) + : mValue(value), mIsValid(isValid), + mNextLevelBitmapEntryIndex(nextLevelBitmapEntryIndex) {} + }; + + static const int INVALID_INDEX; + static const uint64_t MAX_VALUE; + + TrieMap(); + void dump(const int from = 0, const int to = 0) const; + + bool isNearSizeLimit() const { + return mBuffer.isNearSizeLimit(); + } + + // Returns bitmapEntryIndex. Create the next level map if it doesn't exist. + int getNextLevelBitmapEntryIndex(const int key) { + return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX); + } + + int getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex); + + const Result getRoot(const int key) const { + return get(key, ROOT_BITMAP_ENTRY_INDEX); + } + + const Result get(const int key, const int bitmapEntryIndex) const; + + bool putRoot(const int key, const uint64_t value) { + return put(key, value, ROOT_BITMAP_ENTRY_INDEX); + } + + bool put(const int key, const uint64_t value, const int bitmapEntryIndex); + + private: + DISALLOW_COPY_AND_ASSIGN(TrieMap); + + /** + * Struct represents an entry. + * + * Entry is one of these entry types. All entries are fixed size and have 2 fields FIELD_0 and + * FIELD_1. + * 1. bitmap entry. bitmap entry contains bitmap and the link to hash table. + * FIELD_0(bitmap) FIELD_1(LINK_TO_HASH_TABLE) + * 2. terminal entry. terminal entry contains hashed key and value or terminal link. terminal + * entry have terminal link when the value is not fit to FIELD_1 or there is a next level map + * for the key. + * FIELD_0(hashed key) (FIELD_1(VALUE_FLAG VALUE) | FIELD_1(TERMINAL_LINK_FLAG TERMINAL_LINK)) + * 3. value entry. value entry represents a value. Upper order bytes are stored in FIELD_0 and + * lower order bytes are stored in FIELD_1. + * FIELD_0(value (upper order bytes)) FIELD_1(value (lower order bytes)) + */ + struct Entry { + const uint32_t mData0; + const uint32_t mData1; + + Entry(const uint32_t data0, const uint32_t data1) : mData0(data0), mData1(data1) {} + + AK_FORCE_INLINE bool isBitmapEntry() const { + return (mData1 & VALUE_FLAG) == 0 && (mData1 & TERMINAL_LINK_FLAG) == 0; + } + + AK_FORCE_INLINE bool hasTerminalLink() const { + return (mData1 & TERMINAL_LINK_FLAG) != 0; + } + + // For terminal entry. + AK_FORCE_INLINE uint32_t getKey() const { + return mData0; + } + + // For terminal entry. + AK_FORCE_INLINE uint32_t getValue() const { + return mData1 & VALUE_MASK; + } + + // For terminal entry. + AK_FORCE_INLINE uint32_t getValueEntryIndex() const { + return mData1 & TERMINAL_LINK_MASK; + } + + // For bitmap entry. + AK_FORCE_INLINE uint32_t getBitmap() const { + return mData0; + } + + // For bitmap entry. + AK_FORCE_INLINE int getTableIndex() const { + return static_cast(mData1); + } + + // For value entry. + AK_FORCE_INLINE uint64_t getValueOfValueEntry() const { + return ((static_cast(mData0) << (FIELD1_SIZE * CHAR_BIT)) ^ mData1); + } + }; + + BufferWithExtendableBuffer mBuffer; + + static const int FIELD0_SIZE; + static const int FIELD1_SIZE; + static const int ENTRY_SIZE; + static const uint32_t VALUE_FLAG; + static const uint32_t VALUE_MASK; + static const uint32_t TERMINAL_LINK_FLAG; + static const uint32_t TERMINAL_LINK_MASK; + static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL; + static const uint32_t LABEL_MASK; + static const int MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; + static const int ROOT_BITMAP_ENTRY_INDEX; + static const int ROOT_BITMAP_ENTRY_POS; + static const Entry EMPTY_BITMAP_ENTRY; + static const int MAX_BUFFER_SIZE; + + uint32_t getBitShuffledKey(const uint32_t key) const; + bool writeValue(const uint64_t value, const int terminalEntryIndex); + bool updateValue(const Entry &terminalEntry, const uint64_t value, + const int terminalEntryIndex); + bool freeTable(const int tableIndex, const int entryCount); + int allocateTable(const int entryCount); + int getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey, + const Entry &bitmapEntry, const int level) const; + const Result getInternal(const uint32_t key, const uint32_t hashedKey, + const int bitmapEntryIndex, const int level) const; + bool putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey, + const int bitmapEntryIndex, const Entry &bitmapEntry, const int level); + bool addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value, + const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex, + const int level); + bool addNewEntryByExpandingTable(const uint32_t key, const uint64_t value, + const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex, + const int label); + + AK_FORCE_INLINE const Entry readEntry(const int entryIndex) const { + return Entry(readField0(entryIndex), readField1(entryIndex)); + } + + // Returns whether an entry for the index is existing by testing if the index-th bit in the + // bitmap is set or not. + AK_FORCE_INLINE bool exists(const uint32_t bitmap, const int index) const { + return (bitmap & (1 << index)) != 0; + } + + // Set index-th bit in the bitmap. + AK_FORCE_INLINE uint32_t setExist(const uint32_t bitmap, const int index) const { + return bitmap | (1 << index); + } + + // Count set bits before index in the bitmap. + AK_FORCE_INLINE int popCount(const uint32_t bitmap, const int index) const { + return popCount(bitmap & ((1 << index) - 1)); + } + + // Count set bits in the bitmap. + AK_FORCE_INLINE int popCount(const uint32_t bitmap) const { + return __builtin_popcount(bitmap); + // int v = bitmap - ((bitmap >> 1) & 0x55555555); + // v = (v & 0x33333333) + ((v >> 2) & 0x33333333); + // return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24; + } + + AK_FORCE_INLINE int getLabel(const uint32_t hashedKey, const int level) const { + return (hashedKey >> (level * NUM_OF_BITS_USED_FOR_ONE_LEVEL)) & LABEL_MASK; + } + + AK_FORCE_INLINE uint32_t readField0(const int entryIndex) const { + return mBuffer.readUint(FIELD0_SIZE, ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE); + } + + AK_FORCE_INLINE uint32_t readField1(const int entryIndex) const { + return mBuffer.readUint(FIELD1_SIZE, + ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE); + } + + AK_FORCE_INLINE int readEmptyTableLink(const int entryCount) const { + return mBuffer.readUint(FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE); + } + + AK_FORCE_INLINE bool writeEmptyTableLink(const int tableIndex, const int entryCount) { + return mBuffer.writeUint(tableIndex, FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE); + } + + AK_FORCE_INLINE bool writeField0(const uint32_t data, const int entryIndex) { + return mBuffer.writeUint(data, FIELD0_SIZE, + ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE); + } + + AK_FORCE_INLINE bool writeField1(const uint32_t data, const int entryIndex) { + return mBuffer.writeUint(data, FIELD1_SIZE, + ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE); + } + + AK_FORCE_INLINE bool writeEntry(const Entry &entry, const int entryIndex) { + return writeField0(entry.mData0, entryIndex) && writeField1(entry.mData1, entryIndex); + } + + AK_FORCE_INLINE bool writeTerminalEntry(const uint32_t key, const uint64_t value, + const int entryIndex) { + return writeField0(key, entryIndex) && writeValue(value, entryIndex); + } + + AK_FORCE_INLINE bool copyEntry(const int originalEntryIndex, const int newEntryIndex) { + return writeEntry(readEntry(originalEntryIndex), newEntryIndex); + } + + AK_FORCE_INLINE int getTailEntryIndex() const { + return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE; + } +}; + +} // namespace latinime +#endif /* LATINIME_TRIE_MAP_H */ diff --git a/native/jni/tests/suggest/policyimpl/dictionary/utils/trie_map_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/utils/trie_map_test.cpp new file mode 100644 index 000000000..5dd782277 --- /dev/null +++ b/native/jni/tests/suggest/policyimpl/dictionary/utils/trie_map_test.cpp @@ -0,0 +1,169 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/utils/trie_map.h" + +#include + +#include +#include +#include +#include +#include +#include + +namespace latinime { +namespace { + +TEST(TrieMapTest, TestSetAndGet) { + TrieMap trieMap; + trieMap.putRoot(10, 10); + EXPECT_EQ(10ull, trieMap.getRoot(10).mValue); + trieMap.putRoot(0x10A, 10); + EXPECT_EQ(10ull, trieMap.getRoot(10).mValue); + EXPECT_EQ(10ull, trieMap.getRoot(0x10A).mValue); + trieMap.putRoot(10, 1000); + EXPECT_EQ(1000ull, trieMap.getRoot(10).mValue); + trieMap.putRoot(11, 1000); + EXPECT_EQ(1000ull, trieMap.getRoot(11).mValue); + const int next = trieMap.getNextLevelBitmapEntryIndex(10); + trieMap.put(9, 9, next); + EXPECT_EQ(9ull, trieMap.get(9, next).mValue); + EXPECT_FALSE(trieMap.get(11, next).mIsValid); + trieMap.putRoot(0, 0xFFFFFFFFFull); + EXPECT_EQ(0xFFFFFFFFFull, trieMap.getRoot(0).mValue); +} + +TEST(TrieMapTest, TestSetAndGetLarge) { + static const int ELEMENT_COUNT = 200000; + TrieMap trieMap; + for (int i = 0; i < ELEMENT_COUNT; ++i) { + EXPECT_TRUE(trieMap.putRoot(i, i)); + } + for (int i = 0; i < ELEMENT_COUNT; ++i) { + EXPECT_EQ(trieMap.getRoot(i).mValue, static_cast(i)); + } +} + +TEST(TrieMapTest, TestRandSetAndGetLarge) { + static const int ELEMENT_COUNT = 100000; + TrieMap trieMap; + std::unordered_map testKeyValuePairs; + + // Use the uniform integer distribution [S_INT_MIN, S_INT_MAX]. + std::uniform_int_distribution keyDistribution(S_INT_MIN, S_INT_MAX); + auto keyRandomNumberGenerator = std::bind(keyDistribution, std::mt19937()); + + // Use the uniform distribution [0, TrieMap::MAX_VALUE]. + std::uniform_int_distribution valueDistribution(0, TrieMap::MAX_VALUE); + auto valueRandomNumberGenerator = std::bind(valueDistribution, std::mt19937()); + + for (int i = 0; i < ELEMENT_COUNT; ++i) { + const int key = keyRandomNumberGenerator(); + const uint64_t value = valueRandomNumberGenerator(); + EXPECT_TRUE(trieMap.putRoot(key, value)) << key << " " << value; + testKeyValuePairs[key] = value; + } + for (const auto &v : testKeyValuePairs) { + EXPECT_EQ(trieMap.getRoot(v.first).mValue, v.second); + } +} + +TEST(TrieMapTest, TestMultiLevel) { + static const int FIRST_LEVEL_ENTRY_COUNT = 10000; + static const int SECOND_LEVEL_ENTRY_COUNT = 20000; + static const int THIRD_LEVEL_ENTRY_COUNT = 40000; + + TrieMap trieMap; + std::vector firstLevelKeys; + std::map firstLevelEntries; + std::vector> secondLevelKeys; + std::map> twoLevelMap; + std::map>> threeLevelMap; + + // Use the uniform integer distribution [0, S_INT_MAX]. + std::uniform_int_distribution distribution(0, S_INT_MAX); + auto keyRandomNumberGenerator = std::bind(distribution, std::mt19937()); + auto randomNumberGeneratorForKeySelection = std::bind(distribution, std::mt19937()); + + // Use the uniform distribution [0, TrieMap::MAX_VALUE]. + std::uniform_int_distribution valueDistribution(0, TrieMap::MAX_VALUE); + auto valueRandomNumberGenerator = std::bind(valueDistribution, std::mt19937()); + + for (int i = 0; i < FIRST_LEVEL_ENTRY_COUNT; ++i) { + const int key = keyRandomNumberGenerator(); + const uint64_t value = valueRandomNumberGenerator(); + EXPECT_TRUE(trieMap.putRoot(key, value)); + firstLevelKeys.push_back(key); + firstLevelEntries[key] = value; + } + + for (int i = 0; i < SECOND_LEVEL_ENTRY_COUNT; ++i) { + const int key = keyRandomNumberGenerator(); + const uint64_t value = valueRandomNumberGenerator(); + const int firstLevelKey = + firstLevelKeys[randomNumberGeneratorForKeySelection() % FIRST_LEVEL_ENTRY_COUNT]; + const int nextLevelBitmapEntryIndex = trieMap.getNextLevelBitmapEntryIndex(firstLevelKey); + EXPECT_NE(TrieMap::INVALID_INDEX, nextLevelBitmapEntryIndex); + EXPECT_TRUE(trieMap.put(key, value, nextLevelBitmapEntryIndex)); + secondLevelKeys.push_back(std::make_pair(firstLevelKey, key)); + twoLevelMap[firstLevelKey][key] = value; + } + + for (int i = 0; i < THIRD_LEVEL_ENTRY_COUNT; ++i) { + const int key = keyRandomNumberGenerator(); + const uint64_t value = valueRandomNumberGenerator(); + const std::pair secondLevelKey = + secondLevelKeys[randomNumberGeneratorForKeySelection() % SECOND_LEVEL_ENTRY_COUNT]; + const int secondLevel = trieMap.getNextLevelBitmapEntryIndex(secondLevelKey.first); + EXPECT_NE(TrieMap::INVALID_INDEX, secondLevel); + const int thirdLevel = trieMap.getNextLevelBitmapEntryIndex( + secondLevelKey.second, secondLevel); + EXPECT_NE(TrieMap::INVALID_INDEX, thirdLevel); + EXPECT_TRUE(trieMap.put(key, value, thirdLevel)); + threeLevelMap[secondLevelKey.first][secondLevelKey.second][key] = value; + } + + for (const auto &firstLevelEntry : firstLevelEntries) { + EXPECT_EQ(firstLevelEntry.second, trieMap.getRoot(firstLevelEntry.first).mValue); + } + + for (const auto &firstLevelEntry : twoLevelMap) { + const int secondLevel = trieMap.getNextLevelBitmapEntryIndex(firstLevelEntry.first); + EXPECT_NE(TrieMap::INVALID_INDEX, secondLevel); + for (const auto &secondLevelEntry : firstLevelEntry.second) { + EXPECT_EQ(secondLevelEntry.second, + trieMap.get(secondLevelEntry.first, secondLevel).mValue); + } + } + + for (const auto &firstLevelEntry : threeLevelMap) { + const int secondLevel = trieMap.getNextLevelBitmapEntryIndex(firstLevelEntry.first); + EXPECT_NE(TrieMap::INVALID_INDEX, secondLevel); + for (const auto &secondLevelEntry : firstLevelEntry.second) { + const int thirdLevel = + trieMap.getNextLevelBitmapEntryIndex(secondLevelEntry.first, secondLevel); + EXPECT_NE(TrieMap::INVALID_INDEX, thirdLevel); + for (const auto &thirdLevelEntry : secondLevelEntry.second) { + EXPECT_EQ(thirdLevelEntry.second, + trieMap.get(thirdLevelEntry.first, thirdLevel).mValue); + } + } + } +} + +} // namespace +} // namespace latinime