am 6c3b0d3c
: Merge "Add TrieMap." into lmp-dev
* commit '6c3b0d3caa8af9b513573486c12bddacd5f03d03': Add TrieMap.
This commit is contained in:
commit
db72a4f686
4 changed files with 765 additions and 1 deletions
|
@ -84,7 +84,8 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
forgetting_curve_utils.cpp \
|
forgetting_curve_utils.cpp \
|
||||||
format_utils.cpp \
|
format_utils.cpp \
|
||||||
mmapped_buffer.cpp \
|
mmapped_buffer.cpp \
|
||||||
sparse_table.cpp) \
|
sparse_table.cpp \
|
||||||
|
trie_map.cpp ) \
|
||||||
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
|
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
|
||||||
$(addprefix suggest/policyimpl/typing/, \
|
$(addprefix suggest/policyimpl/typing/, \
|
||||||
scoring_params.cpp \
|
scoring_params.cpp \
|
||||||
|
@ -125,4 +126,5 @@ LATIN_IME_CORE_TEST_FILES := \
|
||||||
suggest/core/layout/normal_distribution_2d_test.cpp \
|
suggest/core/layout/normal_distribution_2d_test.cpp \
|
||||||
suggest/core/dictionary/bloom_filter_test.cpp \
|
suggest/core/dictionary/bloom_filter_test.cpp \
|
||||||
suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer_test.cpp \
|
suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer_test.cpp \
|
||||||
|
suggest/policyimpl/dictionary/utils/trie_map_test.cpp \
|
||||||
utils/autocorrection_threshold_utils_test.cpp
|
utils/autocorrection_threshold_utils_test.cpp
|
||||||
|
|
340
native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp
Normal file
340
native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.cpp
Normal file
|
@ -0,0 +1,340 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/trie_map.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
const int TrieMap::INVALID_INDEX = -1;
|
||||||
|
const int TrieMap::FIELD0_SIZE = 4;
|
||||||
|
const int TrieMap::FIELD1_SIZE = 3;
|
||||||
|
const int TrieMap::ENTRY_SIZE = FIELD0_SIZE + FIELD1_SIZE;
|
||||||
|
const uint32_t TrieMap::VALUE_FLAG = 0x400000;
|
||||||
|
const uint32_t TrieMap::VALUE_MASK = 0x3FFFFF;
|
||||||
|
const uint32_t TrieMap::TERMINAL_LINK_FLAG = 0x800000;
|
||||||
|
const uint32_t TrieMap::TERMINAL_LINK_MASK = 0x7FFFFF;
|
||||||
|
const int TrieMap::NUM_OF_BITS_USED_FOR_ONE_LEVEL = 5;
|
||||||
|
const uint32_t TrieMap::LABEL_MASK = 0x1F;
|
||||||
|
const int TrieMap::MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL = 1 << NUM_OF_BITS_USED_FOR_ONE_LEVEL;
|
||||||
|
const int TrieMap::ROOT_BITMAP_ENTRY_INDEX = 0;
|
||||||
|
const int TrieMap::ROOT_BITMAP_ENTRY_POS = MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL * FIELD0_SIZE;
|
||||||
|
const TrieMap::Entry TrieMap::EMPTY_BITMAP_ENTRY = TrieMap::Entry(0, 0);
|
||||||
|
const uint64_t TrieMap::MAX_VALUE =
|
||||||
|
(static_cast<uint64_t>(1) << ((FIELD0_SIZE + FIELD1_SIZE) * CHAR_BIT)) - 1;
|
||||||
|
const int TrieMap::MAX_BUFFER_SIZE = TERMINAL_LINK_MASK * ENTRY_SIZE;
|
||||||
|
|
||||||
|
TrieMap::TrieMap() : mBuffer(MAX_BUFFER_SIZE) {
|
||||||
|
mBuffer.extend(ROOT_BITMAP_ENTRY_POS);
|
||||||
|
writeEntry(EMPTY_BITMAP_ENTRY, ROOT_BITMAP_ENTRY_INDEX);
|
||||||
|
}
|
||||||
|
|
||||||
|
void TrieMap::dump(const int from, const int to) const {
|
||||||
|
AKLOGI("BufSize: %d", mBuffer.getTailPosition());
|
||||||
|
for (int i = from; i < to; ++i) {
|
||||||
|
AKLOGI("Entry[%d]: %x, %x", i, readField0(i), readField1(i));
|
||||||
|
}
|
||||||
|
int unusedRegionSize = 0;
|
||||||
|
for (int i = 1; i <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; ++i) {
|
||||||
|
int index = readEmptyTableLink(i);
|
||||||
|
while (index != ROOT_BITMAP_ENTRY_INDEX) {
|
||||||
|
index = readField0(index);
|
||||||
|
unusedRegionSize += i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
AKLOGI("Unused Size: %d", unusedRegionSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
int TrieMap::getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex) {
|
||||||
|
const Entry bitmapEntry = readEntry(bitmapEntryIndex);
|
||||||
|
const uint32_t unsignedKey = static_cast<uint32_t>(key);
|
||||||
|
const int terminalEntryIndex = getTerminalEntryIndex(
|
||||||
|
unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntry, 0 /* level */);
|
||||||
|
if (terminalEntryIndex == INVALID_INDEX) {
|
||||||
|
// Not found.
|
||||||
|
return INVALID_INDEX;
|
||||||
|
}
|
||||||
|
const Entry terminalEntry = readEntry(terminalEntryIndex);
|
||||||
|
if (terminalEntry.hasTerminalLink()) {
|
||||||
|
return terminalEntry.getValueEntryIndex() + 1;
|
||||||
|
}
|
||||||
|
// Create a value entry and a bitmap entry.
|
||||||
|
const int valueEntryIndex = allocateTable(2 /* entryCount */);
|
||||||
|
if (!writeEntry(Entry(0, terminalEntry.getValue()), valueEntryIndex)) {
|
||||||
|
return INVALID_INDEX;
|
||||||
|
}
|
||||||
|
if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) {
|
||||||
|
return INVALID_INDEX;
|
||||||
|
}
|
||||||
|
if (!writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, valueEntryIndex)) {
|
||||||
|
return INVALID_INDEX;
|
||||||
|
}
|
||||||
|
return valueEntryIndex + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const TrieMap::Result TrieMap::get(const int key, const int bitmapEntryIndex) const {
|
||||||
|
const uint32_t unsignedKey = static_cast<uint32_t>(key);
|
||||||
|
return getInternal(unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntryIndex,
|
||||||
|
0 /* level */);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TrieMap::put(const int key, const uint64_t value, const int bitmapEntryIndex) {
|
||||||
|
if (value > MAX_VALUE) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const uint32_t unsignedKey = static_cast<uint32_t>(key);
|
||||||
|
return putInternal(unsignedKey, value, getBitShuffledKey(unsignedKey), bitmapEntryIndex,
|
||||||
|
readEntry(bitmapEntryIndex), 0 /* level */);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shuffle bits of the key in the fixed order.
|
||||||
|
*
|
||||||
|
* This method is used as a hash function. This returns different values for different inputs.
|
||||||
|
*/
|
||||||
|
uint32_t TrieMap::getBitShuffledKey(const uint32_t key) const {
|
||||||
|
uint32_t shuffledKey = 0;
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
const uint32_t keyPiece = (key >> (i * 8)) & 0xFF;
|
||||||
|
shuffledKey ^= ((keyPiece ^ (keyPiece << 7) ^ (keyPiece << 14) ^ (keyPiece << 21))
|
||||||
|
& 0x11111111) << i;
|
||||||
|
}
|
||||||
|
return shuffledKey;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TrieMap::writeValue(const uint64_t value, const int terminalEntryIndex) {
|
||||||
|
if (value <= VALUE_MASK) {
|
||||||
|
// Write value into the terminal entry.
|
||||||
|
return writeField1(value | VALUE_FLAG, terminalEntryIndex);
|
||||||
|
}
|
||||||
|
// Create value entry and write value.
|
||||||
|
const int valueEntryIndex = allocateTable(2 /* entryCount */);
|
||||||
|
if (!writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, terminalEntryIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TrieMap::updateValue(const Entry &terminalEntry, const uint64_t value,
|
||||||
|
const int terminalEntryIndex) {
|
||||||
|
if (!terminalEntry.hasTerminalLink()) {
|
||||||
|
return writeValue(value, terminalEntryIndex);
|
||||||
|
}
|
||||||
|
const int valueEntryIndex = terminalEntry.getValueEntryIndex();
|
||||||
|
return writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TrieMap::freeTable(const int tableIndex, const int entryCount) {
|
||||||
|
if (!writeField0(readEmptyTableLink(entryCount), tableIndex)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return writeEmptyTableLink(tableIndex, entryCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate table with entryCount-entries. Reuse freed table if possible.
|
||||||
|
*/
|
||||||
|
int TrieMap::allocateTable(const int entryCount) {
|
||||||
|
if (entryCount > 0 && entryCount <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL) {
|
||||||
|
const int tableIndex = readEmptyTableLink(entryCount);
|
||||||
|
if (tableIndex > 0) {
|
||||||
|
if (!writeEmptyTableLink(readField0(tableIndex), entryCount)) {
|
||||||
|
return INVALID_INDEX;
|
||||||
|
}
|
||||||
|
// Reuse the table.
|
||||||
|
return tableIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Allocate memory space at tail position of the buffer.
|
||||||
|
const int mapIndex = getTailEntryIndex();
|
||||||
|
if (!mBuffer.extend(entryCount * ENTRY_SIZE)) {
|
||||||
|
return INVALID_INDEX;
|
||||||
|
}
|
||||||
|
return mapIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
int TrieMap::getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey,
|
||||||
|
const Entry &bitmapEntry, const int level) const {
|
||||||
|
const int label = getLabel(hashedKey, level);
|
||||||
|
if (!exists(bitmapEntry.getBitmap(), label)) {
|
||||||
|
return INVALID_INDEX;
|
||||||
|
}
|
||||||
|
const int entryIndex = bitmapEntry.getTableIndex() + popCount(bitmapEntry.getBitmap(), label);
|
||||||
|
const Entry entry = readEntry(entryIndex);
|
||||||
|
if (entry.isBitmapEntry()) {
|
||||||
|
// Move to the next level.
|
||||||
|
return getTerminalEntryIndex(key, hashedKey, entry, level + 1);
|
||||||
|
}
|
||||||
|
if (entry.getKey() == key) {
|
||||||
|
// Terminal entry is found.
|
||||||
|
return entryIndex;
|
||||||
|
}
|
||||||
|
return INVALID_INDEX;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get Result corresponding to the key.
|
||||||
|
*
|
||||||
|
* @param key the key.
|
||||||
|
* @param hashedKey the hashed key.
|
||||||
|
* @param bitmapEntryIndex the index of bitmap entry
|
||||||
|
* @param level current level
|
||||||
|
* @return Result instance corresponding to the key. mIsValid indicates whether the key is in the
|
||||||
|
* map.
|
||||||
|
*/
|
||||||
|
const TrieMap::Result TrieMap::getInternal(const uint32_t key, const uint32_t hashedKey,
|
||||||
|
const int bitmapEntryIndex, const int level) const {
|
||||||
|
const int terminalEntryIndex = getTerminalEntryIndex(key, hashedKey,
|
||||||
|
readEntry(bitmapEntryIndex), level);
|
||||||
|
if (terminalEntryIndex == INVALID_INDEX) {
|
||||||
|
// Not found.
|
||||||
|
return Result(0, false, INVALID_INDEX);
|
||||||
|
}
|
||||||
|
const Entry terminalEntry = readEntry(terminalEntryIndex);
|
||||||
|
if (!terminalEntry.hasTerminalLink()) {
|
||||||
|
return Result(terminalEntry.getValue(), true, INVALID_INDEX);
|
||||||
|
}
|
||||||
|
const int valueEntryIndex = terminalEntry.getValueEntryIndex();
|
||||||
|
const Entry valueEntry = readEntry(valueEntryIndex);
|
||||||
|
return Result(valueEntry.getValueOfValueEntry(), true, valueEntryIndex + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Put key to value mapping to the map.
|
||||||
|
*
|
||||||
|
* @param key the key.
|
||||||
|
* @param value the value
|
||||||
|
* @param hashedKey the hashed key.
|
||||||
|
* @param bitmapEntryIndex the index of bitmap entry
|
||||||
|
* @param bitmapEntry the bitmap entry
|
||||||
|
* @param level current level
|
||||||
|
* @return whether the key-value has been correctly inserted to the map or not.
|
||||||
|
*/
|
||||||
|
bool TrieMap::putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey,
|
||||||
|
const int bitmapEntryIndex, const Entry &bitmapEntry, const int level) {
|
||||||
|
const int label = getLabel(hashedKey, level);
|
||||||
|
const uint32_t bitmap = bitmapEntry.getBitmap();
|
||||||
|
const int mapIndex = bitmapEntry.getTableIndex();
|
||||||
|
if (!exists(bitmap, label)) {
|
||||||
|
// Current map doesn't contain the label.
|
||||||
|
return addNewEntryByExpandingTable(key, value, mapIndex, bitmap, bitmapEntryIndex, label);
|
||||||
|
}
|
||||||
|
const int entryIndex = mapIndex + popCount(bitmap, label);
|
||||||
|
const Entry entry = readEntry(entryIndex);
|
||||||
|
if (entry.isBitmapEntry()) {
|
||||||
|
// Bitmap entry is found. Go to the next level.
|
||||||
|
return putInternal(key, value, hashedKey, entryIndex, entry, level + 1);
|
||||||
|
}
|
||||||
|
if (entry.getKey() == key) {
|
||||||
|
// Terminal entry for the key is found. Update the value.
|
||||||
|
return updateValue(entry, value, entryIndex);
|
||||||
|
}
|
||||||
|
// Conflict with the existing key.
|
||||||
|
return addNewEntryByResolvingConflict(key, value, hashedKey, entry, entryIndex, level);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolve a conflict in the current level and add new entry.
|
||||||
|
*
|
||||||
|
* @param key the key
|
||||||
|
* @param value the value
|
||||||
|
* @param hashedKey the hashed key
|
||||||
|
* @param conflictedEntry the existing conflicted entry
|
||||||
|
* @param conflictedEntryIndex the index of existing conflicted entry
|
||||||
|
* @param level current level
|
||||||
|
* @return whether the key-value has been correctly inserted to the map or not.
|
||||||
|
*/
|
||||||
|
bool TrieMap::addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value,
|
||||||
|
const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex,
|
||||||
|
const int level) {
|
||||||
|
const int conflictedKeyNextLabel =
|
||||||
|
getLabel(getBitShuffledKey(conflictedEntry.getKey()), level + 1);
|
||||||
|
const int nextLabel = getLabel(hashedKey, level + 1);
|
||||||
|
if (conflictedKeyNextLabel == nextLabel) {
|
||||||
|
// Conflicted again in the next level.
|
||||||
|
const int newTableIndex = allocateTable(1 /* entryCount */);
|
||||||
|
if (newTableIndex == INVALID_INDEX) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!writeEntry(conflictedEntry, newTableIndex)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const Entry newBitmapEntry(setExist(0 /* bitmap */, nextLabel), newTableIndex);
|
||||||
|
if (!writeEntry(newBitmapEntry, conflictedEntryIndex)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return putInternal(key, value, hashedKey, conflictedEntryIndex, newBitmapEntry, level + 1);
|
||||||
|
}
|
||||||
|
// The conflict has been resolved. Create a table that contains 2 entries.
|
||||||
|
const int newTableIndex = allocateTable(2 /* entryCount */);
|
||||||
|
if (newTableIndex == INVALID_INDEX) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (nextLabel < conflictedKeyNextLabel) {
|
||||||
|
if (!writeTerminalEntry(key, value, newTableIndex)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!writeEntry(conflictedEntry, newTableIndex + 1)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else { // nextLabel > conflictedKeyNextLabel
|
||||||
|
if (!writeEntry(conflictedEntry, newTableIndex)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!writeTerminalEntry(key, value, newTableIndex + 1)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const uint32_t updatedBitmap =
|
||||||
|
setExist(setExist(0 /* bitmap */, nextLabel), conflictedKeyNextLabel);
|
||||||
|
return writeEntry(Entry(updatedBitmap, newTableIndex), conflictedEntryIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add new entry to the existing table.
|
||||||
|
*/
|
||||||
|
bool TrieMap::addNewEntryByExpandingTable(const uint32_t key, const uint64_t value,
|
||||||
|
const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex, const int label) {
|
||||||
|
// Current map doesn't contain the label.
|
||||||
|
const int entryCount = popCount(bitmap);
|
||||||
|
const int newTableIndex = allocateTable(entryCount + 1);
|
||||||
|
if (newTableIndex == INVALID_INDEX) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const int newEntryIndexInTable = popCount(bitmap, label);
|
||||||
|
// Copy from existing table to the new table.
|
||||||
|
for (int i = 0; i < entryCount; ++i) {
|
||||||
|
if (!copyEntry(tableIndex + i, newTableIndex + i + (i >= newEntryIndexInTable ? 1 : 0))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Write new terminal entry.
|
||||||
|
if (!writeTerminalEntry(key, value, newTableIndex + newEntryIndexInTable)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Update bitmap.
|
||||||
|
if (!writeEntry(Entry(setExist(bitmap, label), newTableIndex), bitmapEntryIndex)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (entryCount > 0) {
|
||||||
|
return freeTable(tableIndex, entryCount);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace latinime
|
253
native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
Normal file
253
native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h
Normal file
|
@ -0,0 +1,253 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_TRIE_MAP_H
|
||||||
|
#define LATINIME_TRIE_MAP_H
|
||||||
|
|
||||||
|
#include <climits>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Trie map derived from Phil Bagwell's Hash Array Mapped Trie.
|
||||||
|
* key is int and value is uint64_t.
|
||||||
|
* This supports multiple level map. Terminal entries can have a bitmap for the next level map.
|
||||||
|
* This doesn't support root map resizing.
|
||||||
|
*/
|
||||||
|
class TrieMap {
|
||||||
|
public:
|
||||||
|
struct Result {
|
||||||
|
const uint64_t mValue;
|
||||||
|
const bool mIsValid;
|
||||||
|
const int mNextLevelBitmapEntryIndex;
|
||||||
|
|
||||||
|
Result(const uint64_t value, const bool isValid, const int nextLevelBitmapEntryIndex)
|
||||||
|
: mValue(value), mIsValid(isValid),
|
||||||
|
mNextLevelBitmapEntryIndex(nextLevelBitmapEntryIndex) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
static const int INVALID_INDEX;
|
||||||
|
static const uint64_t MAX_VALUE;
|
||||||
|
|
||||||
|
TrieMap();
|
||||||
|
void dump(const int from = 0, const int to = 0) const;
|
||||||
|
|
||||||
|
bool isNearSizeLimit() const {
|
||||||
|
return mBuffer.isNearSizeLimit();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns bitmapEntryIndex. Create the next level map if it doesn't exist.
|
||||||
|
int getNextLevelBitmapEntryIndex(const int key) {
|
||||||
|
return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX);
|
||||||
|
}
|
||||||
|
|
||||||
|
int getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex);
|
||||||
|
|
||||||
|
const Result getRoot(const int key) const {
|
||||||
|
return get(key, ROOT_BITMAP_ENTRY_INDEX);
|
||||||
|
}
|
||||||
|
|
||||||
|
const Result get(const int key, const int bitmapEntryIndex) const;
|
||||||
|
|
||||||
|
bool putRoot(const int key, const uint64_t value) {
|
||||||
|
return put(key, value, ROOT_BITMAP_ENTRY_INDEX);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool put(const int key, const uint64_t value, const int bitmapEntryIndex);
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(TrieMap);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Struct represents an entry.
|
||||||
|
*
|
||||||
|
* Entry is one of these entry types. All entries are fixed size and have 2 fields FIELD_0 and
|
||||||
|
* FIELD_1.
|
||||||
|
* 1. bitmap entry. bitmap entry contains bitmap and the link to hash table.
|
||||||
|
* FIELD_0(bitmap) FIELD_1(LINK_TO_HASH_TABLE)
|
||||||
|
* 2. terminal entry. terminal entry contains hashed key and value or terminal link. terminal
|
||||||
|
* entry have terminal link when the value is not fit to FIELD_1 or there is a next level map
|
||||||
|
* for the key.
|
||||||
|
* FIELD_0(hashed key) (FIELD_1(VALUE_FLAG VALUE) | FIELD_1(TERMINAL_LINK_FLAG TERMINAL_LINK))
|
||||||
|
* 3. value entry. value entry represents a value. Upper order bytes are stored in FIELD_0 and
|
||||||
|
* lower order bytes are stored in FIELD_1.
|
||||||
|
* FIELD_0(value (upper order bytes)) FIELD_1(value (lower order bytes))
|
||||||
|
*/
|
||||||
|
struct Entry {
|
||||||
|
const uint32_t mData0;
|
||||||
|
const uint32_t mData1;
|
||||||
|
|
||||||
|
Entry(const uint32_t data0, const uint32_t data1) : mData0(data0), mData1(data1) {}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool isBitmapEntry() const {
|
||||||
|
return (mData1 & VALUE_FLAG) == 0 && (mData1 & TERMINAL_LINK_FLAG) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool hasTerminalLink() const {
|
||||||
|
return (mData1 & TERMINAL_LINK_FLAG) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For terminal entry.
|
||||||
|
AK_FORCE_INLINE uint32_t getKey() const {
|
||||||
|
return mData0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For terminal entry.
|
||||||
|
AK_FORCE_INLINE uint32_t getValue() const {
|
||||||
|
return mData1 & VALUE_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For terminal entry.
|
||||||
|
AK_FORCE_INLINE uint32_t getValueEntryIndex() const {
|
||||||
|
return mData1 & TERMINAL_LINK_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For bitmap entry.
|
||||||
|
AK_FORCE_INLINE uint32_t getBitmap() const {
|
||||||
|
return mData0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For bitmap entry.
|
||||||
|
AK_FORCE_INLINE int getTableIndex() const {
|
||||||
|
return static_cast<int>(mData1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// For value entry.
|
||||||
|
AK_FORCE_INLINE uint64_t getValueOfValueEntry() const {
|
||||||
|
return ((static_cast<uint64_t>(mData0) << (FIELD1_SIZE * CHAR_BIT)) ^ mData1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
BufferWithExtendableBuffer mBuffer;
|
||||||
|
|
||||||
|
static const int FIELD0_SIZE;
|
||||||
|
static const int FIELD1_SIZE;
|
||||||
|
static const int ENTRY_SIZE;
|
||||||
|
static const uint32_t VALUE_FLAG;
|
||||||
|
static const uint32_t VALUE_MASK;
|
||||||
|
static const uint32_t TERMINAL_LINK_FLAG;
|
||||||
|
static const uint32_t TERMINAL_LINK_MASK;
|
||||||
|
static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL;
|
||||||
|
static const uint32_t LABEL_MASK;
|
||||||
|
static const int MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL;
|
||||||
|
static const int ROOT_BITMAP_ENTRY_INDEX;
|
||||||
|
static const int ROOT_BITMAP_ENTRY_POS;
|
||||||
|
static const Entry EMPTY_BITMAP_ENTRY;
|
||||||
|
static const int MAX_BUFFER_SIZE;
|
||||||
|
|
||||||
|
uint32_t getBitShuffledKey(const uint32_t key) const;
|
||||||
|
bool writeValue(const uint64_t value, const int terminalEntryIndex);
|
||||||
|
bool updateValue(const Entry &terminalEntry, const uint64_t value,
|
||||||
|
const int terminalEntryIndex);
|
||||||
|
bool freeTable(const int tableIndex, const int entryCount);
|
||||||
|
int allocateTable(const int entryCount);
|
||||||
|
int getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey,
|
||||||
|
const Entry &bitmapEntry, const int level) const;
|
||||||
|
const Result getInternal(const uint32_t key, const uint32_t hashedKey,
|
||||||
|
const int bitmapEntryIndex, const int level) const;
|
||||||
|
bool putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey,
|
||||||
|
const int bitmapEntryIndex, const Entry &bitmapEntry, const int level);
|
||||||
|
bool addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value,
|
||||||
|
const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex,
|
||||||
|
const int level);
|
||||||
|
bool addNewEntryByExpandingTable(const uint32_t key, const uint64_t value,
|
||||||
|
const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex,
|
||||||
|
const int label);
|
||||||
|
|
||||||
|
AK_FORCE_INLINE const Entry readEntry(const int entryIndex) const {
|
||||||
|
return Entry(readField0(entryIndex), readField1(entryIndex));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns whether an entry for the index is existing by testing if the index-th bit in the
|
||||||
|
// bitmap is set or not.
|
||||||
|
AK_FORCE_INLINE bool exists(const uint32_t bitmap, const int index) const {
|
||||||
|
return (bitmap & (1 << index)) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set index-th bit in the bitmap.
|
||||||
|
AK_FORCE_INLINE uint32_t setExist(const uint32_t bitmap, const int index) const {
|
||||||
|
return bitmap | (1 << index);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count set bits before index in the bitmap.
|
||||||
|
AK_FORCE_INLINE int popCount(const uint32_t bitmap, const int index) const {
|
||||||
|
return popCount(bitmap & ((1 << index) - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count set bits in the bitmap.
|
||||||
|
AK_FORCE_INLINE int popCount(const uint32_t bitmap) const {
|
||||||
|
return __builtin_popcount(bitmap);
|
||||||
|
// int v = bitmap - ((bitmap >> 1) & 0x55555555);
|
||||||
|
// v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
|
||||||
|
// return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE int getLabel(const uint32_t hashedKey, const int level) const {
|
||||||
|
return (hashedKey >> (level * NUM_OF_BITS_USED_FOR_ONE_LEVEL)) & LABEL_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE uint32_t readField0(const int entryIndex) const {
|
||||||
|
return mBuffer.readUint(FIELD0_SIZE, ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE uint32_t readField1(const int entryIndex) const {
|
||||||
|
return mBuffer.readUint(FIELD1_SIZE,
|
||||||
|
ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE int readEmptyTableLink(const int entryCount) const {
|
||||||
|
return mBuffer.readUint(FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool writeEmptyTableLink(const int tableIndex, const int entryCount) {
|
||||||
|
return mBuffer.writeUint(tableIndex, FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool writeField0(const uint32_t data, const int entryIndex) {
|
||||||
|
return mBuffer.writeUint(data, FIELD0_SIZE,
|
||||||
|
ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool writeField1(const uint32_t data, const int entryIndex) {
|
||||||
|
return mBuffer.writeUint(data, FIELD1_SIZE,
|
||||||
|
ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool writeEntry(const Entry &entry, const int entryIndex) {
|
||||||
|
return writeField0(entry.mData0, entryIndex) && writeField1(entry.mData1, entryIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool writeTerminalEntry(const uint32_t key, const uint64_t value,
|
||||||
|
const int entryIndex) {
|
||||||
|
return writeField0(key, entryIndex) && writeValue(value, entryIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool copyEntry(const int originalEntryIndex, const int newEntryIndex) {
|
||||||
|
return writeEntry(readEntry(originalEntryIndex), newEntryIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE int getTailEntryIndex() const {
|
||||||
|
return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_TRIE_MAP_H */
|
|
@ -0,0 +1,169 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/trie_map.h"
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <functional>
|
||||||
|
#include <map>
|
||||||
|
#include <random>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
TEST(TrieMapTest, TestSetAndGet) {
|
||||||
|
TrieMap trieMap;
|
||||||
|
trieMap.putRoot(10, 10);
|
||||||
|
EXPECT_EQ(10ull, trieMap.getRoot(10).mValue);
|
||||||
|
trieMap.putRoot(0x10A, 10);
|
||||||
|
EXPECT_EQ(10ull, trieMap.getRoot(10).mValue);
|
||||||
|
EXPECT_EQ(10ull, trieMap.getRoot(0x10A).mValue);
|
||||||
|
trieMap.putRoot(10, 1000);
|
||||||
|
EXPECT_EQ(1000ull, trieMap.getRoot(10).mValue);
|
||||||
|
trieMap.putRoot(11, 1000);
|
||||||
|
EXPECT_EQ(1000ull, trieMap.getRoot(11).mValue);
|
||||||
|
const int next = trieMap.getNextLevelBitmapEntryIndex(10);
|
||||||
|
trieMap.put(9, 9, next);
|
||||||
|
EXPECT_EQ(9ull, trieMap.get(9, next).mValue);
|
||||||
|
EXPECT_FALSE(trieMap.get(11, next).mIsValid);
|
||||||
|
trieMap.putRoot(0, 0xFFFFFFFFFull);
|
||||||
|
EXPECT_EQ(0xFFFFFFFFFull, trieMap.getRoot(0).mValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TrieMapTest, TestSetAndGetLarge) {
|
||||||
|
static const int ELEMENT_COUNT = 200000;
|
||||||
|
TrieMap trieMap;
|
||||||
|
for (int i = 0; i < ELEMENT_COUNT; ++i) {
|
||||||
|
EXPECT_TRUE(trieMap.putRoot(i, i));
|
||||||
|
}
|
||||||
|
for (int i = 0; i < ELEMENT_COUNT; ++i) {
|
||||||
|
EXPECT_EQ(trieMap.getRoot(i).mValue, static_cast<uint64_t>(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TrieMapTest, TestRandSetAndGetLarge) {
|
||||||
|
static const int ELEMENT_COUNT = 100000;
|
||||||
|
TrieMap trieMap;
|
||||||
|
std::unordered_map<int, uint64_t> testKeyValuePairs;
|
||||||
|
|
||||||
|
// Use the uniform integer distribution [S_INT_MIN, S_INT_MAX].
|
||||||
|
std::uniform_int_distribution<int> keyDistribution(S_INT_MIN, S_INT_MAX);
|
||||||
|
auto keyRandomNumberGenerator = std::bind(keyDistribution, std::mt19937());
|
||||||
|
|
||||||
|
// Use the uniform distribution [0, TrieMap::MAX_VALUE].
|
||||||
|
std::uniform_int_distribution<uint64_t> valueDistribution(0, TrieMap::MAX_VALUE);
|
||||||
|
auto valueRandomNumberGenerator = std::bind(valueDistribution, std::mt19937());
|
||||||
|
|
||||||
|
for (int i = 0; i < ELEMENT_COUNT; ++i) {
|
||||||
|
const int key = keyRandomNumberGenerator();
|
||||||
|
const uint64_t value = valueRandomNumberGenerator();
|
||||||
|
EXPECT_TRUE(trieMap.putRoot(key, value)) << key << " " << value;
|
||||||
|
testKeyValuePairs[key] = value;
|
||||||
|
}
|
||||||
|
for (const auto &v : testKeyValuePairs) {
|
||||||
|
EXPECT_EQ(trieMap.getRoot(v.first).mValue, v.second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TrieMapTest, TestMultiLevel) {
|
||||||
|
static const int FIRST_LEVEL_ENTRY_COUNT = 10000;
|
||||||
|
static const int SECOND_LEVEL_ENTRY_COUNT = 20000;
|
||||||
|
static const int THIRD_LEVEL_ENTRY_COUNT = 40000;
|
||||||
|
|
||||||
|
TrieMap trieMap;
|
||||||
|
std::vector<int> firstLevelKeys;
|
||||||
|
std::map<int, uint64_t> firstLevelEntries;
|
||||||
|
std::vector<std::pair<int, int>> secondLevelKeys;
|
||||||
|
std::map<int, std::map<int, uint64_t>> twoLevelMap;
|
||||||
|
std::map<int, std::map<int, std::map<int, uint64_t>>> threeLevelMap;
|
||||||
|
|
||||||
|
// Use the uniform integer distribution [0, S_INT_MAX].
|
||||||
|
std::uniform_int_distribution<int> distribution(0, S_INT_MAX);
|
||||||
|
auto keyRandomNumberGenerator = std::bind(distribution, std::mt19937());
|
||||||
|
auto randomNumberGeneratorForKeySelection = std::bind(distribution, std::mt19937());
|
||||||
|
|
||||||
|
// Use the uniform distribution [0, TrieMap::MAX_VALUE].
|
||||||
|
std::uniform_int_distribution<uint64_t> valueDistribution(0, TrieMap::MAX_VALUE);
|
||||||
|
auto valueRandomNumberGenerator = std::bind(valueDistribution, std::mt19937());
|
||||||
|
|
||||||
|
for (int i = 0; i < FIRST_LEVEL_ENTRY_COUNT; ++i) {
|
||||||
|
const int key = keyRandomNumberGenerator();
|
||||||
|
const uint64_t value = valueRandomNumberGenerator();
|
||||||
|
EXPECT_TRUE(trieMap.putRoot(key, value));
|
||||||
|
firstLevelKeys.push_back(key);
|
||||||
|
firstLevelEntries[key] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < SECOND_LEVEL_ENTRY_COUNT; ++i) {
|
||||||
|
const int key = keyRandomNumberGenerator();
|
||||||
|
const uint64_t value = valueRandomNumberGenerator();
|
||||||
|
const int firstLevelKey =
|
||||||
|
firstLevelKeys[randomNumberGeneratorForKeySelection() % FIRST_LEVEL_ENTRY_COUNT];
|
||||||
|
const int nextLevelBitmapEntryIndex = trieMap.getNextLevelBitmapEntryIndex(firstLevelKey);
|
||||||
|
EXPECT_NE(TrieMap::INVALID_INDEX, nextLevelBitmapEntryIndex);
|
||||||
|
EXPECT_TRUE(trieMap.put(key, value, nextLevelBitmapEntryIndex));
|
||||||
|
secondLevelKeys.push_back(std::make_pair(firstLevelKey, key));
|
||||||
|
twoLevelMap[firstLevelKey][key] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < THIRD_LEVEL_ENTRY_COUNT; ++i) {
|
||||||
|
const int key = keyRandomNumberGenerator();
|
||||||
|
const uint64_t value = valueRandomNumberGenerator();
|
||||||
|
const std::pair<int, int> secondLevelKey =
|
||||||
|
secondLevelKeys[randomNumberGeneratorForKeySelection() % SECOND_LEVEL_ENTRY_COUNT];
|
||||||
|
const int secondLevel = trieMap.getNextLevelBitmapEntryIndex(secondLevelKey.first);
|
||||||
|
EXPECT_NE(TrieMap::INVALID_INDEX, secondLevel);
|
||||||
|
const int thirdLevel = trieMap.getNextLevelBitmapEntryIndex(
|
||||||
|
secondLevelKey.second, secondLevel);
|
||||||
|
EXPECT_NE(TrieMap::INVALID_INDEX, thirdLevel);
|
||||||
|
EXPECT_TRUE(trieMap.put(key, value, thirdLevel));
|
||||||
|
threeLevelMap[secondLevelKey.first][secondLevelKey.second][key] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto &firstLevelEntry : firstLevelEntries) {
|
||||||
|
EXPECT_EQ(firstLevelEntry.second, trieMap.getRoot(firstLevelEntry.first).mValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto &firstLevelEntry : twoLevelMap) {
|
||||||
|
const int secondLevel = trieMap.getNextLevelBitmapEntryIndex(firstLevelEntry.first);
|
||||||
|
EXPECT_NE(TrieMap::INVALID_INDEX, secondLevel);
|
||||||
|
for (const auto &secondLevelEntry : firstLevelEntry.second) {
|
||||||
|
EXPECT_EQ(secondLevelEntry.second,
|
||||||
|
trieMap.get(secondLevelEntry.first, secondLevel).mValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto &firstLevelEntry : threeLevelMap) {
|
||||||
|
const int secondLevel = trieMap.getNextLevelBitmapEntryIndex(firstLevelEntry.first);
|
||||||
|
EXPECT_NE(TrieMap::INVALID_INDEX, secondLevel);
|
||||||
|
for (const auto &secondLevelEntry : firstLevelEntry.second) {
|
||||||
|
const int thirdLevel =
|
||||||
|
trieMap.getNextLevelBitmapEntryIndex(secondLevelEntry.first, secondLevel);
|
||||||
|
EXPECT_NE(TrieMap::INVALID_INDEX, thirdLevel);
|
||||||
|
for (const auto &thirdLevelEntry : secondLevelEntry.second) {
|
||||||
|
EXPECT_EQ(thirdLevelEntry.second,
|
||||||
|
trieMap.get(thirdLevelEntry.first, thirdLevel).mValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace latinime
|
Loading…
Reference in a new issue