Merge "Implement SparseTable in native code."
This commit is contained in:
commit
2d3374bb58
10 changed files with 134 additions and 20 deletions
|
@ -96,7 +96,8 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
dict_file_writing_utils.cpp \
|
dict_file_writing_utils.cpp \
|
||||||
forgetting_curve_utils.cpp \
|
forgetting_curve_utils.cpp \
|
||||||
format_utils.cpp \
|
format_utils.cpp \
|
||||||
mmapped_buffer.cpp) \
|
mmapped_buffer.cpp \
|
||||||
|
sparse_table.cpp) \
|
||||||
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
|
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
|
||||||
$(addprefix suggest/policyimpl/typing/, \
|
$(addprefix suggest/policyimpl/typing/, \
|
||||||
scoring_params.cpp \
|
scoring_params.cpp \
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -29,7 +30,8 @@ class SparseTableDictContent : public DictContent {
|
||||||
public:
|
public:
|
||||||
AK_FORCE_INLINE SparseTableDictContent(const char *const dictDirPath,
|
AK_FORCE_INLINE SparseTableDictContent(const char *const dictDirPath,
|
||||||
const char *const lookupTableFileName, const char *const addressTableFileName,
|
const char *const lookupTableFileName, const char *const addressTableFileName,
|
||||||
const char *const contentFileName, const bool isUpdatable)
|
const char *const contentFileName, const bool isUpdatable,
|
||||||
|
const int sparseTableBlockSize, const int sparseTableDataSize)
|
||||||
: mLookupTableBuffer(
|
: mLookupTableBuffer(
|
||||||
MmappedBuffer::openBuffer(dictDirPath, lookupTableFileName, isUpdatable)),
|
MmappedBuffer::openBuffer(dictDirPath, lookupTableFileName, isUpdatable)),
|
||||||
mAddressTableBuffer(
|
mAddressTableBuffer(
|
||||||
|
@ -45,7 +47,9 @@ class SparseTableDictContent : public DictContent {
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
mExpandableContentBuffer(mContentBuffer.get() ? mContentBuffer.get()->getBuffer() : 0,
|
mExpandableContentBuffer(mContentBuffer.get() ? mContentBuffer.get()->getBuffer() : 0,
|
||||||
mContentBuffer.get() ? mContentBuffer.get()->getBufferSize() : 0,
|
mContentBuffer.get() ? mContentBuffer.get()->getBufferSize() : 0,
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
|
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
|
||||||
|
sparseTableBlockSize, sparseTableDataSize) {}
|
||||||
|
|
||||||
virtual ~SparseTableDictContent() {}
|
virtual ~SparseTableDictContent() {}
|
||||||
|
|
||||||
|
@ -64,6 +68,7 @@ class SparseTableDictContent : public DictContent {
|
||||||
BufferWithExtendableBuffer mExpandableLookupTableBuffer;
|
BufferWithExtendableBuffer mExpandableLookupTableBuffer;
|
||||||
BufferWithExtendableBuffer mExpandableAddressTableBuffer;
|
BufferWithExtendableBuffer mExpandableAddressTableBuffer;
|
||||||
BufferWithExtendableBuffer mExpandableContentBuffer;
|
BufferWithExtendableBuffer mExpandableContentBuffer;
|
||||||
|
SparseTable mAddressLookupTable;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */
|
#endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */
|
||||||
|
|
|
@ -67,11 +67,15 @@ class Ver4DictBuffers {
|
||||||
mBigramDictContent(dictDirPath,
|
mBigramDictContent(dictDirPath,
|
||||||
Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
|
Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
|
||||||
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
|
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
|
||||||
Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable),
|
Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable,
|
||||||
|
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
||||||
|
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
|
||||||
mShortcutDictContent(dictDirPath,
|
mShortcutDictContent(dictDirPath,
|
||||||
Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
|
Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
|
||||||
Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
|
Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
|
||||||
Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable) {}
|
Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable,
|
||||||
|
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
|
||||||
|
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
|
||||||
|
|
||||||
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
|
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
|
||||||
SingleDictContent mTerminalAddressTable;
|
SingleDictContent mTerminalAddressTable;
|
||||||
|
|
|
@ -34,4 +34,9 @@ const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
|
||||||
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
|
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
|
||||||
const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
|
const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
|
||||||
|
|
||||||
|
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4;
|
||||||
|
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
|
||||||
|
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16;
|
||||||
|
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -38,6 +38,10 @@ class Ver4DictConstants {
|
||||||
static const int PROBABILITY_SIZE;
|
static const int PROBABILITY_SIZE;
|
||||||
static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
|
static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
|
||||||
|
|
||||||
|
static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;
|
||||||
|
static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;
|
||||||
|
static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
|
||||||
|
static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
|
||||||
};
|
};
|
||||||
|
|
|
@ -23,17 +23,16 @@ const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 9
|
||||||
// TODO: Needs to allocate larger memory corresponding to the current vector size.
|
// TODO: Needs to allocate larger memory corresponding to the current vector size.
|
||||||
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
|
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
|
||||||
|
|
||||||
|
uint32_t BufferWithExtendableBuffer::readUint(const int size, const int pos) const {
|
||||||
|
const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(pos);
|
||||||
|
const int posInBuffer = readingPosIsInAdditionalBuffer ? pos - mOriginalBufferSize : pos;
|
||||||
|
return ByteArrayUtils::readUint(getBuffer(readingPosIsInAdditionalBuffer), size, posInBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t BufferWithExtendableBuffer::readUintAndAdvancePosition(const int size,
|
uint32_t BufferWithExtendableBuffer::readUintAndAdvancePosition(const int size,
|
||||||
int *const pos) const {
|
int *const pos) const {
|
||||||
const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(*pos);
|
const int value = readUint(size, *pos);
|
||||||
if (readingPosIsInAdditionalBuffer) {
|
*pos += size;
|
||||||
*pos -= mOriginalBufferSize;
|
|
||||||
}
|
|
||||||
const int value = ByteArrayUtils::readUintAndAdvancePosition(
|
|
||||||
getBuffer(readingPosIsInAdditionalBuffer), size, pos);
|
|
||||||
if (readingPosIsInAdditionalBuffer) {
|
|
||||||
*pos += mOriginalBufferSize;
|
|
||||||
}
|
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -71,6 +71,8 @@ class BufferWithExtendableBuffer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t readUint(const int size, const int pos) const;
|
||||||
|
|
||||||
uint32_t readUintAndAdvancePosition(const int size, int *const pos) const;
|
uint32_t readUintAndAdvancePosition(const int size, int *const pos) const;
|
||||||
|
|
||||||
AK_FORCE_INLINE int getOriginalBufferSize() const {
|
AK_FORCE_INLINE int getOriginalBufferSize() const {
|
||||||
|
|
|
@ -114,19 +114,19 @@ class ByteArrayUtils {
|
||||||
return buffer[(*pos)++];
|
return buffer[(*pos)++];
|
||||||
}
|
}
|
||||||
|
|
||||||
static AK_FORCE_INLINE int readUintAndAdvancePosition(const uint8_t *const buffer,
|
static AK_FORCE_INLINE int readUint(const uint8_t *const buffer,
|
||||||
const int size, int *const pos) {
|
const int size, const int pos) {
|
||||||
// size must be in 1 to 4.
|
// size must be in 1 to 4.
|
||||||
ASSERT(size >= 1 && size <= 4);
|
ASSERT(size >= 1 && size <= 4);
|
||||||
switch (size) {
|
switch (size) {
|
||||||
case 1:
|
case 1:
|
||||||
return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
|
return ByteArrayUtils::readUint8(buffer, pos);
|
||||||
case 2:
|
case 2:
|
||||||
return ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos);
|
return ByteArrayUtils::readUint16(buffer, pos);
|
||||||
case 3:
|
case 3:
|
||||||
return ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos);
|
return ByteArrayUtils::readUint24(buffer, pos);
|
||||||
case 4:
|
case 4:
|
||||||
return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos);
|
return ByteArrayUtils::readUint32(buffer, pos);
|
||||||
default:
|
default:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
const int SparseTable::NOT_EXIST = -1;
|
||||||
|
|
||||||
|
bool SparseTable::contains(const int id) const {
|
||||||
|
if (id < 0 || mIndexTableBuffer->getTailPosition() <= id * mDataSize) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const int indexTableIndex = id / mBlockSize;
|
||||||
|
const int readingPos = indexTableIndex * mDataSize;
|
||||||
|
const int index = mIndexTableBuffer->readUint(mDataSize, readingPos);
|
||||||
|
return index != NOT_EXIST;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t SparseTable::get(const int id) const {
|
||||||
|
const int indexTableIndex = id / mBlockSize;
|
||||||
|
int readingPos = indexTableIndex * mDataSize;
|
||||||
|
const int index = mIndexTableBuffer->readUint(mDataSize, readingPos);
|
||||||
|
const int offset = id % mBlockSize;
|
||||||
|
readingPos = (index * mDataSize + offset) * mBlockSize;
|
||||||
|
return mContentTableBuffer->readUint(mDataSize, readingPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace latinime
|
|
@ -0,0 +1,52 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_SPARSE_TABLE_H
|
||||||
|
#define LATINIME_SPARSE_TABLE_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
// Note that there is a corresponding implementation in SparseTable.java.
|
||||||
|
// TODO: Support multiple content buffers.
|
||||||
|
class SparseTable {
|
||||||
|
public:
|
||||||
|
SparseTable(BufferWithExtendableBuffer *const indexTableBuffer,
|
||||||
|
BufferWithExtendableBuffer *const contentTableBuffer, const int blockSize,
|
||||||
|
const int dataSize)
|
||||||
|
: mIndexTableBuffer(indexTableBuffer), mContentTableBuffer(contentTableBuffer),
|
||||||
|
mBlockSize(blockSize), mDataSize(dataSize) {}
|
||||||
|
|
||||||
|
bool contains(const int id) const;
|
||||||
|
|
||||||
|
uint32_t get(const int id) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTable);
|
||||||
|
|
||||||
|
static const int NOT_EXIST;
|
||||||
|
|
||||||
|
BufferWithExtendableBuffer *const mIndexTableBuffer;
|
||||||
|
BufferWithExtendableBuffer *const mContentTableBuffer;
|
||||||
|
const int mBlockSize;
|
||||||
|
const int mDataSize;
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_SPARSE_TABLE_H */
|
Loading…
Reference in a new issue