Merge "Implement ver4 shortcut reading method."
This commit is contained in:
commit
acf515ee6a
10 changed files with 173 additions and 15 deletions
|
@ -0,0 +1,71 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_VER4_SHORTCUT_LIST_POLICY_H
|
||||||
|
#define LATINIME_VER4_SHORTCUT_LIST_POLICY_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
|
||||||
|
public:
|
||||||
|
Ver4ShortcutListPolicy(const ShortcutDictContent *const shortcutDictContent,
|
||||||
|
const TerminalPositionLookupTable *const terminalPositionLookupTable)
|
||||||
|
: mShortcutDictContent(shortcutDictContent),
|
||||||
|
mTerminalPositionLookupTable(terminalPositionLookupTable) {}
|
||||||
|
|
||||||
|
~Ver4ShortcutListPolicy() {}
|
||||||
|
|
||||||
|
int getStartPos(const int pos) const {
|
||||||
|
// The first shortcut entry is located at the head position of the shortcut list.
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
|
||||||
|
int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
|
||||||
|
int *const pos) const {
|
||||||
|
int shortcutFlags = 0;
|
||||||
|
if (outCodePoint && outCodePointCount) {
|
||||||
|
mShortcutDictContent->getShortcutEntryAndAdvancePosition(maxCodePointCount,
|
||||||
|
outCodePoint, outCodePointCount, &shortcutFlags, pos);
|
||||||
|
}
|
||||||
|
if (outHasNext) {
|
||||||
|
*outHasNext = ShortcutListReadingUtils::hasNext(shortcutFlags);
|
||||||
|
}
|
||||||
|
if (outIsWhitelist) {
|
||||||
|
*outIsWhitelist = ShortcutListReadingUtils::isWhitelist(shortcutFlags);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void skipAllShortcuts(int *const pos) const {
|
||||||
|
// Do nothing because we don't need to skip shortcut lists in ver4 dictionaries.
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4ShortcutListPolicy);
|
||||||
|
|
||||||
|
const ShortcutDictContent *const mShortcutDictContent;
|
||||||
|
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_VER4_SHORTCUT_LIST_POLICY_H
|
|
@ -38,7 +38,7 @@ class BigramDictContent : public SparseTableDictContent {
|
||||||
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
|
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
|
||||||
if (outBigramFlags) {
|
if (outBigramFlags) {
|
||||||
*outBigramFlags = bigramListBuffer->readUintAndAdvancePosition(
|
*outBigramFlags = bigramListBuffer->readUintAndAdvancePosition(
|
||||||
Ver4DictConstants::BIGRAM_FRAGS_FIELD_SIZE, bigramEntryPos);
|
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
|
||||||
}
|
}
|
||||||
if (outTargetTerminalId) {
|
if (outTargetTerminalId) {
|
||||||
*outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
|
*outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
|
||||||
|
|
|
@ -0,0 +1,63 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_SHORTCUT_DICT_CONTENT_H
|
||||||
|
#define LATINIME_SHORTCUT_DICT_CONTENT_H
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class ShortcutDictContent : public SparseTableDictContent {
|
||||||
|
public:
|
||||||
|
ShortcutDictContent(const char *const dictDirPath, const bool isUpdatable)
|
||||||
|
: SparseTableDictContent(dictDirPath,
|
||||||
|
Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
|
||||||
|
Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
|
||||||
|
Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable,
|
||||||
|
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
|
||||||
|
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
|
||||||
|
|
||||||
|
void getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
|
||||||
|
int *const outCodePoint, int *const outCodePointCount, int *const outShortcutFlags,
|
||||||
|
int *const shortcutEntryPos) const {
|
||||||
|
const BufferWithExtendableBuffer *const shortcutListBuffer = getContentBuffer();
|
||||||
|
if (outShortcutFlags) {
|
||||||
|
*outShortcutFlags = shortcutListBuffer->readUintAndAdvancePosition(
|
||||||
|
Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
|
||||||
|
}
|
||||||
|
if (outCodePoint && outCodePointCount) {
|
||||||
|
shortcutListBuffer->readCodePointsAndAdvancePosition(
|
||||||
|
maxCodePointCount, outCodePoint, outCodePointCount, shortcutEntryPos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns head position of shortcut list for a PtNode specified by terminalId.
|
||||||
|
int getShortcutListHeadPos(const int terminalId) const {
|
||||||
|
const SparseTable *const addressLookupTable = getAddressLookupTable();
|
||||||
|
if (!addressLookupTable->contains(terminalId)) {
|
||||||
|
return NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
|
return addressLookupTable->get(terminalId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutDictContent);
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_SHORTCUT_DICT_CONTENT_H */
|
|
@ -21,8 +21,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
|
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
@ -66,6 +65,10 @@ class Ver4DictBuffers {
|
||||||
return &mBigramDictContent;
|
return &mBigramDictContent;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE const ShortcutDictContent *getShortcutDictContent() const {
|
||||||
|
return &mShortcutDictContent;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers);
|
||||||
|
|
||||||
|
@ -77,18 +80,13 @@ class Ver4DictBuffers {
|
||||||
HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())),
|
HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())),
|
||||||
mProbabilityDictContent(dictDirPath, isUpdatable),
|
mProbabilityDictContent(dictDirPath, isUpdatable),
|
||||||
mBigramDictContent(dictDirPath, isUpdatable),
|
mBigramDictContent(dictDirPath, isUpdatable),
|
||||||
mShortcutDictContent(dictDirPath,
|
mShortcutDictContent(dictDirPath, isUpdatable) {}
|
||||||
Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
|
|
||||||
Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
|
|
||||||
Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable,
|
|
||||||
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
|
|
||||||
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
|
|
||||||
|
|
||||||
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
|
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
|
||||||
TerminalPositionLookupTable mTerminalPositionLookupTable;
|
TerminalPositionLookupTable mTerminalPositionLookupTable;
|
||||||
ProbabilityDictContent mProbabilityDictContent;
|
ProbabilityDictContent mProbabilityDictContent;
|
||||||
BigramDictContent mBigramDictContent;
|
BigramDictContent mBigramDictContent;
|
||||||
SparseTableDictContent mShortcutDictContent;
|
ShortcutDictContent mShortcutDictContent;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_VER4_DICT_BUFFER_H */
|
#endif /* LATINIME_VER4_DICT_BUFFER_H */
|
||||||
|
|
|
@ -41,6 +41,8 @@ const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16;
|
||||||
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
|
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
|
||||||
|
|
||||||
const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
|
const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
|
||||||
const int Ver4DictConstants::BIGRAM_FRAGS_FIELD_SIZE = 1;
|
const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
|
||||||
|
|
||||||
|
const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -44,8 +44,11 @@ class Ver4DictConstants {
|
||||||
static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
|
static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
|
||||||
static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
|
static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
|
||||||
|
|
||||||
static const int BIGRAM_FRAGS_FIELD_SIZE;
|
static const int BIGRAM_FLAGS_FIELD_SIZE;
|
||||||
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
|
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
|
||||||
|
|
||||||
|
static const int SHORTCUT_FLAGS_FIELD_SIZE;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
|
||||||
};
|
};
|
||||||
|
|
|
@ -108,7 +108,8 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con
|
||||||
if (ptNodeParams.isDeleted()) {
|
if (ptNodeParams.isDeleted()) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
return ptNodeParams.getTerminalId();
|
return mBuffers.get()->getShortcutDictContent()->getShortcutListHeadPos(
|
||||||
|
ptNodeParams.getTerminalId());
|
||||||
}
|
}
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
|
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
|
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
@ -41,6 +42,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
mBigramPolicy(mBuffers.get()->getBigramDictContent(),
|
mBigramPolicy(mBuffers.get()->getBigramDictContent(),
|
||||||
mBuffers.get()->getTerminalPositionLookupTable()),
|
mBuffers.get()->getTerminalPositionLookupTable()),
|
||||||
|
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
|
||||||
|
mBuffers.get()->getTerminalPositionLookupTable()),
|
||||||
mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()) {};
|
mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()) {};
|
||||||
|
|
||||||
AK_FORCE_INLINE int getRootPosition() const {
|
AK_FORCE_INLINE int getRootPosition() const {
|
||||||
|
@ -74,7 +77,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
}
|
}
|
||||||
|
|
||||||
const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
|
const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
|
||||||
return 0;
|
return &mShortcutPolicy;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool addUnigramWord(const int *const word, const int length, const int probability);
|
bool addUnigramWord(const int *const word, const int length, const int probability);
|
||||||
|
@ -101,6 +104,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
const HeaderPolicy mHeaderPolicy;
|
const HeaderPolicy mHeaderPolicy;
|
||||||
BufferWithExtendableBuffer mDictBuffer;
|
BufferWithExtendableBuffer mDictBuffer;
|
||||||
const Ver4BigramListPolicy mBigramPolicy;
|
const Ver4BigramListPolicy mBigramPolicy;
|
||||||
|
const Ver4ShortcutListPolicy mShortcutPolicy;
|
||||||
Ver4PatriciaTrieNodeReader mNodeReader;
|
Ver4PatriciaTrieNodeReader mNodeReader;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -36,6 +36,19 @@ uint32_t BufferWithExtendableBuffer::readUintAndAdvancePosition(const int size,
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxCodePointCount,
|
||||||
|
int *const outCodePoints, int *outCodePointCount, int *const pos) const {
|
||||||
|
const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(*pos);
|
||||||
|
if (readingPosIsInAdditionalBuffer) {
|
||||||
|
*pos -= mOriginalBufferSize;
|
||||||
|
}
|
||||||
|
*outCodePointCount = ByteArrayUtils::readStringAndAdvancePosition(
|
||||||
|
getBuffer(readingPosIsInAdditionalBuffer), maxCodePointCount, outCodePointCount, pos);
|
||||||
|
if (readingPosIsInAdditionalBuffer) {
|
||||||
|
*pos += mOriginalBufferSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
|
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
|
||||||
int *const pos) {
|
int *const pos) {
|
||||||
if (!(size >= 1 && size <= 4)) {
|
if (!(size >= 1 && size <= 4)) {
|
||||||
|
@ -59,7 +72,7 @@ bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *const codePoints,
|
bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *const codePoints,
|
||||||
const int codePointCount, const bool writesTerminator ,int *const pos) {
|
const int codePointCount, const bool writesTerminator, int *const pos) {
|
||||||
const size_t size = ByteArrayUtils::calculateRequiredByteCountToStoreCodePoints(
|
const size_t size = ByteArrayUtils::calculateRequiredByteCountToStoreCodePoints(
|
||||||
codePoints, codePointCount, writesTerminator);
|
codePoints, codePointCount, writesTerminator);
|
||||||
if (!checkAndPrepareWriting(*pos, size)) {
|
if (!checkAndPrepareWriting(*pos, size)) {
|
||||||
|
|
|
@ -75,6 +75,9 @@ class BufferWithExtendableBuffer {
|
||||||
|
|
||||||
uint32_t readUintAndAdvancePosition(const int size, int *const pos) const;
|
uint32_t readUintAndAdvancePosition(const int size, int *const pos) const;
|
||||||
|
|
||||||
|
void readCodePointsAndAdvancePosition(const int maxCodePointCount,
|
||||||
|
int *const outCodePoints, int *outCodePointCount, int *const pos) const;
|
||||||
|
|
||||||
AK_FORCE_INLINE int getOriginalBufferSize() const {
|
AK_FORCE_INLINE int getOriginalBufferSize() const {
|
||||||
return mOriginalBufferSize;
|
return mOriginalBufferSize;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue