Merge "Implement ver4 shortcut reading method."
This commit is contained in:
commit
acf515ee6a
10 changed files with 173 additions and 15 deletions
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_VER4_SHORTCUT_LIST_POLICY_H
|
||||
#define LATINIME_VER4_SHORTCUT_LIST_POLICY_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
|
||||
public:
|
||||
Ver4ShortcutListPolicy(const ShortcutDictContent *const shortcutDictContent,
|
||||
const TerminalPositionLookupTable *const terminalPositionLookupTable)
|
||||
: mShortcutDictContent(shortcutDictContent),
|
||||
mTerminalPositionLookupTable(terminalPositionLookupTable) {}
|
||||
|
||||
~Ver4ShortcutListPolicy() {}
|
||||
|
||||
int getStartPos(const int pos) const {
|
||||
// The first shortcut entry is located at the head position of the shortcut list.
|
||||
return pos;
|
||||
}
|
||||
|
||||
void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
|
||||
int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
|
||||
int *const pos) const {
|
||||
int shortcutFlags = 0;
|
||||
if (outCodePoint && outCodePointCount) {
|
||||
mShortcutDictContent->getShortcutEntryAndAdvancePosition(maxCodePointCount,
|
||||
outCodePoint, outCodePointCount, &shortcutFlags, pos);
|
||||
}
|
||||
if (outHasNext) {
|
||||
*outHasNext = ShortcutListReadingUtils::hasNext(shortcutFlags);
|
||||
}
|
||||
if (outIsWhitelist) {
|
||||
*outIsWhitelist = ShortcutListReadingUtils::isWhitelist(shortcutFlags);
|
||||
}
|
||||
}
|
||||
|
||||
void skipAllShortcuts(int *const pos) const {
|
||||
// Do nothing because we don't need to skip shortcut lists in ver4 dictionaries.
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4ShortcutListPolicy);
|
||||
|
||||
const ShortcutDictContent *const mShortcutDictContent;
|
||||
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_VER4_SHORTCUT_LIST_POLICY_H
|
|
@ -38,7 +38,7 @@ class BigramDictContent : public SparseTableDictContent {
|
|||
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
|
||||
if (outBigramFlags) {
|
||||
*outBigramFlags = bigramListBuffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::BIGRAM_FRAGS_FIELD_SIZE, bigramEntryPos);
|
||||
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
|
||||
}
|
||||
if (outTargetTerminalId) {
|
||||
*outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_SHORTCUT_DICT_CONTENT_H
|
||||
#define LATINIME_SHORTCUT_DICT_CONTENT_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class ShortcutDictContent : public SparseTableDictContent {
|
||||
public:
|
||||
ShortcutDictContent(const char *const dictDirPath, const bool isUpdatable)
|
||||
: SparseTableDictContent(dictDirPath,
|
||||
Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
|
||||
Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
|
||||
Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable,
|
||||
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
|
||||
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
|
||||
|
||||
void getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
|
||||
int *const outCodePoint, int *const outCodePointCount, int *const outShortcutFlags,
|
||||
int *const shortcutEntryPos) const {
|
||||
const BufferWithExtendableBuffer *const shortcutListBuffer = getContentBuffer();
|
||||
if (outShortcutFlags) {
|
||||
*outShortcutFlags = shortcutListBuffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
|
||||
}
|
||||
if (outCodePoint && outCodePointCount) {
|
||||
shortcutListBuffer->readCodePointsAndAdvancePosition(
|
||||
maxCodePointCount, outCodePoint, outCodePointCount, shortcutEntryPos);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns head position of shortcut list for a PtNode specified by terminalId.
|
||||
int getShortcutListHeadPos(const int terminalId) const {
|
||||
const SparseTable *const addressLookupTable = getAddressLookupTable();
|
||||
if (!addressLookupTable->contains(terminalId)) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
return addressLookupTable->get(terminalId);
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutDictContent);
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_SHORTCUT_DICT_CONTENT_H */
|
|
@ -21,8 +21,7 @@
|
|||
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
|
@ -66,6 +65,10 @@ class Ver4DictBuffers {
|
|||
return &mBigramDictContent;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE const ShortcutDictContent *getShortcutDictContent() const {
|
||||
return &mShortcutDictContent;
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers);
|
||||
|
||||
|
@ -77,18 +80,13 @@ class Ver4DictBuffers {
|
|||
HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())),
|
||||
mProbabilityDictContent(dictDirPath, isUpdatable),
|
||||
mBigramDictContent(dictDirPath, isUpdatable),
|
||||
mShortcutDictContent(dictDirPath,
|
||||
Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
|
||||
Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
|
||||
Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable,
|
||||
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
|
||||
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
|
||||
mShortcutDictContent(dictDirPath, isUpdatable) {}
|
||||
|
||||
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
|
||||
TerminalPositionLookupTable mTerminalPositionLookupTable;
|
||||
ProbabilityDictContent mProbabilityDictContent;
|
||||
BigramDictContent mBigramDictContent;
|
||||
SparseTableDictContent mShortcutDictContent;
|
||||
ShortcutDictContent mShortcutDictContent;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_VER4_DICT_BUFFER_H */
|
||||
|
|
|
@ -41,6 +41,8 @@ const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16;
|
|||
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
|
||||
|
||||
const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
|
||||
const int Ver4DictConstants::BIGRAM_FRAGS_FIELD_SIZE = 1;
|
||||
const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
|
||||
|
||||
const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -44,8 +44,11 @@ class Ver4DictConstants {
|
|||
static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
|
||||
static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
|
||||
|
||||
static const int BIGRAM_FRAGS_FIELD_SIZE;
|
||||
static const int BIGRAM_FLAGS_FIELD_SIZE;
|
||||
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
|
||||
|
||||
static const int SHORTCUT_FLAGS_FIELD_SIZE;
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
|
||||
};
|
||||
|
|
|
@ -108,7 +108,8 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con
|
|||
if (ptNodeParams.isDeleted()) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
return ptNodeParams.getTerminalId();
|
||||
return mBuffers.get()->getShortcutDictContent()->getShortcutListHeadPos(
|
||||
ptNodeParams.getTerminalId());
|
||||
}
|
||||
|
||||
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
|
@ -41,6 +42,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||
mBigramPolicy(mBuffers.get()->getBigramDictContent(),
|
||||
mBuffers.get()->getTerminalPositionLookupTable()),
|
||||
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
|
||||
mBuffers.get()->getTerminalPositionLookupTable()),
|
||||
mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()) {};
|
||||
|
||||
AK_FORCE_INLINE int getRootPosition() const {
|
||||
|
@ -74,7 +77,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
}
|
||||
|
||||
const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
|
||||
return 0;
|
||||
return &mShortcutPolicy;
|
||||
}
|
||||
|
||||
bool addUnigramWord(const int *const word, const int length, const int probability);
|
||||
|
@ -101,6 +104,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
const HeaderPolicy mHeaderPolicy;
|
||||
BufferWithExtendableBuffer mDictBuffer;
|
||||
const Ver4BigramListPolicy mBigramPolicy;
|
||||
const Ver4ShortcutListPolicy mShortcutPolicy;
|
||||
Ver4PatriciaTrieNodeReader mNodeReader;
|
||||
};
|
||||
} // namespace latinime
|
||||
|
|
|
@ -36,6 +36,19 @@ uint32_t BufferWithExtendableBuffer::readUintAndAdvancePosition(const int size,
|
|||
return value;
|
||||
}
|
||||
|
||||
void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxCodePointCount,
|
||||
int *const outCodePoints, int *outCodePointCount, int *const pos) const {
|
||||
const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(*pos);
|
||||
if (readingPosIsInAdditionalBuffer) {
|
||||
*pos -= mOriginalBufferSize;
|
||||
}
|
||||
*outCodePointCount = ByteArrayUtils::readStringAndAdvancePosition(
|
||||
getBuffer(readingPosIsInAdditionalBuffer), maxCodePointCount, outCodePointCount, pos);
|
||||
if (readingPosIsInAdditionalBuffer) {
|
||||
*pos += mOriginalBufferSize;
|
||||
}
|
||||
}
|
||||
|
||||
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
|
||||
int *const pos) {
|
||||
if (!(size >= 1 && size <= 4)) {
|
||||
|
@ -59,7 +72,7 @@ bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data
|
|||
}
|
||||
|
||||
bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *const codePoints,
|
||||
const int codePointCount, const bool writesTerminator ,int *const pos) {
|
||||
const int codePointCount, const bool writesTerminator, int *const pos) {
|
||||
const size_t size = ByteArrayUtils::calculateRequiredByteCountToStoreCodePoints(
|
||||
codePoints, codePointCount, writesTerminator);
|
||||
if (!checkAndPrepareWriting(*pos, size)) {
|
||||
|
|
|
@ -75,6 +75,9 @@ class BufferWithExtendableBuffer {
|
|||
|
||||
uint32_t readUintAndAdvancePosition(const int size, int *const pos) const;
|
||||
|
||||
void readCodePointsAndAdvancePosition(const int maxCodePointCount,
|
||||
int *const outCodePoints, int *outCodePointCount, int *const pos) const;
|
||||
|
||||
AK_FORCE_INLINE int getOriginalBufferSize() const {
|
||||
return mOriginalBufferSize;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue