Implement ver4 shortcut reading method.

Bug: 11073222
Change-Id: I3738c7ce07f500920bde8d3f985cf6e8ecb40b6a
main
Keisuke Kuroyanagi 2013-11-06 18:09:35 +09:00
parent 9b3e59d644
commit b460c85589
10 changed files with 173 additions and 15 deletions

View File

@ -0,0 +1,71 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_VER4_SHORTCUT_LIST_POLICY_H
#define LATINIME_VER4_SHORTCUT_LIST_POLICY_H
#include <stdint.h>
#include "defines.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
namespace latinime {
class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
public:
Ver4ShortcutListPolicy(const ShortcutDictContent *const shortcutDictContent,
const TerminalPositionLookupTable *const terminalPositionLookupTable)
: mShortcutDictContent(shortcutDictContent),
mTerminalPositionLookupTable(terminalPositionLookupTable) {}
~Ver4ShortcutListPolicy() {}
int getStartPos(const int pos) const {
// The first shortcut entry is located at the head position of the shortcut list.
return pos;
}
void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
int *const pos) const {
int shortcutFlags = 0;
if (outCodePoint && outCodePointCount) {
mShortcutDictContent->getShortcutEntryAndAdvancePosition(maxCodePointCount,
outCodePoint, outCodePointCount, &shortcutFlags, pos);
}
if (outHasNext) {
*outHasNext = ShortcutListReadingUtils::hasNext(shortcutFlags);
}
if (outIsWhitelist) {
*outIsWhitelist = ShortcutListReadingUtils::isWhitelist(shortcutFlags);
}
}
void skipAllShortcuts(int *const pos) const {
// Do nothing because we don't need to skip shortcut lists in ver4 dictionaries.
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4ShortcutListPolicy);
const ShortcutDictContent *const mShortcutDictContent;
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
};
} // namespace latinime
#endif // LATINIME_VER4_SHORTCUT_LIST_POLICY_H

View File

@ -38,7 +38,7 @@ class BigramDictContent : public SparseTableDictContent {
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer(); const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
if (outBigramFlags) { if (outBigramFlags) {
*outBigramFlags = bigramListBuffer->readUintAndAdvancePosition( *outBigramFlags = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::BIGRAM_FRAGS_FIELD_SIZE, bigramEntryPos); Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
} }
if (outTargetTerminalId) { if (outTargetTerminalId) {
*outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition( *outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(

View File

@ -0,0 +1,63 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_SHORTCUT_DICT_CONTENT_H
#define LATINIME_SHORTCUT_DICT_CONTENT_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
namespace latinime {
class ShortcutDictContent : public SparseTableDictContent {
public:
ShortcutDictContent(const char *const dictDirPath, const bool isUpdatable)
: SparseTableDictContent(dictDirPath,
Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable,
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
void getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
int *const outCodePoint, int *const outCodePointCount, int *const outShortcutFlags,
int *const shortcutEntryPos) const {
const BufferWithExtendableBuffer *const shortcutListBuffer = getContentBuffer();
if (outShortcutFlags) {
*outShortcutFlags = shortcutListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
}
if (outCodePoint && outCodePointCount) {
shortcutListBuffer->readCodePointsAndAdvancePosition(
maxCodePointCount, outCodePoint, outCodePointCount, shortcutEntryPos);
}
}
// Returns head position of shortcut list for a PtNode specified by terminalId.
int getShortcutListHeadPos(const int terminalId) const {
const SparseTable *const addressLookupTable = getAddressLookupTable();
if (!addressLookupTable->contains(terminalId)) {
return NOT_A_DICT_POS;
}
return addressLookupTable->get(terminalId);
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutDictContent);
};
} // namespace latinime
#endif /* LATINIME_SHORTCUT_DICT_CONTENT_H */

View File

@ -21,8 +21,7 @@
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
@ -66,6 +65,10 @@ class Ver4DictBuffers {
return &mBigramDictContent; return &mBigramDictContent;
} }
AK_FORCE_INLINE const ShortcutDictContent *getShortcutDictContent() const {
return &mShortcutDictContent;
}
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers); DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers);
@ -77,18 +80,13 @@ class Ver4DictBuffers {
HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())), HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())),
mProbabilityDictContent(dictDirPath, isUpdatable), mProbabilityDictContent(dictDirPath, isUpdatable),
mBigramDictContent(dictDirPath, isUpdatable), mBigramDictContent(dictDirPath, isUpdatable),
mShortcutDictContent(dictDirPath, mShortcutDictContent(dictDirPath, isUpdatable) {}
Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable,
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
const MmappedBuffer::MmappedBufferPtr mDictBuffer; const MmappedBuffer::MmappedBufferPtr mDictBuffer;
TerminalPositionLookupTable mTerminalPositionLookupTable; TerminalPositionLookupTable mTerminalPositionLookupTable;
ProbabilityDictContent mProbabilityDictContent; ProbabilityDictContent mProbabilityDictContent;
BigramDictContent mBigramDictContent; BigramDictContent mBigramDictContent;
SparseTableDictContent mShortcutDictContent; ShortcutDictContent mShortcutDictContent;
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_VER4_DICT_BUFFER_H */ #endif /* LATINIME_VER4_DICT_BUFFER_H */

View File

@ -41,6 +41,8 @@ const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16;
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4; const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3; const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
const int Ver4DictConstants::BIGRAM_FRAGS_FIELD_SIZE = 1; const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
} // namespace latinime } // namespace latinime

View File

@ -44,8 +44,11 @@ class Ver4DictConstants {
static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE; static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE; static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
static const int BIGRAM_FRAGS_FIELD_SIZE; static const int BIGRAM_FLAGS_FIELD_SIZE;
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
static const int SHORTCUT_FLAGS_FIELD_SIZE;
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants); DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
}; };

View File

@ -108,7 +108,8 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con
if (ptNodeParams.isDeleted()) { if (ptNodeParams.isDeleted()) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
return ptNodeParams.getTerminalId(); return mBuffers.get()->getShortcutDictContent()->getShortcutListHeadPos(
ptNodeParams.getTerminalId());
} }
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {

View File

@ -21,6 +21,7 @@
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
@ -41,6 +42,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mBigramPolicy(mBuffers.get()->getBigramDictContent(), mBigramPolicy(mBuffers.get()->getBigramDictContent(),
mBuffers.get()->getTerminalPositionLookupTable()), mBuffers.get()->getTerminalPositionLookupTable()),
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
mBuffers.get()->getTerminalPositionLookupTable()),
mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()) {}; mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()) {};
AK_FORCE_INLINE int getRootPosition() const { AK_FORCE_INLINE int getRootPosition() const {
@ -74,7 +77,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
} }
const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
return 0; return &mShortcutPolicy;
} }
bool addUnigramWord(const int *const word, const int length, const int probability); bool addUnigramWord(const int *const word, const int length, const int probability);
@ -101,6 +104,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const HeaderPolicy mHeaderPolicy; const HeaderPolicy mHeaderPolicy;
BufferWithExtendableBuffer mDictBuffer; BufferWithExtendableBuffer mDictBuffer;
const Ver4BigramListPolicy mBigramPolicy; const Ver4BigramListPolicy mBigramPolicy;
const Ver4ShortcutListPolicy mShortcutPolicy;
Ver4PatriciaTrieNodeReader mNodeReader; Ver4PatriciaTrieNodeReader mNodeReader;
}; };
} // namespace latinime } // namespace latinime

View File

@ -36,6 +36,19 @@ uint32_t BufferWithExtendableBuffer::readUintAndAdvancePosition(const int size,
return value; return value;
} }
void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxCodePointCount,
int *const outCodePoints, int *outCodePointCount, int *const pos) const {
const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(*pos);
if (readingPosIsInAdditionalBuffer) {
*pos -= mOriginalBufferSize;
}
*outCodePointCount = ByteArrayUtils::readStringAndAdvancePosition(
getBuffer(readingPosIsInAdditionalBuffer), maxCodePointCount, outCodePointCount, pos);
if (readingPosIsInAdditionalBuffer) {
*pos += mOriginalBufferSize;
}
}
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size, bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
int *const pos) { int *const pos) {
if (!(size >= 1 && size <= 4)) { if (!(size >= 1 && size <= 4)) {
@ -59,7 +72,7 @@ bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data
} }
bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *const codePoints, bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *const codePoints,
const int codePointCount, const bool writesTerminator ,int *const pos) { const int codePointCount, const bool writesTerminator, int *const pos) {
const size_t size = ByteArrayUtils::calculateRequiredByteCountToStoreCodePoints( const size_t size = ByteArrayUtils::calculateRequiredByteCountToStoreCodePoints(
codePoints, codePointCount, writesTerminator); codePoints, codePointCount, writesTerminator);
if (!checkAndPrepareWriting(*pos, size)) { if (!checkAndPrepareWriting(*pos, size)) {

View File

@ -75,6 +75,9 @@ class BufferWithExtendableBuffer {
uint32_t readUintAndAdvancePosition(const int size, int *const pos) const; uint32_t readUintAndAdvancePosition(const int size, int *const pos) const;
void readCodePointsAndAdvancePosition(const int maxCodePointCount,
int *const outCodePoints, int *outCodePointCount, int *const pos) const;
AK_FORCE_INLINE int getOriginalBufferSize() const { AK_FORCE_INLINE int getOriginalBufferSize() const {
return mOriginalBufferSize; return mOriginalBufferSize;
} }