Merge "Implement ver4 dictionary bigram writing methods."
commit
fe05173198
|
@ -68,12 +68,14 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
suggest/core/policy/weighting.cpp \
|
suggest/core/policy/weighting.cpp \
|
||||||
suggest/core/session/dic_traverse_session.cpp \
|
suggest/core/session/dic_traverse_session.cpp \
|
||||||
$(addprefix suggest/policyimpl/dictionary/, \
|
$(addprefix suggest/policyimpl/dictionary/, \
|
||||||
bigram/bigram_list_read_write_utils.cpp \
|
|
||||||
bigram/dynamic_bigram_list_policy.cpp \
|
|
||||||
header/header_policy.cpp \
|
header/header_policy.cpp \
|
||||||
header/header_read_write_utils.cpp \
|
header/header_read_write_utils.cpp \
|
||||||
shortcut/shortcut_list_reading_utils.cpp \
|
shortcut/shortcut_list_reading_utils.cpp \
|
||||||
structure/dictionary_structure_with_buffer_policy_factory.cpp) \
|
structure/dictionary_structure_with_buffer_policy_factory.cpp) \
|
||||||
|
$(addprefix suggest/policyimpl/dictionary/bigram/, \
|
||||||
|
bigram_list_read_write_utils.cpp \
|
||||||
|
dynamic_bigram_list_policy.cpp \
|
||||||
|
ver4_bigram_list_policy.cpp) \
|
||||||
$(addprefix suggest/policyimpl/dictionary/structure/v2/, \
|
$(addprefix suggest/policyimpl/dictionary/structure/v2/, \
|
||||||
patricia_trie_policy.cpp \
|
patricia_trie_policy.cpp \
|
||||||
patricia_trie_reading_utils.cpp) \
|
patricia_trie_reading_utils.cpp) \
|
||||||
|
@ -88,6 +90,7 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
dynamic_patricia_trie_writing_helper.cpp \
|
dynamic_patricia_trie_writing_helper.cpp \
|
||||||
dynamic_patricia_trie_writing_utils.cpp) \
|
dynamic_patricia_trie_writing_utils.cpp) \
|
||||||
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
|
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
|
||||||
|
content/bigram_dict_content.cpp \
|
||||||
ver4_dict_constants.cpp \
|
ver4_dict_constants.cpp \
|
||||||
ver4_patricia_trie_node_reader.cpp \
|
ver4_patricia_trie_node_reader.cpp \
|
||||||
ver4_patricia_trie_node_writer.cpp \
|
ver4_patricia_trie_node_writer.cpp \
|
||||||
|
|
|
@ -0,0 +1,115 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
|
||||||
|
bool *const outHasNext, int *const bigramEntryPos) const {
|
||||||
|
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||||
|
mBigramDictContent->getBigramEntryAndAdvancePosition(outProbability, outHasNext,
|
||||||
|
&targetTerminalId, bigramEntryPos);
|
||||||
|
if (outBigramPos) {
|
||||||
|
// Lookup target PtNode position.
|
||||||
|
*outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
|
||||||
|
const int newProbability, bool *const outAddedNewEntry) {
|
||||||
|
if (outAddedNewEntry) {
|
||||||
|
*outAddedNewEntry = false;
|
||||||
|
}
|
||||||
|
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||||
|
if (bigramListPos == NOT_A_DICT_POS) {
|
||||||
|
// Updating PtNode doesn't have a bigram list.
|
||||||
|
// Create new bigram list.
|
||||||
|
if (!mBigramDictContent->createNewBigramList(terminalId)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Write an entry.
|
||||||
|
int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||||
|
if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability,
|
||||||
|
false /* hasNext */, newTargetTerminalId, &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos);
|
||||||
|
if (entryPosToUpdate != NOT_A_DICT_POS) {
|
||||||
|
// Overwrite existing entry.
|
||||||
|
int readingPos = entryPosToUpdate;
|
||||||
|
bool hasNext = false;
|
||||||
|
int probability = NOT_A_PROBABILITY;
|
||||||
|
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||||
|
mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
|
||||||
|
&targetTerminalId, &readingPos);
|
||||||
|
if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID && outAddedNewEntry) {
|
||||||
|
// Reuse invalid entry.
|
||||||
|
*outAddedNewEntry = true;
|
||||||
|
}
|
||||||
|
int writingPos = entryPosToUpdate;
|
||||||
|
return mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability, hasNext,
|
||||||
|
newTargetTerminalId, &writingPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add new entry to the bigram list.
|
||||||
|
// Create new bigram list.
|
||||||
|
if (!mBigramDictContent->createNewBigramList(terminalId)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Write new entry at a head position of the bigram list.
|
||||||
|
int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||||
|
if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability,
|
||||||
|
true /* hasNext */, newTargetTerminalId, &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (outAddedNewEntry) {
|
||||||
|
*outAddedNewEntry = true;
|
||||||
|
}
|
||||||
|
// Append existing entries by copying.
|
||||||
|
return mBigramDictContent->copyBigramList(bigramListPos, writingPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
|
||||||
|
const int bigramListPos) const {
|
||||||
|
bool hasNext = true;
|
||||||
|
int invalidEntryPos = NOT_A_DICT_POS;
|
||||||
|
int readingPos = bigramListPos;
|
||||||
|
while(hasNext) {
|
||||||
|
const int entryPos = readingPos;
|
||||||
|
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||||
|
mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext,
|
||||||
|
&targetTerminalId, &readingPos);
|
||||||
|
if (targetTerminalId == targetTerminalIdToFind) {
|
||||||
|
// Entry with same target is found.
|
||||||
|
return entryPos;
|
||||||
|
} else if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) {
|
||||||
|
// Invalid entry that can be reused is found.
|
||||||
|
invalidEntryPos = entryPos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return invalidEntryPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace latinime
|
|
@ -19,46 +19,35 @@
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
class BigramDictContent;
|
||||||
|
class TerminalPositionLookupTable;
|
||||||
|
|
||||||
class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
public:
|
public:
|
||||||
Ver4BigramListPolicy(const BigramDictContent *const bigramDictContent,
|
Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
|
||||||
const TerminalPositionLookupTable *const terminalPositionLookupTable)
|
const TerminalPositionLookupTable *const terminalPositionLookupTable)
|
||||||
: mBigramDictContent(bigramDictContent),
|
: mBigramDictContent(bigramDictContent),
|
||||||
mTerminalPositionLookupTable(terminalPositionLookupTable) {}
|
mTerminalPositionLookupTable(terminalPositionLookupTable) {}
|
||||||
|
|
||||||
void getNextBigram(int *const outBigramPos, int *const outProbability,
|
void getNextBigram(int *const outBigramPos, int *const outProbability,
|
||||||
bool *const outHasNext, int *const bigramEntryPos) const {
|
bool *const outHasNext, int *const bigramEntryPos) const;
|
||||||
int bigramFlags = 0;
|
|
||||||
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
|
||||||
mBigramDictContent->getBigramEntryAndAdvancePosition(&bigramFlags, &targetTerminalId,
|
|
||||||
bigramEntryPos);
|
|
||||||
if (outProbability) {
|
|
||||||
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
|
|
||||||
}
|
|
||||||
if (outHasNext) {
|
|
||||||
*outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
|
|
||||||
}
|
|
||||||
if (outBigramPos) {
|
|
||||||
// Lookup target PtNode position.
|
|
||||||
*outBigramPos =
|
|
||||||
mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void skipAllBigrams(int *const pos) const {
|
void skipAllBigrams(int *const pos) const {
|
||||||
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
|
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability,
|
||||||
|
bool *const outAddedNewEntry);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
|
||||||
|
|
||||||
const BigramDictContent *const mBigramDictContent;
|
int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
|
||||||
|
|
||||||
|
BigramDictContent *const mBigramDictContent;
|
||||||
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
|
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -238,6 +238,9 @@ int DynamicPatriciaTrieReadingHelper::getTerminalPtNodePositionOfWord(const int
|
||||||
}
|
}
|
||||||
// All characters are matched.
|
// All characters are matched.
|
||||||
if (length == getTotalCodePointCount(ptNodeParams)) {
|
if (length == getTotalCodePointCount(ptNodeParams)) {
|
||||||
|
if (!ptNodeParams.isTerminal()) {
|
||||||
|
return NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
// Terminal position is found.
|
// Terminal position is found.
|
||||||
return ptNodeParams.getHeadPos();
|
return ptNodeParams.getHeadPos();
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,69 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
void BigramDictContent::getBigramEntryAndAdvancePosition(int *const outProbability,
|
||||||
|
bool *const outHasNext, int *const outTargetTerminalId, int *const bigramEntryPos) const {
|
||||||
|
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
|
||||||
|
const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
|
||||||
|
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
|
||||||
|
if (outProbability) {
|
||||||
|
*outProbability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK;
|
||||||
|
}
|
||||||
|
if (outHasNext) {
|
||||||
|
*outHasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
|
||||||
|
}
|
||||||
|
if (outTargetTerminalId) {
|
||||||
|
*outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
|
||||||
|
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability, const int hasNext,
|
||||||
|
const int targetTerminalId, int *const entryWritingPos) {
|
||||||
|
BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
|
||||||
|
const int bigramFlags = createAndGetBigramFlags(probability, hasNext);
|
||||||
|
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
|
||||||
|
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalId,
|
||||||
|
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos) {
|
||||||
|
bool hasNext = true;
|
||||||
|
int readingPos = bigramListPos;
|
||||||
|
int writingPos = toPos;
|
||||||
|
while(hasNext) {
|
||||||
|
int probability = NOT_A_PROBABILITY;
|
||||||
|
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||||
|
getBigramEntryAndAdvancePosition(&probability, &hasNext, &targetTerminalId,
|
||||||
|
&readingPos);
|
||||||
|
if (!writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId,
|
||||||
|
&writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace latinime
|
|
@ -33,21 +33,11 @@ class BigramDictContent : public SparseTableDictContent {
|
||||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
||||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
|
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
|
||||||
|
|
||||||
void getBigramEntryAndAdvancePosition(int *const outBigramFlags,
|
void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext,
|
||||||
int *const outTargetTerminalId, int *const bigramEntryPos) const {
|
int *const outTargetTerminalId, int *const bigramEntryPos) const;
|
||||||
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
|
|
||||||
if (outBigramFlags) {
|
|
||||||
*outBigramFlags = bigramListBuffer->readUintAndAdvancePosition(
|
|
||||||
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
|
|
||||||
}
|
|
||||||
if (outTargetTerminalId) {
|
|
||||||
*outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
|
|
||||||
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns head position of bigram list for a PtNode specified by terminalId.
|
// Returns head position of bigram list for a PtNode specified by terminalId.
|
||||||
int getBigramListHeadPos(const int terminalId) const {
|
int getBigramListHeadPos(const int terminalId) const {
|
||||||
const SparseTable *const addressLookupTable = getAddressLookupTable();
|
const SparseTable *const addressLookupTable = getAddressLookupTable();
|
||||||
if (!addressLookupTable->contains(terminalId)) {
|
if (!addressLookupTable->contains(terminalId)) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
|
@ -55,8 +45,23 @@ class BigramDictContent : public SparseTableDictContent {
|
||||||
return addressLookupTable->get(terminalId);
|
return addressLookupTable->get(terminalId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool writeBigramEntryAndAdvancePosition(const int probability, const int hasNext,
|
||||||
|
const int targetTerminalId, int *const entryWritingPos);
|
||||||
|
|
||||||
|
bool createNewBigramList(const int terminalId) {
|
||||||
|
const int bigramListPos = getContentBuffer()->getTailPosition();
|
||||||
|
return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool copyBigramList(const int bigramListPos, const int toPos);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictContent);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictContent);
|
||||||
|
|
||||||
|
int createAndGetBigramFlags(const int probability, const bool hasNext) const {
|
||||||
|
return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK)
|
||||||
|
| (hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */
|
#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */
|
||||||
|
|
|
@ -59,10 +59,18 @@ class SparseTableDictContent : public DictContent {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
SparseTable *getUpdatableAddressLookupTable() {
|
||||||
|
return &mAddressLookupTable;
|
||||||
|
}
|
||||||
|
|
||||||
const SparseTable *getAddressLookupTable() const {
|
const SparseTable *getAddressLookupTable() const {
|
||||||
return &mAddressLookupTable;
|
return &mAddressLookupTable;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BufferWithExtendableBuffer *getWritableContentBuffer() {
|
||||||
|
return &mExpandableContentBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
const BufferWithExtendableBuffer *getContentBuffer() const {
|
const BufferWithExtendableBuffer *getContentBuffer() const {
|
||||||
return &mExpandableContentBuffer;
|
return &mExpandableContentBuffer;
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,6 +70,10 @@ class Ver4DictBuffers {
|
||||||
return &mProbabilityDictContent;
|
return &mProbabilityDictContent;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE BigramDictContent *getUpdatableBigramDictContent() {
|
||||||
|
return &mBigramDictContent;
|
||||||
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const {
|
AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const {
|
||||||
return &mBigramDictContent;
|
return &mBigramDictContent;
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,6 +43,8 @@ const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
|
||||||
|
|
||||||
const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
|
const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
|
||||||
const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
|
const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
|
||||||
|
const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
|
||||||
|
const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80;
|
||||||
|
|
||||||
const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
|
const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
|
||||||
|
|
||||||
|
|
|
@ -47,6 +47,8 @@ class Ver4DictConstants {
|
||||||
|
|
||||||
static const int BIGRAM_FLAGS_FIELD_SIZE;
|
static const int BIGRAM_FLAGS_FIELD_SIZE;
|
||||||
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
|
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
|
||||||
|
static const int BIGRAM_PROBABILITY_MASK;
|
||||||
|
static const int BIGRAM_HAS_NEXT_MASK;
|
||||||
|
|
||||||
static const int SHORTCUT_FLAGS_FIELD_SIZE;
|
static const int SHORTCUT_FLAGS_FIELD_SIZE;
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
||||||
|
@ -167,8 +167,6 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
|
||||||
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
|
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// TODO: Implement bigram and shortcut writing.
|
|
||||||
|
|
||||||
// Create node flags and write them.
|
// Create node flags and write them.
|
||||||
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
|
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
|
||||||
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
|
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
|
||||||
|
@ -188,8 +186,8 @@ bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
|
||||||
const PtNodeParams *const sourcePtNodeParams,
|
const PtNodeParams *const sourcePtNodeParams,
|
||||||
const PtNodeParams *const targetPtNodeParam, const int probability,
|
const PtNodeParams *const targetPtNodeParam, const int probability,
|
||||||
bool *const outAddedNewBigram) {
|
bool *const outAddedNewBigram) {
|
||||||
// TODO: Implement.
|
return mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
|
||||||
return false;
|
targetPtNodeParam->getTerminalId(), probability, outAddedNewBigram);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
|
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
|
||||||
|
|
|
@ -135,9 +135,9 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
|
||||||
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (mDictBuffer.getTailPosition()
|
if (mDictBuffer.getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
||||||
>= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
|
||||||
AKLOGE("The dictionary is too large to dynamically update.");
|
mDictBuffer.getTailPosition());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
|
DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
|
||||||
|
@ -156,8 +156,34 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
|
bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
|
||||||
const int *const word1, const int length1, const int probability) {
|
const int *const word1, const int length1, const int probability) {
|
||||||
// TODO: Implement.
|
if (!mBuffers.get()->isUpdatable()) {
|
||||||
return false;
|
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (mDictBuffer.getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
||||||
|
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
|
||||||
|
mDictBuffer.getTailPosition());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
||||||
|
false /* forceLowerCaseSearch */);
|
||||||
|
if (word0Pos == NOT_A_DICT_POS) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
|
||||||
|
false /* forceLowerCaseSearch */);
|
||||||
|
if (word1Pos == NOT_A_DICT_POS) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool addedNewBigram = false;
|
||||||
|
if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) {
|
||||||
|
if (addedNewBigram) {
|
||||||
|
mBigramCount++;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
|
bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
|
||||||
|
|
|
@ -42,7 +42,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
mDictBuffer(mBuffers.get()->getRawDictBuffer() + mHeaderPolicy.getSize(),
|
mDictBuffer(mBuffers.get()->getRawDictBuffer() + mHeaderPolicy.getSize(),
|
||||||
mBuffers.get()->getRawDictBufferSize() - mHeaderPolicy.getSize(),
|
mBuffers.get()->getRawDictBufferSize() - mHeaderPolicy.getSize(),
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
mBigramPolicy(mBuffers.get()->getBigramDictContent(),
|
mBigramPolicy(mBuffers.get()->getUpdatableBigramDictContent(),
|
||||||
mBuffers.get()->getTerminalPositionLookupTable()),
|
mBuffers.get()->getTerminalPositionLookupTable()),
|
||||||
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
|
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
|
||||||
mBuffers.get()->getTerminalPositionLookupTable()),
|
mBuffers.get()->getTerminalPositionLookupTable()),
|
||||||
|
|
|
@ -49,6 +49,11 @@ void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxC
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool BufferWithExtendableBuffer::writeUint(const uint32_t data, const int size, const int pos) {
|
||||||
|
int writingPos = pos;
|
||||||
|
return writeUintAndAdvancePosition(data, size, &writingPos);
|
||||||
|
}
|
||||||
|
|
||||||
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
|
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
|
||||||
int *const pos) {
|
int *const pos) {
|
||||||
if (!(size >= 1 && size <= 4)) {
|
if (!(size >= 1 && size <= 4)) {
|
||||||
|
|
|
@ -93,6 +93,8 @@ class BufferWithExtendableBuffer {
|
||||||
* Writing is allowed for original buffer, already written region of additional buffer and the
|
* Writing is allowed for original buffer, already written region of additional buffer and the
|
||||||
* tail of additional buffer.
|
* tail of additional buffer.
|
||||||
*/
|
*/
|
||||||
|
bool writeUint(const uint32_t data, const int size, const int pos);
|
||||||
|
|
||||||
bool writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos);
|
bool writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos);
|
||||||
|
|
||||||
bool writeCodePointsAndAdvancePosition(const int *const codePoints, const int codePointCount,
|
bool writeCodePointsAndAdvancePosition(const int *const codePoints, const int codePointCount,
|
||||||
|
|
|
@ -19,23 +19,68 @@
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
const int SparseTable::NOT_EXIST = -1;
|
const int SparseTable::NOT_EXIST = -1;
|
||||||
|
const int SparseTable::INDEX_SIZE = 4;
|
||||||
|
|
||||||
bool SparseTable::contains(const int id) const {
|
bool SparseTable::contains(const int id) const {
|
||||||
const int readingPos = id / mBlockSize * mDataSize;
|
const int readingPos = getPosInIndexTable(id);
|
||||||
if (id < 0 || mIndexTableBuffer->getTailPosition() <= readingPos) {
|
if (id < 0 || mIndexTableBuffer->getTailPosition() <= readingPos) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int index = mIndexTableBuffer->readUint(mDataSize, readingPos);
|
const int index = mIndexTableBuffer->readUint(INDEX_SIZE, readingPos);
|
||||||
return index != NOT_EXIST;
|
return index != NOT_EXIST;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t SparseTable::get(const int id) const {
|
uint32_t SparseTable::get(const int id) const {
|
||||||
const int indexTableIndex = id / mBlockSize;
|
const int indexTableReadingPos = getPosInIndexTable(id);
|
||||||
int readingPos = indexTableIndex * mDataSize;
|
const int index = mIndexTableBuffer->readUint(INDEX_SIZE, indexTableReadingPos);
|
||||||
const int index = mIndexTableBuffer->readUint(mDataSize, readingPos);
|
const int contentTableReadingPos = getPosInContentTable(id, index);
|
||||||
|
return mContentTableBuffer->readUint(mDataSize, contentTableReadingPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SparseTable::set(const int id, const uint32_t value) {
|
||||||
|
const int posInIndexTable = getPosInIndexTable(id);
|
||||||
|
// Extends the index table if needed.
|
||||||
|
if (mIndexTableBuffer->getTailPosition() < posInIndexTable) {
|
||||||
|
int tailPos = mIndexTableBuffer->getTailPosition();
|
||||||
|
while(tailPos < posInIndexTable) {
|
||||||
|
if (!mIndexTableBuffer->writeUintAndAdvancePosition(NOT_EXIST, INDEX_SIZE, &tailPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (contains(id)) {
|
||||||
|
// The entry is already in the content table.
|
||||||
|
const int index = mIndexTableBuffer->readUint(INDEX_SIZE, posInIndexTable);
|
||||||
|
return mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index));
|
||||||
|
}
|
||||||
|
// The entry is not in the content table.
|
||||||
|
// Create new entry in the content table.
|
||||||
|
const int index = getIndexFromContentTablePos(mContentTableBuffer->getTailPosition());
|
||||||
|
if (!mIndexTableBuffer->writeUint(index, INDEX_SIZE, posInIndexTable)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Write a new block that containing the entry to be set.
|
||||||
|
int writingPos = getPosInContentTable(0 /* id */, index);
|
||||||
|
for (int i = 0; i < mBlockSize; ++i) {
|
||||||
|
if (!mContentTableBuffer->writeUintAndAdvancePosition(NOT_A_DICT_POS, mDataSize,
|
||||||
|
&writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index));
|
||||||
|
}
|
||||||
|
|
||||||
|
int SparseTable::getIndexFromContentTablePos(const int contentTablePos) const {
|
||||||
|
return contentTablePos / mDataSize / mBlockSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
int SparseTable::getPosInIndexTable(const int id) const {
|
||||||
|
return (id / mBlockSize) * INDEX_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
int SparseTable::getPosInContentTable(const int id, const int index) const {
|
||||||
const int offset = id % mBlockSize;
|
const int offset = id % mBlockSize;
|
||||||
readingPos = (index * mDataSize + offset) * mBlockSize;
|
return (index * mDataSize + offset) * mBlockSize;
|
||||||
return mContentTableBuffer->readUint(mDataSize, readingPos);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -38,10 +38,19 @@ class SparseTable {
|
||||||
|
|
||||||
uint32_t get(const int id) const;
|
uint32_t get(const int id) const;
|
||||||
|
|
||||||
|
bool set(const int id, const uint32_t value);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTable);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTable);
|
||||||
|
|
||||||
|
int getIndexFromContentTablePos(const int contentTablePos) const;
|
||||||
|
|
||||||
|
int getPosInIndexTable(const int id) const;
|
||||||
|
|
||||||
|
int getPosInContentTable(const int id, const int index) const;
|
||||||
|
|
||||||
static const int NOT_EXIST;
|
static const int NOT_EXIST;
|
||||||
|
static const int INDEX_SIZE;
|
||||||
|
|
||||||
BufferWithExtendableBuffer *const mIndexTableBuffer;
|
BufferWithExtendableBuffer *const mIndexTableBuffer;
|
||||||
BufferWithExtendableBuffer *const mContentTableBuffer;
|
BufferWithExtendableBuffer *const mContentTableBuffer;
|
||||||
|
|
|
@ -206,4 +206,44 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
|
||||||
assertEquals(probability, binaryDictionary.getFrequency("y"));
|
assertEquals(probability, binaryDictionary.getFrequency("y"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testWriteBigrams() {
|
||||||
|
final String dictVersion = Long.toString(System.currentTimeMillis());
|
||||||
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
|
getDictionaryOptions(TEST_LOCALE, dictVersion));
|
||||||
|
final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
|
||||||
|
try {
|
||||||
|
encoder.writeDictionary(dict, FORMAT_OPTIONS);
|
||||||
|
} catch (IOException e) {
|
||||||
|
Log.e(TAG, "IOException while writing dictionary", e);
|
||||||
|
} catch (UnsupportedFormatException e) {
|
||||||
|
Log.e(TAG, "Unsupported format", e);
|
||||||
|
}
|
||||||
|
final File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
|
||||||
|
final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
|
||||||
|
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
|
||||||
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
assertTrue(binaryDictionary.isValidDictionary());
|
||||||
|
|
||||||
|
final int unigramProbability = 100;
|
||||||
|
final int bigramProbability = 10;
|
||||||
|
final int updatedBigramProbability = 15;
|
||||||
|
binaryDictionary.addUnigramWord("aaa", unigramProbability);
|
||||||
|
binaryDictionary.addUnigramWord("abb", unigramProbability);
|
||||||
|
binaryDictionary.addUnigramWord("bcc", unigramProbability);
|
||||||
|
binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
|
||||||
|
binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
|
||||||
|
binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
|
||||||
|
binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
|
||||||
|
|
||||||
|
final int probability = binaryDictionary.calculateProbability(unigramProbability,
|
||||||
|
bigramProbability);
|
||||||
|
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
|
||||||
|
assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
|
||||||
|
assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
|
||||||
|
assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
|
||||||
|
assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
|
||||||
|
assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
|
||||||
|
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
|
||||||
|
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue