Implement ver4 bigram reading method.
Bug: 11073222 Change-Id: I7b3408938f304da361201892e0a1342fdf92e62e
This commit is contained in:
parent
3bd64441a6
commit
9b3e59d644
12 changed files with 234 additions and 20 deletions
|
@ -711,6 +711,13 @@ public class BinaryDictEncoderUtils {
|
||||||
+ word + " is " + unigramFrequency);
|
+ word + " is " + unigramFrequency);
|
||||||
bigramFrequency = unigramFrequency;
|
bigramFrequency = unigramFrequency;
|
||||||
}
|
}
|
||||||
|
bigramFlags += getBigramFrequencyDiff(unigramFrequency, bigramFrequency)
|
||||||
|
& FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY;
|
||||||
|
return bigramFlags;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int getBigramFrequencyDiff(final int unigramFrequency,
|
||||||
|
final int bigramFrequency) {
|
||||||
// We compute the difference between 255 (which means probability = 1) and the
|
// We compute the difference between 255 (which means probability = 1) and the
|
||||||
// unigram score. We split this into a number of discrete steps.
|
// unigram score. We split this into a number of discrete steps.
|
||||||
// Now, the steps are numbered 0~15; 0 represents an increase of 1 step while 15
|
// Now, the steps are numbered 0~15; 0 represents an increase of 1 step while 15
|
||||||
|
@ -744,9 +751,7 @@ public class BinaryDictEncoderUtils {
|
||||||
// include this bigram in the dictionary. For now, register as 0, and live with the
|
// include this bigram in the dictionary. For now, register as 0, and live with the
|
||||||
// small over-estimation that we get in this case. TODO: actually remove this bigram
|
// small over-estimation that we get in this case. TODO: actually remove this bigram
|
||||||
// if discretizedFrequency < 0.
|
// if discretizedFrequency < 0.
|
||||||
final int finalBigramFrequency = discretizedFrequency > 0 ? discretizedFrequency : 0;
|
return discretizedFrequency > 0 ? discretizedFrequency : 0;
|
||||||
bigramFlags += finalBigramFrequency & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY;
|
|
||||||
return bigramFlags;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_VER4_BIGRAM_LIST_POLICY_H
|
||||||
|
#define LATINIME_VER4_BIGRAM_LIST_POLICY_H
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
|
public:
|
||||||
|
Ver4BigramListPolicy(const BigramDictContent *const bigramDictContent,
|
||||||
|
const TerminalPositionLookupTable *const terminalPositionLookupTable)
|
||||||
|
: mBigramDictContent(bigramDictContent),
|
||||||
|
mTerminalPositionLookupTable(terminalPositionLookupTable) {}
|
||||||
|
|
||||||
|
void getNextBigram(int *const outBigramPos, int *const outProbability,
|
||||||
|
bool *const outHasNext, int *const bigramEntryPos) const {
|
||||||
|
int bigramFlags = 0;
|
||||||
|
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||||
|
mBigramDictContent->getBigramEntryAndAdvancePosition(&bigramFlags, &targetTerminalId,
|
||||||
|
bigramEntryPos);
|
||||||
|
if (outProbability) {
|
||||||
|
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
|
||||||
|
}
|
||||||
|
if (outHasNext) {
|
||||||
|
*outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
|
||||||
|
}
|
||||||
|
if (outBigramPos) {
|
||||||
|
// Lookup target PtNode position.
|
||||||
|
*outBigramPos =
|
||||||
|
mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void skipAllBigrams(int *const pos) const {
|
||||||
|
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
|
||||||
|
|
||||||
|
const BigramDictContent *const mBigramDictContent;
|
||||||
|
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */
|
|
@ -0,0 +1,62 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_BIGRAM_DICT_CONTENT_H
|
||||||
|
#define LATINIME_BIGRAM_DICT_CONTENT_H
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class BigramDictContent : public SparseTableDictContent {
|
||||||
|
public:
|
||||||
|
BigramDictContent(const char *const dictDirPath, const bool isUpdatable)
|
||||||
|
: SparseTableDictContent(dictDirPath,
|
||||||
|
Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
|
||||||
|
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
|
||||||
|
Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable,
|
||||||
|
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
||||||
|
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
|
||||||
|
|
||||||
|
void getBigramEntryAndAdvancePosition(int *const outBigramFlags,
|
||||||
|
int *const outTargetTerminalId, int *const bigramEntryPos) const {
|
||||||
|
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
|
||||||
|
if (outBigramFlags) {
|
||||||
|
*outBigramFlags = bigramListBuffer->readUintAndAdvancePosition(
|
||||||
|
Ver4DictConstants::BIGRAM_FRAGS_FIELD_SIZE, bigramEntryPos);
|
||||||
|
}
|
||||||
|
if (outTargetTerminalId) {
|
||||||
|
*outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
|
||||||
|
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns head position of bigram list for a PtNode specified by terminalId.
|
||||||
|
int getBigramListHeadPos(const int terminalId) const {
|
||||||
|
const SparseTable *const addressLookupTable = getAddressLookupTable();
|
||||||
|
if (!addressLookupTable->contains(terminalId)) {
|
||||||
|
return NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
|
return addressLookupTable->get(terminalId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictContent);
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */
|
|
@ -58,6 +58,15 @@ class SparseTableDictContent : public DictContent {
|
||||||
&& mContentBuffer.get() != 0;
|
&& mContentBuffer.get() != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
const SparseTable *getAddressLookupTable() const {
|
||||||
|
return &mAddressLookupTable;
|
||||||
|
}
|
||||||
|
|
||||||
|
const BufferWithExtendableBuffer *getContentBuffer() const {
|
||||||
|
return &mExpandableContentBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);
|
||||||
|
|
||||||
|
|
|
@ -29,11 +29,14 @@ namespace latinime {
|
||||||
|
|
||||||
class TerminalPositionLookupTable : public SingleDictContent {
|
class TerminalPositionLookupTable : public SingleDictContent {
|
||||||
public:
|
public:
|
||||||
TerminalPositionLookupTable(const char *const dictDirPath, const bool isUpdatable)
|
// TODO: Quit using headerRegionSize.
|
||||||
|
TerminalPositionLookupTable(const char *const dictDirPath, const bool isUpdatable,
|
||||||
|
const int headerRegionSize)
|
||||||
: SingleDictContent(dictDirPath,
|
: SingleDictContent(dictDirPath,
|
||||||
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable),
|
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable),
|
||||||
mSize(getBuffer()->getTailPosition()
|
mSize(getBuffer()->getTailPosition()
|
||||||
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}
|
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE),
|
||||||
|
mHeaderRegionSize(headerRegionSize) {}
|
||||||
|
|
||||||
int getTerminalPtNodePosition(const int terminalId) const {
|
int getTerminalPtNodePosition(const int terminalId) const {
|
||||||
if (terminalId < 0 || terminalId >= mSize) {
|
if (terminalId < 0 || terminalId >= mSize) {
|
||||||
|
@ -41,13 +44,14 @@ class TerminalPositionLookupTable : public SingleDictContent {
|
||||||
}
|
}
|
||||||
const int readingPos = terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
|
const int readingPos = terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
|
||||||
return getBuffer()->readUint(Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
|
return getBuffer()->readUint(Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
|
||||||
readingPos);
|
readingPos) - mHeaderRegionSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalPositionLookupTable);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalPositionLookupTable);
|
||||||
|
|
||||||
const int mSize;
|
const int mSize;
|
||||||
|
const int mHeaderRegionSize;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
|
#endif // LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
|
||||||
|
|
|
@ -18,6 +18,8 @@
|
||||||
#define LATINIME_VER4_DICT_BUFFER_H
|
#define LATINIME_VER4_DICT_BUFFER_H
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
|
||||||
|
@ -52,24 +54,29 @@ class Ver4DictBuffers {
|
||||||
return mDictBuffer.get()->getBufferSize();
|
return mDictBuffer.get()->getBufferSize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const {
|
||||||
|
return &mTerminalPositionLookupTable;
|
||||||
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const {
|
AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const {
|
||||||
return &mProbabilityDictContent;
|
return &mProbabilityDictContent;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const {
|
||||||
|
return &mBigramDictContent;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers);
|
||||||
|
|
||||||
AK_FORCE_INLINE Ver4DictBuffers(const char *const dictDirPath,
|
AK_FORCE_INLINE Ver4DictBuffers(const char *const dictDirPath,
|
||||||
const MmappedBuffer::MmappedBufferPtr &dictBuffer, const bool isUpdatable)
|
const MmappedBuffer::MmappedBufferPtr &dictBuffer, const bool isUpdatable)
|
||||||
: mDictBuffer(dictBuffer),
|
: mDictBuffer(dictBuffer),
|
||||||
mTerminalPositionLookupTable(dictDirPath, isUpdatable),
|
// TODO: Quit using getHeaderSize.
|
||||||
|
mTerminalPositionLookupTable(dictDirPath, isUpdatable,
|
||||||
|
HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())),
|
||||||
mProbabilityDictContent(dictDirPath, isUpdatable),
|
mProbabilityDictContent(dictDirPath, isUpdatable),
|
||||||
mBigramDictContent(dictDirPath,
|
mBigramDictContent(dictDirPath, isUpdatable),
|
||||||
Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
|
|
||||||
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
|
|
||||||
Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable,
|
|
||||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
|
||||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
|
|
||||||
mShortcutDictContent(dictDirPath,
|
mShortcutDictContent(dictDirPath,
|
||||||
Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
|
Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
|
||||||
Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
|
Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
|
||||||
|
@ -80,7 +87,7 @@ class Ver4DictBuffers {
|
||||||
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
|
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
|
||||||
TerminalPositionLookupTable mTerminalPositionLookupTable;
|
TerminalPositionLookupTable mTerminalPositionLookupTable;
|
||||||
ProbabilityDictContent mProbabilityDictContent;
|
ProbabilityDictContent mProbabilityDictContent;
|
||||||
SparseTableDictContent mBigramDictContent;
|
BigramDictContent mBigramDictContent;
|
||||||
SparseTableDictContent mShortcutDictContent;
|
SparseTableDictContent mShortcutDictContent;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -40,4 +40,7 @@ const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
|
||||||
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16;
|
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16;
|
||||||
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
|
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
|
||||||
|
|
||||||
|
const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
|
||||||
|
const int Ver4DictConstants::BIGRAM_FRAGS_FIELD_SIZE = 1;
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -43,6 +43,9 @@ class Ver4DictConstants {
|
||||||
static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;
|
static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;
|
||||||
static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
|
static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
|
||||||
static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
|
static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
|
||||||
|
|
||||||
|
static const int BIGRAM_FRAGS_FIELD_SIZE;
|
||||||
|
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
|
||||||
};
|
};
|
||||||
|
|
|
@ -119,7 +119,8 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons
|
||||||
if (ptNodeParams.isDeleted()) {
|
if (ptNodeParams.isDeleted()) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
return ptNodeParams.getTerminalId();
|
return mBuffers.get()->getBigramDictContent()->getBigramListHeadPos(
|
||||||
|
ptNodeParams.getTerminalId());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
|
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
||||||
|
@ -38,6 +39,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
mDictBuffer(mBuffers.get()->getRawDictBuffer() + mHeaderPolicy.getSize(),
|
mDictBuffer(mBuffers.get()->getRawDictBuffer() + mHeaderPolicy.getSize(),
|
||||||
mBuffers.get()->getRawDictBufferSize() - mHeaderPolicy.getSize(),
|
mBuffers.get()->getRawDictBufferSize() - mHeaderPolicy.getSize(),
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
|
mBigramPolicy(mBuffers.get()->getBigramDictContent(),
|
||||||
|
mBuffers.get()->getTerminalPositionLookupTable()),
|
||||||
mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()) {};
|
mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()) {};
|
||||||
|
|
||||||
AK_FORCE_INLINE int getRootPosition() const {
|
AK_FORCE_INLINE int getRootPosition() const {
|
||||||
|
@ -67,7 +70,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
}
|
}
|
||||||
|
|
||||||
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
|
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
|
||||||
return 0;
|
return &mBigramPolicy;
|
||||||
}
|
}
|
||||||
|
|
||||||
const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
|
const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
|
||||||
|
@ -97,6 +100,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
|
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
|
||||||
const HeaderPolicy mHeaderPolicy;
|
const HeaderPolicy mHeaderPolicy;
|
||||||
BufferWithExtendableBuffer mDictBuffer;
|
BufferWithExtendableBuffer mDictBuffer;
|
||||||
|
const Ver4BigramListPolicy mBigramPolicy;
|
||||||
Ver4PatriciaTrieNodeReader mNodeReader;
|
Ver4PatriciaTrieNodeReader mNodeReader;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -21,11 +21,10 @@ namespace latinime {
|
||||||
const int SparseTable::NOT_EXIST = -1;
|
const int SparseTable::NOT_EXIST = -1;
|
||||||
|
|
||||||
bool SparseTable::contains(const int id) const {
|
bool SparseTable::contains(const int id) const {
|
||||||
if (id < 0 || mIndexTableBuffer->getTailPosition() <= id * mDataSize) {
|
const int readingPos = id / mBlockSize * mDataSize;
|
||||||
|
if (id < 0 || mIndexTableBuffer->getTailPosition() <= readingPos) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int indexTableIndex = id / mBlockSize;
|
|
||||||
const int readingPos = indexTableIndex * mDataSize;
|
|
||||||
const int index = mIndexTableBuffer->readUint(mDataSize, readingPos);
|
const int index = mIndexTableBuffer->readUint(mDataSize, readingPos);
|
||||||
return index != NOT_EXIST;
|
return index != NOT_EXIST;
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,7 @@ import android.test.AndroidTestCase;
|
||||||
import android.test.suitebuilder.annotation.LargeTest;
|
import android.test.suitebuilder.annotation.LargeTest;
|
||||||
import android.util.Log;
|
import android.util.Log;
|
||||||
|
|
||||||
|
import com.android.inputmethod.latin.makedict.BinaryDictEncoderUtils;
|
||||||
import com.android.inputmethod.latin.makedict.DictEncoder;
|
import com.android.inputmethod.latin.makedict.DictEncoder;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
|
@ -62,7 +63,7 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
|
||||||
|
|
||||||
// TODO: remove after native code support dictionary creation.
|
// TODO: remove after native code support dictionary creation.
|
||||||
private File getTrieFile(final String id, final String version) {
|
private File getTrieFile(final String id, final String version) {
|
||||||
return new File(getContext().getCacheDir() + "/" + id + "." + version,
|
return new File(getContext().getCacheDir() + "/" + id + "." + version,
|
||||||
TEST_LOCALE + "." + version + FormatSpec.TRIE_FILE_EXTENSION);
|
TEST_LOCALE + "." + version + FormatSpec.TRIE_FILE_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -120,4 +121,55 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
|
||||||
assertEquals(frequency, binaryDictionary.getFrequency("aaa"));
|
assertEquals(frequency, binaryDictionary.getFrequency("aaa"));
|
||||||
assertEquals(frequency, binaryDictionary.getFrequency("ab"));
|
assertEquals(frequency, binaryDictionary.getFrequency("ab"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static int getCalculatedBigramProbabiliy(BinaryDictionary binaryDictionary,
|
||||||
|
final int unigramFrequency, final int bigramFrequency) {
|
||||||
|
final int bigramFrequencyDiff = BinaryDictEncoderUtils.getBigramFrequencyDiff(
|
||||||
|
unigramFrequency, bigramFrequency);
|
||||||
|
return binaryDictionary.calculateProbability(unigramFrequency, bigramFrequencyDiff);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Add large tests.
|
||||||
|
public void testReadBigrams() {
|
||||||
|
final String dictVersion = Long.toString(System.currentTimeMillis());
|
||||||
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
|
getDictionaryOptions(TEST_LOCALE, dictVersion));
|
||||||
|
|
||||||
|
final int unigramFrequency = 1;
|
||||||
|
final int bigramFrequency0 = 150;
|
||||||
|
final int bigramFrequency1 = 1;
|
||||||
|
final int bigramFrequency2 = 255;
|
||||||
|
dict.add("a", unigramFrequency, null, false /* isNotAWord */);
|
||||||
|
dict.add("aaa", unigramFrequency, null, false /* isNotAWord */);
|
||||||
|
dict.add("ab", unigramFrequency, null, false /* isNotAWord */);
|
||||||
|
dict.setBigram("a", "aaa", bigramFrequency0);
|
||||||
|
dict.setBigram("a", "ab", bigramFrequency1);
|
||||||
|
dict.setBigram("aaa", "ab", bigramFrequency2);
|
||||||
|
|
||||||
|
DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
|
||||||
|
try {
|
||||||
|
encoder.writeDictionary(dict, FORMAT_OPTIONS);
|
||||||
|
} catch (IOException e) {
|
||||||
|
Log.e(TAG, "IOException while writing dictionary", e);
|
||||||
|
} catch (UnsupportedFormatException e) {
|
||||||
|
Log.e(TAG, "Unsupported format", e);
|
||||||
|
}
|
||||||
|
File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
|
||||||
|
BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
|
||||||
|
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
|
||||||
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
|
||||||
|
assertTrue(binaryDictionary.isValidDictionary());
|
||||||
|
|
||||||
|
assertEquals(getCalculatedBigramProbabiliy(binaryDictionary, unigramFrequency,
|
||||||
|
bigramFrequency0), binaryDictionary.getBigramProbability("a", "aaa"));
|
||||||
|
assertEquals(getCalculatedBigramProbabiliy(binaryDictionary, unigramFrequency,
|
||||||
|
bigramFrequency1), binaryDictionary.getBigramProbability("a", "ab"));
|
||||||
|
assertEquals(getCalculatedBigramProbabiliy(binaryDictionary, unigramFrequency,
|
||||||
|
bigramFrequency2), binaryDictionary.getBigramProbability("aaa", "ab"));
|
||||||
|
|
||||||
|
assertFalse(binaryDictionary.isValidBigram("aaa", "a"));
|
||||||
|
assertFalse(binaryDictionary.isValidBigram("ab", "a"));
|
||||||
|
assertFalse(binaryDictionary.isValidBigram("ab", "aaa"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue