Have ver4 support decaying dictionary
Bug: 11073222 Change-Id: I7f0002c4743ab3bb1ebaac1bca6e367e6b220010main
parent
0e8dbe0284
commit
24af6ed692
|
@ -20,6 +20,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -46,10 +47,12 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
|
||||||
if (!mBigramDictContent->createNewBigramList(terminalId)) {
|
if (!mBigramDictContent->createNewBigramList(terminalId)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
const int probabilityToWrite = getUpdatedProbability(
|
||||||
|
NOT_A_PROBABILITY /* originalProbability */, newProbability);
|
||||||
// Write an entry.
|
// Write an entry.
|
||||||
int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||||
if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability,
|
if (!mBigramDictContent->writeBigramEntry(probabilityToWrite, false /* hasNext */,
|
||||||
false /* hasNext */, newTargetTerminalId, &writingPos)) {
|
newTargetTerminalId, writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (outAddedNewEntry) {
|
if (outAddedNewEntry) {
|
||||||
|
@ -61,19 +64,18 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
|
||||||
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos);
|
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos);
|
||||||
if (entryPosToUpdate != NOT_A_DICT_POS) {
|
if (entryPosToUpdate != NOT_A_DICT_POS) {
|
||||||
// Overwrite existing entry.
|
// Overwrite existing entry.
|
||||||
int readingPos = entryPosToUpdate;
|
|
||||||
bool hasNext = false;
|
bool hasNext = false;
|
||||||
int probability = NOT_A_PROBABILITY;
|
int probability = NOT_A_PROBABILITY;
|
||||||
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||||
mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
|
mBigramDictContent->getBigramEntry(&probability, &hasNext, &targetTerminalId,
|
||||||
&targetTerminalId, &readingPos);
|
entryPosToUpdate);
|
||||||
|
const int probabilityToWrite = getUpdatedProbability(probability, newProbability);
|
||||||
if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID && outAddedNewEntry) {
|
if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID && outAddedNewEntry) {
|
||||||
// Reuse invalid entry.
|
// Reuse invalid entry.
|
||||||
*outAddedNewEntry = true;
|
*outAddedNewEntry = true;
|
||||||
}
|
}
|
||||||
int writingPos = entryPosToUpdate;
|
return mBigramDictContent->writeBigramEntry(probabilityToWrite, hasNext,
|
||||||
return mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability, hasNext,
|
newTargetTerminalId, entryPosToUpdate);
|
||||||
newTargetTerminalId, &writingPos);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add new entry to the bigram list.
|
// Add new entry to the bigram list.
|
||||||
|
@ -83,7 +85,9 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
|
||||||
}
|
}
|
||||||
// Write new entry at a head position of the bigram list.
|
// Write new entry at a head position of the bigram list.
|
||||||
int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||||
if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability,
|
const int probabilityToWrite = getUpdatedProbability(
|
||||||
|
NOT_A_PROBABILITY /* originalProbability */, newProbability);
|
||||||
|
if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(probabilityToWrite,
|
||||||
true /* hasNext */, newTargetTerminalId, &writingPos)) {
|
true /* hasNext */, newTargetTerminalId, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -105,20 +109,18 @@ bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTer
|
||||||
// Bigram entry doesn't exist.
|
// Bigram entry doesn't exist.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int readingPos = entryPosToUpdate;
|
|
||||||
bool hasNext = false;
|
bool hasNext = false;
|
||||||
int probability = NOT_A_PROBABILITY;
|
int probability = NOT_A_PROBABILITY;
|
||||||
int originalTargetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
int originalTargetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||||
mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
|
mBigramDictContent->getBigramEntry(&probability, &hasNext, &originalTargetTerminalId,
|
||||||
&originalTargetTerminalId, &readingPos);
|
entryPosToUpdate);
|
||||||
if (targetTerminalId != originalTargetTerminalId) {
|
if (targetTerminalId != originalTargetTerminalId) {
|
||||||
// Bigram entry doesn't exist.
|
// Bigram entry doesn't exist.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int writingPos = entryPosToUpdate;
|
|
||||||
// Remove bigram entry by overwriting target terminal Id.
|
// Remove bigram entry by overwriting target terminal Id.
|
||||||
return mBigramDictContent->writeBigramEntryAndAdvancePosition(probability, hasNext,
|
return mBigramDictContent->writeBigramEntry(probability, hasNext,
|
||||||
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos);
|
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPosToUpdate);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
|
bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
|
||||||
|
@ -143,9 +145,28 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i
|
||||||
targetTerminalId);
|
targetTerminalId);
|
||||||
if (targetPtNodePos == NOT_A_DICT_POS) {
|
if (targetPtNodePos == NOT_A_DICT_POS) {
|
||||||
// Invalidate bigram entry.
|
// Invalidate bigram entry.
|
||||||
int writingPos = entryPos;
|
if (!mBigramDictContent->writeBigramEntry(probability, hasNext,
|
||||||
return mBigramDictContent->writeBigramEntryAndAdvancePosition(probability, hasNext,
|
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPos)) {
|
||||||
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos);
|
return false;
|
||||||
|
}
|
||||||
|
} else if (mNeedsToDecayWhenUpdating) {
|
||||||
|
probability = ForgettingCurveUtils::getEncodedProbabilityToSave(
|
||||||
|
probability, mHeaderPolicy);
|
||||||
|
if (ForgettingCurveUtils::isValidEncodedProbability(probability)) {
|
||||||
|
if (!mBigramDictContent->writeBigramEntry(probability, hasNext, targetTerminalId,
|
||||||
|
entryPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*outBigramCount += 1;
|
||||||
|
} else {
|
||||||
|
// Remove entry.
|
||||||
|
if (!mBigramDictContent->writeBigramEntry(probability, hasNext,
|
||||||
|
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
*outBigramCount += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -192,4 +213,14 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
|
||||||
return invalidEntryPos;
|
return invalidEntryPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int Ver4BigramListPolicy::getUpdatedProbability(const int originalProbability,
|
||||||
|
const int newProbability) const {
|
||||||
|
if (mNeedsToDecayWhenUpdating) {
|
||||||
|
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
|
||||||
|
newProbability);
|
||||||
|
} else {
|
||||||
|
return newProbability;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -23,14 +23,18 @@
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
class BigramDictContent;
|
class BigramDictContent;
|
||||||
|
class DictionaryHeaderStructurePolicy;
|
||||||
class TerminalPositionLookupTable;
|
class TerminalPositionLookupTable;
|
||||||
|
|
||||||
class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
public:
|
public:
|
||||||
Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
|
Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
|
||||||
const TerminalPositionLookupTable *const terminalPositionLookupTable)
|
const TerminalPositionLookupTable *const terminalPositionLookupTable,
|
||||||
|
const DictionaryHeaderStructurePolicy *const headerPolicy,
|
||||||
|
const bool needsToDecayWhenUpdating)
|
||||||
: mBigramDictContent(bigramDictContent),
|
: mBigramDictContent(bigramDictContent),
|
||||||
mTerminalPositionLookupTable(terminalPositionLookupTable) {}
|
mTerminalPositionLookupTable(terminalPositionLookupTable),
|
||||||
|
mHeaderPolicy(headerPolicy), mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {}
|
||||||
|
|
||||||
void getNextBigram(int *const outBigramPos, int *const outProbability,
|
void getNextBigram(int *const outBigramPos, int *const outProbability,
|
||||||
bool *const outHasNext, int *const bigramEntryPos) const;
|
bool *const outHasNext, int *const bigramEntryPos) const;
|
||||||
|
@ -54,8 +58,12 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
|
|
||||||
int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
|
int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
|
||||||
|
|
||||||
|
int getUpdatedProbability(const int originalProbability, const int newProbability) const;
|
||||||
|
|
||||||
BigramDictContent *const mBigramDictContent;
|
BigramDictContent *const mBigramDictContent;
|
||||||
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
|
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
|
||||||
|
const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
|
||||||
|
const bool mNeedsToDecayWhenUpdating;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */
|
#endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */
|
||||||
|
|
|
@ -103,6 +103,7 @@ bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns whether GC for the bigram list was succeeded or not.
|
||||||
bool BigramDictContent::runGCBigramList(const int bigramListPos,
|
bool BigramDictContent::runGCBigramList(const int bigramListPos,
|
||||||
const BigramDictContent *const sourceBigramDictContent, const int toPos,
|
const BigramDictContent *const sourceBigramDictContent, const int toPos,
|
||||||
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||||
|
@ -121,9 +122,8 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
|
||||||
TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
|
TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
|
||||||
terminalIdMap->find(targetTerminalId);
|
terminalIdMap->find(targetTerminalId);
|
||||||
if (it == terminalIdMap->end()) {
|
if (it == terminalIdMap->end()) {
|
||||||
AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd",
|
// Target word has been removed.
|
||||||
targetTerminalId, terminalIdMap->size());
|
continue;
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
if (!writeBigramEntryAndAdvancePosition(probability, hasNext, it->second,
|
if (!writeBigramEntryAndAdvancePosition(probability, hasNext, it->second,
|
||||||
&writingPos)) {
|
&writingPos)) {
|
||||||
|
|
|
@ -38,6 +38,13 @@ class BigramDictContent : public SparseTableDictContent {
|
||||||
: SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
: SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
||||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
|
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
|
||||||
|
|
||||||
|
void getBigramEntry(int *const outProbability, bool *const outHasNext,
|
||||||
|
int *const outTargetTerminalId, const int bigramEntryPos) const {
|
||||||
|
int readingPos = bigramEntryPos;
|
||||||
|
getBigramEntryAndAdvancePosition(outProbability, outHasNext, outTargetTerminalId,
|
||||||
|
&readingPos);
|
||||||
|
}
|
||||||
|
|
||||||
void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext,
|
void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext,
|
||||||
int *const outTargetTerminalId, int *const bigramEntryPos) const;
|
int *const outTargetTerminalId, int *const bigramEntryPos) const;
|
||||||
|
|
||||||
|
@ -50,6 +57,13 @@ class BigramDictContent : public SparseTableDictContent {
|
||||||
return addressLookupTable->get(terminalId);
|
return addressLookupTable->get(terminalId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool writeBigramEntry(const int probability, const int hasNext, const int targetTerminalId,
|
||||||
|
const int entryWritingPos) {
|
||||||
|
int writingPos = entryWritingPos;
|
||||||
|
return writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId,
|
||||||
|
&writingPos);
|
||||||
|
}
|
||||||
|
|
||||||
bool writeBigramEntryAndAdvancePosition(const int probability, const int hasNext,
|
bool writeBigramEntryAndAdvancePosition(const int probability, const int hasNext,
|
||||||
const int targetTerminalId, int *const entryWritingPos);
|
const int targetTerminalId, int *const entryWritingPos);
|
||||||
|
|
||||||
|
|
|
@ -43,7 +43,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
false /* usesAdditionalBuffer*/), FormatUtils::VERSION_4),
|
false /* usesAdditionalBuffer*/), FormatUtils::VERSION_4),
|
||||||
mDictBuffer(mBuffers.get()->getWritableTrieBuffer()),
|
mDictBuffer(mBuffers.get()->getWritableTrieBuffer()),
|
||||||
mBigramPolicy(mBuffers.get()->getUpdatableBigramDictContent(),
|
mBigramPolicy(mBuffers.get()->getUpdatableBigramDictContent(),
|
||||||
mBuffers.get()->getTerminalPositionLookupTable()),
|
mBuffers.get()->getTerminalPositionLookupTable(), &mHeaderPolicy,
|
||||||
|
mHeaderPolicy.isDecayingDict()),
|
||||||
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
|
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
|
||||||
mBuffers.get()->getTerminalPositionLookupTable()),
|
mBuffers.get()->getTerminalPositionLookupTable()),
|
||||||
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
|
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
|
||||||
|
|
|
@ -83,7 +83,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
|
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
|
||||||
mBuffers->getProbabilityDictContent());
|
mBuffers->getProbabilityDictContent());
|
||||||
Ver4BigramListPolicy bigramPolicy(mBuffers->getUpdatableBigramDictContent(),
|
Ver4BigramListPolicy bigramPolicy(mBuffers->getUpdatableBigramDictContent(),
|
||||||
mBuffers->getTerminalPositionLookupTable());
|
mBuffers->getTerminalPositionLookupTable(), headerPolicy, needsToDecay);
|
||||||
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getShortcutDictContent(),
|
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getShortcutDictContent(),
|
||||||
mBuffers->getTerminalPositionLookupTable());
|
mBuffers->getTerminalPositionLookupTable());
|
||||||
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
|
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
|
||||||
|
@ -134,7 +134,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
|
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
|
||||||
buffersToWrite->getProbabilityDictContent());
|
buffersToWrite->getProbabilityDictContent());
|
||||||
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getUpdatableBigramDictContent(),
|
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getUpdatableBigramDictContent(),
|
||||||
buffersToWrite->getTerminalPositionLookupTable());
|
buffersToWrite->getTerminalPositionLookupTable(), headerPolicy,
|
||||||
|
false /* needsToDecay */);
|
||||||
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getShortcutDictContent(),
|
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getShortcutDictContent(),
|
||||||
buffersToWrite->getTerminalPositionLookupTable());
|
buffersToWrite->getTerminalPositionLookupTable());
|
||||||
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
|
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
|
||||||
|
|
|
@ -72,26 +72,63 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private File createEmptyDictionaryAndGetFile(final String filename) throws IOException {
|
private File createEmptyDictionaryAndGetFile(final String dictId,
|
||||||
final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION,
|
final int formatVersion) throws IOException {
|
||||||
|
if (formatVersion == 3) {
|
||||||
|
return createEmptyVer3DictionaryAndGetFile(dictId);
|
||||||
|
} else if (formatVersion == 4) {
|
||||||
|
return createEmptyVer4DictionaryAndGetFile(dictId);
|
||||||
|
} else {
|
||||||
|
throw new IOException("Dictionary format version " + formatVersion
|
||||||
|
+ " is not supported.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private File createEmptyVer4DictionaryAndGetFile(final String dictId) throws IOException {
|
||||||
|
final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
|
||||||
getContext().getCacheDir());
|
getContext().getCacheDir());
|
||||||
|
file.delete();
|
||||||
|
file.mkdir();
|
||||||
Map<String, String> attributeMap = new HashMap<String, String>();
|
Map<String, String> attributeMap = new HashMap<String, String>();
|
||||||
attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
|
attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
|
||||||
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
||||||
attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE,
|
attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE,
|
||||||
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
||||||
if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
|
if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
|
||||||
3 /* dictVersion */, attributeMap)) {
|
4 /* dictVersion */, attributeMap)) {
|
||||||
|
return new File(file, FormatSpec.TRIE_FILE_EXTENSION);
|
||||||
|
} else {
|
||||||
|
throw new IOException("Empty dictionary " + file.getAbsolutePath() + " "
|
||||||
|
+ FormatSpec.TRIE_FILE_EXTENSION + " cannot be created.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private File createEmptyVer3DictionaryAndGetFile(final String dictId) throws IOException {
|
||||||
|
final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
|
||||||
|
getContext().getCacheDir());
|
||||||
|
file.delete();
|
||||||
|
Map<String, String> attributeMap = new HashMap<String, String>();
|
||||||
|
attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
|
||||||
|
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
||||||
|
attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE,
|
||||||
|
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
||||||
|
if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), 3 /* dictVersion */,
|
||||||
|
attributeMap)) {
|
||||||
return file;
|
return file;
|
||||||
} else {
|
} else {
|
||||||
throw new IOException("Empty dictionary cannot be created.");
|
throw new IOException(
|
||||||
|
"Empty dictionary " + file.getAbsolutePath() + " cannot be created.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testAddValidAndInvalidWords() {
|
public void testAddValidAndInvalidWords() {
|
||||||
|
testAddValidAndInvalidWords(3 /* formatVersion */);
|
||||||
|
testAddValidAndInvalidWords(4 /* formatVersion */);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testAddValidAndInvalidWords(final int formatVersion) {
|
||||||
File dictFile = null;
|
File dictFile = null;
|
||||||
try {
|
try {
|
||||||
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
fail("IOException while writing an initial dictionary : " + e);
|
fail("IOException while writing an initial dictionary : " + e);
|
||||||
}
|
}
|
||||||
|
@ -111,7 +148,6 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
|
binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
|
||||||
assertTrue(binaryDictionary.isValidWord("b"));
|
assertTrue(binaryDictionary.isValidWord("b"));
|
||||||
|
|
||||||
final int unigramProbability = binaryDictionary.getFrequency("a");
|
|
||||||
binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY);
|
binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY);
|
||||||
assertFalse(binaryDictionary.isValidBigram("a", "b"));
|
assertFalse(binaryDictionary.isValidBigram("a", "b"));
|
||||||
binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY);
|
binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY);
|
||||||
|
@ -136,9 +172,14 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testDecayingProbability() {
|
public void testDecayingProbability() {
|
||||||
|
testDecayingProbability(3 /* formatVersion */);
|
||||||
|
testDecayingProbability(4 /* formatVersion */);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testDecayingProbability(final int formatVersion) {
|
||||||
File dictFile = null;
|
File dictFile = null;
|
||||||
try {
|
try {
|
||||||
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
fail("IOException while writing an initial dictionary : " + e);
|
fail("IOException while writing an initial dictionary : " + e);
|
||||||
}
|
}
|
||||||
|
@ -190,6 +231,11 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testAddManyUnigramsToDecayingDict() {
|
public void testAddManyUnigramsToDecayingDict() {
|
||||||
|
testAddManyUnigramsToDecayingDict(3 /* formatVersion */);
|
||||||
|
testAddManyUnigramsToDecayingDict(4 /* formatVersion */);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testAddManyUnigramsToDecayingDict(final int formatVersion) {
|
||||||
final int unigramCount = 30000;
|
final int unigramCount = 30000;
|
||||||
final int unigramTypedCount = 100000;
|
final int unigramTypedCount = 100000;
|
||||||
final int codePointSetSize = 50;
|
final int codePointSetSize = 50;
|
||||||
|
@ -198,7 +244,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
|
|
||||||
File dictFile = null;
|
File dictFile = null;
|
||||||
try {
|
try {
|
||||||
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
fail("IOException while writing an initial dictionary : " + e);
|
fail("IOException while writing an initial dictionary : " + e);
|
||||||
}
|
}
|
||||||
|
@ -242,6 +288,11 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testAddManyBigramsToDecayingDict() {
|
public void testAddManyBigramsToDecayingDict() {
|
||||||
|
testAddManyBigramsToDecayingDict(3 /* formatVersion */);
|
||||||
|
testAddManyBigramsToDecayingDict(4 /* formatVersion */);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testAddManyBigramsToDecayingDict(final int formatVersion) {
|
||||||
final int unigramCount = 5000;
|
final int unigramCount = 5000;
|
||||||
final int bigramCount = 30000;
|
final int bigramCount = 30000;
|
||||||
final int bigramTypedCount = 100000;
|
final int bigramTypedCount = 100000;
|
||||||
|
@ -251,7 +302,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
|
|
||||||
File dictFile = null;
|
File dictFile = null;
|
||||||
try {
|
try {
|
||||||
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
fail("IOException while writing an initial dictionary : " + e);
|
fail("IOException while writing an initial dictionary : " + e);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue