am 0fc93fe4
: Implement PatriciaTriePolicy::getNextWordAndNextToken().
* commit '0fc93fe4455f24809f6c9baf0d3b936519779cfb': Implement PatriciaTriePolicy::getNextWordAndNextToken().
This commit is contained in:
commit
610653bb2c
7 changed files with 213 additions and 40 deletions
|
@ -17,12 +17,12 @@
|
||||||
package com.android.inputmethod.latin.makedict;
|
package com.android.inputmethod.latin.makedict;
|
||||||
|
|
||||||
import com.android.inputmethod.annotations.UsedForTesting;
|
import com.android.inputmethod.annotations.UsedForTesting;
|
||||||
|
import com.android.inputmethod.latin.BinaryDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
|
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||||
import android.util.Log;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
|
@ -33,6 +33,7 @@ import java.util.Arrays;
|
||||||
/**
|
/**
|
||||||
* An implementation of DictDecoder for version 2 binary dictionary.
|
* An implementation of DictDecoder for version 2 binary dictionary.
|
||||||
*/
|
*/
|
||||||
|
// TODO: Separate logics that are used only for testing.
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public class Ver2DictDecoder extends AbstractDictDecoder {
|
public class Ver2DictDecoder extends AbstractDictDecoder {
|
||||||
private static final String TAG = Ver2DictDecoder.class.getSimpleName();
|
private static final String TAG = Ver2DictDecoder.class.getSimpleName();
|
||||||
|
@ -116,12 +117,19 @@ public class Ver2DictDecoder extends AbstractDictDecoder {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected final File mDictionaryBinaryFile;
|
protected final File mDictionaryBinaryFile;
|
||||||
|
// TODO: Remove mBufferFactory and mDictBuffer from this class members because they are now
|
||||||
|
// used only for testing.
|
||||||
private final DictionaryBufferFactory mBufferFactory;
|
private final DictionaryBufferFactory mBufferFactory;
|
||||||
protected DictBuffer mDictBuffer;
|
protected DictBuffer mDictBuffer;
|
||||||
|
private final BinaryDictionary mBinaryDictionary;
|
||||||
|
|
||||||
/* package */ Ver2DictDecoder(final File file, final int factoryFlag) {
|
/* package */ Ver2DictDecoder(final File file, final int factoryFlag) {
|
||||||
mDictionaryBinaryFile = file;
|
mDictionaryBinaryFile = file;
|
||||||
mDictBuffer = null;
|
mDictBuffer = null;
|
||||||
|
// dictType is not being used in dicttool. Passing an empty string.
|
||||||
|
mBinaryDictionary = new BinaryDictionary(file.getAbsolutePath(),
|
||||||
|
0 /* offset */, file.length() /* length */, true /* useFullEditDistance */,
|
||||||
|
null /* locale */, "" /* dictType */, false /* isUpdatable */);
|
||||||
|
|
||||||
if ((factoryFlag & MASK_DICTBUFFER) == USE_READONLY_BYTEBUFFER) {
|
if ((factoryFlag & MASK_DICTBUFFER) == USE_READONLY_BYTEBUFFER) {
|
||||||
mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory();
|
mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory();
|
||||||
|
@ -137,6 +145,10 @@ public class Ver2DictDecoder extends AbstractDictDecoder {
|
||||||
/* package */ Ver2DictDecoder(final File file, final DictionaryBufferFactory factory) {
|
/* package */ Ver2DictDecoder(final File file, final DictionaryBufferFactory factory) {
|
||||||
mDictionaryBinaryFile = file;
|
mDictionaryBinaryFile = file;
|
||||||
mBufferFactory = factory;
|
mBufferFactory = factory;
|
||||||
|
// dictType is not being used in dicttool. Passing an empty string.
|
||||||
|
mBinaryDictionary = new BinaryDictionary(file.getAbsolutePath(),
|
||||||
|
0 /* offset */, file.length() /* length */, true /* useFullEditDistance */,
|
||||||
|
null /* locale */, "" /* dictType */, false /* isUpdatable */);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -238,24 +250,47 @@ public class Ver2DictDecoder extends AbstractDictDecoder {
|
||||||
@Override
|
@Override
|
||||||
public FusionDictionary readDictionaryBinary(final boolean deleteDictIfBroken)
|
public FusionDictionary readDictionaryBinary(final boolean deleteDictIfBroken)
|
||||||
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
||||||
if (mDictBuffer == null) {
|
final DictionaryHeader header = readHeader();
|
||||||
openDictBuffer();
|
final FusionDictionary fusionDict =
|
||||||
|
new FusionDictionary(new FusionDictionary.PtNodeArray(), header.mDictionaryOptions);
|
||||||
|
int token = 0;
|
||||||
|
final ArrayList<WordProperty> wordProperties = CollectionUtils.newArrayList();
|
||||||
|
do {
|
||||||
|
final BinaryDictionary.GetNextWordPropertyResult result =
|
||||||
|
mBinaryDictionary.getNextWordProperty(token);
|
||||||
|
final WordProperty wordProperty = result.mWordProperty;
|
||||||
|
if (wordProperty == null) {
|
||||||
|
if (deleteDictIfBroken) {
|
||||||
|
mBinaryDictionary.close();
|
||||||
|
mDictionaryBinaryFile.delete();
|
||||||
}
|
}
|
||||||
try {
|
return null;
|
||||||
return BinaryDictDecoderUtils.readDictionaryBinary(this);
|
|
||||||
} catch (IOException e) {
|
|
||||||
Log.e(TAG, "The dictionary " + mDictionaryBinaryFile.getName() + " is broken.", e);
|
|
||||||
if (deleteDictIfBroken && !mDictionaryBinaryFile.delete()) {
|
|
||||||
Log.e(TAG, "Failed to delete the broken dictionary.");
|
|
||||||
}
|
}
|
||||||
throw e;
|
wordProperties.add(wordProperty);
|
||||||
} catch (UnsupportedFormatException e) {
|
token = result.mNextToken;
|
||||||
Log.e(TAG, "The dictionary " + mDictionaryBinaryFile.getName() + " is broken.", e);
|
} while (token != 0);
|
||||||
if (deleteDictIfBroken && !mDictionaryBinaryFile.delete()) {
|
|
||||||
Log.e(TAG, "Failed to delete the broken dictionary.");
|
// Insert unigrams into the fusion dictionary.
|
||||||
|
for (final WordProperty wordProperty : wordProperties) {
|
||||||
|
if (wordProperty.mIsBlacklistEntry) {
|
||||||
|
fusionDict.addBlacklistEntry(wordProperty.mWord, wordProperty.mShortcutTargets,
|
||||||
|
wordProperty.mIsNotAWord);
|
||||||
|
} else {
|
||||||
|
fusionDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo,
|
||||||
|
wordProperty.mShortcutTargets, wordProperty.mIsNotAWord);
|
||||||
}
|
}
|
||||||
throw e;
|
|
||||||
}
|
}
|
||||||
|
// Insert bigrams into the fusion dictionary.
|
||||||
|
for (final WordProperty wordProperty : wordProperties) {
|
||||||
|
if (wordProperty.mBigrams == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
final String word0 = wordProperty.mWord;
|
||||||
|
for (final WeightedString bigram : wordProperty.mBigrams) {
|
||||||
|
fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fusionDict;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -45,6 +45,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
/* package */ Ver4DictDecoder(final File dictDirectory, final DictionaryBufferFactory factory) {
|
/* package */ Ver4DictDecoder(final File dictDirectory, final DictionaryBufferFactory factory) {
|
||||||
mDictDirectory = dictDirectory;
|
mDictDirectory = dictDirectory;
|
||||||
|
// dictType is not being used in dicttool. Passing an empty string.
|
||||||
mBinaryDictionary = new BinaryDictionary(dictDirectory.getAbsolutePath(),
|
mBinaryDictionary = new BinaryDictionary(dictDirectory.getAbsolutePath(),
|
||||||
0 /* offset */, 0 /* length */, true /* useFullEditDistance */, null /* locale */,
|
0 /* offset */, 0 /* length */, true /* useFullEditDistance */, null /* locale */,
|
||||||
"" /* dictType */, true /* isUpdatable */);
|
"" /* dictType */, true /* isUpdatable */);
|
||||||
|
@ -78,7 +79,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
token = result.mNextToken;
|
token = result.mNextToken;
|
||||||
} while (token != 0);
|
} while (token != 0);
|
||||||
|
|
||||||
// Insert unigrams to the fusion dictionary.
|
// Insert unigrams into the fusion dictionary.
|
||||||
for (final WordProperty wordProperty : wordProperties) {
|
for (final WordProperty wordProperty : wordProperties) {
|
||||||
if (wordProperty.mIsBlacklistEntry) {
|
if (wordProperty.mIsBlacklistEntry) {
|
||||||
fusionDict.addBlacklistEntry(wordProperty.mWord, wordProperty.mShortcutTargets,
|
fusionDict.addBlacklistEntry(wordProperty.mWord, wordProperty.mShortcutTargets,
|
||||||
|
@ -88,7 +89,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
wordProperty.mShortcutTargets, wordProperty.mIsNotAWord);
|
wordProperty.mShortcutTargets, wordProperty.mIsNotAWord);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Insert bigrams to the fusion dictionary.
|
// Insert bigrams into the fusion dictionary.
|
||||||
for (final WordProperty wordProperty : wordProperties) {
|
for (final WordProperty wordProperty : wordProperties) {
|
||||||
if (wordProperty.mBigrams == null) {
|
if (wordProperty.mBigrams == null) {
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -30,18 +30,19 @@ namespace latinime {
|
||||||
class PtNodeParams {
|
class PtNodeParams {
|
||||||
public:
|
public:
|
||||||
// Invalid PtNode.
|
// Invalid PtNode.
|
||||||
PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS),
|
PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mHasMovedFlag(false),
|
||||||
mCodePointCount(0), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
|
mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mCodePoints(),
|
||||||
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), mProbabilityFieldPos(NOT_A_DICT_POS),
|
mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
|
||||||
mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
|
mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
|
||||||
mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
|
mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
|
||||||
mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
|
mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
|
||||||
mSiblingPos(NOT_A_DICT_POS) {}
|
mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {}
|
||||||
|
|
||||||
PtNodeParams(const PtNodeParams& ptNodeParams)
|
PtNodeParams(const PtNodeParams& ptNodeParams)
|
||||||
: mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags),
|
: mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags),
|
||||||
mParentPos(ptNodeParams.mParentPos), mCodePointCount(ptNodeParams.mCodePointCount),
|
mHasMovedFlag(ptNodeParams.mHasMovedFlag), mParentPos(ptNodeParams.mParentPos),
|
||||||
mCodePoints(), mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos),
|
mCodePointCount(ptNodeParams.mCodePointCount), mCodePoints(),
|
||||||
|
mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos),
|
||||||
mTerminalId(ptNodeParams.mTerminalId),
|
mTerminalId(ptNodeParams.mTerminalId),
|
||||||
mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos),
|
mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos),
|
||||||
mProbability(ptNodeParams.mProbability),
|
mProbability(ptNodeParams.mProbability),
|
||||||
|
@ -58,7 +59,7 @@ class PtNodeParams {
|
||||||
const int codePointCount, const int *const codePoints, const int probability,
|
const int codePointCount, const int *const codePoints, const int probability,
|
||||||
const int childrenPos, const int shortcutPos, const int bigramPos,
|
const int childrenPos, const int shortcutPos, const int bigramPos,
|
||||||
const int siblingPos)
|
const int siblingPos)
|
||||||
: mHeadPos(headPos), mFlags(flags), mParentPos(NOT_A_DICT_POS),
|
: mHeadPos(headPos), mFlags(flags), mHasMovedFlag(false), mParentPos(NOT_A_DICT_POS),
|
||||||
mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
|
mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
|
||||||
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
|
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
|
||||||
mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
|
mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
|
||||||
|
@ -73,7 +74,7 @@ class PtNodeParams {
|
||||||
const int parentPos, const int codePointCount, const int *const codePoints,
|
const int parentPos, const int codePointCount, const int *const codePoints,
|
||||||
const int terminalIdFieldPos, const int terminalId, const int probability,
|
const int terminalIdFieldPos, const int terminalId, const int probability,
|
||||||
const int childrenPosFieldPos, const int childrenPos, const int siblingPos)
|
const int childrenPosFieldPos, const int childrenPos, const int siblingPos)
|
||||||
: mHeadPos(headPos), mFlags(flags), mParentPos(parentPos),
|
: mHeadPos(headPos), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos),
|
||||||
mCodePointCount(codePointCount), mCodePoints(),
|
mCodePointCount(codePointCount), mCodePoints(),
|
||||||
mTerminalIdFieldPos(terminalIdFieldPos), mTerminalId(terminalId),
|
mTerminalIdFieldPos(terminalIdFieldPos), mTerminalId(terminalId),
|
||||||
mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
|
mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
|
||||||
|
@ -87,8 +88,8 @@ class PtNodeParams {
|
||||||
PtNodeParams(const PtNodeParams *const ptNodeParams,
|
PtNodeParams(const PtNodeParams *const ptNodeParams,
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
|
const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
|
||||||
const int codePointCount, const int *const codePoints, const int probability)
|
const int codePointCount, const int *const codePoints, const int probability)
|
||||||
: mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mParentPos(parentPos),
|
: mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mHasMovedFlag(true),
|
||||||
mCodePointCount(codePointCount), mCodePoints(),
|
mParentPos(parentPos), mCodePointCount(codePointCount), mCodePoints(),
|
||||||
mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()),
|
mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()),
|
||||||
mTerminalId(ptNodeParams->getTerminalId()),
|
mTerminalId(ptNodeParams->getTerminalId()),
|
||||||
mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()),
|
mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()),
|
||||||
|
@ -104,7 +105,7 @@ class PtNodeParams {
|
||||||
|
|
||||||
PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
|
PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
|
||||||
const int codePointCount, const int *const codePoints, const int probability)
|
const int codePointCount, const int *const codePoints, const int probability)
|
||||||
: mHeadPos(NOT_A_DICT_POS), mFlags(flags), mParentPos(parentPos),
|
: mHeadPos(NOT_A_DICT_POS), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos),
|
||||||
mCodePointCount(codePointCount), mCodePoints(),
|
mCodePointCount(codePointCount), mCodePoints(),
|
||||||
mTerminalIdFieldPos(NOT_A_DICT_POS),
|
mTerminalIdFieldPos(NOT_A_DICT_POS),
|
||||||
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
|
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
|
||||||
|
@ -126,11 +127,11 @@ class PtNodeParams {
|
||||||
|
|
||||||
// Flags
|
// Flags
|
||||||
AK_FORCE_INLINE bool isDeleted() const {
|
AK_FORCE_INLINE bool isDeleted() const {
|
||||||
return DynamicPtReadingUtils::isDeleted(mFlags);
|
return mHasMovedFlag && DynamicPtReadingUtils::isDeleted(mFlags);
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool willBecomeNonTerminal() const {
|
AK_FORCE_INLINE bool willBecomeNonTerminal() const {
|
||||||
return DynamicPtReadingUtils::willBecomeNonTerminal(mFlags);
|
return mHasMovedFlag && DynamicPtReadingUtils::willBecomeNonTerminal(mFlags);
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool hasChildren() const {
|
AK_FORCE_INLINE bool hasChildren() const {
|
||||||
|
@ -224,6 +225,7 @@ class PtNodeParams {
|
||||||
|
|
||||||
const int mHeadPos;
|
const int mHeadPos;
|
||||||
const PatriciaTrieReadingUtils::NodeFlags mFlags;
|
const PatriciaTrieReadingUtils::NodeFlags mFlags;
|
||||||
|
const bool mHasMovedFlag;
|
||||||
const int mParentPos;
|
const int mParentPos;
|
||||||
const uint8_t mCodePointCount;
|
const uint8_t mCodePointCount;
|
||||||
int mCodePoints[MAX_WORD_LENGTH];
|
int mCodePoints[MAX_WORD_LENGTH];
|
||||||
|
|
|
@ -363,4 +363,33 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin
|
||||||
&bigrams, &shortcuts);
|
&bigrams, &shortcuts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {
|
||||||
|
if (token == 0) {
|
||||||
|
// Start iterating the dictionary.
|
||||||
|
mTerminalPtNodePositionsForIteratingWords.clear();
|
||||||
|
DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
|
||||||
|
&mTerminalPtNodePositionsForIteratingWords);
|
||||||
|
DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
|
||||||
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
|
readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
|
||||||
|
}
|
||||||
|
const int terminalPtNodePositionsVectorSize =
|
||||||
|
static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
|
||||||
|
if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
|
||||||
|
AKLOGE("Given token %d is invalid.", token);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
|
||||||
|
int unigramProbability = NOT_A_PROBABILITY;
|
||||||
|
getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, MAX_WORD_LENGTH,
|
||||||
|
outCodePoints, &unigramProbability);
|
||||||
|
const int nextToken = token + 1;
|
||||||
|
if (nextToken >= terminalPtNodePositionsVectorSize) {
|
||||||
|
// All words have been iterated.
|
||||||
|
mTerminalPtNodePositionsForIteratingWords.clear();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return nextToken;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
#define LATINIME_PATRICIA_TRIE_POLICY_H
|
#define LATINIME_PATRICIA_TRIE_POLICY_H
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
|
@ -44,7 +45,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
- mHeaderPolicy.getSize()),
|
- mHeaderPolicy.getSize()),
|
||||||
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
|
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
|
||||||
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
|
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
|
||||||
mPtNodeArrayReader(mDictRoot, mDictBufferSize) {}
|
mPtNodeArrayReader(mDictRoot, mDictBufferSize),
|
||||||
|
mTerminalPtNodePositionsForIteratingWords() {}
|
||||||
|
|
||||||
AK_FORCE_INLINE int getRootPosition() const {
|
AK_FORCE_INLINE int getRootPosition() const {
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -130,10 +132,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
const WordProperty getWordProperty(const int *const codePoints,
|
const WordProperty getWordProperty(const int *const codePoints,
|
||||||
const int codePointCount) const;
|
const int codePointCount) const;
|
||||||
|
|
||||||
int getNextWordAndNextToken(const int token, int *const outCodePoints) {
|
int getNextWordAndNextToken(const int token, int *const outCodePoints);
|
||||||
// getNextWordAndNextToken is not supported.
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
|
||||||
|
@ -146,6 +145,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
const ShortcutListPolicy mShortcutListPolicy;
|
const ShortcutListPolicy mShortcutListPolicy;
|
||||||
const Ver2ParticiaTrieNodeReader mPtNodeReader;
|
const Ver2ParticiaTrieNodeReader mPtNodeReader;
|
||||||
const Ver2PtNodeArrayReader mPtNodeArrayReader;
|
const Ver2PtNodeArrayReader mPtNodeArrayReader;
|
||||||
|
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
|
||||||
|
|
||||||
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
|
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
|
||||||
DicNodeVector *const childDicNodes) const;
|
DicNodeVector *const childDicNodes) const;
|
||||||
|
|
|
@ -19,6 +19,7 @@ package com.android.inputmethod.latin.makedict;
|
||||||
import android.test.AndroidTestCase;
|
import android.test.AndroidTestCase;
|
||||||
import android.test.suitebuilder.annotation.LargeTest;
|
import android.test.suitebuilder.annotation.LargeTest;
|
||||||
import android.util.Log;
|
import android.util.Log;
|
||||||
|
import android.util.Pair;
|
||||||
import android.util.SparseArray;
|
import android.util.SparseArray;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.BinaryDictionary;
|
import com.android.inputmethod.latin.BinaryDictionary;
|
||||||
|
@ -632,4 +633,66 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testVer2DictIteration() {
|
||||||
|
final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS;
|
||||||
|
final ArrayList<String> words = sWords;
|
||||||
|
final HashMap<String, List<String>> shortcuts = sShortcuts;
|
||||||
|
final SparseArray<List<Integer>> bigrams = sEmptyBigrams;
|
||||||
|
final String dictName = "testGetWordProperty";
|
||||||
|
final String dictVersion = Long.toString(System.currentTimeMillis());
|
||||||
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
|
BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
|
||||||
|
addUnigrams(words.size(), dict, words, shortcuts);
|
||||||
|
addBigrams(dict, words, bigrams);
|
||||||
|
final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
|
||||||
|
getContext().getCacheDir());
|
||||||
|
timeWritingDictToFile(file, dict, formatOptions);
|
||||||
|
Log.d(TAG, file.getAbsolutePath());
|
||||||
|
final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(),
|
||||||
|
0 /* offset */, file.length(), true /* useFullEditDistance */,
|
||||||
|
Locale.ENGLISH, dictName, false /* isUpdatable */);
|
||||||
|
|
||||||
|
final HashSet<String> wordSet = new HashSet<String>(words);
|
||||||
|
final HashSet<Pair<String, String>> bigramSet = new HashSet<Pair<String,String>>();
|
||||||
|
|
||||||
|
for (int i = 0; i < words.size(); i++) {
|
||||||
|
final List<Integer> bigramList = bigrams.get(i);
|
||||||
|
if (bigramList != null) {
|
||||||
|
for (final Integer word1Index : bigramList) {
|
||||||
|
final String word1 = words.get(word1Index);
|
||||||
|
bigramSet.add(new Pair<String, String>(words.get(i), word1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int token = 0;
|
||||||
|
do {
|
||||||
|
final BinaryDictionary.GetNextWordPropertyResult result =
|
||||||
|
binaryDictionary.getNextWordProperty(token);
|
||||||
|
final WordProperty wordProperty = result.mWordProperty;
|
||||||
|
final String word0 = wordProperty.mWord;
|
||||||
|
assertEquals(UNIGRAM_FREQ, wordProperty.mProbabilityInfo.mProbability);
|
||||||
|
wordSet.remove(word0);
|
||||||
|
if (shortcuts.containsKey(word0)) {
|
||||||
|
assertEquals(shortcuts.get(word0).size(), wordProperty.mShortcutTargets.size());
|
||||||
|
final List<String> shortcutList = shortcuts.get(word0);
|
||||||
|
assertNotNull(wordProperty.mShortcutTargets);
|
||||||
|
for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
|
||||||
|
assertTrue(shortcutList.contains(shortcutTarget.mWord));
|
||||||
|
assertEquals(UNIGRAM_FREQ, shortcutTarget.getProbability());
|
||||||
|
shortcutList.remove(shortcutTarget.mWord);
|
||||||
|
}
|
||||||
|
assertTrue(shortcutList.isEmpty());
|
||||||
|
}
|
||||||
|
for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
|
||||||
|
final String word1 = wordProperty.mBigrams.get(j).mWord;
|
||||||
|
final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
|
||||||
|
assertTrue(bigramSet.contains(bigram));
|
||||||
|
bigramSet.remove(bigram);
|
||||||
|
}
|
||||||
|
token = result.mNextToken;
|
||||||
|
} while (token != 0);
|
||||||
|
assertTrue(wordSet.isEmpty());
|
||||||
|
assertTrue(bigramSet.isEmpty());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
43
tools/dicttool/compat/android/util/Pair.java
Normal file
43
tools/dicttool/compat/android/util/Pair.java
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package android.util;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
public class Pair<T1, T2> {
|
||||||
|
public final T1 mFirst;
|
||||||
|
public final T2 mSecond;
|
||||||
|
|
||||||
|
public Pair(final T1 first, final T2 second) {
|
||||||
|
mFirst = first;
|
||||||
|
mSecond = second;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Arrays.hashCode(new Object[] { mFirst, mSecond });
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (o == this) return true;
|
||||||
|
if (!(o instanceof Pair)) return false;
|
||||||
|
Pair<?, ?> p = (Pair<?, ?>)o;
|
||||||
|
return ((mFirst == null && p.mFirst == null) || mFirst.equals(p.mFirst))
|
||||||
|
&& ((mSecond == null && p.mSecond == null) || mSecond.equals(p.mSecond));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue