Merge "Implement PatriciaTriePolicy::getWordProperty()."
This commit is contained in:
commit
8fa7a09f1e
3 changed files with 97 additions and 4 deletions
|
@ -20,6 +20,7 @@
|
|||
#include "defines.h"
|
||||
#include "suggest/core/dicnode/dic_node.h"
|
||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
||||
|
@ -303,4 +304,63 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
|
|||
return siblingPos;
|
||||
}
|
||||
|
||||
const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoints,
|
||||
const int codePointCount) const {
|
||||
const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
AKLOGE("getWordProperty was called for invalid word.");
|
||||
return WordProperty();
|
||||
}
|
||||
const PtNodeParams ptNodeParams = mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
|
||||
std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
|
||||
ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
|
||||
// Fetch bigram information.
|
||||
std::vector<WordProperty::BigramProperty> bigrams;
|
||||
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
|
||||
int bigramWord1CodePoints[MAX_WORD_LENGTH];
|
||||
BinaryDictionaryBigramsIterator bigramsIt(getBigramsStructurePolicy(), bigramListPos);
|
||||
while (bigramsIt.hasNext()) {
|
||||
// Fetch the next bigram information and forward the iterator.
|
||||
bigramsIt.next();
|
||||
// Skip the entry if the entry has been deleted. This never happens for ver2 dicts.
|
||||
if (bigramsIt.getBigramPos() != NOT_A_DICT_POS) {
|
||||
int word1Probability = NOT_A_PROBABILITY;
|
||||
int word1CodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramWord1CodePoints,
|
||||
&word1Probability);
|
||||
std::vector<int> word1(bigramWord1CodePoints,
|
||||
bigramWord1CodePoints + word1CodePointCount);
|
||||
bigrams.push_back(WordProperty::BigramProperty(&word1, bigramsIt.getProbability(),
|
||||
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */));
|
||||
}
|
||||
}
|
||||
// Fetch shortcut information.
|
||||
std::vector<WordProperty::ShortcutProperty> shortcuts;
|
||||
int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
|
||||
if (shortcutPos != NOT_A_DICT_POS) {
|
||||
int shortcutTargetCodePoints[MAX_WORD_LENGTH];
|
||||
ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mDictRoot, &shortcutPos);
|
||||
bool hasNext = true;
|
||||
while (hasNext) {
|
||||
const ShortcutListReadingUtils::ShortcutFlags shortcutFlags =
|
||||
ShortcutListReadingUtils::getFlagsAndForwardPointer(mDictRoot, &shortcutPos);
|
||||
hasNext = ShortcutListReadingUtils::hasNext(shortcutFlags);
|
||||
const int shortcutTargetLength = ShortcutListReadingUtils::readShortcutTarget(
|
||||
mDictRoot, MAX_WORD_LENGTH, shortcutTargetCodePoints, &shortcutPos);
|
||||
std::vector<int> shortcutTarget(shortcutTargetCodePoints,
|
||||
shortcutTargetCodePoints + shortcutTargetLength);
|
||||
const int shortcutProbability =
|
||||
ShortcutListReadingUtils::getProbabilityFromFlags(shortcutFlags);
|
||||
shortcuts.push_back(
|
||||
WordProperty::ShortcutProperty(&shortcutTarget, shortcutProbability));
|
||||
}
|
||||
}
|
||||
return WordProperty(&codePointVector, ptNodeParams.isNotAWord(),
|
||||
ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(),
|
||||
ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(),
|
||||
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */,
|
||||
&bigrams, &shortcuts);
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -128,10 +128,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
}
|
||||
|
||||
const WordProperty getWordProperty(const int *const codePoints,
|
||||
const int codePointCount) const {
|
||||
// getWordProperty is not supported.
|
||||
return WordProperty();
|
||||
}
|
||||
const int codePointCount) const;
|
||||
|
||||
int getNextWordAndNextToken(const int token, int *const outCodePoints) {
|
||||
// getNextWordAndNextToken is not supported.
|
||||
|
|
|
@ -39,6 +39,7 @@ import java.util.Arrays;
|
|||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
@ -596,4 +597,39 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
Log.d(TAG, result);
|
||||
}
|
||||
}
|
||||
|
||||
public void testVer2DictGetWordProperty() {
|
||||
final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS;
|
||||
final ArrayList<String> words = sWords;
|
||||
final HashMap<String, List<String>> shortcuts = sShortcuts;
|
||||
final String dictName = "testGetWordProperty";
|
||||
final String dictVersion = Long.toString(System.currentTimeMillis());
|
||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||
BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
|
||||
addUnigrams(words.size(), dict, words, shortcuts);
|
||||
addBigrams(dict, words, sEmptyBigrams);
|
||||
final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
|
||||
getContext().getCacheDir());
|
||||
file.delete();
|
||||
timeWritingDictToFile(file, dict, formatOptions);
|
||||
final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(),
|
||||
0 /* offset */, file.length(), true /* useFullEditDistance */,
|
||||
Locale.ENGLISH, dictName, false /* isUpdatable */);
|
||||
for (final String word : words) {
|
||||
final WordProperty wordProperty = binaryDictionary.getWordProperty(word);
|
||||
assertEquals(word, wordProperty.mWord);
|
||||
assertEquals(UNIGRAM_FREQ, wordProperty.getProbability());
|
||||
if (shortcuts.containsKey(word)) {
|
||||
assertEquals(shortcuts.get(word).size(), wordProperty.mShortcutTargets.size());
|
||||
final List<String> shortcutList = shortcuts.get(word);
|
||||
assertTrue(wordProperty.mHasShortcuts);
|
||||
for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
|
||||
assertTrue(shortcutList.contains(shortcutTarget.mWord));
|
||||
assertEquals(UNIGRAM_FREQ, shortcutTarget.getProbability());
|
||||
shortcutList.remove(shortcutTarget.mWord);
|
||||
}
|
||||
assertTrue(shortcutList.isEmpty());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue