am 8fa7a09f: Merge "Implement PatriciaTriePolicy::getWordProperty()."
* commit '8fa7a09f1e7cd16a4fa1e4138078bd3984519630': Implement PatriciaTriePolicy::getWordProperty().main
commit
0a206ef1e5
|
@ -20,6 +20,7 @@
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dicnode/dic_node.h"
|
#include "suggest/core/dicnode/dic_node.h"
|
||||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||||
|
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
|
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
||||||
|
@ -303,4 +304,63 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
|
||||||
return siblingPos;
|
return siblingPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoints,
|
||||||
|
const int codePointCount) const {
|
||||||
|
const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
|
||||||
|
false /* forceLowerCaseSearch */);
|
||||||
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
|
AKLOGE("getWordProperty was called for invalid word.");
|
||||||
|
return WordProperty();
|
||||||
|
}
|
||||||
|
const PtNodeParams ptNodeParams = mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
|
||||||
|
std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
|
||||||
|
ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
|
||||||
|
// Fetch bigram information.
|
||||||
|
std::vector<WordProperty::BigramProperty> bigrams;
|
||||||
|
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
|
||||||
|
int bigramWord1CodePoints[MAX_WORD_LENGTH];
|
||||||
|
BinaryDictionaryBigramsIterator bigramsIt(getBigramsStructurePolicy(), bigramListPos);
|
||||||
|
while (bigramsIt.hasNext()) {
|
||||||
|
// Fetch the next bigram information and forward the iterator.
|
||||||
|
bigramsIt.next();
|
||||||
|
// Skip the entry if the entry has been deleted. This never happens for ver2 dicts.
|
||||||
|
if (bigramsIt.getBigramPos() != NOT_A_DICT_POS) {
|
||||||
|
int word1Probability = NOT_A_PROBABILITY;
|
||||||
|
int word1CodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
|
bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramWord1CodePoints,
|
||||||
|
&word1Probability);
|
||||||
|
std::vector<int> word1(bigramWord1CodePoints,
|
||||||
|
bigramWord1CodePoints + word1CodePointCount);
|
||||||
|
bigrams.push_back(WordProperty::BigramProperty(&word1, bigramsIt.getProbability(),
|
||||||
|
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Fetch shortcut information.
|
||||||
|
std::vector<WordProperty::ShortcutProperty> shortcuts;
|
||||||
|
int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
|
||||||
|
if (shortcutPos != NOT_A_DICT_POS) {
|
||||||
|
int shortcutTargetCodePoints[MAX_WORD_LENGTH];
|
||||||
|
ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mDictRoot, &shortcutPos);
|
||||||
|
bool hasNext = true;
|
||||||
|
while (hasNext) {
|
||||||
|
const ShortcutListReadingUtils::ShortcutFlags shortcutFlags =
|
||||||
|
ShortcutListReadingUtils::getFlagsAndForwardPointer(mDictRoot, &shortcutPos);
|
||||||
|
hasNext = ShortcutListReadingUtils::hasNext(shortcutFlags);
|
||||||
|
const int shortcutTargetLength = ShortcutListReadingUtils::readShortcutTarget(
|
||||||
|
mDictRoot, MAX_WORD_LENGTH, shortcutTargetCodePoints, &shortcutPos);
|
||||||
|
std::vector<int> shortcutTarget(shortcutTargetCodePoints,
|
||||||
|
shortcutTargetCodePoints + shortcutTargetLength);
|
||||||
|
const int shortcutProbability =
|
||||||
|
ShortcutListReadingUtils::getProbabilityFromFlags(shortcutFlags);
|
||||||
|
shortcuts.push_back(
|
||||||
|
WordProperty::ShortcutProperty(&shortcutTarget, shortcutProbability));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return WordProperty(&codePointVector, ptNodeParams.isNotAWord(),
|
||||||
|
ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(),
|
||||||
|
ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(),
|
||||||
|
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */,
|
||||||
|
&bigrams, &shortcuts);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -128,10 +128,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
}
|
}
|
||||||
|
|
||||||
const WordProperty getWordProperty(const int *const codePoints,
|
const WordProperty getWordProperty(const int *const codePoints,
|
||||||
const int codePointCount) const {
|
const int codePointCount) const;
|
||||||
// getWordProperty is not supported.
|
|
||||||
return WordProperty();
|
|
||||||
}
|
|
||||||
|
|
||||||
int getNextWordAndNextToken(const int token, int *const outCodePoints) {
|
int getNextWordAndNextToken(const int token, int *const outCodePoints) {
|
||||||
// getNextWordAndNextToken is not supported.
|
// getNextWordAndNextToken is not supported.
|
||||||
|
|
|
@ -39,6 +39,7 @@ import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
@ -596,4 +597,39 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
Log.d(TAG, result);
|
Log.d(TAG, result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testVer2DictGetWordProperty() {
|
||||||
|
final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS;
|
||||||
|
final ArrayList<String> words = sWords;
|
||||||
|
final HashMap<String, List<String>> shortcuts = sShortcuts;
|
||||||
|
final String dictName = "testGetWordProperty";
|
||||||
|
final String dictVersion = Long.toString(System.currentTimeMillis());
|
||||||
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
|
BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
|
||||||
|
addUnigrams(words.size(), dict, words, shortcuts);
|
||||||
|
addBigrams(dict, words, sEmptyBigrams);
|
||||||
|
final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
|
||||||
|
getContext().getCacheDir());
|
||||||
|
file.delete();
|
||||||
|
timeWritingDictToFile(file, dict, formatOptions);
|
||||||
|
final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(),
|
||||||
|
0 /* offset */, file.length(), true /* useFullEditDistance */,
|
||||||
|
Locale.ENGLISH, dictName, false /* isUpdatable */);
|
||||||
|
for (final String word : words) {
|
||||||
|
final WordProperty wordProperty = binaryDictionary.getWordProperty(word);
|
||||||
|
assertEquals(word, wordProperty.mWord);
|
||||||
|
assertEquals(UNIGRAM_FREQ, wordProperty.getProbability());
|
||||||
|
if (shortcuts.containsKey(word)) {
|
||||||
|
assertEquals(shortcuts.get(word).size(), wordProperty.mShortcutTargets.size());
|
||||||
|
final List<String> shortcutList = shortcuts.get(word);
|
||||||
|
assertTrue(wordProperty.mHasShortcuts);
|
||||||
|
for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
|
||||||
|
assertTrue(shortcutList.contains(shortcutTarget.mWord));
|
||||||
|
assertEquals(UNIGRAM_FREQ, shortcutTarget.getProbability());
|
||||||
|
shortcutList.remove(shortcutTarget.mWord);
|
||||||
|
}
|
||||||
|
assertTrue(shortcutList.isEmpty());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue