diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 8172e70b6..bf38dffa5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -20,6 +20,7 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" +#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" @@ -303,4 +304,63 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod return siblingPos; } +const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoints, + const int codePointCount) const { + const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount, + false /* forceLowerCaseSearch */); + if (ptNodePos == NOT_A_DICT_POS) { + AKLOGE("getWordProperty was called for invalid word."); + return WordProperty(); + } + const PtNodeParams ptNodeParams = mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); + std::vector codePointVector(ptNodeParams.getCodePoints(), + ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount()); + // Fetch bigram information. + std::vector bigrams; + const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); + int bigramWord1CodePoints[MAX_WORD_LENGTH]; + BinaryDictionaryBigramsIterator bigramsIt(getBigramsStructurePolicy(), bigramListPos); + while (bigramsIt.hasNext()) { + // Fetch the next bigram information and forward the iterator. + bigramsIt.next(); + // Skip the entry if the entry has been deleted. This never happens for ver2 dicts. + if (bigramsIt.getBigramPos() != NOT_A_DICT_POS) { + int word1Probability = NOT_A_PROBABILITY; + int word1CodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( + bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramWord1CodePoints, + &word1Probability); + std::vector word1(bigramWord1CodePoints, + bigramWord1CodePoints + word1CodePointCount); + bigrams.push_back(WordProperty::BigramProperty(&word1, bigramsIt.getProbability(), + NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */)); + } + } + // Fetch shortcut information. + std::vector shortcuts; + int shortcutPos = getShortcutPositionOfPtNode(ptNodePos); + if (shortcutPos != NOT_A_DICT_POS) { + int shortcutTargetCodePoints[MAX_WORD_LENGTH]; + ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mDictRoot, &shortcutPos); + bool hasNext = true; + while (hasNext) { + const ShortcutListReadingUtils::ShortcutFlags shortcutFlags = + ShortcutListReadingUtils::getFlagsAndForwardPointer(mDictRoot, &shortcutPos); + hasNext = ShortcutListReadingUtils::hasNext(shortcutFlags); + const int shortcutTargetLength = ShortcutListReadingUtils::readShortcutTarget( + mDictRoot, MAX_WORD_LENGTH, shortcutTargetCodePoints, &shortcutPos); + std::vector shortcutTarget(shortcutTargetCodePoints, + shortcutTargetCodePoints + shortcutTargetLength); + const int shortcutProbability = + ShortcutListReadingUtils::getProbabilityFromFlags(shortcutFlags); + shortcuts.push_back( + WordProperty::ShortcutProperty(&shortcutTarget, shortcutProbability)); + } + } + return WordProperty(&codePointVector, ptNodeParams.isNotAWord(), + ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(), + ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(), + NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, + &bigrams, &shortcuts); +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 1ce7f85d4..da4be87ce 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -128,10 +128,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { } const WordProperty getWordProperty(const int *const codePoints, - const int codePointCount) const { - // getWordProperty is not supported. - return WordProperty(); - } + const int codePointCount) const; int getNextWordAndNextToken(const int token, int *const outCodePoints) { // getNextWordAndNextToken is not supported. diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index 141730de9..28388b4a0 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -39,6 +39,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Map.Entry; import java.util.Random; import java.util.Set; @@ -596,4 +597,39 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { Log.d(TAG, result); } } + + public void testVer2DictGetWordProperty() { + final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS; + final ArrayList words = sWords; + final HashMap> shortcuts = sShortcuts; + final String dictName = "testGetWordProperty"; + final String dictVersion = Long.toString(System.currentTimeMillis()); + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), + BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); + addUnigrams(words.size(), dict, words, shortcuts); + addBigrams(dict, words, sEmptyBigrams); + final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, + getContext().getCacheDir()); + file.delete(); + timeWritingDictToFile(file, dict, formatOptions); + final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(), + 0 /* offset */, file.length(), true /* useFullEditDistance */, + Locale.ENGLISH, dictName, false /* isUpdatable */); + for (final String word : words) { + final WordProperty wordProperty = binaryDictionary.getWordProperty(word); + assertEquals(word, wordProperty.mWord); + assertEquals(UNIGRAM_FREQ, wordProperty.getProbability()); + if (shortcuts.containsKey(word)) { + assertEquals(shortcuts.get(word).size(), wordProperty.mShortcutTargets.size()); + final List shortcutList = shortcuts.get(word); + assertTrue(wordProperty.mHasShortcuts); + for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { + assertTrue(shortcutList.contains(shortcutTarget.mWord)); + assertEquals(UNIGRAM_FREQ, shortcutTarget.getProbability()); + shortcutList.remove(shortcutTarget.mWord); + } + assertTrue(shortcutList.isEmpty()); + } + } + } }