diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk index 2dd75c4f5..fe2106140 100644 --- a/native/jni/NativeFileList.mk +++ b/native/jni/NativeFileList.mk @@ -26,7 +26,6 @@ LATIN_IME_CORE_SRC_FILES := \ dic_node_utils.cpp \ dic_nodes_cache.cpp) \ $(addprefix suggest/core/dictionary/, \ - bigram_dictionary.cpp \ dictionary.cpp \ dictionary_utils.cpp \ digraph_utils.cpp \ diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp deleted file mode 100644 index 56339fe48..000000000 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (C) 2010, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#define LOG_TAG "LatinIME: bigram_dictionary.cpp" - -#include "bigram_dictionary.h" - -#include -#include - -#include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" -#include "suggest/core/dictionary/dictionary.h" -#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" -#include "suggest/core/result/suggestion_results.h" -#include "suggest/core/session/prev_words_info.h" -#include "utils/char_utils.h" - -namespace latinime { - -BigramDictionary::BigramDictionary( - const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy) - : mDictionaryStructurePolicy(dictionaryStructurePolicy) { - if (DEBUG_DICT) { - AKLOGI("BigramDictionary - constructor"); - } -} - -BigramDictionary::~BigramDictionary() { -} - -/* Parameters : - * prevWordsInfo: Information of previous words to get the predictions. - * outSuggestionResults: SuggestionResults to put the predictions. - */ -void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo, - SuggestionResults *const outSuggestionResults) const { - int unigramProbability = 0; - int bigramCodePoints[MAX_WORD_LENGTH]; - BinaryDictionaryBigramsIterator bigramsIt = - prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy); - while (bigramsIt.hasNext()) { - bigramsIt.next(); - if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { - continue; - } - if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */) - && bigramsIt.getProbability() == NOT_A_PROBABILITY) { - continue; - } - const int codePointCount = mDictionaryStructurePolicy-> - getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(), - MAX_WORD_LENGTH, bigramCodePoints, &unigramProbability); - if (codePointCount <= 0) { - continue; - } - // Due to space constraints, the probability for bigrams is approximate - the lower the - // unigram probability, the worse the precision. The theoritical maximum error in - // resulting probability is 8 - although in the practice it's never bigger than 3 or 4 - // in very bad cases. This means that sometimes, we'll see some bigrams interverted - // here, but it can't get too bad. - const int probability = mDictionaryStructurePolicy->getProbability( - unigramProbability, bigramsIt.getProbability()); - outSuggestionResults->addPrediction(bigramCodePoints, codePointCount, probability); - } -} - -// Returns a pointer to the start of the bigram list. -// If the word is not found or has no bigrams, this function returns NOT_A_DICT_POS. -int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, - const bool forceLowerCaseSearch) const { - if (0 >= prevWordLength) return NOT_A_DICT_POS; - int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength, - forceLowerCaseSearch); - if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS; - return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos); -} - -int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, - const int *word1, int length1) const { - int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1, - false /* forceLowerCaseSearch */); - if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY; - BinaryDictionaryBigramsIterator bigramsIt = - prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy); - while (bigramsIt.hasNext()) { - bigramsIt.next(); - if (bigramsIt.getBigramPos() == nextWordPos - && bigramsIt.getProbability() != NOT_A_PROBABILITY) { - return mDictionaryStructurePolicy->getProbability( - mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos), - bigramsIt.getProbability()); - } - } - return NOT_A_PROBABILITY; -} - -// TODO: Move functions related to bigram to here -} // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h deleted file mode 100644 index bd3aed1bd..000000000 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_BIGRAM_DICTIONARY_H -#define LATINIME_BIGRAM_DICTIONARY_H - -#include "defines.h" - -namespace latinime { - -class DictionaryStructureWithBufferPolicy; -class PrevWordsInfo; -class SuggestionResults; - -class BigramDictionary { - public: - BigramDictionary(const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy); - - void getPredictions(const PrevWordsInfo *const prevWordsInfo, - SuggestionResults *const outSuggestionResults) const; - int getBigramProbability(const PrevWordsInfo *const prevWordsInfo, - const int *word1, int length1) const; - ~BigramDictionary(); - - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary); - - int getBigramListPositionForWord(const int *prevWord, const int prevWordLength, - const bool forceLowerCaseSearch) const; - - const DictionaryStructureWithBufferPolicy *const mDictionaryStructurePolicy; -}; -} // namespace latinime -#endif // LATINIME_BIGRAM_DICTIONARY_H diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index e553bc0fc..fb25f757c 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -23,6 +23,7 @@ #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/result/suggestion_results.h" #include "suggest/core/session/dic_traverse_session.h" +#include "suggest/core/session/prev_words_info.h" #include "suggest/core/suggest.h" #include "suggest/core/suggest_options.h" #include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h" @@ -37,7 +38,6 @@ const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32; Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy) : mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)), - mBigramDictionary(mDictionaryStructureWithBufferPolicy.get()), mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())), mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) { logDictionaryInfo(env); @@ -62,7 +62,29 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const outSuggestionResults) const { TimeKeeper::setCurrentTime(); - mBigramDictionary.getPredictions(prevWordsInfo, outSuggestionResults); + int unigramProbability = 0; + int bigramCodePoints[MAX_WORD_LENGTH]; + BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction( + mDictionaryStructureWithBufferPolicy.get()); + while (bigramsIt.hasNext()) { + bigramsIt.next(); + if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { + continue; + } + if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */) + && bigramsIt.getProbability() == NOT_A_PROBABILITY) { + continue; + } + const int codePointCount = mDictionaryStructureWithBufferPolicy-> + getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(), + MAX_WORD_LENGTH, bigramCodePoints, &unigramProbability); + if (codePointCount <= 0) { + continue; + } + const int probability = mDictionaryStructureWithBufferPolicy->getProbability( + unigramProbability, bigramsIt.getProbability()); + outSuggestionResults->addPrediction(bigramCodePoints, codePointCount, probability); + } } int Dictionary::getProbability(const int *word, int length) const { @@ -84,7 +106,21 @@ int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) con int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word, int length) const { TimeKeeper::setCurrentTime(); - return mBigramDictionary.getBigramProbability(prevWordsInfo, word, length); + int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(word, + length, false /* forceLowerCaseSearch */); + if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY; + BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction( + mDictionaryStructureWithBufferPolicy.get()); + while (bigramsIt.hasNext()) { + bigramsIt.next(); + if (bigramsIt.getBigramPos() == nextWordPos + && bigramsIt.getProbability() != NOT_A_PROBABILITY) { + return mDictionaryStructureWithBufferPolicy->getProbability( + mDictionaryStructureWithBufferPolicy->getUnigramProbabilityOfPtNode( + nextWordPos), bigramsIt.getProbability()); + } + } + return NOT_A_PROBABILITY; } bool Dictionary::addUnigramEntry(const int *const word, const int length, diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 83447de44..3b41088fe 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -21,7 +21,6 @@ #include "defines.h" #include "jni.h" -#include "suggest/core/dictionary/bigram_dictionary.h" #include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" @@ -119,7 +118,6 @@ class Dictionary { const DictionaryStructureWithBufferPolicy::StructurePolicyPtr mDictionaryStructureWithBufferPolicy; - const BigramDictionary mBigramDictionary; const SuggestInterfacePtr mGestureSuggest; const SuggestInterfacePtr mTypingSuggest; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 5c62b9caf..002593c49 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -268,6 +268,10 @@ int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, int PatriciaTriePolicy::getProbability(const int unigramProbability, const int bigramProbability) const { + // Due to space constraints, the probability for bigrams is approximate - the lower the unigram + // probability, the worse the precision. The theoritical maximum error in resulting probability + // is 8 - although in the practice it's never bigger than 3 or 4 in very bad cases. This means + // that sometimes, we'll see some bigrams interverted here, but it can't get too bad. if (unigramProbability == NOT_A_PROBABILITY) { return NOT_A_PROBABILITY; } else if (bigramProbability == NOT_A_PROBABILITY) {