am 8681bef0: Remove BigramDictionary form native code.

* commit '8681bef03c1ca864d3de0ae27adb5cbfb63f0fef': Remove BigramDictionary form native code.
2014-06-26 07:22:27 +00:00 · 2014-06-26 07:22:27 +00:00 · 24e06a3287
commit 24e06a3287
parent 95b5b1d1d6 8681bef03c
6 changed files with 43 additions and 165 deletions
--- a/native/jni/NativeFileList.mk
+++ b/native/jni/NativeFileList.mk
@ -26,7 +26,6 @@ LATIN_IME_CORE_SRC_FILES := \
        dic_node_utils.cpp \
        dic_nodes_cache.cpp) \
    $(addprefix suggest/core/dictionary/, \
        bigram_dictionary.cpp \
        dictionary.cpp \
        dictionary_utils.cpp \
        digraph_utils.cpp \
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp
@ -1,112 +0,0 @@
 /*
 * Copyright (C) 2010, The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #define LOG_TAG "LatinIME: bigram_dictionary.cpp"
 #include "bigram_dictionary.h"
 #include <algorithm>
 #include <cstring>
 #include "defines.h"
 #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
 #include "suggest/core/dictionary/dictionary.h"
 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
 #include "suggest/core/result/suggestion_results.h"
 #include "suggest/core/session/prev_words_info.h"
 #include "utils/char_utils.h"
 namespace latinime {
 BigramDictionary::BigramDictionary(
        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy)
        : mDictionaryStructurePolicy(dictionaryStructurePolicy) {
    if (DEBUG_DICT) {
        AKLOGI("BigramDictionary - constructor");
    }
 }
 BigramDictionary::~BigramDictionary() {
 }
 /* Parameters :
 * prevWordsInfo: Information of previous words to get the predictions.
 * outSuggestionResults: SuggestionResults to put the predictions.
 */
 void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
        SuggestionResults *const outSuggestionResults) const {
    int unigramProbability = 0;
    int bigramCodePoints[MAX_WORD_LENGTH];
    BinaryDictionaryBigramsIterator bigramsIt =
            prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy);
    while (bigramsIt.hasNext()) {
        bigramsIt.next();
        if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
            continue;
        }
        if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
                && bigramsIt.getProbability() == NOT_A_PROBABILITY) {
            continue;
        }
        const int codePointCount = mDictionaryStructurePolicy->
                getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
                        MAX_WORD_LENGTH, bigramCodePoints, &unigramProbability);
        if (codePointCount <= 0) {
            continue;
        }
        // Due to space constraints, the probability for bigrams is approximate - the lower the
        // unigram probability, the worse the precision. The theoritical maximum error in
        // resulting probability is 8 - although in the practice it's never bigger than 3 or 4
        // in very bad cases. This means that sometimes, we'll see some bigrams interverted
        // here, but it can't get too bad.
        const int probability = mDictionaryStructurePolicy->getProbability(
                unigramProbability, bigramsIt.getProbability());
        outSuggestionResults->addPrediction(bigramCodePoints, codePointCount, probability);
    }
 }
 // Returns a pointer to the start of the bigram list.
 // If the word is not found or has no bigrams, this function returns NOT_A_DICT_POS.
 int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
        const bool forceLowerCaseSearch) const {
    if (0 >= prevWordLength) return NOT_A_DICT_POS;
    int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength,
            forceLowerCaseSearch);
    if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
    return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
 }
 int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
        const int *word1, int length1) const {
    int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
            false /* forceLowerCaseSearch */);
    if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
    BinaryDictionaryBigramsIterator bigramsIt =
            prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy);
    while (bigramsIt.hasNext()) {
        bigramsIt.next();
        if (bigramsIt.getBigramPos() == nextWordPos
                && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
            return mDictionaryStructurePolicy->getProbability(
                    mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos),
                    bigramsIt.getProbability());
        }
    }
    return NOT_A_PROBABILITY;
 }
 // TODO: Move functions related to bigram to here
 } // namespace latinime
--- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h
@ -1,47 +0,0 @@
 /*
 * Copyright (C) 2010 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef LATINIME_BIGRAM_DICTIONARY_H
 #define LATINIME_BIGRAM_DICTIONARY_H
 #include "defines.h"
 namespace latinime {
 class DictionaryStructureWithBufferPolicy;
 class PrevWordsInfo;
 class SuggestionResults;
 class BigramDictionary {
 public:
    BigramDictionary(const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy);
    void getPredictions(const PrevWordsInfo *const prevWordsInfo,
            SuggestionResults *const outSuggestionResults) const;
    int getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
            const int *word1, int length1) const;
    ~BigramDictionary();
 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary);
    int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
            const bool forceLowerCaseSearch) const;
    const DictionaryStructureWithBufferPolicy *const mDictionaryStructurePolicy;
 };
 } // namespace latinime
 #endif // LATINIME_BIGRAM_DICTIONARY_H
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@ -23,6 +23,7 @@
 #include "suggest/core/policy/dictionary_header_structure_policy.h"
 #include "suggest/core/result/suggestion_results.h"
 #include "suggest/core/session/dic_traverse_session.h"
 #include "suggest/core/session/prev_words_info.h"
 #include "suggest/core/suggest.h"
 #include "suggest/core/suggest_options.h"
 #include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
@ -37,7 +38,6 @@ const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
 Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
        dictionaryStructureWithBufferPolicy)
        : mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)),
          mBigramDictionary(mDictionaryStructureWithBufferPolicy.get()),
          mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
          mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
    logDictionaryInfo(env);
@ -62,7 +62,29 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
 void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
        SuggestionResults *const outSuggestionResults) const {
    TimeKeeper::setCurrentTime();
-    mBigramDictionary.getPredictions(prevWordsInfo, outSuggestionResults);
+    int unigramProbability = 0;
    int bigramCodePoints[MAX_WORD_LENGTH];
    BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
            mDictionaryStructureWithBufferPolicy.get());
    while (bigramsIt.hasNext()) {
        bigramsIt.next();
        if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
            continue;
        }
        if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
                && bigramsIt.getProbability() == NOT_A_PROBABILITY) {
            continue;
        }
        const int codePointCount = mDictionaryStructureWithBufferPolicy->
                getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
                        MAX_WORD_LENGTH, bigramCodePoints, &unigramProbability);
        if (codePointCount <= 0) {
            continue;
        }
        const int probability = mDictionaryStructureWithBufferPolicy->getProbability(
                unigramProbability, bigramsIt.getProbability());
        outSuggestionResults->addPrediction(bigramCodePoints, codePointCount, probability);
    }
 }
 int Dictionary::getProbability(const int *word, int length) const {
@ -84,7 +106,21 @@ int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) con
 int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
        int length) const {
    TimeKeeper::setCurrentTime();
-    return mBigramDictionary.getBigramProbability(prevWordsInfo, word, length);
+    int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(word,
            length, false /* forceLowerCaseSearch */);
    if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
    BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
            mDictionaryStructureWithBufferPolicy.get());
    while (bigramsIt.hasNext()) {
        bigramsIt.next();
        if (bigramsIt.getBigramPos() == nextWordPos
                && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
            return mDictionaryStructureWithBufferPolicy->getProbability(
                    mDictionaryStructureWithBufferPolicy->getUnigramProbabilityOfPtNode(
                            nextWordPos), bigramsIt.getProbability());
        }
    }
    return NOT_A_PROBABILITY;
 }
 bool Dictionary::addUnigramEntry(const int *const word, const int length,
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@ -21,7 +21,6 @@
 #include "defines.h"
 #include "jni.h"
 #include "suggest/core/dictionary/bigram_dictionary.h"
 #include "suggest/core/dictionary/property/word_property.h"
 #include "suggest/core/policy/dictionary_header_structure_policy.h"
 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
@ -119,7 +118,6 @@ class Dictionary {
    const DictionaryStructureWithBufferPolicy::StructurePolicyPtr
            mDictionaryStructureWithBufferPolicy;
    const BigramDictionary mBigramDictionary;
    const SuggestInterfacePtr mGestureSuggest;
    const SuggestInterfacePtr mTypingSuggest;
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@ -268,6 +268,10 @@ int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
 int PatriciaTriePolicy::getProbability(const int unigramProbability,
        const int bigramProbability) const {
    // Due to space constraints, the probability for bigrams is approximate - the lower the unigram
    // probability, the worse the precision. The theoritical maximum error in resulting probability
    // is 8 - although in the practice it's never bigger than 3 or 4 in very bad cases. This means
    // that sometimes, we'll see some bigrams interverted here, but it can't get too bad.
    if (unigramProbability == NOT_A_PROBABILITY) {
        return NOT_A_PROBABILITY;
    } else if (bigramProbability == NOT_A_PROBABILITY) {