am 8681bef0
: Remove BigramDictionary form native code.
* commit '8681bef03c1ca864d3de0ae27adb5cbfb63f0fef': Remove BigramDictionary form native code.
This commit is contained in:
commit
24e06a3287
6 changed files with 43 additions and 165 deletions
|
@ -26,7 +26,6 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
dic_node_utils.cpp \
|
dic_node_utils.cpp \
|
||||||
dic_nodes_cache.cpp) \
|
dic_nodes_cache.cpp) \
|
||||||
$(addprefix suggest/core/dictionary/, \
|
$(addprefix suggest/core/dictionary/, \
|
||||||
bigram_dictionary.cpp \
|
|
||||||
dictionary.cpp \
|
dictionary.cpp \
|
||||||
dictionary_utils.cpp \
|
dictionary_utils.cpp \
|
||||||
digraph_utils.cpp \
|
digraph_utils.cpp \
|
||||||
|
|
|
@ -1,112 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2010, The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define LOG_TAG "LatinIME: bigram_dictionary.cpp"
|
|
||||||
|
|
||||||
#include "bigram_dictionary.h"
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cstring>
|
|
||||||
|
|
||||||
#include "defines.h"
|
|
||||||
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
|
|
||||||
#include "suggest/core/dictionary/dictionary.h"
|
|
||||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
|
||||||
#include "suggest/core/result/suggestion_results.h"
|
|
||||||
#include "suggest/core/session/prev_words_info.h"
|
|
||||||
#include "utils/char_utils.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
BigramDictionary::BigramDictionary(
|
|
||||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy)
|
|
||||||
: mDictionaryStructurePolicy(dictionaryStructurePolicy) {
|
|
||||||
if (DEBUG_DICT) {
|
|
||||||
AKLOGI("BigramDictionary - constructor");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
BigramDictionary::~BigramDictionary() {
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Parameters :
|
|
||||||
* prevWordsInfo: Information of previous words to get the predictions.
|
|
||||||
* outSuggestionResults: SuggestionResults to put the predictions.
|
|
||||||
*/
|
|
||||||
void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
|
||||||
SuggestionResults *const outSuggestionResults) const {
|
|
||||||
int unigramProbability = 0;
|
|
||||||
int bigramCodePoints[MAX_WORD_LENGTH];
|
|
||||||
BinaryDictionaryBigramsIterator bigramsIt =
|
|
||||||
prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy);
|
|
||||||
while (bigramsIt.hasNext()) {
|
|
||||||
bigramsIt.next();
|
|
||||||
if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
|
|
||||||
&& bigramsIt.getProbability() == NOT_A_PROBABILITY) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const int codePointCount = mDictionaryStructurePolicy->
|
|
||||||
getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
|
|
||||||
MAX_WORD_LENGTH, bigramCodePoints, &unigramProbability);
|
|
||||||
if (codePointCount <= 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Due to space constraints, the probability for bigrams is approximate - the lower the
|
|
||||||
// unigram probability, the worse the precision. The theoritical maximum error in
|
|
||||||
// resulting probability is 8 - although in the practice it's never bigger than 3 or 4
|
|
||||||
// in very bad cases. This means that sometimes, we'll see some bigrams interverted
|
|
||||||
// here, but it can't get too bad.
|
|
||||||
const int probability = mDictionaryStructurePolicy->getProbability(
|
|
||||||
unigramProbability, bigramsIt.getProbability());
|
|
||||||
outSuggestionResults->addPrediction(bigramCodePoints, codePointCount, probability);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns a pointer to the start of the bigram list.
|
|
||||||
// If the word is not found or has no bigrams, this function returns NOT_A_DICT_POS.
|
|
||||||
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
|
|
||||||
const bool forceLowerCaseSearch) const {
|
|
||||||
if (0 >= prevWordLength) return NOT_A_DICT_POS;
|
|
||||||
int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength,
|
|
||||||
forceLowerCaseSearch);
|
|
||||||
if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
|
|
||||||
return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
|
|
||||||
const int *word1, int length1) const {
|
|
||||||
int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
|
|
||||||
false /* forceLowerCaseSearch */);
|
|
||||||
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
|
|
||||||
BinaryDictionaryBigramsIterator bigramsIt =
|
|
||||||
prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy);
|
|
||||||
while (bigramsIt.hasNext()) {
|
|
||||||
bigramsIt.next();
|
|
||||||
if (bigramsIt.getBigramPos() == nextWordPos
|
|
||||||
&& bigramsIt.getProbability() != NOT_A_PROBABILITY) {
|
|
||||||
return mDictionaryStructurePolicy->getProbability(
|
|
||||||
mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos),
|
|
||||||
bigramsIt.getProbability());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return NOT_A_PROBABILITY;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Move functions related to bigram to here
|
|
||||||
} // namespace latinime
|
|
|
@ -1,47 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2010 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef LATINIME_BIGRAM_DICTIONARY_H
|
|
||||||
#define LATINIME_BIGRAM_DICTIONARY_H
|
|
||||||
|
|
||||||
#include "defines.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
class DictionaryStructureWithBufferPolicy;
|
|
||||||
class PrevWordsInfo;
|
|
||||||
class SuggestionResults;
|
|
||||||
|
|
||||||
class BigramDictionary {
|
|
||||||
public:
|
|
||||||
BigramDictionary(const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy);
|
|
||||||
|
|
||||||
void getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
|
||||||
SuggestionResults *const outSuggestionResults) const;
|
|
||||||
int getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
|
|
||||||
const int *word1, int length1) const;
|
|
||||||
~BigramDictionary();
|
|
||||||
|
|
||||||
private:
|
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary);
|
|
||||||
|
|
||||||
int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
|
|
||||||
const bool forceLowerCaseSearch) const;
|
|
||||||
|
|
||||||
const DictionaryStructureWithBufferPolicy *const mDictionaryStructurePolicy;
|
|
||||||
};
|
|
||||||
} // namespace latinime
|
|
||||||
#endif // LATINIME_BIGRAM_DICTIONARY_H
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
||||||
#include "suggest/core/result/suggestion_results.h"
|
#include "suggest/core/result/suggestion_results.h"
|
||||||
#include "suggest/core/session/dic_traverse_session.h"
|
#include "suggest/core/session/dic_traverse_session.h"
|
||||||
|
#include "suggest/core/session/prev_words_info.h"
|
||||||
#include "suggest/core/suggest.h"
|
#include "suggest/core/suggest.h"
|
||||||
#include "suggest/core/suggest_options.h"
|
#include "suggest/core/suggest_options.h"
|
||||||
#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
|
#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
|
||||||
|
@ -37,7 +38,6 @@ const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
|
||||||
Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
|
Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
|
||||||
dictionaryStructureWithBufferPolicy)
|
dictionaryStructureWithBufferPolicy)
|
||||||
: mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)),
|
: mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)),
|
||||||
mBigramDictionary(mDictionaryStructureWithBufferPolicy.get()),
|
|
||||||
mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
|
mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
|
||||||
mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
|
mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
|
||||||
logDictionaryInfo(env);
|
logDictionaryInfo(env);
|
||||||
|
@ -62,7 +62,29 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
|
||||||
void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
||||||
SuggestionResults *const outSuggestionResults) const {
|
SuggestionResults *const outSuggestionResults) const {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
mBigramDictionary.getPredictions(prevWordsInfo, outSuggestionResults);
|
int unigramProbability = 0;
|
||||||
|
int bigramCodePoints[MAX_WORD_LENGTH];
|
||||||
|
BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
|
||||||
|
mDictionaryStructureWithBufferPolicy.get());
|
||||||
|
while (bigramsIt.hasNext()) {
|
||||||
|
bigramsIt.next();
|
||||||
|
if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
|
||||||
|
&& bigramsIt.getProbability() == NOT_A_PROBABILITY) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const int codePointCount = mDictionaryStructureWithBufferPolicy->
|
||||||
|
getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
|
||||||
|
MAX_WORD_LENGTH, bigramCodePoints, &unigramProbability);
|
||||||
|
if (codePointCount <= 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const int probability = mDictionaryStructureWithBufferPolicy->getProbability(
|
||||||
|
unigramProbability, bigramsIt.getProbability());
|
||||||
|
outSuggestionResults->addPrediction(bigramCodePoints, codePointCount, probability);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int Dictionary::getProbability(const int *word, int length) const {
|
int Dictionary::getProbability(const int *word, int length) const {
|
||||||
|
@ -84,7 +106,21 @@ int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) con
|
||||||
int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
|
int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
|
||||||
int length) const {
|
int length) const {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
return mBigramDictionary.getBigramProbability(prevWordsInfo, word, length);
|
int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(word,
|
||||||
|
length, false /* forceLowerCaseSearch */);
|
||||||
|
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
|
||||||
|
BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
|
||||||
|
mDictionaryStructureWithBufferPolicy.get());
|
||||||
|
while (bigramsIt.hasNext()) {
|
||||||
|
bigramsIt.next();
|
||||||
|
if (bigramsIt.getBigramPos() == nextWordPos
|
||||||
|
&& bigramsIt.getProbability() != NOT_A_PROBABILITY) {
|
||||||
|
return mDictionaryStructureWithBufferPolicy->getProbability(
|
||||||
|
mDictionaryStructureWithBufferPolicy->getUnigramProbabilityOfPtNode(
|
||||||
|
nextWordPos), bigramsIt.getProbability());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::addUnigramEntry(const int *const word, const int length,
|
bool Dictionary::addUnigramEntry(const int *const word, const int length,
|
||||||
|
|
|
@ -21,7 +21,6 @@
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "jni.h"
|
#include "jni.h"
|
||||||
#include "suggest/core/dictionary/bigram_dictionary.h"
|
|
||||||
#include "suggest/core/dictionary/property/word_property.h"
|
#include "suggest/core/dictionary/property/word_property.h"
|
||||||
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
||||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
|
@ -119,7 +118,6 @@ class Dictionary {
|
||||||
|
|
||||||
const DictionaryStructureWithBufferPolicy::StructurePolicyPtr
|
const DictionaryStructureWithBufferPolicy::StructurePolicyPtr
|
||||||
mDictionaryStructureWithBufferPolicy;
|
mDictionaryStructureWithBufferPolicy;
|
||||||
const BigramDictionary mBigramDictionary;
|
|
||||||
const SuggestInterfacePtr mGestureSuggest;
|
const SuggestInterfacePtr mGestureSuggest;
|
||||||
const SuggestInterfacePtr mTypingSuggest;
|
const SuggestInterfacePtr mTypingSuggest;
|
||||||
|
|
||||||
|
|
|
@ -268,6 +268,10 @@ int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||||
|
|
||||||
int PatriciaTriePolicy::getProbability(const int unigramProbability,
|
int PatriciaTriePolicy::getProbability(const int unigramProbability,
|
||||||
const int bigramProbability) const {
|
const int bigramProbability) const {
|
||||||
|
// Due to space constraints, the probability for bigrams is approximate - the lower the unigram
|
||||||
|
// probability, the worse the precision. The theoritical maximum error in resulting probability
|
||||||
|
// is 8 - although in the practice it's never bigger than 3 or 4 in very bad cases. This means
|
||||||
|
// that sometimes, we'll see some bigrams interverted here, but it can't get too bad.
|
||||||
if (unigramProbability == NOT_A_PROBABILITY) {
|
if (unigramProbability == NOT_A_PROBABILITY) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
} else if (bigramProbability == NOT_A_PROBABILITY) {
|
} else if (bigramProbability == NOT_A_PROBABILITY) {
|
||||||
|
|
Loading…
Reference in a new issue