am ef0d83ca
: Merge "Refactoring: Move prev word information into PrevWordsInfo."
* commit 'ef0d83ca07c6dd0b3bcb82e2a1d54727ac88bd68': Refactoring: Move prev word information into PrevWordsInfo.
This commit is contained in:
commit
1cec26ce75
9 changed files with 102 additions and 37 deletions
|
@ -28,6 +28,7 @@
|
|||
#include "suggest/core/dictionary/property/unigram_property.h"
|
||||
#include "suggest/core/dictionary/property/word_property.h"
|
||||
#include "suggest/core/result/suggestion_results.h"
|
||||
#include "suggest/core/session/prev_words_info.h"
|
||||
#include "suggest/core/suggest_options.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
|
||||
#include "utils/char_utils.h"
|
||||
|
@ -247,15 +248,15 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz,
|
|||
float languageWeight;
|
||||
env->GetFloatArrayRegion(inOutLanguageWeight, 0, 1 /* len */, &languageWeight);
|
||||
SuggestionResults suggestionResults(MAX_RESULTS);
|
||||
const PrevWordsInfo prevWordsInfo(prevWordCodePoints, prevWordCodePointsLength,
|
||||
false /* isStartOfSentence */);
|
||||
if (givenSuggestOptions.isGesture() || inputSize > 0) {
|
||||
// TODO: Use SuggestionResults to return suggestions.
|
||||
dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
|
||||
times, pointerIds, inputCodePoints, inputSize, prevWordCodePoints,
|
||||
prevWordCodePointsLength, &givenSuggestOptions, languageWeight,
|
||||
&suggestionResults);
|
||||
times, pointerIds, inputCodePoints, inputSize, &prevWordsInfo,
|
||||
&givenSuggestOptions, languageWeight, &suggestionResults);
|
||||
} else {
|
||||
dictionary->getPredictions(prevWordCodePoints, prevWordCodePointsLength,
|
||||
&suggestionResults);
|
||||
dictionary->getPredictions(&prevWordsInfo, &suggestionResults);
|
||||
}
|
||||
suggestionResults.outputSuggestions(env, outSuggestionCount, outCodePointsArray,
|
||||
outScoresArray, outSpaceIndicesArray, outTypesArray,
|
||||
|
@ -282,8 +283,8 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c
|
|||
int word1CodePoints[word1Length];
|
||||
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
|
||||
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
|
||||
return dictionary->getBigramProbability(word0CodePoints, word0Length, word1CodePoints,
|
||||
word1Length);
|
||||
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, false /* isStartOfSentence */);
|
||||
return dictionary->getBigramProbability(&prevWordsInfo, word1CodePoints, word1Length);
|
||||
}
|
||||
|
||||
// Method to iterate all words in the dictionary for makedict.
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "jni.h"
|
||||
#include "jni_common.h"
|
||||
#include "suggest/core/session/dic_traverse_session.h"
|
||||
#include "suggest/core/session/prev_words_info.h"
|
||||
|
||||
namespace latinime {
|
||||
class Dictionary;
|
||||
|
@ -39,12 +40,14 @@ static void latinime_initDicTraverseSession(JNIEnv *env, jclass clazz, jlong tra
|
|||
}
|
||||
Dictionary *dict = reinterpret_cast<Dictionary *>(dictionary);
|
||||
if (!previousWord) {
|
||||
ts->init(dict, 0 /* prevWord */, 0 /* prevWordLength*/, 0 /* suggestOptions */);
|
||||
PrevWordsInfo prevWordsInfo;
|
||||
ts->init(dict, &prevWordsInfo, 0 /* suggestOptions */);
|
||||
return;
|
||||
}
|
||||
int prevWord[previousWordLength];
|
||||
env->GetIntArrayRegion(previousWord, 0, previousWordLength, prevWord);
|
||||
ts->init(dict, prevWord, previousWordLength, 0 /* suggestOptions */);
|
||||
PrevWordsInfo prevWordsInfo(prevWord, previousWordLength, false /* isStartOfSentence */);
|
||||
ts->init(dict, &prevWordsInfo, 0 /* suggestOptions */);
|
||||
}
|
||||
|
||||
static void latinime_releaseDicTraverseSession(JNIEnv *env, jclass clazz, jlong traverseSession) {
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "suggest/core/dictionary/dictionary.h"
|
||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||
#include "suggest/core/result/suggestion_results.h"
|
||||
#include "suggest/core/session/prev_words_info.h"
|
||||
#include "utils/char_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -42,19 +43,18 @@ BigramDictionary::~BigramDictionary() {
|
|||
}
|
||||
|
||||
/* Parameters :
|
||||
* prevWord: the word before, the one for which we need to look up bigrams.
|
||||
* prevWordLength: its length.
|
||||
* prevWordsInfo: Information of previous words to get the predictions.
|
||||
* outSuggestionResults: SuggestionResults to put the predictions.
|
||||
*/
|
||||
void BigramDictionary::getPredictions(const int *prevWord, const int prevWordLength,
|
||||
void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
||||
SuggestionResults *const outSuggestionResults) const {
|
||||
int pos = getBigramListPositionForWord(prevWord, prevWordLength,
|
||||
false /* forceLowerCaseSearch */);
|
||||
int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
|
||||
prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */);
|
||||
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
||||
if (NOT_A_DICT_POS == pos) {
|
||||
// If no bigrams for this exact word, search again in lower case.
|
||||
pos = getBigramListPositionForWord(prevWord, prevWordLength,
|
||||
true /* forceLowerCaseSearch */);
|
||||
pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
|
||||
prevWordsInfo->getPrevWordCodePointCount(), true /* forceLowerCaseSearch */);
|
||||
}
|
||||
// If still no bigrams, we really don't have them!
|
||||
if (NOT_A_DICT_POS == pos) return;
|
||||
|
@ -96,9 +96,10 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
|
|||
return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
|
||||
}
|
||||
|
||||
int BigramDictionary::getBigramProbability(const int *word0, int length0, const int *word1,
|
||||
int length1) const {
|
||||
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
|
||||
int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
|
||||
const int *word1, int length1) const {
|
||||
int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
|
||||
prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */);
|
||||
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
||||
if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
|
||||
int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
|
||||
|
|
|
@ -22,15 +22,17 @@
|
|||
namespace latinime {
|
||||
|
||||
class DictionaryStructureWithBufferPolicy;
|
||||
class PrevWordsInfo;
|
||||
class SuggestionResults;
|
||||
|
||||
class BigramDictionary {
|
||||
public:
|
||||
BigramDictionary(const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy);
|
||||
|
||||
void getPredictions(const int *word, int length,
|
||||
void getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
||||
SuggestionResults *const outSuggestionResults) const;
|
||||
int getBigramProbability(const int *word1, int length1, const int *word2, int length2) const;
|
||||
int getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
|
||||
const int *word1, int length1) const;
|
||||
~BigramDictionary();
|
||||
|
||||
private:
|
||||
|
|
|
@ -44,11 +44,11 @@ Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::Structu
|
|||
|
||||
void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
|
||||
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
|
||||
int inputSize, int *prevWordCodePoints, int prevWordLength,
|
||||
int inputSize, const PrevWordsInfo *const prevWordsInfo,
|
||||
const SuggestOptions *const suggestOptions, const float languageWeight,
|
||||
SuggestionResults *const outSuggestionResults) const {
|
||||
TimeKeeper::setCurrentTime();
|
||||
traverseSession->init(this, prevWordCodePoints, prevWordLength, suggestOptions);
|
||||
traverseSession->init(this, prevWordsInfo, suggestOptions);
|
||||
const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
|
||||
suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
|
||||
ycoordinates, times, pointerIds, inputCodePoints, inputSize,
|
||||
|
@ -58,11 +58,10 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
|
|||
}
|
||||
}
|
||||
|
||||
void Dictionary::getPredictions(const int *word, int length,
|
||||
void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
||||
SuggestionResults *const outSuggestionResults) const {
|
||||
TimeKeeper::setCurrentTime();
|
||||
if (length <= 0) return;
|
||||
mBigramDictionary.getPredictions(word, length, outSuggestionResults);
|
||||
mBigramDictionary.getPredictions(prevWordsInfo, outSuggestionResults);
|
||||
}
|
||||
|
||||
int Dictionary::getProbability(const int *word, int length) const {
|
||||
|
@ -75,10 +74,10 @@ int Dictionary::getProbability(const int *word, int length) const {
|
|||
return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos);
|
||||
}
|
||||
|
||||
int Dictionary::getBigramProbability(const int *word0, int length0, const int *word1,
|
||||
int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word1,
|
||||
int length1) const {
|
||||
TimeKeeper::setCurrentTime();
|
||||
return mBigramDictionary.getBigramProbability(word0, length0, word1, length1);
|
||||
return mBigramDictionary.getBigramProbability(prevWordsInfo, word1, length1);
|
||||
}
|
||||
|
||||
void Dictionary::addUnigramWord(const int *const word, const int length,
|
||||
|
|
|
@ -31,6 +31,7 @@ namespace latinime {
|
|||
|
||||
class DictionaryStructureWithBufferPolicy;
|
||||
class DicTraverseSession;
|
||||
class PrevWordsInfo;
|
||||
class ProximityInfo;
|
||||
class SuggestionResults;
|
||||
class SuggestOptions;
|
||||
|
@ -62,16 +63,17 @@ class Dictionary {
|
|||
|
||||
void getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
|
||||
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
|
||||
int inputSize, int *prevWordCodePoints, int prevWordLength,
|
||||
int inputSize, const PrevWordsInfo *const prevWordsInfo,
|
||||
const SuggestOptions *const suggestOptions, const float languageWeight,
|
||||
SuggestionResults *const outSuggestionResults) const;
|
||||
|
||||
void getPredictions(const int *word, int length,
|
||||
void getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
||||
SuggestionResults *const outSuggestionResults) const;
|
||||
|
||||
int getProbability(const int *word, int length) const;
|
||||
|
||||
int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
|
||||
int getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
|
||||
const int *word1, int length1) const;
|
||||
|
||||
void addUnigramWord(const int *const codePoints, const int codePointCount,
|
||||
const UnigramProperty *const unigramProperty);
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "suggest/core/dictionary/dictionary.h"
|
||||
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||
#include "suggest/core/session/prev_words_info.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -28,24 +29,26 @@ namespace latinime {
|
|||
const int DicTraverseSession::DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION =
|
||||
256 * 1024;
|
||||
|
||||
void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord,
|
||||
int prevWordLength, const SuggestOptions *const suggestOptions) {
|
||||
void DicTraverseSession::init(const Dictionary *const dictionary,
|
||||
const PrevWordsInfo *const prevWordsInfo, const SuggestOptions *const suggestOptions) {
|
||||
mDictionary = dictionary;
|
||||
mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy()
|
||||
->getMultiWordCostMultiplier();
|
||||
mSuggestOptions = suggestOptions;
|
||||
if (!prevWord) {
|
||||
if (!prevWordsInfo->getPrevWordCodePoints()) {
|
||||
mPrevWordPtNodePos = NOT_A_DICT_POS;
|
||||
return;
|
||||
}
|
||||
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
|
||||
mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
|
||||
prevWord, prevWordLength, false /* forceLowerCaseSearch */);
|
||||
prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (mPrevWordPtNodePos == NOT_A_DICT_POS) {
|
||||
// Check bigrams for lower-cased previous word if original was not found. Useful for
|
||||
// auto-capitalized words like "The [current_word]".
|
||||
mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
|
||||
prevWord, prevWordLength, true /* forceLowerCaseSearch */);
|
||||
prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
|
||||
true /* forceLowerCaseSearch */);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@ namespace latinime {
|
|||
|
||||
class Dictionary;
|
||||
class DictionaryStructureWithBufferPolicy;
|
||||
class PrevWordsInfo;
|
||||
class ProximityInfo;
|
||||
class SuggestOptions;
|
||||
|
||||
|
@ -60,7 +61,7 @@ class DicTraverseSession {
|
|||
// Non virtual inline destructor -- never inherit this class
|
||||
AK_FORCE_INLINE ~DicTraverseSession() {}
|
||||
|
||||
void init(const Dictionary *dictionary, const int *prevWord, int prevWordLength,
|
||||
void init(const Dictionary *dictionary, const PrevWordsInfo *const prevWordsInfo,
|
||||
const SuggestOptions *const suggestOptions);
|
||||
// TODO: Remove and merge into init
|
||||
void setupForGetSuggestions(const ProximityInfo *pInfo, const int *inputCodePoints,
|
||||
|
|
53
native/jni/src/suggest/core/session/prev_words_info.h
Normal file
53
native/jni/src/suggest/core/session/prev_words_info.h
Normal file
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Copyright (C) 2014 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_PREV_WORDS_INFO_H
|
||||
#define LATINIME_PREV_WORDS_INFO_H
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
// TODO: Support n-gram.
|
||||
// TODO: Support beginning of sentence.
|
||||
// This class does not take ownership of any code point buffers.
|
||||
class PrevWordsInfo {
|
||||
public:
|
||||
// No prev word information.
|
||||
PrevWordsInfo()
|
||||
: mPrevWordCodePoints(nullptr), mPrevWordCodePointCount(0) {}
|
||||
|
||||
PrevWordsInfo(const int *const prevWordCodePoints, const int prevWordCodePointCount,
|
||||
const bool isBeginningOfSentence)
|
||||
: mPrevWordCodePoints(prevWordCodePoints),
|
||||
mPrevWordCodePointCount(prevWordCodePointCount) {}
|
||||
|
||||
const int *getPrevWordCodePoints() const {
|
||||
return mPrevWordCodePoints;
|
||||
}
|
||||
|
||||
int getPrevWordCodePointCount() const {
|
||||
return mPrevWordCodePointCount;
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo);
|
||||
|
||||
const int *const mPrevWordCodePoints;
|
||||
const int mPrevWordCodePointCount;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_PREV_WORDS_INFO_H
|
Loading…
Reference in a new issue