Refactoring: Move prev word information into PrevWordsInfo.

Bug: 14119293
Bug: 14425059

Change-Id: I5a5f81c6b189e3ad1da093343a3121463f87c21c
main
Keisuke Kuroyanagi 2014-05-15 18:39:23 +09:00
parent d6fdd58cc0
commit b87fffb8be
9 changed files with 102 additions and 37 deletions

View File

@ -28,6 +28,7 @@
#include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/result/suggestion_results.h" #include "suggest/core/result/suggestion_results.h"
#include "suggest/core/session/prev_words_info.h"
#include "suggest/core/suggest_options.h" #include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h" #include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
#include "utils/char_utils.h" #include "utils/char_utils.h"
@ -247,15 +248,15 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz,
float languageWeight; float languageWeight;
env->GetFloatArrayRegion(inOutLanguageWeight, 0, 1 /* len */, &languageWeight); env->GetFloatArrayRegion(inOutLanguageWeight, 0, 1 /* len */, &languageWeight);
SuggestionResults suggestionResults(MAX_RESULTS); SuggestionResults suggestionResults(MAX_RESULTS);
const PrevWordsInfo prevWordsInfo(prevWordCodePoints, prevWordCodePointsLength,
false /* isStartOfSentence */);
if (givenSuggestOptions.isGesture() || inputSize > 0) { if (givenSuggestOptions.isGesture() || inputSize > 0) {
// TODO: Use SuggestionResults to return suggestions. // TODO: Use SuggestionResults to return suggestions.
dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates, dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
times, pointerIds, inputCodePoints, inputSize, prevWordCodePoints, times, pointerIds, inputCodePoints, inputSize, &prevWordsInfo,
prevWordCodePointsLength, &givenSuggestOptions, languageWeight, &givenSuggestOptions, languageWeight, &suggestionResults);
&suggestionResults);
} else { } else {
dictionary->getPredictions(prevWordCodePoints, prevWordCodePointsLength, dictionary->getPredictions(&prevWordsInfo, &suggestionResults);
&suggestionResults);
} }
suggestionResults.outputSuggestions(env, outSuggestionCount, outCodePointsArray, suggestionResults.outputSuggestions(env, outSuggestionCount, outCodePointsArray,
outScoresArray, outSpaceIndicesArray, outTypesArray, outScoresArray, outSpaceIndicesArray, outTypesArray,
@ -282,8 +283,8 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c
int word1CodePoints[word1Length]; int word1CodePoints[word1Length];
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints); env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
return dictionary->getBigramProbability(word0CodePoints, word0Length, word1CodePoints, const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, false /* isStartOfSentence */);
word1Length); return dictionary->getBigramProbability(&prevWordsInfo, word1CodePoints, word1Length);
} }
// Method to iterate all words in the dictionary for makedict. // Method to iterate all words in the dictionary for makedict.

View File

@ -22,6 +22,7 @@
#include "jni.h" #include "jni.h"
#include "jni_common.h" #include "jni_common.h"
#include "suggest/core/session/dic_traverse_session.h" #include "suggest/core/session/dic_traverse_session.h"
#include "suggest/core/session/prev_words_info.h"
namespace latinime { namespace latinime {
class Dictionary; class Dictionary;
@ -39,12 +40,14 @@ static void latinime_initDicTraverseSession(JNIEnv *env, jclass clazz, jlong tra
} }
Dictionary *dict = reinterpret_cast<Dictionary *>(dictionary); Dictionary *dict = reinterpret_cast<Dictionary *>(dictionary);
if (!previousWord) { if (!previousWord) {
ts->init(dict, 0 /* prevWord */, 0 /* prevWordLength*/, 0 /* suggestOptions */); PrevWordsInfo prevWordsInfo;
ts->init(dict, &prevWordsInfo, 0 /* suggestOptions */);
return; return;
} }
int prevWord[previousWordLength]; int prevWord[previousWordLength];
env->GetIntArrayRegion(previousWord, 0, previousWordLength, prevWord); env->GetIntArrayRegion(previousWord, 0, previousWordLength, prevWord);
ts->init(dict, prevWord, previousWordLength, 0 /* suggestOptions */); PrevWordsInfo prevWordsInfo(prevWord, previousWordLength, false /* isStartOfSentence */);
ts->init(dict, &prevWordsInfo, 0 /* suggestOptions */);
} }
static void latinime_releaseDicTraverseSession(JNIEnv *env, jclass clazz, jlong traverseSession) { static void latinime_releaseDicTraverseSession(JNIEnv *env, jclass clazz, jlong traverseSession) {

View File

@ -26,6 +26,7 @@
#include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/core/result/suggestion_results.h" #include "suggest/core/result/suggestion_results.h"
#include "suggest/core/session/prev_words_info.h"
#include "utils/char_utils.h" #include "utils/char_utils.h"
namespace latinime { namespace latinime {
@ -42,19 +43,18 @@ BigramDictionary::~BigramDictionary() {
} }
/* Parameters : /* Parameters :
* prevWord: the word before, the one for which we need to look up bigrams. * prevWordsInfo: Information of previous words to get the predictions.
* prevWordLength: its length.
* outSuggestionResults: SuggestionResults to put the predictions. * outSuggestionResults: SuggestionResults to put the predictions.
*/ */
void BigramDictionary::getPredictions(const int *prevWord, const int prevWordLength, void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
SuggestionResults *const outSuggestionResults) const { SuggestionResults *const outSuggestionResults) const {
int pos = getBigramListPositionForWord(prevWord, prevWordLength, int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
false /* forceLowerCaseSearch */); prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) { if (NOT_A_DICT_POS == pos) {
// If no bigrams for this exact word, search again in lower case. // If no bigrams for this exact word, search again in lower case.
pos = getBigramListPositionForWord(prevWord, prevWordLength, pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
true /* forceLowerCaseSearch */); prevWordsInfo->getPrevWordCodePointCount(), true /* forceLowerCaseSearch */);
} }
// If still no bigrams, we really don't have them! // If still no bigrams, we really don't have them!
if (NOT_A_DICT_POS == pos) return; if (NOT_A_DICT_POS == pos) return;
@ -96,9 +96,10 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos); return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
} }
int BigramDictionary::getBigramProbability(const int *word0, int length0, const int *word1, int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
int length1) const { const int *word1, int length1) const {
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY; if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1, int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,

View File

@ -22,15 +22,17 @@
namespace latinime { namespace latinime {
class DictionaryStructureWithBufferPolicy; class DictionaryStructureWithBufferPolicy;
class PrevWordsInfo;
class SuggestionResults; class SuggestionResults;
class BigramDictionary { class BigramDictionary {
public: public:
BigramDictionary(const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy); BigramDictionary(const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy);
void getPredictions(const int *word, int length, void getPredictions(const PrevWordsInfo *const prevWordsInfo,
SuggestionResults *const outSuggestionResults) const; SuggestionResults *const outSuggestionResults) const;
int getBigramProbability(const int *word1, int length1, const int *word2, int length2) const; int getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
const int *word1, int length1) const;
~BigramDictionary(); ~BigramDictionary();
private: private:

View File

@ -44,11 +44,11 @@ Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::Structu
void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
int inputSize, int *prevWordCodePoints, int prevWordLength, int inputSize, const PrevWordsInfo *const prevWordsInfo,
const SuggestOptions *const suggestOptions, const float languageWeight, const SuggestOptions *const suggestOptions, const float languageWeight,
SuggestionResults *const outSuggestionResults) const { SuggestionResults *const outSuggestionResults) const {
TimeKeeper::setCurrentTime(); TimeKeeper::setCurrentTime();
traverseSession->init(this, prevWordCodePoints, prevWordLength, suggestOptions); traverseSession->init(this, prevWordsInfo, suggestOptions);
const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest; const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
ycoordinates, times, pointerIds, inputCodePoints, inputSize, ycoordinates, times, pointerIds, inputCodePoints, inputSize,
@ -58,11 +58,10 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
} }
} }
void Dictionary::getPredictions(const int *word, int length, void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
SuggestionResults *const outSuggestionResults) const { SuggestionResults *const outSuggestionResults) const {
TimeKeeper::setCurrentTime(); TimeKeeper::setCurrentTime();
if (length <= 0) return; mBigramDictionary.getPredictions(prevWordsInfo, outSuggestionResults);
mBigramDictionary.getPredictions(word, length, outSuggestionResults);
} }
int Dictionary::getProbability(const int *word, int length) const { int Dictionary::getProbability(const int *word, int length) const {
@ -75,10 +74,10 @@ int Dictionary::getProbability(const int *word, int length) const {
return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos); return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos);
} }
int Dictionary::getBigramProbability(const int *word0, int length0, const int *word1, int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word1,
int length1) const { int length1) const {
TimeKeeper::setCurrentTime(); TimeKeeper::setCurrentTime();
return mBigramDictionary.getBigramProbability(word0, length0, word1, length1); return mBigramDictionary.getBigramProbability(prevWordsInfo, word1, length1);
} }
void Dictionary::addUnigramWord(const int *const word, const int length, void Dictionary::addUnigramWord(const int *const word, const int length,

View File

@ -31,6 +31,7 @@ namespace latinime {
class DictionaryStructureWithBufferPolicy; class DictionaryStructureWithBufferPolicy;
class DicTraverseSession; class DicTraverseSession;
class PrevWordsInfo;
class ProximityInfo; class ProximityInfo;
class SuggestionResults; class SuggestionResults;
class SuggestOptions; class SuggestOptions;
@ -62,16 +63,17 @@ class Dictionary {
void getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, void getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
int inputSize, int *prevWordCodePoints, int prevWordLength, int inputSize, const PrevWordsInfo *const prevWordsInfo,
const SuggestOptions *const suggestOptions, const float languageWeight, const SuggestOptions *const suggestOptions, const float languageWeight,
SuggestionResults *const outSuggestionResults) const; SuggestionResults *const outSuggestionResults) const;
void getPredictions(const int *word, int length, void getPredictions(const PrevWordsInfo *const prevWordsInfo,
SuggestionResults *const outSuggestionResults) const; SuggestionResults *const outSuggestionResults) const;
int getProbability(const int *word, int length) const; int getProbability(const int *word, int length) const;
int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const; int getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
const int *word1, int length1) const;
void addUnigramWord(const int *const codePoints, const int codePointCount, void addUnigramWord(const int *const codePoints, const int codePointCount,
const UnigramProperty *const unigramProperty); const UnigramProperty *const unigramProperty);

View File

@ -20,6 +20,7 @@
#include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/core/session/prev_words_info.h"
namespace latinime { namespace latinime {
@ -28,24 +29,26 @@ namespace latinime {
const int DicTraverseSession::DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION = const int DicTraverseSession::DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_SUGGESTION =
256 * 1024; 256 * 1024;
void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord, void DicTraverseSession::init(const Dictionary *const dictionary,
int prevWordLength, const SuggestOptions *const suggestOptions) { const PrevWordsInfo *const prevWordsInfo, const SuggestOptions *const suggestOptions) {
mDictionary = dictionary; mDictionary = dictionary;
mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy() mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy()
->getMultiWordCostMultiplier(); ->getMultiWordCostMultiplier();
mSuggestOptions = suggestOptions; mSuggestOptions = suggestOptions;
if (!prevWord) { if (!prevWordsInfo->getPrevWordCodePoints()) {
mPrevWordPtNodePos = NOT_A_DICT_POS; mPrevWordPtNodePos = NOT_A_DICT_POS;
return; return;
} }
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
prevWord, prevWordLength, false /* forceLowerCaseSearch */); prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
false /* forceLowerCaseSearch */);
if (mPrevWordPtNodePos == NOT_A_DICT_POS) { if (mPrevWordPtNodePos == NOT_A_DICT_POS) {
// Check bigrams for lower-cased previous word if original was not found. Useful for // Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]". // auto-capitalized words like "The [current_word]".
mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
prevWord, prevWordLength, true /* forceLowerCaseSearch */); prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
true /* forceLowerCaseSearch */);
} }
} }

View File

@ -29,6 +29,7 @@ namespace latinime {
class Dictionary; class Dictionary;
class DictionaryStructureWithBufferPolicy; class DictionaryStructureWithBufferPolicy;
class PrevWordsInfo;
class ProximityInfo; class ProximityInfo;
class SuggestOptions; class SuggestOptions;
@ -60,7 +61,7 @@ class DicTraverseSession {
// Non virtual inline destructor -- never inherit this class // Non virtual inline destructor -- never inherit this class
AK_FORCE_INLINE ~DicTraverseSession() {} AK_FORCE_INLINE ~DicTraverseSession() {}
void init(const Dictionary *dictionary, const int *prevWord, int prevWordLength, void init(const Dictionary *dictionary, const PrevWordsInfo *const prevWordsInfo,
const SuggestOptions *const suggestOptions); const SuggestOptions *const suggestOptions);
// TODO: Remove and merge into init // TODO: Remove and merge into init
void setupForGetSuggestions(const ProximityInfo *pInfo, const int *inputCodePoints, void setupForGetSuggestions(const ProximityInfo *pInfo, const int *inputCodePoints,

View File

@ -0,0 +1,53 @@
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_PREV_WORDS_INFO_H
#define LATINIME_PREV_WORDS_INFO_H
#include "defines.h"
namespace latinime {
// TODO: Support n-gram.
// TODO: Support beginning of sentence.
// This class does not take ownership of any code point buffers.
class PrevWordsInfo {
public:
// No prev word information.
PrevWordsInfo()
: mPrevWordCodePoints(nullptr), mPrevWordCodePointCount(0) {}
PrevWordsInfo(const int *const prevWordCodePoints, const int prevWordCodePointCount,
const bool isBeginningOfSentence)
: mPrevWordCodePoints(prevWordCodePoints),
mPrevWordCodePointCount(prevWordCodePointCount) {}
const int *getPrevWordCodePoints() const {
return mPrevWordCodePoints;
}
int getPrevWordCodePointCount() const {
return mPrevWordCodePointCount;
}
private:
DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo);
const int *const mPrevWordCodePoints;
const int mPrevWordCodePointCount;
};
} // namespace latinime
#endif // LATINIME_PREV_WORDS_INFO_H