[ML23] Introduce a different accuracy/performance tradeoff

Bug: 11230254
Change-Id: Ic09518c818ae7b68942b1c63160dd462e5922cb5
This commit is contained in:
Jean Chalard 2014-10-10 15:50:44 +09:00
parent 5d2d852eb0
commit 7d5e1cb265
8 changed files with 52 additions and 7 deletions

View file

@ -292,6 +292,7 @@ public final class BinaryDictionary extends Dictionary {
settingsValuesForSuggestion.mSpaceAwareGestureEnabled);
session.mNativeSuggestOptions.setAdditionalFeaturesOptions(
settingsValuesForSuggestion.mAdditionalFeaturesSettingValues);
session.mNativeSuggestOptions.setWeightForLocale(weightForLocale);
if (inOutWeightOfLangModelVsSpatialModel != null) {
session.mInputOutputWeightOfLangModelVsSpatialModel[0] =
inOutWeightOfLangModelVsSpatialModel[0];

View file

@ -22,7 +22,8 @@ public class NativeSuggestOptions {
private static final int USE_FULL_EDIT_DISTANCE = 1;
private static final int BLOCK_OFFENSIVE_WORDS = 2;
private static final int SPACE_AWARE_GESTURE_ENABLED = 3;
private static final int OPTIONS_SIZE = 4;
private static final int WEIGHT_FOR_LOCALE_IN_THOUSANDS = 4;
private static final int OPTIONS_SIZE = 5;
private final int[] mOptions = new int[OPTIONS_SIZE
+ AdditionalFeaturesSettingUtils.ADDITIONAL_FEATURES_SETTINGS_SIZE];
@ -43,6 +44,12 @@ public class NativeSuggestOptions {
setBooleanOption(SPACE_AWARE_GESTURE_ENABLED, value);
}
public void setWeightForLocale(final float value) {
// We're passing this option as a fixed point value, in thousands. This is decoded in
// native code by SuggestOptions#weightForLocale().
setIntegerOption(WEIGHT_FOR_LOCALE_IN_THOUSANDS, (int) (value * 1000));
}
public void setAdditionalFeaturesOptions(final int[] additionalOptions) {
if (additionalOptions == null) {
return;

View file

@ -44,7 +44,7 @@ class Traversal {
virtual bool needsToTraverseAllUserInput() const = 0;
virtual float getMaxSpatialDistance() const = 0;
virtual int getDefaultExpandDicNodeSize() const = 0;
virtual int getMaxCacheSize(const int inputSize) const = 0;
virtual int getMaxCacheSize(const int inputSize, const float weightForLocale) const = 0;
virtual int getTerminalCacheSize() const = 0;
virtual bool isPossibleOmissionChildNode(const DicTraverseSession *const traverseSession,
const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;

View file

@ -28,6 +28,7 @@
#include "suggest/core/policy/weighting.h"
#include "suggest/core/result/suggestions_output_utils.h"
#include "suggest/core/session/dic_traverse_session.h"
#include "suggest/core/suggest_options.h"
namespace latinime {
@ -88,7 +89,8 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession) const {
traverseSession->getDicTraverseCache()->continueSearch();
} else {
// Restart recognition at the root.
traverseSession->resetCache(TRAVERSAL->getMaxCacheSize(traverseSession->getInputSize()),
traverseSession->resetCache(TRAVERSAL->getMaxCacheSize(traverseSession->getInputSize(),
traverseSession->getSuggestOptions()->weightForLocale()),
TRAVERSAL->getTerminalCacheSize());
// Create a new dic node here
DicNode rootNode;

View file

@ -42,6 +42,12 @@ class SuggestOptions{
return getBoolOption(SPACE_AWARE_GESTURE_ENABLED);
}
AK_FORCE_INLINE float weightForLocale() const {
// The weight is in thousands and we want the real value, so we divide by 1000.
// NativeSuggestOptions#setWeightForLocale does the opposite processing in Java.
return static_cast<float>(getIntOption(WEIGHT_FOR_LOCALE_IN_THOUSANDS)) / 1000.0f;
}
AK_FORCE_INLINE bool getAdditionalFeaturesBoolOption(const int key) const {
return getBoolOption(key + ADDITIONAL_FEATURES_OPTIONS);
}
@ -55,9 +61,10 @@ class SuggestOptions{
static const int USE_FULL_EDIT_DISTANCE = 1;
static const int BLOCK_OFFENSIVE_WORDS = 2;
static const int SPACE_AWARE_GESTURE_ENABLED = 3;
static const int WEIGHT_FOR_LOCALE_IN_THOUSANDS = 4;
// Additional features options are stored after the other options and used as setting values of
// experimental features.
static const int ADDITIONAL_FEATURES_OPTIONS = 4;
static const int ADDITIONAL_FEATURES_OPTIONS = 5;
const int *const mOptions;
const int mLength;

View file

@ -31,6 +31,7 @@ const float ScoringParams::DIGRAPH_PENALTY_FOR_EXACT_MATCH = 0.03f;
// TODO: Unlimit max cache dic node size
const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE = 170;
const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT = 310;
const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_LOW_PROBABILITY_LOCALE = 50;
const int ScoringParams::THRESHOLD_SHORT_WORD_LENGTH = 4;
const float ScoringParams::DISTANCE_WEIGHT_LENGTH = 0.1524f;
@ -61,4 +62,7 @@ const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.4182f;
const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f;
const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f;
const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.095f;
const float ScoringParams::LOCALE_WEIGHT_THRESHOLD_FOR_SPACE_SUBSTITUTION = 0.99f;
const float ScoringParams::LOCALE_WEIGHT_THRESHOLD_FOR_SPACE_OMISSION = 0.99f;
const float ScoringParams::LOCALE_WEIGHT_THRESHOLD_FOR_SMALL_CACHE_SIZE = 0.99f;
} // namespace latinime

View file

@ -30,6 +30,7 @@ class ScoringParams {
static const float AUTOCORRECT_OUTPUT_THRESHOLD;
static const int MAX_CACHE_DIC_NODE_SIZE;
static const int MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT;
static const int MAX_CACHE_DIC_NODE_SIZE_FOR_LOW_PROBABILITY_LOCALE;
static const int THRESHOLD_SHORT_WORD_LENGTH;
static const float EXACT_MATCH_PROMOTION;
@ -68,6 +69,9 @@ class ScoringParams {
static const float TYPING_BASE_OUTPUT_SCORE;
static const float TYPING_MAX_OUTPUT_SCORE_PER_INPUT;
static const float NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT;
static const float LOCALE_WEIGHT_THRESHOLD_FOR_SPACE_SUBSTITUTION;
static const float LOCALE_WEIGHT_THRESHOLD_FOR_SPACE_OMISSION;
static const float LOCALE_WEIGHT_THRESHOLD_FOR_SMALL_CACHE_SIZE;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ScoringParams);

View file

@ -26,6 +26,7 @@
#include "suggest/core/layout/proximity_info_utils.h"
#include "suggest/core/policy/traversal.h"
#include "suggest/core/session/dic_traverse_session.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/typing/scoring_params.h"
#include "utils/char_utils.h"
@ -77,6 +78,13 @@ class TypingTraversal : public Traversal {
if (!CORRECT_NEW_WORD_SPACE_SUBSTITUTION) {
return false;
}
if (traverseSession->getSuggestOptions()->weightForLocale()
< ScoringParams::LOCALE_WEIGHT_THRESHOLD_FOR_SPACE_SUBSTITUTION) {
// Space substitution is heavy, so we skip doing it if the weight for this language
// is low because we anticipate the suggestions out of this dictionary are not for
// the language the user intends to type in.
return false;
}
if (!canDoLookAheadCorrection(traverseSession, dicNode)) {
return false;
}
@ -91,6 +99,13 @@ class TypingTraversal : public Traversal {
if (!CORRECT_NEW_WORD_SPACE_OMISSION) {
return false;
}
if (traverseSession->getSuggestOptions()->weightForLocale()
< ScoringParams::LOCALE_WEIGHT_THRESHOLD_FOR_SPACE_OMISSION) {
// Space omission is heavy, so we skip doing it if the weight for this language
// is low because we anticipate the suggestions out of this dictionary are not for
// the language the user intends to type in.
return false;
}
const int inputSize = traverseSession->getInputSize();
// TODO: Don't refer to isCompletion?
if (dicNode->isCompletion(inputSize)) {
@ -141,9 +156,14 @@ class TypingTraversal : public Traversal {
return DicNodeVector::DEFAULT_NODES_SIZE_FOR_OPTIMIZATION;
}
AK_FORCE_INLINE int getMaxCacheSize(const int inputSize) const {
return (inputSize <= 1) ? ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT
: ScoringParams::MAX_CACHE_DIC_NODE_SIZE;
AK_FORCE_INLINE int getMaxCacheSize(const int inputSize, const float weightForLocale) const {
if (inputSize <= 1) {
return ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT;
}
if (weightForLocale < ScoringParams::LOCALE_WEIGHT_THRESHOLD_FOR_SMALL_CACHE_SIZE) {
return ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_LOW_PROBABILITY_LOCALE;
}
return ScoringParams::MAX_CACHE_DIC_NODE_SIZE;
}
AK_FORCE_INLINE int getTerminalCacheSize() const {