Merge "Implement getMaxProbabilityOfExactMatches()."

This commit is contained in:
Keisuke Kuroyanagi 2014-06-06 08:39:40 +00:00 committed by Android (Google) Code Review
commit 8cae9f50b4
12 changed files with 185 additions and 6 deletions

View file

@ -356,6 +356,7 @@ public final class BinaryDictionary extends Dictionary {
return getProbabilityNative(mNativeDict, codePoints); return getProbabilityNative(mNativeDict, codePoints);
} }
@Override
public int getMaxFrequencyOfExactMatches(final String word) { public int getMaxFrequencyOfExactMatches(final String word) {
if (TextUtils.isEmpty(word)) return NOT_A_PROBABILITY; if (TextUtils.isEmpty(word)) return NOT_A_PROBABILITY;
int[] codePoints = StringUtils.toCodePointArray(word); int[] codePoints = StringUtils.toCodePointArray(word);

View file

@ -28,6 +28,7 @@ LATIN_IME_CORE_SRC_FILES := \
$(addprefix suggest/core/dictionary/, \ $(addprefix suggest/core/dictionary/, \
bigram_dictionary.cpp \ bigram_dictionary.cpp \
dictionary.cpp \ dictionary.cpp \
dictionary_utils.cpp \
digraph_utils.cpp \ digraph_utils.cpp \
error_type_utils.cpp \ error_type_utils.cpp \
multi_bigram_map.cpp \ multi_bigram_map.cpp \

View file

@ -280,8 +280,7 @@ static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches(
const jsize wordLength = env->GetArrayLength(word); const jsize wordLength = env->GetArrayLength(word);
int codePoints[wordLength]; int codePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, codePoints); env->GetIntArrayRegion(word, 0, wordLength, codePoints);
// TODO: Implement. return dictionary->getMaxProbabilityOfExactMatches(codePoints, wordLength);
return NOT_A_PROBABILITY;
} }
static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz, static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz,

View file

@ -125,7 +125,7 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
} }
void initAsPassingChild(DicNode *parentDicNode) { void initAsPassingChild(const DicNode *parentDicNode) {
mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion; mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion;
const int codePoint = const int codePoint =
parentDicNode->mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt( parentDicNode->mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(

View file

@ -48,7 +48,7 @@ namespace latinime {
/////////////////////////////////// ///////////////////////////////////
// Traverse node expansion utils // // Traverse node expansion utils //
/////////////////////////////////// ///////////////////////////////////
/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode, /* static */ void DicNodeUtils::getAllChildDicNodes(const DicNode *dicNode,
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNodeVector *const childDicNodes) { DicNodeVector *const childDicNodes) {
if (dicNode->isTotalInputSizeExceedingLimit()) { if (dicNode->isTotalInputSizeExceedingLimit()) {

View file

@ -35,7 +35,7 @@ class DicNodeUtils {
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode); const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode);
static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode); static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode);
static void getAllChildDicNodes(DicNode *dicNode, static void getAllChildDicNodes(const DicNode *dicNode,
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNodeVector *childDicNodes); DicNodeVector *childDicNodes);
static float getBigramNodeImprobability( static float getBigramNodeImprobability(

View file

@ -52,7 +52,7 @@ class DicNodeVector {
return static_cast<int>(mDicNodes.size()); return static_cast<int>(mDicNodes.size());
} }
void pushPassingChild(DicNode *dicNode) { void pushPassingChild(const DicNode *dicNode) {
ASSERT(!mLock); ASSERT(!mLock);
mDicNodes.emplace_back(); mDicNodes.emplace_back();
mDicNodes.back().initAsPassingChild(dicNode); mDicNodes.back().initAsPassingChild(dicNode);

View file

@ -19,6 +19,7 @@
#include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/dictionary.h"
#include "defines.h" #include "defines.h"
#include "suggest/core/dictionary/dictionary_utils.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/result/suggestion_results.h" #include "suggest/core/result/suggestion_results.h"
#include "suggest/core/session/dic_traverse_session.h" #include "suggest/core/session/dic_traverse_session.h"
@ -74,6 +75,12 @@ int Dictionary::getProbability(const int *word, int length) const {
return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos); return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos);
} }
int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) const {
TimeKeeper::setCurrentTime();
return DictionaryUtils::getMaxProbabilityOfExactMatches(
mDictionaryStructureWithBufferPolicy.get(), word, length);
}
int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word, int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
int length) const { int length) const {
TimeKeeper::setCurrentTime(); TimeKeeper::setCurrentTime();

View file

@ -73,6 +73,8 @@ class Dictionary {
int getProbability(const int *word, int length) const; int getProbability(const int *word, int length) const;
int getMaxProbabilityOfExactMatches(const int *word, int length) const;
int getBigramProbability(const PrevWordsInfo *const prevWordsInfo, int getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
const int *word, int length) const; const int *word, int length) const;

View file

@ -0,0 +1,96 @@
/*
* Copyright (C) 2014, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/core/dictionary/dictionary_utils.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_priority_queue.h"
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/digraph_utils.h"
#include "suggest/core/session/prev_words_info.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
namespace latinime {
/* static */ int DictionaryUtils::getMaxProbabilityOfExactMatches(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const int *const codePoints, const int codePointCount) {
std::vector<DicNode> current;
std::vector<DicNode> next;
// No prev words information.
PrevWordsInfo emptyPrevWordsInfo;
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
emptyPrevWordsInfo.getPrevWordsTerminalPtNodePos(dictionaryStructurePolicy,
prevWordsPtNodePos, false /* tryLowerCaseSearch */);
current.emplace_back();
DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordsPtNodePos, &current.front());
for (int i = 0; i < codePointCount; ++i) {
// The base-lower input is used to ignore case errors and accent errors.
const int codePoint = CharUtils::toBaseLowerCase(codePoints[i]);
for (const DicNode &dicNode : current) {
if (dicNode.isInDigraph() && dicNode.getNodeCodePoint() == codePoint) {
next.emplace_back(dicNode);
next.back().advanceDigraphIndex();
continue;
}
processChildDicNodes(dictionaryStructurePolicy, codePoint, &dicNode, &next);
}
current.clear();
current.swap(next);
}
int maxProbability = NOT_A_PROBABILITY;
for (const DicNode &dicNode : current) {
if (!dicNode.isTerminalDicNode()) {
continue;
}
// dicNode can contain case errors, accent errors, intentional omissions or digraphs.
maxProbability = std::max(maxProbability, dicNode.getProbability());
}
return maxProbability;
}
/* static */ void DictionaryUtils::processChildDicNodes(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const int inputCodePoint, const DicNode *const parentDicNode,
std::vector<DicNode> *const outDicNodes) {
DicNodeVector childDicNodes;
DicNodeUtils::getAllChildDicNodes(parentDicNode, dictionaryStructurePolicy, &childDicNodes);
for (int childIndex = 0; childIndex < childDicNodes.getSizeAndLock(); ++childIndex) {
DicNode *const childDicNode = childDicNodes[childIndex];
const int codePoint = CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint());
if (inputCodePoint == codePoint) {
outDicNodes->emplace_back(*childDicNode);
}
if (childDicNode->canBeIntentionalOmission()) {
processChildDicNodes(dictionaryStructurePolicy, inputCodePoint, childDicNode,
outDicNodes);
}
if (DigraphUtils::hasDigraphForCodePoint(
dictionaryStructurePolicy->getHeaderStructurePolicy(),
childDicNode->getNodeCodePoint())) {
childDicNode->advanceDigraphIndex();
if (childDicNode->getNodeCodePoint() == codePoint) {
childDicNode->advanceDigraphIndex();
outDicNodes->emplace_back(*childDicNode);
}
}
}
}
} // namespace latinime

View file

@ -0,0 +1,44 @@
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_DICTIONARY_UTILS_H
#define LATINIME_DICTIONARY_UTILS_H
#include <vector>
#include "defines.h"
namespace latinime {
class DictionaryStructureWithBufferPolicy;
class DicNode;
class DictionaryUtils {
public:
static int getMaxProbabilityOfExactMatches(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const int *const codePoints, const int codePointCount);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryUtils);
static void processChildDicNodes(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const int inputCodePoint, const DicNode *const parentDicNode,
std::vector<DicNode> *const outDicNodes);
};
} // namespace latinime
#endif // LATINIME_DICTIONARY_UTILS_H

View file

@ -1472,4 +1472,33 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(bigramProbability, assertEquals(bigramProbability,
binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "bbb")); binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "bbb"));
} }
public void testGetMaxFrequencyOfExactMatches() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testGetMaxFrequencyOfExactMatches(formatVersion);
}
}
private void testGetMaxFrequencyOfExactMatches(final int formatVersion) {
File dictFile = null;
try {
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
addUnigramWord(binaryDictionary, "abc", 10);
addUnigramWord(binaryDictionary, "aBc", 15);
assertEquals(15, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
addUnigramWord(binaryDictionary, "ab'c", 20);
assertEquals(20, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
addUnigramWord(binaryDictionary, "a-b-c", 25);
assertEquals(25, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
addUnigramWord(binaryDictionary, "ab-'-'-'-c", 30);
assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
addUnigramWord(binaryDictionary, "ab c", 255);
assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
}
} }