Merge "Implement getMaxProbabilityOfExactMatches()."
This commit is contained in:
commit
8cae9f50b4
12 changed files with 185 additions and 6 deletions
|
@ -356,6 +356,7 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
return getProbabilityNative(mNativeDict, codePoints);
|
return getProbabilityNative(mNativeDict, codePoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public int getMaxFrequencyOfExactMatches(final String word) {
|
public int getMaxFrequencyOfExactMatches(final String word) {
|
||||||
if (TextUtils.isEmpty(word)) return NOT_A_PROBABILITY;
|
if (TextUtils.isEmpty(word)) return NOT_A_PROBABILITY;
|
||||||
int[] codePoints = StringUtils.toCodePointArray(word);
|
int[] codePoints = StringUtils.toCodePointArray(word);
|
||||||
|
|
|
@ -28,6 +28,7 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
$(addprefix suggest/core/dictionary/, \
|
$(addprefix suggest/core/dictionary/, \
|
||||||
bigram_dictionary.cpp \
|
bigram_dictionary.cpp \
|
||||||
dictionary.cpp \
|
dictionary.cpp \
|
||||||
|
dictionary_utils.cpp \
|
||||||
digraph_utils.cpp \
|
digraph_utils.cpp \
|
||||||
error_type_utils.cpp \
|
error_type_utils.cpp \
|
||||||
multi_bigram_map.cpp \
|
multi_bigram_map.cpp \
|
||||||
|
|
|
@ -280,8 +280,7 @@ static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches(
|
||||||
const jsize wordLength = env->GetArrayLength(word);
|
const jsize wordLength = env->GetArrayLength(word);
|
||||||
int codePoints[wordLength];
|
int codePoints[wordLength];
|
||||||
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
|
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
|
||||||
// TODO: Implement.
|
return dictionary->getMaxProbabilityOfExactMatches(codePoints, wordLength);
|
||||||
return NOT_A_PROBABILITY;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz,
|
static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz,
|
||||||
|
|
|
@ -125,7 +125,7 @@ class DicNode {
|
||||||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||||
}
|
}
|
||||||
|
|
||||||
void initAsPassingChild(DicNode *parentDicNode) {
|
void initAsPassingChild(const DicNode *parentDicNode) {
|
||||||
mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion;
|
mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion;
|
||||||
const int codePoint =
|
const int codePoint =
|
||||||
parentDicNode->mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(
|
parentDicNode->mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(
|
||||||
|
|
|
@ -48,7 +48,7 @@ namespace latinime {
|
||||||
///////////////////////////////////
|
///////////////////////////////////
|
||||||
// Traverse node expansion utils //
|
// Traverse node expansion utils //
|
||||||
///////////////////////////////////
|
///////////////////////////////////
|
||||||
/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
|
/* static */ void DicNodeUtils::getAllChildDicNodes(const DicNode *dicNode,
|
||||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||||
DicNodeVector *const childDicNodes) {
|
DicNodeVector *const childDicNodes) {
|
||||||
if (dicNode->isTotalInputSizeExceedingLimit()) {
|
if (dicNode->isTotalInputSizeExceedingLimit()) {
|
||||||
|
|
|
@ -35,7 +35,7 @@ class DicNodeUtils {
|
||||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||||
const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode);
|
const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode);
|
||||||
static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode);
|
static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode);
|
||||||
static void getAllChildDicNodes(DicNode *dicNode,
|
static void getAllChildDicNodes(const DicNode *dicNode,
|
||||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||||
DicNodeVector *childDicNodes);
|
DicNodeVector *childDicNodes);
|
||||||
static float getBigramNodeImprobability(
|
static float getBigramNodeImprobability(
|
||||||
|
|
|
@ -52,7 +52,7 @@ class DicNodeVector {
|
||||||
return static_cast<int>(mDicNodes.size());
|
return static_cast<int>(mDicNodes.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
void pushPassingChild(DicNode *dicNode) {
|
void pushPassingChild(const DicNode *dicNode) {
|
||||||
ASSERT(!mLock);
|
ASSERT(!mLock);
|
||||||
mDicNodes.emplace_back();
|
mDicNodes.emplace_back();
|
||||||
mDicNodes.back().initAsPassingChild(dicNode);
|
mDicNodes.back().initAsPassingChild(dicNode);
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
#include "suggest/core/dictionary/dictionary.h"
|
#include "suggest/core/dictionary/dictionary.h"
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
#include "suggest/core/dictionary/dictionary_utils.h"
|
||||||
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
||||||
#include "suggest/core/result/suggestion_results.h"
|
#include "suggest/core/result/suggestion_results.h"
|
||||||
#include "suggest/core/session/dic_traverse_session.h"
|
#include "suggest/core/session/dic_traverse_session.h"
|
||||||
|
@ -74,6 +75,12 @@ int Dictionary::getProbability(const int *word, int length) const {
|
||||||
return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos);
|
return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) const {
|
||||||
|
TimeKeeper::setCurrentTime();
|
||||||
|
return DictionaryUtils::getMaxProbabilityOfExactMatches(
|
||||||
|
mDictionaryStructureWithBufferPolicy.get(), word, length);
|
||||||
|
}
|
||||||
|
|
||||||
int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
|
int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
|
||||||
int length) const {
|
int length) const {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
|
|
|
@ -73,6 +73,8 @@ class Dictionary {
|
||||||
|
|
||||||
int getProbability(const int *word, int length) const;
|
int getProbability(const int *word, int length) const;
|
||||||
|
|
||||||
|
int getMaxProbabilityOfExactMatches(const int *word, int length) const;
|
||||||
|
|
||||||
int getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
|
int getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const int *word, int length) const;
|
const int *word, int length) const;
|
||||||
|
|
||||||
|
|
96
native/jni/src/suggest/core/dictionary/dictionary_utils.cpp
Normal file
96
native/jni/src/suggest/core/dictionary/dictionary_utils.cpp
Normal file
|
@ -0,0 +1,96 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/core/dictionary/dictionary_utils.h"
|
||||||
|
|
||||||
|
#include "suggest/core/dicnode/dic_node.h"
|
||||||
|
#include "suggest/core/dicnode/dic_node_priority_queue.h"
|
||||||
|
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||||
|
#include "suggest/core/dictionary/dictionary.h"
|
||||||
|
#include "suggest/core/dictionary/digraph_utils.h"
|
||||||
|
#include "suggest/core/session/prev_words_info.h"
|
||||||
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
/* static */ int DictionaryUtils::getMaxProbabilityOfExactMatches(
|
||||||
|
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||||
|
const int *const codePoints, const int codePointCount) {
|
||||||
|
std::vector<DicNode> current;
|
||||||
|
std::vector<DicNode> next;
|
||||||
|
|
||||||
|
// No prev words information.
|
||||||
|
PrevWordsInfo emptyPrevWordsInfo;
|
||||||
|
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||||
|
emptyPrevWordsInfo.getPrevWordsTerminalPtNodePos(dictionaryStructurePolicy,
|
||||||
|
prevWordsPtNodePos, false /* tryLowerCaseSearch */);
|
||||||
|
current.emplace_back();
|
||||||
|
DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordsPtNodePos, ¤t.front());
|
||||||
|
for (int i = 0; i < codePointCount; ++i) {
|
||||||
|
// The base-lower input is used to ignore case errors and accent errors.
|
||||||
|
const int codePoint = CharUtils::toBaseLowerCase(codePoints[i]);
|
||||||
|
for (const DicNode &dicNode : current) {
|
||||||
|
if (dicNode.isInDigraph() && dicNode.getNodeCodePoint() == codePoint) {
|
||||||
|
next.emplace_back(dicNode);
|
||||||
|
next.back().advanceDigraphIndex();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
processChildDicNodes(dictionaryStructurePolicy, codePoint, &dicNode, &next);
|
||||||
|
}
|
||||||
|
current.clear();
|
||||||
|
current.swap(next);
|
||||||
|
}
|
||||||
|
|
||||||
|
int maxProbability = NOT_A_PROBABILITY;
|
||||||
|
for (const DicNode &dicNode : current) {
|
||||||
|
if (!dicNode.isTerminalDicNode()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// dicNode can contain case errors, accent errors, intentional omissions or digraphs.
|
||||||
|
maxProbability = std::max(maxProbability, dicNode.getProbability());
|
||||||
|
}
|
||||||
|
return maxProbability;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ void DictionaryUtils::processChildDicNodes(
|
||||||
|
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||||
|
const int inputCodePoint, const DicNode *const parentDicNode,
|
||||||
|
std::vector<DicNode> *const outDicNodes) {
|
||||||
|
DicNodeVector childDicNodes;
|
||||||
|
DicNodeUtils::getAllChildDicNodes(parentDicNode, dictionaryStructurePolicy, &childDicNodes);
|
||||||
|
for (int childIndex = 0; childIndex < childDicNodes.getSizeAndLock(); ++childIndex) {
|
||||||
|
DicNode *const childDicNode = childDicNodes[childIndex];
|
||||||
|
const int codePoint = CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint());
|
||||||
|
if (inputCodePoint == codePoint) {
|
||||||
|
outDicNodes->emplace_back(*childDicNode);
|
||||||
|
}
|
||||||
|
if (childDicNode->canBeIntentionalOmission()) {
|
||||||
|
processChildDicNodes(dictionaryStructurePolicy, inputCodePoint, childDicNode,
|
||||||
|
outDicNodes);
|
||||||
|
}
|
||||||
|
if (DigraphUtils::hasDigraphForCodePoint(
|
||||||
|
dictionaryStructurePolicy->getHeaderStructurePolicy(),
|
||||||
|
childDicNode->getNodeCodePoint())) {
|
||||||
|
childDicNode->advanceDigraphIndex();
|
||||||
|
if (childDicNode->getNodeCodePoint() == codePoint) {
|
||||||
|
childDicNode->advanceDigraphIndex();
|
||||||
|
outDicNodes->emplace_back(*childDicNode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace latinime
|
44
native/jni/src/suggest/core/dictionary/dictionary_utils.h
Normal file
44
native/jni/src/suggest/core/dictionary/dictionary_utils.h
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DICTIONARY_UTILS_H
|
||||||
|
#define LATINIME_DICTIONARY_UTILS_H
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class DictionaryStructureWithBufferPolicy;
|
||||||
|
class DicNode;
|
||||||
|
|
||||||
|
class DictionaryUtils {
|
||||||
|
public:
|
||||||
|
static int getMaxProbabilityOfExactMatches(
|
||||||
|
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||||
|
const int *const codePoints, const int codePointCount);
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryUtils);
|
||||||
|
|
||||||
|
static void processChildDicNodes(
|
||||||
|
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||||
|
const int inputCodePoint, const DicNode *const parentDicNode,
|
||||||
|
std::vector<DicNode> *const outDicNodes);
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_DICTIONARY_UTILS_H
|
|
@ -1472,4 +1472,33 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
assertEquals(bigramProbability,
|
assertEquals(bigramProbability,
|
||||||
binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "bbb"));
|
binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "bbb"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testGetMaxFrequencyOfExactMatches() {
|
||||||
|
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
||||||
|
testGetMaxFrequencyOfExactMatches(formatVersion);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testGetMaxFrequencyOfExactMatches(final int formatVersion) {
|
||||||
|
File dictFile = null;
|
||||||
|
try {
|
||||||
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
||||||
|
} catch (IOException e) {
|
||||||
|
fail("IOException while writing an initial dictionary : " + e);
|
||||||
|
}
|
||||||
|
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||||
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||||
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
addUnigramWord(binaryDictionary, "abc", 10);
|
||||||
|
addUnigramWord(binaryDictionary, "aBc", 15);
|
||||||
|
assertEquals(15, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
|
||||||
|
addUnigramWord(binaryDictionary, "ab'c", 20);
|
||||||
|
assertEquals(20, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
|
||||||
|
addUnigramWord(binaryDictionary, "a-b-c", 25);
|
||||||
|
assertEquals(25, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
|
||||||
|
addUnigramWord(binaryDictionary, "ab-'-'-'-c", 30);
|
||||||
|
assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
|
||||||
|
addUnigramWord(binaryDictionary, "ab c", 255);
|
||||||
|
assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue