Separate unigram/bigram property from WordProperty.
Bug: 13406708 Change-Id: I48e9fccedd9dcdc1a35ffe027745b58966a83315
This commit is contained in:
parent
a857235d0c
commit
e41b2ed8d3
11 changed files with 261 additions and 155 deletions
|
@ -32,7 +32,7 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
digraph_utils.cpp \
|
digraph_utils.cpp \
|
||||||
error_type_utils.cpp \
|
error_type_utils.cpp \
|
||||||
multi_bigram_map.cpp \
|
multi_bigram_map.cpp \
|
||||||
word_property.cpp) \
|
property/word_property.cpp) \
|
||||||
$(addprefix suggest/core/layout/, \
|
$(addprefix suggest/core/layout/, \
|
||||||
additional_proximity_chars.cpp \
|
additional_proximity_chars.cpp \
|
||||||
proximity_info.cpp \
|
proximity_info.cpp \
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
#include "jni.h"
|
#include "jni.h"
|
||||||
#include "jni_common.h"
|
#include "jni_common.h"
|
||||||
#include "suggest/core/dictionary/dictionary.h"
|
#include "suggest/core/dictionary/dictionary.h"
|
||||||
#include "suggest/core/dictionary/word_property.h"
|
#include "suggest/core/dictionary/property/word_property.h"
|
||||||
#include "suggest/core/result/suggestion_results.h"
|
#include "suggest/core/result/suggestion_results.h"
|
||||||
#include "suggest/core/suggest_options.h"
|
#include "suggest/core/suggest_options.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
|
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "jni.h"
|
#include "jni.h"
|
||||||
#include "suggest/core/dictionary/bigram_dictionary.h"
|
#include "suggest/core/dictionary/bigram_dictionary.h"
|
||||||
#include "suggest/core/dictionary/word_property.h"
|
#include "suggest/core/dictionary/property/word_property.h"
|
||||||
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
||||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
#include "suggest/core/suggest_interface.h"
|
#include "suggest/core/suggest_interface.h"
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_BIGRAM_PROPERTY_H
|
||||||
|
#define LATINIME_BIGRAM_PROPERTY_H
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class BigramProperty {
|
||||||
|
public:
|
||||||
|
BigramProperty(const std::vector<int> *const targetCodePoints,
|
||||||
|
const int probability, const int timestamp, const int level, const int count)
|
||||||
|
: mTargetCodePoints(*targetCodePoints), mProbability(probability),
|
||||||
|
mTimestamp(timestamp), mLevel(level), mCount(count) {}
|
||||||
|
|
||||||
|
const std::vector<int> *getTargetCodePoints() const {
|
||||||
|
return &mTargetCodePoints;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getProbability() const {
|
||||||
|
return mProbability;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getTimestamp() const {
|
||||||
|
return mTimestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getLevel() const {
|
||||||
|
return mLevel;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getCount() const {
|
||||||
|
return mCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Default copy constructor and assign operator are used for using in std::vector.
|
||||||
|
DISALLOW_DEFAULT_CONSTRUCTOR(BigramProperty);
|
||||||
|
|
||||||
|
// TODO: Make members const.
|
||||||
|
std::vector<int> mTargetCodePoints;
|
||||||
|
int mProbability;
|
||||||
|
int mTimestamp;
|
||||||
|
int mLevel;
|
||||||
|
int mCount;
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_WORD_PROPERTY_H
|
|
@ -0,0 +1,107 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_UNIGRAM_PROPERTY_H
|
||||||
|
#define LATINIME_UNIGRAM_PROPERTY_H
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class UnigramProperty {
|
||||||
|
public:
|
||||||
|
class ShortcutProperty {
|
||||||
|
public:
|
||||||
|
ShortcutProperty(const std::vector<int> *const targetCodePoints, const int probability)
|
||||||
|
: mTargetCodePoints(*targetCodePoints), mProbability(probability) {}
|
||||||
|
|
||||||
|
const std::vector<int> *getTargetCodePoints() const {
|
||||||
|
return &mTargetCodePoints;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getProbability() const {
|
||||||
|
return mProbability;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Default copy constructor and assign operator are used for using in std::vector.
|
||||||
|
DISALLOW_DEFAULT_CONSTRUCTOR(ShortcutProperty);
|
||||||
|
|
||||||
|
// TODO: Make members const.
|
||||||
|
std::vector<int> mTargetCodePoints;
|
||||||
|
int mProbability;
|
||||||
|
};
|
||||||
|
|
||||||
|
UnigramProperty()
|
||||||
|
: mIsNotAWord(false), mIsBlacklisted(false), mProbability(NOT_A_PROBABILITY),
|
||||||
|
mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0), mShortcuts() {}
|
||||||
|
|
||||||
|
UnigramProperty(const bool isNotAWord, const bool isBlacklisted, const int probability,
|
||||||
|
const int timestamp, const int level, const int count,
|
||||||
|
const std::vector<ShortcutProperty> *const shortcuts)
|
||||||
|
: mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
|
||||||
|
mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {}
|
||||||
|
|
||||||
|
bool isNotAWord() const {
|
||||||
|
return mIsNotAWord;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isBlacklisted() const {
|
||||||
|
return mIsBlacklisted;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool hasShortcuts() const {
|
||||||
|
return !mShortcuts.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
int getProbability() const {
|
||||||
|
return mProbability;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getTimestamp() const {
|
||||||
|
return mTimestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getLevel() const {
|
||||||
|
return mLevel;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getCount() const {
|
||||||
|
return mCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<ShortcutProperty> &getShortcuts() const {
|
||||||
|
return mShortcuts;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Default copy constructor is used for using as a return value.
|
||||||
|
DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
|
||||||
|
|
||||||
|
// TODO: Make members const.
|
||||||
|
bool mIsNotAWord;
|
||||||
|
bool mIsBlacklisted;
|
||||||
|
int mProbability;
|
||||||
|
// Historical information
|
||||||
|
int mTimestamp;
|
||||||
|
int mLevel;
|
||||||
|
int mCount;
|
||||||
|
std::vector<ShortcutProperty> mShortcuts;
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_UNIGRAM_PROPERTY_H
|
|
@ -14,7 +14,7 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "suggest/core/dictionary/word_property.h"
|
#include "suggest/core/dictionary/property/word_property.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -23,9 +23,12 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
|
||||||
jobject outBigramProbabilities, jobject outShortcutTargets,
|
jobject outBigramProbabilities, jobject outShortcutTargets,
|
||||||
jobject outShortcutProbabilities) const {
|
jobject outShortcutProbabilities) const {
|
||||||
env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePoints.size(), &mCodePoints[0]);
|
env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePoints.size(), &mCodePoints[0]);
|
||||||
jboolean flags[] = {mIsNotAWord, mIsBlacklisted, mHasBigrams, mHasShortcuts};
|
|
||||||
|
jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isBlacklisted(),
|
||||||
|
!mBigrams.empty(), mUnigramProperty.hasShortcuts()};
|
||||||
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
|
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
|
||||||
int probabilityInfo[] = {mProbability, mTimestamp, mLevel, mCount};
|
int probabilityInfo[] = {mUnigramProperty.getProbability(), mUnigramProperty.getTimestamp(),
|
||||||
|
mUnigramProperty.getLevel(), mUnigramProperty.getCount()};
|
||||||
env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo),
|
env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo),
|
||||||
probabilityInfo);
|
probabilityInfo);
|
||||||
|
|
||||||
|
@ -35,19 +38,17 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
|
||||||
jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
|
jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
|
||||||
|
|
||||||
// Output bigrams.
|
// Output bigrams.
|
||||||
const int bigramCount = mBigrams.size();
|
for (const auto &bigramProperty : mBigrams) {
|
||||||
for (int i = 0; i < bigramCount; ++i) {
|
const std::vector<int> *const word1CodePoints = bigramProperty.getTargetCodePoints();
|
||||||
const BigramProperty *const bigramProperty = &mBigrams[i];
|
|
||||||
const std::vector<int> *const word1CodePoints = bigramProperty->getTargetCodePoints();
|
|
||||||
jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size());
|
jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size());
|
||||||
env->SetIntArrayRegion(bigramWord1CodePointArray, 0 /* start */,
|
env->SetIntArrayRegion(bigramWord1CodePointArray, 0 /* start */,
|
||||||
word1CodePoints->size(), &word1CodePoints->at(0));
|
word1CodePoints->size(), word1CodePoints->data());
|
||||||
env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray);
|
env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray);
|
||||||
env->DeleteLocalRef(bigramWord1CodePointArray);
|
env->DeleteLocalRef(bigramWord1CodePointArray);
|
||||||
|
|
||||||
int bigramProbabilityInfo[] = {bigramProperty->getProbability(),
|
int bigramProbabilityInfo[] = {bigramProperty.getProbability(),
|
||||||
bigramProperty->getTimestamp(), bigramProperty->getLevel(),
|
bigramProperty.getTimestamp(), bigramProperty.getLevel(),
|
||||||
bigramProperty->getCount()};
|
bigramProperty.getCount()};
|
||||||
jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
|
jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
|
||||||
env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
|
env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
|
||||||
NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);
|
NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);
|
||||||
|
@ -56,16 +57,15 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Output shortcuts.
|
// Output shortcuts.
|
||||||
const int shortcutTargetCount = mShortcuts.size();
|
for (const auto &shortcut : mUnigramProperty.getShortcuts()) {
|
||||||
for (int i = 0; i < shortcutTargetCount; ++i) {
|
const std::vector<int> *const targetCodePoints = shortcut.getTargetCodePoints();
|
||||||
const std::vector<int> *const targetCodePoints = mShortcuts[i].getTargetCodePoints();
|
|
||||||
jintArray shortcutTargetCodePointArray = env->NewIntArray(targetCodePoints->size());
|
jintArray shortcutTargetCodePointArray = env->NewIntArray(targetCodePoints->size());
|
||||||
env->SetIntArrayRegion(shortcutTargetCodePointArray, 0 /* start */,
|
env->SetIntArrayRegion(shortcutTargetCodePointArray, 0 /* start */,
|
||||||
targetCodePoints->size(), &targetCodePoints->at(0));
|
targetCodePoints->size(), targetCodePoints->data());
|
||||||
env->CallBooleanMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray);
|
env->CallBooleanMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray);
|
||||||
env->DeleteLocalRef(shortcutTargetCodePointArray);
|
env->DeleteLocalRef(shortcutTargetCodePointArray);
|
||||||
jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId,
|
jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId,
|
||||||
mShortcuts[i].getProbability());
|
shortcut.getProbability());
|
||||||
env->CallBooleanMethod(outShortcutProbabilities, addMethodId, integerProbability);
|
env->CallBooleanMethod(outShortcutProbabilities, addMethodId, integerProbability);
|
||||||
env->DeleteLocalRef(integerProbability);
|
env->DeleteLocalRef(integerProbability);
|
||||||
}
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_WORD_PROPERTY_H
|
||||||
|
#define LATINIME_WORD_PROPERTY_H
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "jni.h"
|
||||||
|
#include "suggest/core/dictionary/property/bigram_property.h"
|
||||||
|
#include "suggest/core/dictionary/property/unigram_property.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
// This class is used for returning information belonging to a word to java side.
|
||||||
|
class WordProperty {
|
||||||
|
public:
|
||||||
|
// Default constructor is used to create an instance that indicates an invalid word.
|
||||||
|
WordProperty()
|
||||||
|
: mCodePoints(), mUnigramProperty(), mBigrams() {}
|
||||||
|
|
||||||
|
WordProperty(const std::vector<int> *const codePoints,
|
||||||
|
const UnigramProperty *const unigramProperty,
|
||||||
|
const std::vector<BigramProperty> *const bigrams)
|
||||||
|
: mCodePoints(*codePoints), mUnigramProperty(*unigramProperty), mBigrams(*bigrams) {}
|
||||||
|
|
||||||
|
void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
|
||||||
|
jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
|
||||||
|
jobject outShortcutTargets, jobject outShortcutProbabilities) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Default copy constructor is used for using as a return value.
|
||||||
|
DISALLOW_ASSIGNMENT_OPERATOR(WordProperty);
|
||||||
|
|
||||||
|
const std::vector<int> mCodePoints;
|
||||||
|
const UnigramProperty mUnigramProperty;
|
||||||
|
const std::vector<BigramProperty> mBigrams;
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_WORD_PROPERTY_H
|
|
@ -1,121 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef LATINIME_WORD_PROPERTY_H
|
|
||||||
#define LATINIME_WORD_PROPERTY_H
|
|
||||||
|
|
||||||
#include <cstring>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "defines.h"
|
|
||||||
#include "jni.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
// This class is used for returning information belonging to a word to java side.
|
|
||||||
class WordProperty {
|
|
||||||
public:
|
|
||||||
class BigramProperty {
|
|
||||||
public:
|
|
||||||
BigramProperty(const std::vector<int> *const targetCodePoints,
|
|
||||||
const int probability, const int timestamp, const int level, const int count)
|
|
||||||
: mTargetCodePoints(*targetCodePoints), mProbability(probability),
|
|
||||||
mTimestamp(timestamp), mLevel(level), mCount(count) {}
|
|
||||||
|
|
||||||
const std::vector<int> *getTargetCodePoints() const {
|
|
||||||
return &mTargetCodePoints;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getProbability() const {
|
|
||||||
return mProbability;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getTimestamp() const {
|
|
||||||
return mTimestamp;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getLevel() const {
|
|
||||||
return mLevel;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getCount() const {
|
|
||||||
return mCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::vector<int> mTargetCodePoints;
|
|
||||||
int mProbability;
|
|
||||||
int mTimestamp;
|
|
||||||
int mLevel;
|
|
||||||
int mCount;
|
|
||||||
};
|
|
||||||
|
|
||||||
class ShortcutProperty {
|
|
||||||
public:
|
|
||||||
ShortcutProperty(const std::vector<int> *const targetCodePoints, const int probability)
|
|
||||||
: mTargetCodePoints(*targetCodePoints), mProbability(probability) {}
|
|
||||||
|
|
||||||
const std::vector<int> *getTargetCodePoints() const {
|
|
||||||
return &mTargetCodePoints;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getProbability() const {
|
|
||||||
return mProbability;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::vector<int> mTargetCodePoints;
|
|
||||||
int mProbability;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Invalid word.
|
|
||||||
WordProperty()
|
|
||||||
: mCodePoints(), mIsNotAWord(false), mIsBlacklisted(false),
|
|
||||||
mHasBigrams(false), mHasShortcuts(false), mProbability(NOT_A_PROBABILITY),
|
|
||||||
mTimestamp(0), mLevel(0), mCount(0), mBigrams(), mShortcuts() {}
|
|
||||||
|
|
||||||
WordProperty(const std::vector<int> *const codePoints,
|
|
||||||
const bool isNotAWord, const bool isBlacklisted, const bool hasBigrams,
|
|
||||||
const bool hasShortcuts, const int probability, const int timestamp,
|
|
||||||
const int level, const int count, const std::vector<BigramProperty> *const bigrams,
|
|
||||||
const std::vector<ShortcutProperty> *const shortcuts)
|
|
||||||
: mCodePoints(*codePoints), mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted),
|
|
||||||
mHasBigrams(hasBigrams), mHasShortcuts(hasShortcuts), mProbability(probability),
|
|
||||||
mTimestamp(timestamp), mLevel(level), mCount(count), mBigrams(*bigrams),
|
|
||||||
mShortcuts(*shortcuts) {}
|
|
||||||
|
|
||||||
void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
|
|
||||||
jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
|
|
||||||
jobject outShortcutTargets, jobject outShortcutProbabilities) const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
DISALLOW_ASSIGNMENT_OPERATOR(WordProperty);
|
|
||||||
|
|
||||||
std::vector<int> mCodePoints;
|
|
||||||
bool mIsNotAWord;
|
|
||||||
bool mIsBlacklisted;
|
|
||||||
bool mHasBigrams;
|
|
||||||
bool mHasShortcuts;
|
|
||||||
int mProbability;
|
|
||||||
// Historical information
|
|
||||||
int mTimestamp;
|
|
||||||
int mLevel;
|
|
||||||
int mCount;
|
|
||||||
std::vector<BigramProperty> mBigrams;
|
|
||||||
std::vector<ShortcutProperty> mShortcuts;
|
|
||||||
};
|
|
||||||
} // namespace latinime
|
|
||||||
#endif // LATINIME_WORD_PROPERTY_H
|
|
|
@ -20,7 +20,7 @@
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/word_property.h"
|
#include "suggest/core/dictionary/property/word_property.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
|
|
@ -339,7 +339,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin
|
||||||
std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
|
std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
|
||||||
ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
|
ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
|
||||||
// Fetch bigram information.
|
// Fetch bigram information.
|
||||||
std::vector<WordProperty::BigramProperty> bigrams;
|
std::vector<BigramProperty> bigrams;
|
||||||
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
|
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
|
||||||
int bigramWord1CodePoints[MAX_WORD_LENGTH];
|
int bigramWord1CodePoints[MAX_WORD_LENGTH];
|
||||||
BinaryDictionaryBigramsIterator bigramsIt(getBigramsStructurePolicy(), bigramListPos);
|
BinaryDictionaryBigramsIterator bigramsIt(getBigramsStructurePolicy(), bigramListPos);
|
||||||
|
@ -360,7 +360,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Fetch shortcut information.
|
// Fetch shortcut information.
|
||||||
std::vector<WordProperty::ShortcutProperty> shortcuts;
|
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
||||||
int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
|
int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
|
||||||
if (shortcutPos != NOT_A_DICT_POS) {
|
if (shortcutPos != NOT_A_DICT_POS) {
|
||||||
int shortcutTargetCodePoints[MAX_WORD_LENGTH];
|
int shortcutTargetCodePoints[MAX_WORD_LENGTH];
|
||||||
|
@ -379,11 +379,10 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin
|
||||||
shortcuts.emplace_back(&shortcutTarget, shortcutProbability);
|
shortcuts.emplace_back(&shortcutTarget, shortcutProbability);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return WordProperty(&codePointVector, ptNodeParams.isNotAWord(),
|
const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
|
||||||
ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(),
|
ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
||||||
ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(),
|
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
|
||||||
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */,
|
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
|
||||||
&bigrams, &shortcuts);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {
|
int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {
|
||||||
|
|
|
@ -20,7 +20,9 @@
|
||||||
|
|
||||||
#include "suggest/core/dicnode/dic_node.h"
|
#include "suggest/core/dicnode/dic_node.h"
|
||||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||||
#include "suggest/core/dictionary/word_property.h"
|
#include "suggest/core/dictionary/property/bigram_property.h"
|
||||||
|
#include "suggest/core/dictionary/property/unigram_property.h"
|
||||||
|
#include "suggest/core/dictionary/property/word_property.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
|
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||||
|
@ -358,7 +360,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
|
||||||
ptNodeParams.getTerminalId());
|
ptNodeParams.getTerminalId());
|
||||||
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
|
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
|
||||||
// Fetch bigram information.
|
// Fetch bigram information.
|
||||||
std::vector<WordProperty::BigramProperty> bigrams;
|
std::vector<BigramProperty> bigrams;
|
||||||
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
|
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
|
||||||
if (bigramListPos != NOT_A_DICT_POS) {
|
if (bigramListPos != NOT_A_DICT_POS) {
|
||||||
int bigramWord1CodePoints[MAX_WORD_LENGTH];
|
int bigramWord1CodePoints[MAX_WORD_LENGTH];
|
||||||
|
@ -395,7 +397,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Fetch shortcut information.
|
// Fetch shortcut information.
|
||||||
std::vector<WordProperty::ShortcutProperty> shortcuts;
|
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
||||||
int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
|
int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
|
||||||
if (shortcutPos != NOT_A_DICT_POS) {
|
if (shortcutPos != NOT_A_DICT_POS) {
|
||||||
int shortcutTarget[MAX_WORD_LENGTH];
|
int shortcutTarget[MAX_WORD_LENGTH];
|
||||||
|
@ -411,11 +413,11 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
|
||||||
shortcuts.emplace_back(&target, shortcutProbability);
|
shortcuts.emplace_back(&target, shortcutProbability);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return WordProperty(&codePointVector, ptNodeParams.isNotAWord(),
|
const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
|
||||||
ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(),
|
ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
||||||
ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(),
|
|
||||||
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
||||||
historicalInfo->getCount(), &bigrams, &shortcuts);
|
historicalInfo->getCount(), &shortcuts);
|
||||||
|
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
|
||||||
}
|
}
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {
|
int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {
|
||||||
|
|
Loading…
Reference in a new issue