Merge "Separate unigram/bigram property from WordProperty."

This commit is contained in:
Keisuke Kuroyanagi 2014-04-08 09:42:24 +00:00 committed by Android (Google) Code Review
commit ee6392f5a6
11 changed files with 261 additions and 155 deletions

View file

@ -32,7 +32,7 @@ LATIN_IME_CORE_SRC_FILES := \
digraph_utils.cpp \
error_type_utils.cpp \
multi_bigram_map.cpp \
word_property.cpp) \
property/word_property.cpp) \
$(addprefix suggest/core/layout/, \
additional_proximity_chars.cpp \
proximity_info.cpp \

View file

@ -24,7 +24,7 @@
#include "jni.h"
#include "jni_common.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/word_property.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/result/suggestion_results.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"

View file

@ -22,7 +22,7 @@
#include "defines.h"
#include "jni.h"
#include "suggest/core/dictionary/bigram_dictionary.h"
#include "suggest/core/dictionary/word_property.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/core/suggest_interface.h"

View file

@ -0,0 +1,65 @@
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_BIGRAM_PROPERTY_H
#define LATINIME_BIGRAM_PROPERTY_H
#include <vector>
#include "defines.h"
namespace latinime {
class BigramProperty {
public:
BigramProperty(const std::vector<int> *const targetCodePoints,
const int probability, const int timestamp, const int level, const int count)
: mTargetCodePoints(*targetCodePoints), mProbability(probability),
mTimestamp(timestamp), mLevel(level), mCount(count) {}
const std::vector<int> *getTargetCodePoints() const {
return &mTargetCodePoints;
}
int getProbability() const {
return mProbability;
}
int getTimestamp() const {
return mTimestamp;
}
int getLevel() const {
return mLevel;
}
int getCount() const {
return mCount;
}
private:
// Default copy constructor and assign operator are used for using in std::vector.
DISALLOW_DEFAULT_CONSTRUCTOR(BigramProperty);
// TODO: Make members const.
std::vector<int> mTargetCodePoints;
int mProbability;
int mTimestamp;
int mLevel;
int mCount;
};
} // namespace latinime
#endif // LATINIME_WORD_PROPERTY_H

View file

@ -0,0 +1,107 @@
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_UNIGRAM_PROPERTY_H
#define LATINIME_UNIGRAM_PROPERTY_H
#include <vector>
#include "defines.h"
namespace latinime {
class UnigramProperty {
public:
class ShortcutProperty {
public:
ShortcutProperty(const std::vector<int> *const targetCodePoints, const int probability)
: mTargetCodePoints(*targetCodePoints), mProbability(probability) {}
const std::vector<int> *getTargetCodePoints() const {
return &mTargetCodePoints;
}
int getProbability() const {
return mProbability;
}
private:
// Default copy constructor and assign operator are used for using in std::vector.
DISALLOW_DEFAULT_CONSTRUCTOR(ShortcutProperty);
// TODO: Make members const.
std::vector<int> mTargetCodePoints;
int mProbability;
};
UnigramProperty()
: mIsNotAWord(false), mIsBlacklisted(false), mProbability(NOT_A_PROBABILITY),
mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0), mShortcuts() {}
UnigramProperty(const bool isNotAWord, const bool isBlacklisted, const int probability,
const int timestamp, const int level, const int count,
const std::vector<ShortcutProperty> *const shortcuts)
: mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {}
bool isNotAWord() const {
return mIsNotAWord;
}
bool isBlacklisted() const {
return mIsBlacklisted;
}
bool hasShortcuts() const {
return !mShortcuts.empty();
}
int getProbability() const {
return mProbability;
}
int getTimestamp() const {
return mTimestamp;
}
int getLevel() const {
return mLevel;
}
int getCount() const {
return mCount;
}
const std::vector<ShortcutProperty> &getShortcuts() const {
return mShortcuts;
}
private:
// Default copy constructor is used for using as a return value.
DISALLOW_ASSIGNMENT_OPERATOR(UnigramProperty);
// TODO: Make members const.
bool mIsNotAWord;
bool mIsBlacklisted;
int mProbability;
// Historical information
int mTimestamp;
int mLevel;
int mCount;
std::vector<ShortcutProperty> mShortcuts;
};
} // namespace latinime
#endif // LATINIME_UNIGRAM_PROPERTY_H

View file

@ -14,7 +14,7 @@
* limitations under the License.
*/
#include "suggest/core/dictionary/word_property.h"
#include "suggest/core/dictionary/property/word_property.h"
namespace latinime {
@ -23,9 +23,12 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
jobject outBigramProbabilities, jobject outShortcutTargets,
jobject outShortcutProbabilities) const {
env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePoints.size(), &mCodePoints[0]);
jboolean flags[] = {mIsNotAWord, mIsBlacklisted, mHasBigrams, mHasShortcuts};
jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isBlacklisted(),
!mBigrams.empty(), mUnigramProperty.hasShortcuts()};
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
int probabilityInfo[] = {mProbability, mTimestamp, mLevel, mCount};
int probabilityInfo[] = {mUnigramProperty.getProbability(), mUnigramProperty.getTimestamp(),
mUnigramProperty.getLevel(), mUnigramProperty.getCount()};
env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo),
probabilityInfo);
@ -35,19 +38,17 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
// Output bigrams.
const int bigramCount = mBigrams.size();
for (int i = 0; i < bigramCount; ++i) {
const BigramProperty *const bigramProperty = &mBigrams[i];
const std::vector<int> *const word1CodePoints = bigramProperty->getTargetCodePoints();
for (const auto &bigramProperty : mBigrams) {
const std::vector<int> *const word1CodePoints = bigramProperty.getTargetCodePoints();
jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size());
env->SetIntArrayRegion(bigramWord1CodePointArray, 0 /* start */,
word1CodePoints->size(), &word1CodePoints->at(0));
word1CodePoints->size(), word1CodePoints->data());
env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray);
env->DeleteLocalRef(bigramWord1CodePointArray);
int bigramProbabilityInfo[] = {bigramProperty->getProbability(),
bigramProperty->getTimestamp(), bigramProperty->getLevel(),
bigramProperty->getCount()};
int bigramProbabilityInfo[] = {bigramProperty.getProbability(),
bigramProperty.getTimestamp(), bigramProperty.getLevel(),
bigramProperty.getCount()};
jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);
@ -56,16 +57,15 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
}
// Output shortcuts.
const int shortcutTargetCount = mShortcuts.size();
for (int i = 0; i < shortcutTargetCount; ++i) {
const std::vector<int> *const targetCodePoints = mShortcuts[i].getTargetCodePoints();
for (const auto &shortcut : mUnigramProperty.getShortcuts()) {
const std::vector<int> *const targetCodePoints = shortcut.getTargetCodePoints();
jintArray shortcutTargetCodePointArray = env->NewIntArray(targetCodePoints->size());
env->SetIntArrayRegion(shortcutTargetCodePointArray, 0 /* start */,
targetCodePoints->size(), &targetCodePoints->at(0));
targetCodePoints->size(), targetCodePoints->data());
env->CallBooleanMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray);
env->DeleteLocalRef(shortcutTargetCodePointArray);
jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId,
mShortcuts[i].getProbability());
shortcut.getProbability());
env->CallBooleanMethod(outShortcutProbabilities, addMethodId, integerProbability);
env->DeleteLocalRef(integerProbability);
}

View file

@ -0,0 +1,54 @@
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_WORD_PROPERTY_H
#define LATINIME_WORD_PROPERTY_H
#include <vector>
#include "defines.h"
#include "jni.h"
#include "suggest/core/dictionary/property/bigram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h"
namespace latinime {
// This class is used for returning information belonging to a word to java side.
class WordProperty {
public:
// Default constructor is used to create an instance that indicates an invalid word.
WordProperty()
: mCodePoints(), mUnigramProperty(), mBigrams() {}
WordProperty(const std::vector<int> *const codePoints,
const UnigramProperty *const unigramProperty,
const std::vector<BigramProperty> *const bigrams)
: mCodePoints(*codePoints), mUnigramProperty(*unigramProperty), mBigrams(*bigrams) {}
void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
jobject outShortcutTargets, jobject outShortcutProbabilities) const;
private:
// Default copy constructor is used for using as a return value.
DISALLOW_ASSIGNMENT_OPERATOR(WordProperty);
const std::vector<int> mCodePoints;
const UnigramProperty mUnigramProperty;
const std::vector<BigramProperty> mBigrams;
};
} // namespace latinime
#endif // LATINIME_WORD_PROPERTY_H

View file

@ -1,121 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_WORD_PROPERTY_H
#define LATINIME_WORD_PROPERTY_H
#include <cstring>
#include <vector>
#include "defines.h"
#include "jni.h"
namespace latinime {
// This class is used for returning information belonging to a word to java side.
class WordProperty {
public:
class BigramProperty {
public:
BigramProperty(const std::vector<int> *const targetCodePoints,
const int probability, const int timestamp, const int level, const int count)
: mTargetCodePoints(*targetCodePoints), mProbability(probability),
mTimestamp(timestamp), mLevel(level), mCount(count) {}
const std::vector<int> *getTargetCodePoints() const {
return &mTargetCodePoints;
}
int getProbability() const {
return mProbability;
}
int getTimestamp() const {
return mTimestamp;
}
int getLevel() const {
return mLevel;
}
int getCount() const {
return mCount;
}
private:
std::vector<int> mTargetCodePoints;
int mProbability;
int mTimestamp;
int mLevel;
int mCount;
};
class ShortcutProperty {
public:
ShortcutProperty(const std::vector<int> *const targetCodePoints, const int probability)
: mTargetCodePoints(*targetCodePoints), mProbability(probability) {}
const std::vector<int> *getTargetCodePoints() const {
return &mTargetCodePoints;
}
int getProbability() const {
return mProbability;
}
private:
std::vector<int> mTargetCodePoints;
int mProbability;
};
// Invalid word.
WordProperty()
: mCodePoints(), mIsNotAWord(false), mIsBlacklisted(false),
mHasBigrams(false), mHasShortcuts(false), mProbability(NOT_A_PROBABILITY),
mTimestamp(0), mLevel(0), mCount(0), mBigrams(), mShortcuts() {}
WordProperty(const std::vector<int> *const codePoints,
const bool isNotAWord, const bool isBlacklisted, const bool hasBigrams,
const bool hasShortcuts, const int probability, const int timestamp,
const int level, const int count, const std::vector<BigramProperty> *const bigrams,
const std::vector<ShortcutProperty> *const shortcuts)
: mCodePoints(*codePoints), mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted),
mHasBigrams(hasBigrams), mHasShortcuts(hasShortcuts), mProbability(probability),
mTimestamp(timestamp), mLevel(level), mCount(count), mBigrams(*bigrams),
mShortcuts(*shortcuts) {}
void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
jobject outShortcutTargets, jobject outShortcutProbabilities) const;
private:
DISALLOW_ASSIGNMENT_OPERATOR(WordProperty);
std::vector<int> mCodePoints;
bool mIsNotAWord;
bool mIsBlacklisted;
bool mHasBigrams;
bool mHasShortcuts;
int mProbability;
// Historical information
int mTimestamp;
int mLevel;
int mCount;
std::vector<BigramProperty> mBigrams;
std::vector<ShortcutProperty> mShortcuts;
};
} // namespace latinime
#endif // LATINIME_WORD_PROPERTY_H

View file

@ -20,7 +20,7 @@
#include <memory>
#include "defines.h"
#include "suggest/core/dictionary/word_property.h"
#include "suggest/core/dictionary/property/word_property.h"
namespace latinime {

View file

@ -339,7 +339,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin
std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
// Fetch bigram information.
std::vector<WordProperty::BigramProperty> bigrams;
std::vector<BigramProperty> bigrams;
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
int bigramWord1CodePoints[MAX_WORD_LENGTH];
BinaryDictionaryBigramsIterator bigramsIt(getBigramsStructurePolicy(), bigramListPos);
@ -360,7 +360,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin
}
}
// Fetch shortcut information.
std::vector<WordProperty::ShortcutProperty> shortcuts;
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
if (shortcutPos != NOT_A_DICT_POS) {
int shortcutTargetCodePoints[MAX_WORD_LENGTH];
@ -379,11 +379,10 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin
shortcuts.emplace_back(&shortcutTarget, shortcutProbability);
}
}
return WordProperty(&codePointVector, ptNodeParams.isNotAWord(),
ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(),
ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(),
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */,
&bigrams, &shortcuts);
const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
}
int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {

View file

@ -20,7 +20,9 @@
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/word_property.h"
#include "suggest/core/dictionary/property/bigram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
@ -358,7 +360,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
ptNodeParams.getTerminalId());
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
// Fetch bigram information.
std::vector<WordProperty::BigramProperty> bigrams;
std::vector<BigramProperty> bigrams;
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
if (bigramListPos != NOT_A_DICT_POS) {
int bigramWord1CodePoints[MAX_WORD_LENGTH];
@ -395,7 +397,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
}
}
// Fetch shortcut information.
std::vector<WordProperty::ShortcutProperty> shortcuts;
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
if (shortcutPos != NOT_A_DICT_POS) {
int shortcutTarget[MAX_WORD_LENGTH];
@ -411,11 +413,11 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
shortcuts.emplace_back(&target, shortcutProbability);
}
}
return WordProperty(&codePointVector, ptNodeParams.isNotAWord(),
ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(),
ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(),
const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount(), &bigrams, &shortcuts);
historicalInfo->getCount(), &shortcuts);
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
}
int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {