Merge "Use std::move for dictionary properties."
This commit is contained in:
commit
a2251ef47b
9 changed files with 58 additions and 48 deletions
|
@ -364,10 +364,12 @@ static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz,
|
|||
int codePoints[codePointCount];
|
||||
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
|
||||
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
||||
std::vector<int> shortcutTargetCodePoints;
|
||||
JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
|
||||
if (!shortcutTargetCodePoints.empty()) {
|
||||
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
|
||||
{
|
||||
std::vector<int> shortcutTargetCodePoints;
|
||||
JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
|
||||
if (!shortcutTargetCodePoints.empty()) {
|
||||
shortcuts.emplace_back(std::move(shortcutTargetCodePoints), shortcutProbability);
|
||||
}
|
||||
}
|
||||
// Use 1 for count to indicate the word has inputted.
|
||||
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
|
||||
|
@ -401,11 +403,9 @@ static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, j
|
|||
jsize wordLength = env->GetArrayLength(word);
|
||||
int wordCodePoints[wordLength];
|
||||
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
|
||||
const std::vector<int> bigramTargetCodePoints(
|
||||
wordCodePoints, wordCodePoints + wordLength);
|
||||
// Use 1 for count to indicate the bigram has inputted.
|
||||
const BigramProperty bigramProperty(&bigramTargetCodePoints, probability,
|
||||
timestamp, 0 /* level */, 1 /* count */);
|
||||
const BigramProperty bigramProperty(CodePointArrayView(wordCodePoints, wordLength).toVector(),
|
||||
probability, timestamp, 0 /* level */, 1 /* count */);
|
||||
return dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
|
||||
}
|
||||
|
||||
|
@ -483,12 +483,14 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
|||
jintArray shortcutTarget = static_cast<jintArray>(
|
||||
env->GetObjectField(languageModelParam, shortcutTargetFieldId));
|
||||
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
||||
std::vector<int> shortcutTargetCodePoints;
|
||||
JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
|
||||
if (!shortcutTargetCodePoints.empty()) {
|
||||
jint shortcutProbability =
|
||||
env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
|
||||
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
|
||||
{
|
||||
std::vector<int> shortcutTargetCodePoints;
|
||||
JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
|
||||
if (!shortcutTargetCodePoints.empty()) {
|
||||
jint shortcutProbability =
|
||||
env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
|
||||
shortcuts.emplace_back(std::move(shortcutTargetCodePoints), shortcutProbability);
|
||||
}
|
||||
}
|
||||
// Use 1 for count to indicate the word has inputted.
|
||||
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
|
||||
|
@ -498,11 +500,10 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
|||
&unigramProperty);
|
||||
if (word0) {
|
||||
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
|
||||
const std::vector<int> bigramTargetCodePoints(
|
||||
word1CodePoints, word1CodePoints + word1Length);
|
||||
// Use 1 for count to indicate the bigram has inputted.
|
||||
const BigramProperty bigramProperty(&bigramTargetCodePoints, bigramProbability,
|
||||
timestamp, 0 /* level */, 1 /* count */);
|
||||
const BigramProperty bigramProperty(
|
||||
CodePointArrayView(word1CodePoints, word1Length).toVector(),
|
||||
bigramProbability, timestamp, 0 /* level */, 1 /* count */);
|
||||
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
|
||||
false /* isBeginningOfSentence */);
|
||||
dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
|
||||
|
|
|
@ -26,9 +26,9 @@ namespace latinime {
|
|||
// TODO: Change to NgramProperty.
|
||||
class BigramProperty {
|
||||
public:
|
||||
BigramProperty(const std::vector<int> *const targetCodePoints,
|
||||
const int probability, const int timestamp, const int level, const int count)
|
||||
: mTargetCodePoints(*targetCodePoints), mProbability(probability),
|
||||
BigramProperty(const std::vector<int> &&targetCodePoints, const int probability,
|
||||
const int timestamp, const int level, const int count)
|
||||
: mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability),
|
||||
mTimestamp(timestamp), mLevel(level), mCount(count) {}
|
||||
|
||||
const std::vector<int> *getTargetCodePoints() const {
|
||||
|
|
|
@ -27,8 +27,9 @@ class UnigramProperty {
|
|||
public:
|
||||
class ShortcutProperty {
|
||||
public:
|
||||
ShortcutProperty(const std::vector<int> *const targetCodePoints, const int probability)
|
||||
: mTargetCodePoints(*targetCodePoints), mProbability(probability) {}
|
||||
ShortcutProperty(const std::vector<int> &&targetCodePoints, const int probability)
|
||||
: mTargetCodePoints(std::move(targetCodePoints)),
|
||||
mProbability(probability) {}
|
||||
|
||||
const std::vector<int> *getTargetCodePoints() const {
|
||||
return &mTargetCodePoints;
|
||||
|
|
|
@ -23,7 +23,6 @@
|
|||
#include "jni.h"
|
||||
#include "suggest/core/dictionary/property/bigram_property.h"
|
||||
#include "suggest/core/dictionary/property/unigram_property.h"
|
||||
#include "utils/int_array_view.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -34,9 +33,9 @@ class WordProperty {
|
|||
WordProperty()
|
||||
: mCodePoints(), mUnigramProperty(), mBigrams() {}
|
||||
|
||||
WordProperty(const CodePointArrayView codePoints, const UnigramProperty *const unigramProperty,
|
||||
WordProperty(const std::vector<int> &&codePoints, const UnigramProperty *const unigramProperty,
|
||||
const std::vector<BigramProperty> *const bigrams)
|
||||
: mCodePoints(codePoints.begin(), codePoints.end()), mUnigramProperty(*unigramProperty),
|
||||
: mCodePoints(std::move(codePoints)), mUnigramProperty(*unigramProperty),
|
||||
mBigrams(*bigrams) {}
|
||||
|
||||
void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
|
||||
|
|
|
@ -521,15 +521,14 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
|
|||
const int codePointCount = getCodePointsAndReturnCodePointCount(
|
||||
getWordIdFromTerminalPtNodePos(word1TerminalPtNodePos), MAX_WORD_LENGTH,
|
||||
bigramWord1CodePoints);
|
||||
const std::vector<int> word1(bigramWord1CodePoints,
|
||||
bigramWord1CodePoints + codePointCount);
|
||||
const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
|
||||
const int probability = bigramEntry.hasHistoricalInfo() ?
|
||||
ForgettingCurveUtils::decodeProbability(
|
||||
bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
|
||||
bigramEntry.getProbability();
|
||||
bigrams.emplace_back(&word1, probability,
|
||||
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
||||
bigrams.emplace_back(
|
||||
CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
|
||||
probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
||||
historicalInfo->getCount());
|
||||
}
|
||||
}
|
||||
|
@ -546,15 +545,16 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
|
|||
int shortcutProbability = NOT_A_PROBABILITY;
|
||||
shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
|
||||
&shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
|
||||
const std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
|
||||
shortcuts.emplace_back(&target, shortcutProbability);
|
||||
shortcuts.emplace_back(
|
||||
CodePointArrayView(shortcutTarget, shortcutTargetLength).toVector(),
|
||||
shortcutProbability);
|
||||
}
|
||||
}
|
||||
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
|
||||
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
||||
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
||||
historicalInfo->getCount(), &shortcuts);
|
||||
return WordProperty(wordCodePoints, &unigramProperty, &bigrams);
|
||||
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &bigrams);
|
||||
}
|
||||
|
||||
int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
|
||||
|
|
|
@ -449,11 +449,10 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
|
|||
const int word1CodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
getWordIdFromTerminalPtNodePos(bigramsIt.getBigramPos()), MAX_WORD_LENGTH,
|
||||
bigramWord1CodePoints, &word1Probability);
|
||||
const std::vector<int> word1(bigramWord1CodePoints,
|
||||
bigramWord1CodePoints + word1CodePointCount);
|
||||
const int probability = getProbability(word1Probability, bigramsIt.getProbability());
|
||||
bigrams.emplace_back(&word1, probability,
|
||||
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */);
|
||||
bigrams.emplace_back(
|
||||
CodePointArrayView(bigramWord1CodePoints, word1CodePointCount).toVector(),
|
||||
probability, NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */);
|
||||
}
|
||||
}
|
||||
// Fetch shortcut information.
|
||||
|
@ -469,17 +468,17 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
|
|||
hasNext = ShortcutListReadingUtils::hasNext(shortcutFlags);
|
||||
const int shortcutTargetLength = ShortcutListReadingUtils::readShortcutTarget(
|
||||
mBuffer, MAX_WORD_LENGTH, shortcutTargetCodePoints, &shortcutPos);
|
||||
const std::vector<int> shortcutTarget(shortcutTargetCodePoints,
|
||||
shortcutTargetCodePoints + shortcutTargetLength);
|
||||
const int shortcutProbability =
|
||||
ShortcutListReadingUtils::getProbabilityFromFlags(shortcutFlags);
|
||||
shortcuts.emplace_back(&shortcutTarget, shortcutProbability);
|
||||
shortcuts.emplace_back(
|
||||
CodePointArrayView(shortcutTargetCodePoints, shortcutTargetLength).toVector(),
|
||||
shortcutProbability);
|
||||
}
|
||||
}
|
||||
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
|
||||
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
||||
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
|
||||
return WordProperty(wordCodePoints, &unigramProperty, &bigrams);
|
||||
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &bigrams);
|
||||
}
|
||||
|
||||
int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
|
||||
|
|
|
@ -464,15 +464,13 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
|
|||
prevWordIds)) {
|
||||
const int codePointCount = getCodePointsAndReturnCodePointCount(entry.getWordId(),
|
||||
MAX_WORD_LENGTH, bigramWord1CodePoints);
|
||||
const std::vector<int> word1(bigramWord1CodePoints,
|
||||
bigramWord1CodePoints + codePointCount);
|
||||
const ProbabilityEntry probabilityEntry = entry.getProbabilityEntry();
|
||||
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
|
||||
const int probability = probabilityEntry.hasHistoricalInfo() ?
|
||||
ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) :
|
||||
probabilityEntry.getProbability();
|
||||
bigrams.emplace_back(&word1, probability,
|
||||
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
||||
bigrams.emplace_back(CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
|
||||
probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
||||
historicalInfo->getCount());
|
||||
}
|
||||
// Fetch shortcut information.
|
||||
|
@ -488,15 +486,16 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
|
|||
int shortcutProbability = NOT_A_PROBABILITY;
|
||||
shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
|
||||
&shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
|
||||
const std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
|
||||
shortcuts.emplace_back(&target, shortcutProbability);
|
||||
shortcuts.emplace_back(
|
||||
CodePointArrayView(shortcutTarget, shortcutTargetLength).toVector(),
|
||||
shortcutProbability);
|
||||
}
|
||||
}
|
||||
const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(),
|
||||
probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(),
|
||||
probabilityEntry.getProbability(), historicalInfo->getTimeStamp(),
|
||||
historicalInfo->getLevel(), historicalInfo->getCount(), &shortcuts);
|
||||
return WordProperty(wordCodePoints, &unigramProperty, &bigrams);
|
||||
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &bigrams);
|
||||
}
|
||||
|
||||
int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
|
||||
|
|
|
@ -129,6 +129,10 @@ class IntArrayView {
|
|||
return mPtr[mSize - 1];
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE std::vector<int> toVector() const {
|
||||
return std::vector<int>(begin(), end());
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_ASSIGNMENT_OPERATOR(IntArrayView);
|
||||
|
||||
|
|
|
@ -144,5 +144,12 @@ TEST(IntArrayViewTest, TestLastOrDefault) {
|
|||
EXPECT_EQ(10, intArrayView.skip(6).lastOrDefault(10));
|
||||
}
|
||||
|
||||
TEST(IntArrayViewTest, TestToVector) {
|
||||
const std::vector<int> intVector = {3, 2, 1, 0, -1, -2};
|
||||
IntArrayView intArrayView(intVector);
|
||||
EXPECT_EQ(intVector, intArrayView.toVector());
|
||||
EXPECT_EQ(std::vector<int>(), CodePointArrayView().toVector());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace latinime
|
||||
|
|
Loading…
Reference in a new issue