am 08ea74eb
: Merge "Implement migrateNative()."
* commit '08ea74eb2cec7e48e2a0a21ea569c765d493db20': Implement migrateNative().
This commit is contained in:
commit
7a14c69701
4 changed files with 88 additions and 2 deletions
|
@ -32,6 +32,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
|
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
|
||||||
#include "utils/char_utils.h"
|
#include "utils/char_utils.h"
|
||||||
#include "utils/jni_data_utils.h"
|
#include "utils/jni_data_utils.h"
|
||||||
|
#include "utils/log_utils.h"
|
||||||
#include "utils/time_keeper.h"
|
#include "utils/time_keeper.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -489,14 +490,85 @@ static bool latinime_BinaryDictionary_isCorruptedNative(JNIEnv *env, jclass claz
|
||||||
return dictionary->getDictionaryStructurePolicy()->isCorrupted();
|
return dictionary->getDictionaryStructurePolicy()->isCorrupted();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static DictionaryStructureWithBufferPolicy::StructurePolicyPtr runGCAndGetNewStructurePolicy(
|
||||||
|
DictionaryStructureWithBufferPolicy::StructurePolicyPtr structurePolicy,
|
||||||
|
const char *const dictFilePath) {
|
||||||
|
structurePolicy->flushWithGC(dictFilePath);
|
||||||
|
structurePolicy.release();
|
||||||
|
return DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile(
|
||||||
|
dictFilePath, 0 /* offset */, 0 /* size */, true /* isUpdatable */);
|
||||||
|
}
|
||||||
|
|
||||||
static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, jlong dict,
|
static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, jlong dict,
|
||||||
jstring dictFilePath, jlong newFormatVersion) {
|
jstring dictFilePath, jlong newFormatVersion) {
|
||||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||||
if (!dictionary) {
|
if (!dictionary) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// TODO: Implement.
|
const jsize filePathUtf8Length = env->GetStringUTFLength(dictFilePath);
|
||||||
|
char dictFilePathChars[filePathUtf8Length + 1];
|
||||||
|
env->GetStringUTFRegion(dictFilePath, 0, env->GetStringLength(dictFilePath), dictFilePathChars);
|
||||||
|
dictFilePathChars[filePathUtf8Length] = '\0';
|
||||||
|
|
||||||
|
const DictionaryHeaderStructurePolicy *const headerPolicy =
|
||||||
|
dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
|
||||||
|
DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy =
|
||||||
|
DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict(
|
||||||
|
newFormatVersion, *headerPolicy->getLocale(), headerPolicy->getAttributeMap());
|
||||||
|
if (!dictionaryStructureWithBufferPolicy) {
|
||||||
|
LogUtils::logToJava(env, "Cannot migrate header.");
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Migrate historical information.
|
||||||
|
int wordCodePoints[MAX_WORD_LENGTH];
|
||||||
|
int token = 0;
|
||||||
|
// Add unigrams.
|
||||||
|
do {
|
||||||
|
token = dictionary->getNextWordAndNextToken(token, wordCodePoints);
|
||||||
|
const int wordLength = CharUtils::getCodePointCount(MAX_WORD_LENGTH, wordCodePoints);
|
||||||
|
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
|
||||||
|
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
|
||||||
|
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
|
||||||
|
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
|
||||||
|
if (!dictionaryStructureWithBufferPolicy) {
|
||||||
|
LogUtils::logToJava(env, "Cannot open dict after GC.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!dictionaryStructureWithBufferPolicy->addUnigramWord(wordCodePoints, wordLength,
|
||||||
|
wordProperty.getUnigramProperty())) {
|
||||||
|
LogUtils::logToJava(env, "Cannot add unigram to the new dict.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} while (token != 0);
|
||||||
|
|
||||||
|
// Add bigrams.
|
||||||
|
do {
|
||||||
|
token = dictionary->getNextWordAndNextToken(token, wordCodePoints);
|
||||||
|
const int wordLength = CharUtils::getCodePointCount(MAX_WORD_LENGTH, wordCodePoints);
|
||||||
|
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
|
||||||
|
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
|
||||||
|
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
|
||||||
|
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
|
||||||
|
if (!dictionaryStructureWithBufferPolicy) {
|
||||||
|
LogUtils::logToJava(env, "Cannot open dict after GC.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (const BigramProperty &bigarmProperty : *wordProperty.getBigramProperties()) {
|
||||||
|
const std::vector<int> *targetCodePoints = bigarmProperty.getTargetCodePoints();
|
||||||
|
if (!dictionaryStructureWithBufferPolicy->addBigramWords(wordCodePoints, wordLength,
|
||||||
|
targetCodePoints->data(), targetCodePoints->size(),
|
||||||
|
bigarmProperty.getProbability(), bigarmProperty.getTimestamp())) {
|
||||||
|
LogUtils::logToJava(env, "Cannot add bigram to the new dict.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (token != 0);
|
||||||
|
// Save to File.
|
||||||
|
dictionaryStructureWithBufferPolicy->flushWithGC(dictFilePathChars);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const JNINativeMethod sMethods[] = {
|
static const JNINativeMethod sMethods[] = {
|
||||||
|
|
|
@ -42,6 +42,14 @@ class WordProperty {
|
||||||
jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
|
jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
|
||||||
jobject outShortcutTargets, jobject outShortcutProbabilities) const;
|
jobject outShortcutTargets, jobject outShortcutProbabilities) const;
|
||||||
|
|
||||||
|
const UnigramProperty *getUnigramProperty() const {
|
||||||
|
return &mUnigramProperty;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<BigramProperty> *getBigramProperties() const {
|
||||||
|
return &mBigrams;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Default copy constructor is used for using as a return value.
|
// Default copy constructor is used for using as a return value.
|
||||||
DISALLOW_ASSIGNMENT_OPERATOR(WordProperty);
|
DISALLOW_ASSIGNMENT_OPERATOR(WordProperty);
|
||||||
|
|
|
@ -49,6 +49,8 @@ class DictionaryHeaderStructurePolicy {
|
||||||
|
|
||||||
virtual bool shouldBoostExactMatches() const = 0;
|
virtual bool shouldBoostExactMatches() const = 0;
|
||||||
|
|
||||||
|
virtual const std::vector<int> *getLocale() const = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DictionaryHeaderStructurePolicy() {}
|
DictionaryHeaderStructurePolicy() {}
|
||||||
|
|
||||||
|
|
|
@ -238,6 +238,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
||||||
const int unigramCount, const int bigramCount, const int extendedRegionSize,
|
const int unigramCount, const int bigramCount, const int extendedRegionSize,
|
||||||
DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const;
|
DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const;
|
||||||
|
|
||||||
|
AK_FORCE_INLINE const std::vector<int> *getLocale() const {
|
||||||
|
return &mLocale;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
|
DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue