am 08ea74eb: Merge "Implement migrateNative()."

* commit '08ea74eb2cec7e48e2a0a21ea569c765d493db20':
  Implement migrateNative().
This commit is contained in:
Keisuke Kuroyanagi 2014-05-08 05:24:58 +00:00 committed by Android Git Automerger
commit 7a14c69701
4 changed files with 88 additions and 2 deletions

View file

@ -32,6 +32,7 @@
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h" #include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
#include "utils/char_utils.h" #include "utils/char_utils.h"
#include "utils/jni_data_utils.h" #include "utils/jni_data_utils.h"
#include "utils/log_utils.h"
#include "utils/time_keeper.h" #include "utils/time_keeper.h"
namespace latinime { namespace latinime {
@ -489,14 +490,85 @@ static bool latinime_BinaryDictionary_isCorruptedNative(JNIEnv *env, jclass claz
return dictionary->getDictionaryStructurePolicy()->isCorrupted(); return dictionary->getDictionaryStructurePolicy()->isCorrupted();
} }
static DictionaryStructureWithBufferPolicy::StructurePolicyPtr runGCAndGetNewStructurePolicy(
DictionaryStructureWithBufferPolicy::StructurePolicyPtr structurePolicy,
const char *const dictFilePath) {
structurePolicy->flushWithGC(dictFilePath);
structurePolicy.release();
return DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile(
dictFilePath, 0 /* offset */, 0 /* size */, true /* isUpdatable */);
}
static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, jlong dict, static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, jlong dict,
jstring dictFilePath, jlong newFormatVersion) { jstring dictFilePath, jlong newFormatVersion) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) { if (!dictionary) {
return false; return false;
} }
// TODO: Implement. const jsize filePathUtf8Length = env->GetStringUTFLength(dictFilePath);
char dictFilePathChars[filePathUtf8Length + 1];
env->GetStringUTFRegion(dictFilePath, 0, env->GetStringLength(dictFilePath), dictFilePathChars);
dictFilePathChars[filePathUtf8Length] = '\0';
const DictionaryHeaderStructurePolicy *const headerPolicy =
dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy =
DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict(
newFormatVersion, *headerPolicy->getLocale(), headerPolicy->getAttributeMap());
if (!dictionaryStructureWithBufferPolicy) {
LogUtils::logToJava(env, "Cannot migrate header.");
return false; return false;
}
// TODO: Migrate historical information.
int wordCodePoints[MAX_WORD_LENGTH];
int token = 0;
// Add unigrams.
do {
token = dictionary->getNextWordAndNextToken(token, wordCodePoints);
const int wordLength = CharUtils::getCodePointCount(MAX_WORD_LENGTH, wordCodePoints);
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
if (!dictionaryStructureWithBufferPolicy) {
LogUtils::logToJava(env, "Cannot open dict after GC.");
return false;
}
}
if (!dictionaryStructureWithBufferPolicy->addUnigramWord(wordCodePoints, wordLength,
wordProperty.getUnigramProperty())) {
LogUtils::logToJava(env, "Cannot add unigram to the new dict.");
return false;
}
} while (token != 0);
// Add bigrams.
do {
token = dictionary->getNextWordAndNextToken(token, wordCodePoints);
const int wordLength = CharUtils::getCodePointCount(MAX_WORD_LENGTH, wordCodePoints);
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
if (!dictionaryStructureWithBufferPolicy) {
LogUtils::logToJava(env, "Cannot open dict after GC.");
return false;
}
}
for (const BigramProperty &bigarmProperty : *wordProperty.getBigramProperties()) {
const std::vector<int> *targetCodePoints = bigarmProperty.getTargetCodePoints();
if (!dictionaryStructureWithBufferPolicy->addBigramWords(wordCodePoints, wordLength,
targetCodePoints->data(), targetCodePoints->size(),
bigarmProperty.getProbability(), bigarmProperty.getTimestamp())) {
LogUtils::logToJava(env, "Cannot add bigram to the new dict.");
return false;
}
}
} while (token != 0);
// Save to File.
dictionaryStructureWithBufferPolicy->flushWithGC(dictFilePathChars);
return true;
} }
static const JNINativeMethod sMethods[] = { static const JNINativeMethod sMethods[] = {

View file

@ -42,6 +42,14 @@ class WordProperty {
jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities, jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
jobject outShortcutTargets, jobject outShortcutProbabilities) const; jobject outShortcutTargets, jobject outShortcutProbabilities) const;
const UnigramProperty *getUnigramProperty() const {
return &mUnigramProperty;
}
const std::vector<BigramProperty> *getBigramProperties() const {
return &mBigrams;
}
private: private:
// Default copy constructor is used for using as a return value. // Default copy constructor is used for using as a return value.
DISALLOW_ASSIGNMENT_OPERATOR(WordProperty); DISALLOW_ASSIGNMENT_OPERATOR(WordProperty);

View file

@ -49,6 +49,8 @@ class DictionaryHeaderStructurePolicy {
virtual bool shouldBoostExactMatches() const = 0; virtual bool shouldBoostExactMatches() const = 0;
virtual const std::vector<int> *getLocale() const = 0;
protected: protected:
DictionaryHeaderStructurePolicy() {} DictionaryHeaderStructurePolicy() {}

View file

@ -238,6 +238,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
const int unigramCount, const int bigramCount, const int extendedRegionSize, const int unigramCount, const int bigramCount, const int extendedRegionSize,
DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const; DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const;
AK_FORCE_INLINE const std::vector<int> *getLocale() const {
return &mLocale;
}
private: private:
DISALLOW_COPY_AND_ASSIGN(HeaderPolicy); DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);