diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index 47ace23a1..9a32f64d4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -25,7 +25,7 @@ namespace latinime { const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE"; const char *const HeaderPolicy::USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE"; const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date"; -const float HeaderPolicy::DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER = 1.0f; +const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100; const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f; // Used for logging. Question mark is used to indicate that the key is not found. @@ -37,7 +37,7 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out return; } std::vector keyCodePointVector; - insertCharactersIntoVector(key, &keyCodePointVector); + HeaderReadWriteUtils::insertCharactersIntoVector(key, &keyCodePointVector); HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector); if (it == mAttributeMap.end()) { // The key was not found. @@ -53,47 +53,29 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out } float HeaderPolicy::readMultipleWordCostMultiplier() const { - int attributeValue = 0; - if (getAttributeValueAsInt(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &attributeValue)) { - if (attributeValue <= 0) { - return static_cast(MAX_VALUE_FOR_WEIGHTING); - } - return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast(attributeValue); - } else { - return DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; + std::vector keyVector; + HeaderReadWriteUtils::insertCharactersIntoVector(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &keyVector); + const int demotionRate = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + &keyVector, DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE); + if (demotionRate <= 0) { + return static_cast(MAX_VALUE_FOR_WEIGHTING); } + return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast(demotionRate); } bool HeaderPolicy::readUsesForgettingCurveFlag() const { - int attributeValue = 0; - if (getAttributeValueAsInt(USES_FORGETTING_CURVE_KEY, &attributeValue)) { - return attributeValue != 0; - } else { - return false; - } -} - -// Returns S_INT_MIN when the key is not found or the value is invalid. -int HeaderPolicy::readLastUpdatedTime() const { - int attributeValue = 0; - if (getAttributeValueAsInt(LAST_UPDATED_TIME_KEY, &attributeValue)) { - return attributeValue; - } else { - return S_INT_MIN; - } -} - -// Returns whether the key is found or not and stores the found value into outValue. -bool HeaderPolicy::getAttributeValueAsInt(const char *const key, int *const outValue) const { std::vector keyVector; - insertCharactersIntoVector(key, &keyVector); - HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyVector); - if (it == mAttributeMap.end()) { - // The key was not found. - return false; - } - *outValue = parseIntAttributeValue(&(it->second)); - return true; + HeaderReadWriteUtils::insertCharactersIntoVector(USES_FORGETTING_CURVE_KEY, &keyVector); + return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector, + false /* defaultValue */); +} + +// Returns current time when the key is not found or the value is invalid. +int HeaderPolicy::readLastUpdatedTime() const { + std::vector keyVector; + HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &keyVector); + return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector, + time(0) /* defaultValue */); } bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite, @@ -117,13 +99,8 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT // Set current time as a last updated time. HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap); std::vector updatedTimekey; - insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey); - const time_t currentTime = time(NULL); - std::vector updatedTimeValue; - char charBuf[LARGEST_INT_DIGIT_COUNT + 1]; - snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%ld", currentTime); - insertCharactersIntoVector(charBuf, &updatedTimeValue); - attributeMapTowrite[updatedTimekey] = updatedTimeValue; + HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey); + HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, &updatedTimekey, time(0)); if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite, &writingPos)) { return false; @@ -149,30 +126,4 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT return attributeMap; } -/* static */ int HeaderPolicy::parseIntAttributeValue( - const std::vector *const attributeValue) { - int value = 0; - bool isNegative = false; - for (size_t i = 0; i < attributeValue->size(); ++i) { - if (i == 0 && attributeValue->at(i) == '-') { - isNegative = true; - } else { - if (!isdigit(attributeValue->at(i))) { - // If not a number, return S_INT_MIN - return S_INT_MIN; - } - value *= 10; - value += attributeValue->at(i) - '0'; - } - } - return isNegative ? -value : value; -} - -/* static */ void HeaderPolicy::insertCharactersIntoVector(const char *const characters, - std::vector *const vector) { - for (int i = 0; characters[i]; ++i) { - vector->push_back(characters[i]); - } -} - } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 6b396f3f2..e97c08ca4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -17,7 +17,6 @@ #ifndef LATINIME_HEADER_POLICY_H #define LATINIME_HEADER_POLICY_H -#include #include #include "defines.h" @@ -29,16 +28,26 @@ namespace latinime { class HeaderPolicy : public DictionaryHeaderStructurePolicy { public: - explicit HeaderPolicy(const uint8_t *const dictBuf, const int dictSize) - : mDictBuf(dictBuf), - mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)), + // Reads information from existing dictionary buffer. + HeaderPolicy(const uint8_t *const dictBuf, const int dictSize) + : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)), mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)), mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)), - mAttributeMap(createAttributeMapAndReadAllAttributes(mDictBuf)), + mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), mUsesForgettingCurve(readUsesForgettingCurveFlag()), mLastUpdatedTime(readLastUpdatedTime()) {} + // Constructs header information using an attribute map. + HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion, + const HeaderReadWriteUtils::AttributeMap *const attributeMap) + : mDictFormatVersion(dictFormatVersion), + mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( + attributeMap)), mSize(0), mAttributeMap(*attributeMap), + mMultiWordCostMultiplier(readUsesForgettingCurveFlag()), + mUsesForgettingCurve(readUsesForgettingCurveFlag()), + mLastUpdatedTime(readLastUpdatedTime()) {} + ~HeaderPolicy() {} AK_FORCE_INLINE int getSize() const { @@ -81,10 +90,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; static const char *const USES_FORGETTING_CURVE_KEY; static const char *const LAST_UPDATED_TIME_KEY; - static const float DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; + static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE; static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; - const uint8_t *const mDictBuf; const FormatUtils::FORMAT_VERSION mDictFormatVersion; const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags; const int mSize; @@ -99,15 +107,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { int readLastUpdatedTime() const; - bool getAttributeValueAsInt(const char *const key, int *const outValue) const; - static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes( const uint8_t *const dictBuf); - - static int parseIntAttributeValue(const std::vector *const attributeValue); - - static void insertCharactersIntoVector( - const char *const characters, std::vector *const vector); }; } // namespace latinime #endif /* LATINIME_HEADER_POLICY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp index 80fe88671..e6547675c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp @@ -16,6 +16,8 @@ #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" +#include +#include #include #include "defines.h" @@ -43,6 +45,12 @@ const HeaderReadWriteUtils::DictionaryFlags const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; +const char *const HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_KEY = "SUPPORTS_DYNAMIC_UPDATE"; +const char *const HeaderReadWriteUtils::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY = + "REQUIRES_GERMAN_UMLAUT_PROCESSING"; +const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY = + "REQUIRES_FRENCH_LIGATURE_PROCESSING"; + /* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) { // See the format of the header in the comment in // BinaryDictionaryFormatUtils::detectFormatVersion() @@ -56,6 +64,28 @@ const HeaderReadWriteUtils::DictionaryFlags HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE); } +/* static */ HeaderReadWriteUtils::DictionaryFlags + HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( + const HeaderReadWriteUtils::AttributeMap *const attributeMap) { + AttributeMap::key_type key; + insertCharactersIntoVector(REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, &key); + const bool requiresGermanUmlautProcessing = readBoolAttributeValue(attributeMap, &key, + false /* defaultValue */); + key.clear(); + insertCharactersIntoVector(REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY, &key); + const bool requiresFrenchLigatureProcessing = readBoolAttributeValue(attributeMap, &key, + false /* defaultValue */); + key.clear(); + insertCharactersIntoVector(SUPPORTS_DYNAMIC_UPDATE_KEY, &key); + const bool supportsDynamicUpdate = readBoolAttributeValue(attributeMap, &key, + false /* defaultValue */); + DictionaryFlags dictflags = NO_FLAGS; + dictflags |= requiresGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0; + dictflags |= requiresFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0; + dictflags |= supportsDynamicUpdate ? SUPPORTS_DYNAMIC_UPDATE_FLAG : 0; + return dictflags; +} + /* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf, AttributeMap *const headerAttributes) { const int headerSize = getHeaderSize(dictBuf); @@ -128,4 +158,57 @@ const HeaderReadWriteUtils::DictionaryFlags return true; } +/* static */ void HeaderReadWriteUtils::setBoolAttribute(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const bool value) { + setIntAttribute(headerAttributes, key, value ? 1 : 0); +} + +/* static */ void HeaderReadWriteUtils::setIntAttribute(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const int value) { + AttributeMap::mapped_type valueVector; + char charBuf[LARGEST_INT_DIGIT_COUNT + 1]; + snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%d", value); + insertCharactersIntoVector(charBuf, &valueVector); + (*headerAttributes)[*key] = valueVector; +} + +/* static */ bool HeaderReadWriteUtils::readBoolAttributeValue( + const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key, + const bool defaultValue) { + const int intDefaultValue = defaultValue ? 1 : 0; + const int intValue = readIntAttributeValue(headerAttributes, key, intDefaultValue); + return intValue != 0; +} + +/* static */ int HeaderReadWriteUtils::readIntAttributeValue( + const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key, + const int defaultValue) { + AttributeMap::const_iterator it = headerAttributes->find(*key); + if (it != headerAttributes->end()) { + int value = 0; + bool isNegative = false; + for (size_t i = 0; i < it->second.size(); ++i) { + if (i == 0 && it->second.at(i) == '-') { + isNegative = true; + } else { + if (!isdigit(it->second.at(i))) { + // If not a number. + return defaultValue; + } + value *= 10; + value += it->second.at(i) - '0'; + } + } + return isNegative ? -value : value; + } + return defaultValue; +} + +/* static */ void HeaderReadWriteUtils::insertCharactersIntoVector(const char *const characters, + std::vector *const vector) { + for (int i = 0; characters[i]; ++i) { + vector->push_back(characters[i]); + } +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h index 6cce73375..caa5097f6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h @@ -54,6 +54,9 @@ class HeaderReadWriteUtils { + HEADER_SIZE_FIELD_SIZE; } + static DictionaryFlags createAndGetDictionaryFlagsUsingAttributeMap( + const HeaderReadWriteUtils::AttributeMap *const attributeMap); + static void fetchAllHeaderAttributes(const uint8_t *const dictBuf, AttributeMap *const headerAttributes); @@ -69,6 +72,24 @@ class HeaderReadWriteUtils { static bool writeHeaderAttributes(BufferWithExtendableBuffer *const buffer, const AttributeMap *const headerAttributes, int *const writingPos); + /** + * Methods for header attributes. + */ + static void setBoolAttribute(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const bool value); + + static void setIntAttribute(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const int value); + + static bool readBoolAttributeValue(const AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const bool defaultValue); + + static int readIntAttributeValue(const AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const int defaultValue); + + static void insertCharactersIntoVector(const char *const characters, + AttributeMap::key_type *const key); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadWriteUtils); @@ -87,7 +108,10 @@ class HeaderReadWriteUtils { static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG; static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG; static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG; - static const DictionaryFlags CONTAINS_BIGRAMS_FLAG; + + static const char *const SUPPORTS_DYNAMIC_UPDATE_KEY; + static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY; + static const char *const REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY; }; } #endif /* LATINIME_HEADER_READ_WRITE_UTILS_H */