am 853cd794: am 42c716be: am 36f45921: Merge "Refactoring header attribute reading."
* commit '853cd794f32a9c06367df1d289eeaf5b6203ac36': Refactoring header attribute reading.main
commit
3b48d3fb93
|
@ -25,7 +25,7 @@ namespace latinime {
|
|||
const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE";
|
||||
const char *const HeaderPolicy::USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE";
|
||||
const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date";
|
||||
const float HeaderPolicy::DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER = 1.0f;
|
||||
const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
|
||||
const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
|
||||
|
||||
// Used for logging. Question mark is used to indicate that the key is not found.
|
||||
|
@ -37,7 +37,7 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out
|
|||
return;
|
||||
}
|
||||
std::vector<int> keyCodePointVector;
|
||||
insertCharactersIntoVector(key, &keyCodePointVector);
|
||||
HeaderReadWriteUtils::insertCharactersIntoVector(key, &keyCodePointVector);
|
||||
HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector);
|
||||
if (it == mAttributeMap.end()) {
|
||||
// The key was not found.
|
||||
|
@ -53,47 +53,29 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out
|
|||
}
|
||||
|
||||
float HeaderPolicy::readMultipleWordCostMultiplier() const {
|
||||
int attributeValue = 0;
|
||||
if (getAttributeValueAsInt(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &attributeValue)) {
|
||||
if (attributeValue <= 0) {
|
||||
std::vector<int> keyVector;
|
||||
HeaderReadWriteUtils::insertCharactersIntoVector(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &keyVector);
|
||||
const int demotionRate = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
|
||||
&keyVector, DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE);
|
||||
if (demotionRate <= 0) {
|
||||
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
|
||||
}
|
||||
return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(attributeValue);
|
||||
} else {
|
||||
return DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER;
|
||||
}
|
||||
return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate);
|
||||
}
|
||||
|
||||
bool HeaderPolicy::readUsesForgettingCurveFlag() const {
|
||||
int attributeValue = 0;
|
||||
if (getAttributeValueAsInt(USES_FORGETTING_CURVE_KEY, &attributeValue)) {
|
||||
return attributeValue != 0;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns S_INT_MIN when the key is not found or the value is invalid.
|
||||
int HeaderPolicy::readLastUpdatedTime() const {
|
||||
int attributeValue = 0;
|
||||
if (getAttributeValueAsInt(LAST_UPDATED_TIME_KEY, &attributeValue)) {
|
||||
return attributeValue;
|
||||
} else {
|
||||
return S_INT_MIN;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns whether the key is found or not and stores the found value into outValue.
|
||||
bool HeaderPolicy::getAttributeValueAsInt(const char *const key, int *const outValue) const {
|
||||
std::vector<int> keyVector;
|
||||
insertCharactersIntoVector(key, &keyVector);
|
||||
HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyVector);
|
||||
if (it == mAttributeMap.end()) {
|
||||
// The key was not found.
|
||||
return false;
|
||||
HeaderReadWriteUtils::insertCharactersIntoVector(USES_FORGETTING_CURVE_KEY, &keyVector);
|
||||
return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector,
|
||||
false /* defaultValue */);
|
||||
}
|
||||
*outValue = parseIntAttributeValue(&(it->second));
|
||||
return true;
|
||||
|
||||
// Returns current time when the key is not found or the value is invalid.
|
||||
int HeaderPolicy::readLastUpdatedTime() const {
|
||||
std::vector<int> keyVector;
|
||||
HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &keyVector);
|
||||
return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector,
|
||||
time(0) /* defaultValue */);
|
||||
}
|
||||
|
||||
bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
|
||||
|
@ -117,13 +99,8 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT
|
|||
// Set current time as a last updated time.
|
||||
HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap);
|
||||
std::vector<int> updatedTimekey;
|
||||
insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey);
|
||||
const time_t currentTime = time(NULL);
|
||||
std::vector<int> updatedTimeValue;
|
||||
char charBuf[LARGEST_INT_DIGIT_COUNT + 1];
|
||||
snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%ld", currentTime);
|
||||
insertCharactersIntoVector(charBuf, &updatedTimeValue);
|
||||
attributeMapTowrite[updatedTimekey] = updatedTimeValue;
|
||||
HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey);
|
||||
HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, &updatedTimekey, time(0));
|
||||
if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite,
|
||||
&writingPos)) {
|
||||
return false;
|
||||
|
@ -149,30 +126,4 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT
|
|||
return attributeMap;
|
||||
}
|
||||
|
||||
/* static */ int HeaderPolicy::parseIntAttributeValue(
|
||||
const std::vector<int> *const attributeValue) {
|
||||
int value = 0;
|
||||
bool isNegative = false;
|
||||
for (size_t i = 0; i < attributeValue->size(); ++i) {
|
||||
if (i == 0 && attributeValue->at(i) == '-') {
|
||||
isNegative = true;
|
||||
} else {
|
||||
if (!isdigit(attributeValue->at(i))) {
|
||||
// If not a number, return S_INT_MIN
|
||||
return S_INT_MIN;
|
||||
}
|
||||
value *= 10;
|
||||
value += attributeValue->at(i) - '0';
|
||||
}
|
||||
}
|
||||
return isNegative ? -value : value;
|
||||
}
|
||||
|
||||
/* static */ void HeaderPolicy::insertCharactersIntoVector(const char *const characters,
|
||||
std::vector<int> *const vector) {
|
||||
for (int i = 0; characters[i]; ++i) {
|
||||
vector->push_back(characters[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
#ifndef LATINIME_HEADER_POLICY_H
|
||||
#define LATINIME_HEADER_POLICY_H
|
||||
|
||||
#include <cctype>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
|
@ -29,16 +28,26 @@ namespace latinime {
|
|||
|
||||
class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
||||
public:
|
||||
explicit HeaderPolicy(const uint8_t *const dictBuf, const int dictSize)
|
||||
: mDictBuf(dictBuf),
|
||||
mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)),
|
||||
// Reads information from existing dictionary buffer.
|
||||
HeaderPolicy(const uint8_t *const dictBuf, const int dictSize)
|
||||
: mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)),
|
||||
mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)),
|
||||
mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
|
||||
mAttributeMap(createAttributeMapAndReadAllAttributes(mDictBuf)),
|
||||
mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
|
||||
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
|
||||
mUsesForgettingCurve(readUsesForgettingCurveFlag()),
|
||||
mLastUpdatedTime(readLastUpdatedTime()) {}
|
||||
|
||||
// Constructs header information using an attribute map.
|
||||
HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion,
|
||||
const HeaderReadWriteUtils::AttributeMap *const attributeMap)
|
||||
: mDictFormatVersion(dictFormatVersion),
|
||||
mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
|
||||
attributeMap)), mSize(0), mAttributeMap(*attributeMap),
|
||||
mMultiWordCostMultiplier(readUsesForgettingCurveFlag()),
|
||||
mUsesForgettingCurve(readUsesForgettingCurveFlag()),
|
||||
mLastUpdatedTime(readLastUpdatedTime()) {}
|
||||
|
||||
~HeaderPolicy() {}
|
||||
|
||||
AK_FORCE_INLINE int getSize() const {
|
||||
|
@ -81,10 +90,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
|
||||
static const char *const USES_FORGETTING_CURVE_KEY;
|
||||
static const char *const LAST_UPDATED_TIME_KEY;
|
||||
static const float DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER;
|
||||
static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
|
||||
static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
|
||||
|
||||
const uint8_t *const mDictBuf;
|
||||
const FormatUtils::FORMAT_VERSION mDictFormatVersion;
|
||||
const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags;
|
||||
const int mSize;
|
||||
|
@ -99,15 +107,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
|
||||
int readLastUpdatedTime() const;
|
||||
|
||||
bool getAttributeValueAsInt(const char *const key, int *const outValue) const;
|
||||
|
||||
static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes(
|
||||
const uint8_t *const dictBuf);
|
||||
|
||||
static int parseIntAttributeValue(const std::vector<int> *const attributeValue);
|
||||
|
||||
static void insertCharactersIntoVector(
|
||||
const char *const characters, std::vector<int> *const vector);
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_HEADER_POLICY_H */
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
|
||||
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
|
||||
|
||||
#include <cctype>
|
||||
#include <cstdio>
|
||||
#include <vector>
|
||||
|
||||
#include "defines.h"
|
||||
|
@ -43,6 +45,12 @@ const HeaderReadWriteUtils::DictionaryFlags
|
|||
const HeaderReadWriteUtils::DictionaryFlags
|
||||
HeaderReadWriteUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
|
||||
|
||||
const char *const HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_KEY = "SUPPORTS_DYNAMIC_UPDATE";
|
||||
const char *const HeaderReadWriteUtils::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY =
|
||||
"REQUIRES_GERMAN_UMLAUT_PROCESSING";
|
||||
const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY =
|
||||
"REQUIRES_FRENCH_LIGATURE_PROCESSING";
|
||||
|
||||
/* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) {
|
||||
// See the format of the header in the comment in
|
||||
// BinaryDictionaryFormatUtils::detectFormatVersion()
|
||||
|
@ -56,6 +64,28 @@ const HeaderReadWriteUtils::DictionaryFlags
|
|||
HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE);
|
||||
}
|
||||
|
||||
/* static */ HeaderReadWriteUtils::DictionaryFlags
|
||||
HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
|
||||
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
|
||||
AttributeMap::key_type key;
|
||||
insertCharactersIntoVector(REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, &key);
|
||||
const bool requiresGermanUmlautProcessing = readBoolAttributeValue(attributeMap, &key,
|
||||
false /* defaultValue */);
|
||||
key.clear();
|
||||
insertCharactersIntoVector(REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY, &key);
|
||||
const bool requiresFrenchLigatureProcessing = readBoolAttributeValue(attributeMap, &key,
|
||||
false /* defaultValue */);
|
||||
key.clear();
|
||||
insertCharactersIntoVector(SUPPORTS_DYNAMIC_UPDATE_KEY, &key);
|
||||
const bool supportsDynamicUpdate = readBoolAttributeValue(attributeMap, &key,
|
||||
false /* defaultValue */);
|
||||
DictionaryFlags dictflags = NO_FLAGS;
|
||||
dictflags |= requiresGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0;
|
||||
dictflags |= requiresFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0;
|
||||
dictflags |= supportsDynamicUpdate ? SUPPORTS_DYNAMIC_UPDATE_FLAG : 0;
|
||||
return dictflags;
|
||||
}
|
||||
|
||||
/* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf,
|
||||
AttributeMap *const headerAttributes) {
|
||||
const int headerSize = getHeaderSize(dictBuf);
|
||||
|
@ -128,4 +158,57 @@ const HeaderReadWriteUtils::DictionaryFlags
|
|||
return true;
|
||||
}
|
||||
|
||||
/* static */ void HeaderReadWriteUtils::setBoolAttribute(AttributeMap *const headerAttributes,
|
||||
const AttributeMap::key_type *const key, const bool value) {
|
||||
setIntAttribute(headerAttributes, key, value ? 1 : 0);
|
||||
}
|
||||
|
||||
/* static */ void HeaderReadWriteUtils::setIntAttribute(AttributeMap *const headerAttributes,
|
||||
const AttributeMap::key_type *const key, const int value) {
|
||||
AttributeMap::mapped_type valueVector;
|
||||
char charBuf[LARGEST_INT_DIGIT_COUNT + 1];
|
||||
snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%d", value);
|
||||
insertCharactersIntoVector(charBuf, &valueVector);
|
||||
(*headerAttributes)[*key] = valueVector;
|
||||
}
|
||||
|
||||
/* static */ bool HeaderReadWriteUtils::readBoolAttributeValue(
|
||||
const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key,
|
||||
const bool defaultValue) {
|
||||
const int intDefaultValue = defaultValue ? 1 : 0;
|
||||
const int intValue = readIntAttributeValue(headerAttributes, key, intDefaultValue);
|
||||
return intValue != 0;
|
||||
}
|
||||
|
||||
/* static */ int HeaderReadWriteUtils::readIntAttributeValue(
|
||||
const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key,
|
||||
const int defaultValue) {
|
||||
AttributeMap::const_iterator it = headerAttributes->find(*key);
|
||||
if (it != headerAttributes->end()) {
|
||||
int value = 0;
|
||||
bool isNegative = false;
|
||||
for (size_t i = 0; i < it->second.size(); ++i) {
|
||||
if (i == 0 && it->second.at(i) == '-') {
|
||||
isNegative = true;
|
||||
} else {
|
||||
if (!isdigit(it->second.at(i))) {
|
||||
// If not a number.
|
||||
return defaultValue;
|
||||
}
|
||||
value *= 10;
|
||||
value += it->second.at(i) - '0';
|
||||
}
|
||||
}
|
||||
return isNegative ? -value : value;
|
||||
}
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
/* static */ void HeaderReadWriteUtils::insertCharactersIntoVector(const char *const characters,
|
||||
std::vector<int> *const vector) {
|
||||
for (int i = 0; characters[i]; ++i) {
|
||||
vector->push_back(characters[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -54,6 +54,9 @@ class HeaderReadWriteUtils {
|
|||
+ HEADER_SIZE_FIELD_SIZE;
|
||||
}
|
||||
|
||||
static DictionaryFlags createAndGetDictionaryFlagsUsingAttributeMap(
|
||||
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
|
||||
|
||||
static void fetchAllHeaderAttributes(const uint8_t *const dictBuf,
|
||||
AttributeMap *const headerAttributes);
|
||||
|
||||
|
@ -69,6 +72,24 @@ class HeaderReadWriteUtils {
|
|||
static bool writeHeaderAttributes(BufferWithExtendableBuffer *const buffer,
|
||||
const AttributeMap *const headerAttributes, int *const writingPos);
|
||||
|
||||
/**
|
||||
* Methods for header attributes.
|
||||
*/
|
||||
static void setBoolAttribute(AttributeMap *const headerAttributes,
|
||||
const AttributeMap::key_type *const key, const bool value);
|
||||
|
||||
static void setIntAttribute(AttributeMap *const headerAttributes,
|
||||
const AttributeMap::key_type *const key, const int value);
|
||||
|
||||
static bool readBoolAttributeValue(const AttributeMap *const headerAttributes,
|
||||
const AttributeMap::key_type *const key, const bool defaultValue);
|
||||
|
||||
static int readIntAttributeValue(const AttributeMap *const headerAttributes,
|
||||
const AttributeMap::key_type *const key, const int defaultValue);
|
||||
|
||||
static void insertCharactersIntoVector(const char *const characters,
|
||||
AttributeMap::key_type *const key);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadWriteUtils);
|
||||
|
||||
|
@ -87,7 +108,10 @@ class HeaderReadWriteUtils {
|
|||
static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG;
|
||||
static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
|
||||
static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
|
||||
static const DictionaryFlags CONTAINS_BIGRAMS_FLAG;
|
||||
|
||||
static const char *const SUPPORTS_DYNAMIC_UPDATE_KEY;
|
||||
static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY;
|
||||
static const char *const REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY;
|
||||
};
|
||||
}
|
||||
#endif /* LATINIME_HEADER_READ_WRITE_UTILS_H */
|
||||
|
|
Loading…
Reference in New Issue