Refactoring header attribute reading.

Bug: 6669677
Change-Id: Ifc11da614d5c331ac61019a324e3a0ff187329cd
main
Keisuke Kuroyanagi 2013-09-26 10:45:03 +09:00
parent 4350a93aa5
commit 989596844e
4 changed files with 145 additions and 86 deletions

View File

@ -25,7 +25,7 @@ namespace latinime {
const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE"; const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE";
const char *const HeaderPolicy::USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE"; const char *const HeaderPolicy::USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE";
const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date"; const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date";
const float HeaderPolicy::DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER = 1.0f; const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f; const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
// Used for logging. Question mark is used to indicate that the key is not found. // Used for logging. Question mark is used to indicate that the key is not found.
@ -37,7 +37,7 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out
return; return;
} }
std::vector<int> keyCodePointVector; std::vector<int> keyCodePointVector;
insertCharactersIntoVector(key, &keyCodePointVector); HeaderReadWriteUtils::insertCharactersIntoVector(key, &keyCodePointVector);
HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector); HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector);
if (it == mAttributeMap.end()) { if (it == mAttributeMap.end()) {
// The key was not found. // The key was not found.
@ -53,47 +53,29 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out
} }
float HeaderPolicy::readMultipleWordCostMultiplier() const { float HeaderPolicy::readMultipleWordCostMultiplier() const {
int attributeValue = 0; std::vector<int> keyVector;
if (getAttributeValueAsInt(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &attributeValue)) { HeaderReadWriteUtils::insertCharactersIntoVector(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &keyVector);
if (attributeValue <= 0) { const int demotionRate = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); &keyVector, DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE);
} if (demotionRate <= 0) {
return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(attributeValue); return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
} else {
return DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER;
} }
return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate);
} }
bool HeaderPolicy::readUsesForgettingCurveFlag() const { bool HeaderPolicy::readUsesForgettingCurveFlag() const {
int attributeValue = 0;
if (getAttributeValueAsInt(USES_FORGETTING_CURVE_KEY, &attributeValue)) {
return attributeValue != 0;
} else {
return false;
}
}
// Returns S_INT_MIN when the key is not found or the value is invalid.
int HeaderPolicy::readLastUpdatedTime() const {
int attributeValue = 0;
if (getAttributeValueAsInt(LAST_UPDATED_TIME_KEY, &attributeValue)) {
return attributeValue;
} else {
return S_INT_MIN;
}
}
// Returns whether the key is found or not and stores the found value into outValue.
bool HeaderPolicy::getAttributeValueAsInt(const char *const key, int *const outValue) const {
std::vector<int> keyVector; std::vector<int> keyVector;
insertCharactersIntoVector(key, &keyVector); HeaderReadWriteUtils::insertCharactersIntoVector(USES_FORGETTING_CURVE_KEY, &keyVector);
HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyVector); return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector,
if (it == mAttributeMap.end()) { false /* defaultValue */);
// The key was not found. }
return false;
} // Returns current time when the key is not found or the value is invalid.
*outValue = parseIntAttributeValue(&(it->second)); int HeaderPolicy::readLastUpdatedTime() const {
return true; std::vector<int> keyVector;
HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &keyVector);
return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector,
time(0) /* defaultValue */);
} }
bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite, bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
@ -117,13 +99,8 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT
// Set current time as a last updated time. // Set current time as a last updated time.
HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap); HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap);
std::vector<int> updatedTimekey; std::vector<int> updatedTimekey;
insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey); HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey);
const time_t currentTime = time(NULL); HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, &updatedTimekey, time(0));
std::vector<int> updatedTimeValue;
char charBuf[LARGEST_INT_DIGIT_COUNT + 1];
snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%ld", currentTime);
insertCharactersIntoVector(charBuf, &updatedTimeValue);
attributeMapTowrite[updatedTimekey] = updatedTimeValue;
if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite, if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite,
&writingPos)) { &writingPos)) {
return false; return false;
@ -149,30 +126,4 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT
return attributeMap; return attributeMap;
} }
/* static */ int HeaderPolicy::parseIntAttributeValue(
const std::vector<int> *const attributeValue) {
int value = 0;
bool isNegative = false;
for (size_t i = 0; i < attributeValue->size(); ++i) {
if (i == 0 && attributeValue->at(i) == '-') {
isNegative = true;
} else {
if (!isdigit(attributeValue->at(i))) {
// If not a number, return S_INT_MIN
return S_INT_MIN;
}
value *= 10;
value += attributeValue->at(i) - '0';
}
}
return isNegative ? -value : value;
}
/* static */ void HeaderPolicy::insertCharactersIntoVector(const char *const characters,
std::vector<int> *const vector) {
for (int i = 0; characters[i]; ++i) {
vector->push_back(characters[i]);
}
}
} // namespace latinime } // namespace latinime

View File

@ -17,7 +17,6 @@
#ifndef LATINIME_HEADER_POLICY_H #ifndef LATINIME_HEADER_POLICY_H
#define LATINIME_HEADER_POLICY_H #define LATINIME_HEADER_POLICY_H
#include <cctype>
#include <stdint.h> #include <stdint.h>
#include "defines.h" #include "defines.h"
@ -29,16 +28,26 @@ namespace latinime {
class HeaderPolicy : public DictionaryHeaderStructurePolicy { class HeaderPolicy : public DictionaryHeaderStructurePolicy {
public: public:
explicit HeaderPolicy(const uint8_t *const dictBuf, const int dictSize) // Reads information from existing dictionary buffer.
: mDictBuf(dictBuf), HeaderPolicy(const uint8_t *const dictBuf, const int dictSize)
mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)), : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)),
mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)), mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)),
mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)), mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
mAttributeMap(createAttributeMapAndReadAllAttributes(mDictBuf)), mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
mUsesForgettingCurve(readUsesForgettingCurveFlag()), mUsesForgettingCurve(readUsesForgettingCurveFlag()),
mLastUpdatedTime(readLastUpdatedTime()) {} mLastUpdatedTime(readLastUpdatedTime()) {}
// Constructs header information using an attribute map.
HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion,
const HeaderReadWriteUtils::AttributeMap *const attributeMap)
: mDictFormatVersion(dictFormatVersion),
mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
attributeMap)), mSize(0), mAttributeMap(*attributeMap),
mMultiWordCostMultiplier(readUsesForgettingCurveFlag()),
mUsesForgettingCurve(readUsesForgettingCurveFlag()),
mLastUpdatedTime(readLastUpdatedTime()) {}
~HeaderPolicy() {} ~HeaderPolicy() {}
AK_FORCE_INLINE int getSize() const { AK_FORCE_INLINE int getSize() const {
@ -81,10 +90,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
static const char *const USES_FORGETTING_CURVE_KEY; static const char *const USES_FORGETTING_CURVE_KEY;
static const char *const LAST_UPDATED_TIME_KEY; static const char *const LAST_UPDATED_TIME_KEY;
static const float DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
const uint8_t *const mDictBuf;
const FormatUtils::FORMAT_VERSION mDictFormatVersion; const FormatUtils::FORMAT_VERSION mDictFormatVersion;
const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags; const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags;
const int mSize; const int mSize;
@ -99,15 +107,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
int readLastUpdatedTime() const; int readLastUpdatedTime() const;
bool getAttributeValueAsInt(const char *const key, int *const outValue) const;
static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes( static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes(
const uint8_t *const dictBuf); const uint8_t *const dictBuf);
static int parseIntAttributeValue(const std::vector<int> *const attributeValue);
static void insertCharactersIntoVector(
const char *const characters, std::vector<int> *const vector);
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_HEADER_POLICY_H */ #endif /* LATINIME_HEADER_POLICY_H */

View File

@ -16,6 +16,8 @@
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
#include <cctype>
#include <cstdio>
#include <vector> #include <vector>
#include "defines.h" #include "defines.h"
@ -43,6 +45,12 @@ const HeaderReadWriteUtils::DictionaryFlags
const HeaderReadWriteUtils::DictionaryFlags const HeaderReadWriteUtils::DictionaryFlags
HeaderReadWriteUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; HeaderReadWriteUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
const char *const HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_KEY = "SUPPORTS_DYNAMIC_UPDATE";
const char *const HeaderReadWriteUtils::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY =
"REQUIRES_GERMAN_UMLAUT_PROCESSING";
const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY =
"REQUIRES_FRENCH_LIGATURE_PROCESSING";
/* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) { /* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) {
// See the format of the header in the comment in // See the format of the header in the comment in
// BinaryDictionaryFormatUtils::detectFormatVersion() // BinaryDictionaryFormatUtils::detectFormatVersion()
@ -56,6 +64,28 @@ const HeaderReadWriteUtils::DictionaryFlags
HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE); HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE);
} }
/* static */ HeaderReadWriteUtils::DictionaryFlags
HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
AttributeMap::key_type key;
insertCharactersIntoVector(REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, &key);
const bool requiresGermanUmlautProcessing = readBoolAttributeValue(attributeMap, &key,
false /* defaultValue */);
key.clear();
insertCharactersIntoVector(REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY, &key);
const bool requiresFrenchLigatureProcessing = readBoolAttributeValue(attributeMap, &key,
false /* defaultValue */);
key.clear();
insertCharactersIntoVector(SUPPORTS_DYNAMIC_UPDATE_KEY, &key);
const bool supportsDynamicUpdate = readBoolAttributeValue(attributeMap, &key,
false /* defaultValue */);
DictionaryFlags dictflags = NO_FLAGS;
dictflags |= requiresGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0;
dictflags |= requiresFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0;
dictflags |= supportsDynamicUpdate ? SUPPORTS_DYNAMIC_UPDATE_FLAG : 0;
return dictflags;
}
/* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf, /* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf,
AttributeMap *const headerAttributes) { AttributeMap *const headerAttributes) {
const int headerSize = getHeaderSize(dictBuf); const int headerSize = getHeaderSize(dictBuf);
@ -128,4 +158,57 @@ const HeaderReadWriteUtils::DictionaryFlags
return true; return true;
} }
/* static */ void HeaderReadWriteUtils::setBoolAttribute(AttributeMap *const headerAttributes,
const AttributeMap::key_type *const key, const bool value) {
setIntAttribute(headerAttributes, key, value ? 1 : 0);
}
/* static */ void HeaderReadWriteUtils::setIntAttribute(AttributeMap *const headerAttributes,
const AttributeMap::key_type *const key, const int value) {
AttributeMap::mapped_type valueVector;
char charBuf[LARGEST_INT_DIGIT_COUNT + 1];
snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%d", value);
insertCharactersIntoVector(charBuf, &valueVector);
(*headerAttributes)[*key] = valueVector;
}
/* static */ bool HeaderReadWriteUtils::readBoolAttributeValue(
const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key,
const bool defaultValue) {
const int intDefaultValue = defaultValue ? 1 : 0;
const int intValue = readIntAttributeValue(headerAttributes, key, intDefaultValue);
return intValue != 0;
}
/* static */ int HeaderReadWriteUtils::readIntAttributeValue(
const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key,
const int defaultValue) {
AttributeMap::const_iterator it = headerAttributes->find(*key);
if (it != headerAttributes->end()) {
int value = 0;
bool isNegative = false;
for (size_t i = 0; i < it->second.size(); ++i) {
if (i == 0 && it->second.at(i) == '-') {
isNegative = true;
} else {
if (!isdigit(it->second.at(i))) {
// If not a number.
return defaultValue;
}
value *= 10;
value += it->second.at(i) - '0';
}
}
return isNegative ? -value : value;
}
return defaultValue;
}
/* static */ void HeaderReadWriteUtils::insertCharactersIntoVector(const char *const characters,
std::vector<int> *const vector) {
for (int i = 0; characters[i]; ++i) {
vector->push_back(characters[i]);
}
}
} // namespace latinime } // namespace latinime

View File

@ -54,6 +54,9 @@ class HeaderReadWriteUtils {
+ HEADER_SIZE_FIELD_SIZE; + HEADER_SIZE_FIELD_SIZE;
} }
static DictionaryFlags createAndGetDictionaryFlagsUsingAttributeMap(
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
static void fetchAllHeaderAttributes(const uint8_t *const dictBuf, static void fetchAllHeaderAttributes(const uint8_t *const dictBuf,
AttributeMap *const headerAttributes); AttributeMap *const headerAttributes);
@ -69,6 +72,24 @@ class HeaderReadWriteUtils {
static bool writeHeaderAttributes(BufferWithExtendableBuffer *const buffer, static bool writeHeaderAttributes(BufferWithExtendableBuffer *const buffer,
const AttributeMap *const headerAttributes, int *const writingPos); const AttributeMap *const headerAttributes, int *const writingPos);
/**
* Methods for header attributes.
*/
static void setBoolAttribute(AttributeMap *const headerAttributes,
const AttributeMap::key_type *const key, const bool value);
static void setIntAttribute(AttributeMap *const headerAttributes,
const AttributeMap::key_type *const key, const int value);
static bool readBoolAttributeValue(const AttributeMap *const headerAttributes,
const AttributeMap::key_type *const key, const bool defaultValue);
static int readIntAttributeValue(const AttributeMap *const headerAttributes,
const AttributeMap::key_type *const key, const int defaultValue);
static void insertCharactersIntoVector(const char *const characters,
AttributeMap::key_type *const key);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadWriteUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadWriteUtils);
@ -87,7 +108,10 @@ class HeaderReadWriteUtils {
static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG; static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG;
static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG; static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG; static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
static const DictionaryFlags CONTAINS_BIGRAMS_FLAG;
static const char *const SUPPORTS_DYNAMIC_UPDATE_KEY;
static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY;
static const char *const REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY;
}; };
} }
#endif /* LATINIME_HEADER_READ_WRITE_UTILS_H */ #endif /* LATINIME_HEADER_READ_WRITE_UTILS_H */