Merge "Quit using BinaryDictionaryInfo in header reading methods."
This commit is contained in:
commit
b40d14226b
7 changed files with 40 additions and 82 deletions
|
@ -16,6 +16,8 @@
|
||||||
|
|
||||||
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
|
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
|
||||||
|
|
||||||
|
#include "suggest/core/dictionary/byte_array_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -27,7 +29,6 @@ const int BinaryDictionaryFormatUtils::DICTIONARY_MINIMUM_SIZE = 4;
|
||||||
/**
|
/**
|
||||||
* Format versions
|
* Format versions
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// The versions of Latin IME that only handle format version 1 only test for the magic
|
// The versions of Latin IME that only handle format version 1 only test for the magic
|
||||||
// number, so we had to change it so that version 2 files would be rejected by older
|
// number, so we had to change it so that version 2 files would be rejected by older
|
||||||
// implementations. On this occasion, we made the magic number 32 bits long.
|
// implementations. On this occasion, we made the magic number 32 bits long.
|
||||||
|
|
|
@ -20,7 +20,6 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/byte_array_utils.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
#include "suggest/core/dictionary/binary_dictionary_header.h"
|
#include "suggest/core/dictionary/binary_dictionary_header.h"
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -26,16 +25,15 @@ const char *const BinaryDictionaryHeader::MULTIPLE_WORDS_DEMOTION_RATE_KEY =
|
||||||
const float BinaryDictionaryHeader::DEFAULT_MULTI_WORD_COST_MULTIPLIER = 1.0f;
|
const float BinaryDictionaryHeader::DEFAULT_MULTI_WORD_COST_MULTIPLIER = 1.0f;
|
||||||
const float BinaryDictionaryHeader::MULTI_WORD_COST_MULTIPLIER_SCALE = 100.0f;
|
const float BinaryDictionaryHeader::MULTI_WORD_COST_MULTIPLIER_SCALE = 100.0f;
|
||||||
|
|
||||||
BinaryDictionaryHeader::BinaryDictionaryHeader(
|
BinaryDictionaryHeader::BinaryDictionaryHeader(const uint8_t *const dictBuf)
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo)
|
: mDictBuf(dictBuf),
|
||||||
: mBinaryDictionaryInfo(binaryDictionaryInfo),
|
mDictionaryFlags(BinaryDictionaryHeaderReadingUtils::getFlags(mDictBuf)),
|
||||||
mDictionaryFlags(BinaryDictionaryHeaderReadingUtils::getFlags(binaryDictionaryInfo)),
|
mSize(BinaryDictionaryHeaderReadingUtils::getHeaderSize(mDictBuf)),
|
||||||
mSize(BinaryDictionaryHeaderReadingUtils::getHeaderSize(binaryDictionaryInfo)),
|
|
||||||
mMultiWordCostMultiplier(readMultiWordCostMultiplier()) {}
|
mMultiWordCostMultiplier(readMultiWordCostMultiplier()) {}
|
||||||
|
|
||||||
float BinaryDictionaryHeader::readMultiWordCostMultiplier() const {
|
float BinaryDictionaryHeader::readMultiWordCostMultiplier() const {
|
||||||
const int headerValue = BinaryDictionaryHeaderReadingUtils::readHeaderValueInt(
|
const int headerValue = BinaryDictionaryHeaderReadingUtils::readHeaderValueInt(
|
||||||
mBinaryDictionaryInfo, MULTIPLE_WORDS_DEMOTION_RATE_KEY);
|
mDictBuf, MULTIPLE_WORDS_DEMOTION_RATE_KEY);
|
||||||
if (headerValue == S_INT_MIN) {
|
if (headerValue == S_INT_MIN) {
|
||||||
// not found
|
// not found
|
||||||
return DEFAULT_MULTI_WORD_COST_MULTIPLIER;
|
return DEFAULT_MULTI_WORD_COST_MULTIPLIER;
|
||||||
|
|
|
@ -17,6 +17,8 @@
|
||||||
#ifndef LATINIME_BINARY_DICTIONARY_HEADER_H
|
#ifndef LATINIME_BINARY_DICTIONARY_HEADER_H
|
||||||
#define LATINIME_BINARY_DICTIONARY_HEADER_H
|
#define LATINIME_BINARY_DICTIONARY_HEADER_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/binary_dictionary_header_reading_utils.h"
|
#include "suggest/core/dictionary/binary_dictionary_header_reading_utils.h"
|
||||||
|
|
||||||
|
@ -28,9 +30,10 @@ class BinaryDictionaryInfo;
|
||||||
* This class abstracts dictionary header structures and provide interface to access dictionary
|
* This class abstracts dictionary header structures and provide interface to access dictionary
|
||||||
* header information.
|
* header information.
|
||||||
*/
|
*/
|
||||||
|
// TODO:: Move header classes to policyimpl.
|
||||||
class BinaryDictionaryHeader {
|
class BinaryDictionaryHeader {
|
||||||
public:
|
public:
|
||||||
explicit BinaryDictionaryHeader(const BinaryDictionaryInfo *const binaryDictionaryInfo);
|
explicit BinaryDictionaryHeader(const uint8_t *const dictBuf);
|
||||||
|
|
||||||
AK_FORCE_INLINE int getSize() const {
|
AK_FORCE_INLINE int getSize() const {
|
||||||
return mSize;
|
return mSize;
|
||||||
|
@ -60,7 +63,7 @@ class BinaryDictionaryHeader {
|
||||||
outValue[0] = '\0';
|
outValue[0] = '\0';
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (!BinaryDictionaryHeaderReadingUtils::readHeaderValue(mBinaryDictionaryInfo,
|
if (!BinaryDictionaryHeaderReadingUtils::readHeaderValue(mDictBuf,
|
||||||
key, outValue, outValueSize)) {
|
key, outValue, outValueSize)) {
|
||||||
outValue[0] = '?';
|
outValue[0] = '?';
|
||||||
outValue[1] = '\0';
|
outValue[1] = '\0';
|
||||||
|
@ -74,7 +77,7 @@ class BinaryDictionaryHeader {
|
||||||
static const float DEFAULT_MULTI_WORD_COST_MULTIPLIER;
|
static const float DEFAULT_MULTI_WORD_COST_MULTIPLIER;
|
||||||
static const float MULTI_WORD_COST_MULTIPLIER_SCALE;
|
static const float MULTI_WORD_COST_MULTIPLIER_SCALE;
|
||||||
|
|
||||||
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
|
const uint8_t *const mDictBuf;
|
||||||
const BinaryDictionaryHeaderReadingUtils::DictionaryFlags mDictionaryFlags;
|
const BinaryDictionaryHeaderReadingUtils::DictionaryFlags mDictionaryFlags;
|
||||||
const int mSize;
|
const int mSize;
|
||||||
const float mMultiWordCostMultiplier;
|
const float mMultiWordCostMultiplier;
|
||||||
|
|
|
@ -20,12 +20,11 @@
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
#include "suggest/core/dictionary/byte_array_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256;
|
const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256;
|
||||||
|
|
||||||
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_MAGIC_NUMBER_SIZE = 4;
|
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_MAGIC_NUMBER_SIZE = 4;
|
||||||
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_DICTIONARY_VERSION_SIZE = 2;
|
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_DICTIONARY_VERSION_SIZE = 2;
|
||||||
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_FLAG_SIZE = 2;
|
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_FLAG_SIZE = 2;
|
||||||
|
@ -43,68 +42,54 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
|
||||||
const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
|
const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
|
||||||
BinaryDictionaryHeaderReadingUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
|
BinaryDictionaryHeaderReadingUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
|
||||||
|
|
||||||
/* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize(
|
/* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize(const uint8_t *const dictBuf) {
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo) {
|
|
||||||
switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) {
|
|
||||||
case HEADER_VERSION_2:
|
|
||||||
// See the format of the header in the comment in
|
// See the format of the header in the comment in
|
||||||
// BinaryDictionaryFormatUtils::detectFormatVersion()
|
// BinaryDictionaryFormatUtils::detectFormatVersion()
|
||||||
return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(),
|
return ByteArrayUtils::readUint32(dictBuf,
|
||||||
VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
|
VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
|
||||||
+ VERSION_2_HEADER_FLAG_SIZE);
|
+ VERSION_2_HEADER_FLAG_SIZE);
|
||||||
default:
|
|
||||||
return S_INT_MAX;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags
|
/* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags
|
||||||
BinaryDictionaryHeaderReadingUtils::getFlags(
|
BinaryDictionaryHeaderReadingUtils::getFlags(const uint8_t *const dictBuf) {
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo) {
|
return ByteArrayUtils::readUint16(dictBuf, VERSION_2_HEADER_MAGIC_NUMBER_SIZE
|
||||||
switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) {
|
+ VERSION_2_HEADER_DICTIONARY_VERSION_SIZE);
|
||||||
case HEADER_VERSION_2:
|
|
||||||
return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(),
|
|
||||||
VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE);
|
|
||||||
default:
|
|
||||||
return NO_FLAGS;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns if the key is found or not and reads the found value into outValue.
|
// Returns if the key is found or not and reads the found value into outValue.
|
||||||
/* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue(
|
/* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue(const uint8_t *const dictBuf,
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const char *const key, int *outValue, const int outValueSize) {
|
const char *const key, int *outValue, const int outValueSize) {
|
||||||
if (outValueSize <= 0) {
|
if (outValueSize <= 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int headerSize = getHeaderSize(binaryDictionaryInfo);
|
const int headerSize = getHeaderSize(dictBuf);
|
||||||
int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat());
|
int pos = getHeaderOptionsPosition();
|
||||||
if (pos == NOT_A_DICT_POS) {
|
if (pos == NOT_A_DICT_POS) {
|
||||||
// The header doesn't have header options.
|
// The header doesn't have header options.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
while (pos < headerSize) {
|
while (pos < headerSize) {
|
||||||
if(ByteArrayUtils::compareStringInBufferWithCharArray(
|
if(ByteArrayUtils::compareStringInBufferWithCharArray(
|
||||||
binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) {
|
dictBuf, key, headerSize - pos, &pos) == 0) {
|
||||||
// The key was found.
|
// The key was found.
|
||||||
const int length = ByteArrayUtils::readStringAndAdvancePosition(
|
const int length = ByteArrayUtils::readStringAndAdvancePosition(
|
||||||
binaryDictionaryInfo->getDictBuf(), outValueSize, outValue, &pos);
|
dictBuf, outValueSize, outValue, &pos);
|
||||||
// Add a 0 terminator to the string.
|
// Add a 0 terminator to the string.
|
||||||
outValue[length < outValueSize ? length : outValueSize - 1] = '\0';
|
outValue[length < outValueSize ? length : outValueSize - 1] = '\0';
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
ByteArrayUtils::advancePositionToBehindString(
|
ByteArrayUtils::advancePositionToBehindString(dictBuf, headerSize - pos, &pos);
|
||||||
binaryDictionaryInfo->getDictBuf(), headerSize - pos, &pos);
|
|
||||||
}
|
}
|
||||||
// The key was not found.
|
// The key was not found.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ int BinaryDictionaryHeaderReadingUtils::readHeaderValueInt(
|
/* static */ int BinaryDictionaryHeaderReadingUtils::readHeaderValueInt(
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key) {
|
const uint8_t *const dictBuf, const char *const key) {
|
||||||
const int bufferSize = LARGEST_INT_DIGIT_COUNT;
|
const int bufferSize = LARGEST_INT_DIGIT_COUNT;
|
||||||
int intBuffer[bufferSize];
|
int intBuffer[bufferSize];
|
||||||
char charBuffer[bufferSize];
|
char charBuffer[bufferSize];
|
||||||
if (!readHeaderValue(binaryDictionaryInfo, key, intBuffer, bufferSize)) {
|
if (!readHeaderValue(dictBuf, key, intBuffer, bufferSize)) {
|
||||||
return S_INT_MIN;
|
return S_INT_MIN;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < bufferSize; ++i) {
|
for (int i = 0; i < bufferSize; ++i) {
|
||||||
|
|
|
@ -20,21 +20,19 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
class BinaryDictionaryInfo;
|
// TODO:: Move header classes to policyimpl.
|
||||||
|
|
||||||
class BinaryDictionaryHeaderReadingUtils {
|
class BinaryDictionaryHeaderReadingUtils {
|
||||||
public:
|
public:
|
||||||
typedef uint16_t DictionaryFlags;
|
typedef uint16_t DictionaryFlags;
|
||||||
|
|
||||||
static const int MAX_OPTION_KEY_LENGTH;
|
static const int MAX_OPTION_KEY_LENGTH;
|
||||||
|
|
||||||
static int getHeaderSize(const BinaryDictionaryInfo *const binaryDictionaryInfo);
|
static int getHeaderSize(const uint8_t *const dictBuf);
|
||||||
|
|
||||||
static DictionaryFlags getFlags(const BinaryDictionaryInfo *const binaryDictionaryInfo);
|
static DictionaryFlags getFlags(const uint8_t *const dictBuf);
|
||||||
|
|
||||||
static AK_FORCE_INLINE bool supportsDynamicUpdate(const DictionaryFlags flags) {
|
static AK_FORCE_INLINE bool supportsDynamicUpdate(const DictionaryFlags flags) {
|
||||||
return (flags & SUPPORTS_DYNAMIC_UPDATE_FLAG) != 0;
|
return (flags & SUPPORTS_DYNAMIC_UPDATE_FLAG) != 0;
|
||||||
|
@ -48,33 +46,19 @@ class BinaryDictionaryHeaderReadingUtils {
|
||||||
return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0;
|
return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static AK_FORCE_INLINE int getHeaderOptionsPosition(
|
static AK_FORCE_INLINE int getHeaderOptionsPosition() {
|
||||||
const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) {
|
|
||||||
switch (getHeaderVersion(dictionaryFormat)) {
|
|
||||||
case HEADER_VERSION_2:
|
|
||||||
return VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
|
return VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
|
||||||
+ VERSION_2_HEADER_FLAG_SIZE + VERSION_2_HEADER_SIZE_FIELD_SIZE;
|
+ VERSION_2_HEADER_FLAG_SIZE + VERSION_2_HEADER_SIZE_FIELD_SIZE;
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool readHeaderValue(
|
static bool readHeaderValue(const uint8_t *const dictBuf,
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const char *const key, int *outValue, const int outValueSize);
|
const char *const key, int *outValue, const int outValueSize);
|
||||||
|
|
||||||
static int readHeaderValueInt(
|
static int readHeaderValueInt(const uint8_t *const dictBuf, const char *const key);
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils);
|
||||||
|
|
||||||
enum HEADER_VERSION {
|
|
||||||
HEADER_VERSION_2,
|
|
||||||
UNKNOWN_HEADER_VERSION
|
|
||||||
};
|
|
||||||
|
|
||||||
static const int VERSION_2_HEADER_MAGIC_NUMBER_SIZE;
|
static const int VERSION_2_HEADER_MAGIC_NUMBER_SIZE;
|
||||||
static const int VERSION_2_HEADER_DICTIONARY_VERSION_SIZE;
|
static const int VERSION_2_HEADER_DICTIONARY_VERSION_SIZE;
|
||||||
static const int VERSION_2_HEADER_FLAG_SIZE;
|
static const int VERSION_2_HEADER_FLAG_SIZE;
|
||||||
|
@ -88,18 +72,6 @@ class BinaryDictionaryHeaderReadingUtils {
|
||||||
static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
|
static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
|
||||||
static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
|
static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
|
||||||
static const DictionaryFlags CONTAINS_BIGRAMS_FLAG;
|
static const DictionaryFlags CONTAINS_BIGRAMS_FLAG;
|
||||||
|
|
||||||
static HEADER_VERSION getHeaderVersion(
|
|
||||||
const BinaryDictionaryFormatUtils::FORMAT_VERSION formatVersion) {
|
|
||||||
switch(formatVersion) {
|
|
||||||
case BinaryDictionaryFormatUtils::VERSION_2:
|
|
||||||
// Fall through
|
|
||||||
case BinaryDictionaryFormatUtils::VERSION_3:
|
|
||||||
return HEADER_VERSION_2;
|
|
||||||
default:
|
|
||||||
return UNKNOWN_HEADER_VERSION;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */
|
#endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */
|
||||||
|
|
|
@ -36,7 +36,7 @@ class BinaryDictionaryInfo {
|
||||||
mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable),
|
mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable),
|
||||||
mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion(
|
mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion(
|
||||||
mDictBuf, mDictSize)),
|
mDictBuf, mDictSize)),
|
||||||
mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()),
|
mDictionaryHeader(dictBuf), mDictRoot(mDictBuf + mDictionaryHeader.getSize()),
|
||||||
// TODO: Remove.
|
// TODO: Remove.
|
||||||
mStructurePolicy(DictionaryStructureWithBufferPolicyFactory
|
mStructurePolicy(DictionaryStructureWithBufferPolicyFactory
|
||||||
::newDictionaryStructurePolicy(this)) {
|
::newDictionaryStructurePolicy(this)) {
|
||||||
|
|
Loading…
Reference in a new issue