am 0933353c: Merge "Read version 3 dictionary header."
* commit '0933353c0c8823ec5ca1dcc5cd68014b87a6e045': Read version 3 dictionary header.main
commit
2e22356da4
|
@ -31,9 +31,9 @@ const int BinaryDictionaryFormatUtils::DICTIONARY_MINIMUM_SIZE = 4;
|
||||||
// The versions of Latin IME that only handle format version 1 only test for the magic
|
// The versions of Latin IME that only handle format version 1 only test for the magic
|
||||||
// number, so we had to change it so that version 2 files would be rejected by older
|
// number, so we had to change it so that version 2 files would be rejected by older
|
||||||
// implementations. On this occasion, we made the magic number 32 bits long.
|
// implementations. On this occasion, we made the magic number 32 bits long.
|
||||||
const uint32_t BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
|
const uint32_t BinaryDictionaryFormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
|
||||||
// Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12
|
// Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12
|
||||||
const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12;
|
const int BinaryDictionaryFormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12;
|
||||||
|
|
||||||
/* static */ BinaryDictionaryFormatUtils::FORMAT_VERSION
|
/* static */ BinaryDictionaryFormatUtils::FORMAT_VERSION
|
||||||
BinaryDictionaryFormatUtils::detectFormatVersion(const uint8_t *const dict,
|
BinaryDictionaryFormatUtils::detectFormatVersion(const uint8_t *const dict,
|
||||||
|
@ -46,20 +46,23 @@ const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12;
|
||||||
}
|
}
|
||||||
const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0);
|
const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0);
|
||||||
switch (magicNumber) {
|
switch (magicNumber) {
|
||||||
case FORMAT_VERSION_2_MAGIC_NUMBER:
|
case HEADER_VERSION_2_MAGIC_NUMBER:
|
||||||
// Version 2 dictionaries are at least 12 bytes long.
|
// Version 2 header are at least 12 bytes long.
|
||||||
// If this dictionary has the version 2 magic number but is less than 12 bytes long,
|
// If this header has the version 2 magic number but is less than 12 bytes long,
|
||||||
// then it's an unknown format and we need to avoid confidently reading the next bytes.
|
// then it's an unknown format and we need to avoid confidently reading the next bytes.
|
||||||
if (dictSize < FORMAT_VERSION_2_MINIMUM_SIZE) {
|
if (dictSize < HEADER_VERSION_2_MINIMUM_SIZE) {
|
||||||
return UNKNOWN_VERSION;
|
return UNKNOWN_VERSION;
|
||||||
}
|
}
|
||||||
// Format 2 header is as follows:
|
// Version 2 header is as follows:
|
||||||
// Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
|
// Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
|
||||||
// Version number (2 bytes) 0x00 0x02
|
// Version number (2 bytes)
|
||||||
// Options (2 bytes)
|
// Options (2 bytes)
|
||||||
// Header size (4 bytes) : integer, big endian
|
// Header size (4 bytes) : integer, big endian
|
||||||
if (ByteArrayUtils::readUint16(dict, 4) == 2) {
|
if (ByteArrayUtils::readUint16(dict, 4) == 2) {
|
||||||
return VERSION_2;
|
return VERSION_2;
|
||||||
|
} else if (ByteArrayUtils::readUint16(dict, 4) == 3) {
|
||||||
|
// TODO: Support version 3 dictionary.
|
||||||
|
return UNKNOWN_VERSION;
|
||||||
} else {
|
} else {
|
||||||
return UNKNOWN_VERSION;
|
return UNKNOWN_VERSION;
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,9 +33,9 @@ namespace latinime {
|
||||||
*/
|
*/
|
||||||
class BinaryDictionaryFormatUtils {
|
class BinaryDictionaryFormatUtils {
|
||||||
public:
|
public:
|
||||||
// TODO: Support version 3 format.
|
|
||||||
enum FORMAT_VERSION {
|
enum FORMAT_VERSION {
|
||||||
VERSION_2 = 1,
|
VERSION_2,
|
||||||
|
VERSION_3,
|
||||||
UNKNOWN_VERSION
|
UNKNOWN_VERSION
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -45,8 +45,8 @@ class BinaryDictionaryFormatUtils {
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormatUtils);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormatUtils);
|
||||||
|
|
||||||
static const int DICTIONARY_MINIMUM_SIZE;
|
static const int DICTIONARY_MINIMUM_SIZE;
|
||||||
static const uint32_t FORMAT_VERSION_2_MAGIC_NUMBER;
|
static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER;
|
||||||
static const int FORMAT_VERSION_2_MINIMUM_SIZE;
|
static const int HEADER_VERSION_2_MINIMUM_SIZE;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H */
|
#endif /* LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H */
|
||||||
|
|
|
@ -26,10 +26,10 @@ namespace latinime {
|
||||||
|
|
||||||
const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256;
|
const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256;
|
||||||
|
|
||||||
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_MAGIC_NUMBER_SIZE = 4;
|
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_MAGIC_NUMBER_SIZE = 4;
|
||||||
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_VERSION_SIZE = 2;
|
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_DICTIONARY_VERSION_SIZE = 2;
|
||||||
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_FLAG_SIZE = 2;
|
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_FLAG_SIZE = 2;
|
||||||
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_HEADER_SIZE_SIZE = 4;
|
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_SIZE_FIELD_SIZE = 4;
|
||||||
|
|
||||||
const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
|
const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
|
||||||
BinaryDictionaryHeaderReadingUtils::NO_FLAGS = 0;
|
BinaryDictionaryHeaderReadingUtils::NO_FLAGS = 0;
|
||||||
|
@ -45,13 +45,13 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
|
||||||
|
|
||||||
/* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize(
|
/* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize(
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo) {
|
const BinaryDictionaryInfo *const binaryDictionaryInfo) {
|
||||||
switch (binaryDictionaryInfo->getFormat()) {
|
switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) {
|
||||||
case BinaryDictionaryFormatUtils::VERSION_2:
|
case HEADER_VERSION_2:
|
||||||
// See the format of the header in the comment in
|
// See the format of the header in the comment in
|
||||||
// BinaryDictionaryFormatUtils::detectFormatVersion()
|
// BinaryDictionaryFormatUtils::detectFormatVersion()
|
||||||
return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(),
|
return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(),
|
||||||
VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE
|
VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
|
||||||
+ VERSION_2_DICTIONARY_FLAG_SIZE);
|
+ VERSION_2_HEADER_FLAG_SIZE);
|
||||||
default:
|
default:
|
||||||
return S_INT_MAX;
|
return S_INT_MAX;
|
||||||
}
|
}
|
||||||
|
@ -60,10 +60,10 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
|
||||||
/* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags
|
/* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags
|
||||||
BinaryDictionaryHeaderReadingUtils::getFlags(
|
BinaryDictionaryHeaderReadingUtils::getFlags(
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo) {
|
const BinaryDictionaryInfo *const binaryDictionaryInfo) {
|
||||||
switch (binaryDictionaryInfo->getFormat()) {
|
switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) {
|
||||||
case BinaryDictionaryFormatUtils::VERSION_2:
|
case HEADER_VERSION_2:
|
||||||
return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(),
|
return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(),
|
||||||
VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE);
|
VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE);
|
||||||
default:
|
default:
|
||||||
return NO_FLAGS;
|
return NO_FLAGS;
|
||||||
}
|
}
|
||||||
|
@ -73,11 +73,15 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
|
||||||
/* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue(
|
/* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue(
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
const char *const key, int *outValue, const int outValueSize) {
|
const char *const key, int *outValue, const int outValueSize) {
|
||||||
if (outValueSize <= 0 || !hasHeaderAttributes(binaryDictionaryInfo->getFormat())) {
|
if (outValueSize <= 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int headerSize = getHeaderSize(binaryDictionaryInfo);
|
const int headerSize = getHeaderSize(binaryDictionaryInfo);
|
||||||
int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat());
|
int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat());
|
||||||
|
if (pos == NOT_A_DICT_POS) {
|
||||||
|
// The header doesn't have header options.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
while (pos < headerSize) {
|
while (pos < headerSize) {
|
||||||
if(ByteArrayUtils::compareStringInBufferWithCharArray(
|
if(ByteArrayUtils::compareStringInBufferWithCharArray(
|
||||||
binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) {
|
binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) {
|
||||||
|
|
|
@ -48,27 +48,15 @@ class BinaryDictionaryHeaderReadingUtils {
|
||||||
return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0;
|
return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static AK_FORCE_INLINE bool hasHeaderAttributes(
|
|
||||||
const BinaryDictionaryFormatUtils::FORMAT_VERSION format) {
|
|
||||||
// Only format 2 and above have header attributes as {key,value} string pairs.
|
|
||||||
switch (format) {
|
|
||||||
case BinaryDictionaryFormatUtils::VERSION_2:
|
|
||||||
return true;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static AK_FORCE_INLINE int getHeaderOptionsPosition(
|
static AK_FORCE_INLINE int getHeaderOptionsPosition(
|
||||||
const BinaryDictionaryFormatUtils::FORMAT_VERSION format) {
|
const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) {
|
||||||
switch (format) {
|
switch (getHeaderVersion(dictionaryFormat)) {
|
||||||
case BinaryDictionaryFormatUtils::VERSION_2:
|
case HEADER_VERSION_2:
|
||||||
return VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE
|
return VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
|
||||||
+ VERSION_2_DICTIONARY_FLAG_SIZE + VERSION_2_DICTIONARY_HEADER_SIZE_SIZE;
|
+ VERSION_2_HEADER_FLAG_SIZE + VERSION_2_HEADER_SIZE_FIELD_SIZE;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return 0;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -82,10 +70,15 @@ class BinaryDictionaryHeaderReadingUtils {
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils);
|
||||||
|
|
||||||
static const int VERSION_2_MAGIC_NUMBER_SIZE;
|
enum HEADER_VERSION {
|
||||||
static const int VERSION_2_DICTIONARY_VERSION_SIZE;
|
HEADER_VERSION_2,
|
||||||
static const int VERSION_2_DICTIONARY_FLAG_SIZE;
|
UNKNOWN_HEADER_VERSION
|
||||||
static const int VERSION_2_DICTIONARY_HEADER_SIZE_SIZE;
|
};
|
||||||
|
|
||||||
|
static const int VERSION_2_HEADER_MAGIC_NUMBER_SIZE;
|
||||||
|
static const int VERSION_2_HEADER_DICTIONARY_VERSION_SIZE;
|
||||||
|
static const int VERSION_2_HEADER_FLAG_SIZE;
|
||||||
|
static const int VERSION_2_HEADER_SIZE_FIELD_SIZE;
|
||||||
|
|
||||||
static const DictionaryFlags NO_FLAGS;
|
static const DictionaryFlags NO_FLAGS;
|
||||||
// Flags for special processing
|
// Flags for special processing
|
||||||
|
@ -95,6 +88,18 @@ class BinaryDictionaryHeaderReadingUtils {
|
||||||
static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
|
static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
|
||||||
static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
|
static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
|
||||||
static const DictionaryFlags CONTAINS_BIGRAMS_FLAG;
|
static const DictionaryFlags CONTAINS_BIGRAMS_FLAG;
|
||||||
|
|
||||||
|
static HEADER_VERSION getHeaderVersion(
|
||||||
|
const BinaryDictionaryFormatUtils::FORMAT_VERSION formatVersion) {
|
||||||
|
switch(formatVersion) {
|
||||||
|
case BinaryDictionaryFormatUtils::VERSION_2:
|
||||||
|
// Fall through
|
||||||
|
case BinaryDictionaryFormatUtils::VERSION_3:
|
||||||
|
return HEADER_VERSION_2;
|
||||||
|
default:
|
||||||
|
return UNKNOWN_HEADER_VERSION;
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */
|
#endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */
|
||||||
|
|
|
@ -32,6 +32,9 @@ class DictionaryStructurePolicyFactory {
|
||||||
switch (dictionaryFormat) {
|
switch (dictionaryFormat) {
|
||||||
case BinaryDictionaryFormatUtils::VERSION_2:
|
case BinaryDictionaryFormatUtils::VERSION_2:
|
||||||
return PatriciaTriePolicy::getInstance();
|
return PatriciaTriePolicy::getInstance();
|
||||||
|
case BinaryDictionaryFormatUtils::VERSION_3:
|
||||||
|
// TODO: support version 3 dictionaries.
|
||||||
|
return 0;
|
||||||
default:
|
default:
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Loading…
Reference in New Issue