Read version 3 dictionary header.

The header structure of version 3 dictionary is same as version 2.

Bug: 6669677
Change-Id: I26c8723333938fbf102de1a40f11d97444da9075
This commit is contained in:
Keisuke Kuroynagi 2013-07-03 17:10:48 +09:00
parent edf5842568
commit 5ae8722bd5
5 changed files with 73 additions and 58 deletions

View file

@ -31,9 +31,9 @@ const int BinaryDictionaryFormatUtils::DICTIONARY_MINIMUM_SIZE = 4;
// The versions of Latin IME that only handle format version 1 only test for the magic // The versions of Latin IME that only handle format version 1 only test for the magic
// number, so we had to change it so that version 2 files would be rejected by older // number, so we had to change it so that version 2 files would be rejected by older
// implementations. On this occasion, we made the magic number 32 bits long. // implementations. On this occasion, we made the magic number 32 bits long.
const uint32_t BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; const uint32_t BinaryDictionaryFormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
// Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12 // Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12
const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12; const int BinaryDictionaryFormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12;
/* static */ BinaryDictionaryFormatUtils::FORMAT_VERSION /* static */ BinaryDictionaryFormatUtils::FORMAT_VERSION
BinaryDictionaryFormatUtils::detectFormatVersion(const uint8_t *const dict, BinaryDictionaryFormatUtils::detectFormatVersion(const uint8_t *const dict,
@ -46,25 +46,28 @@ const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12;
} }
const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0); const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0);
switch (magicNumber) { switch (magicNumber) {
case FORMAT_VERSION_2_MAGIC_NUMBER: case HEADER_VERSION_2_MAGIC_NUMBER:
// Version 2 dictionaries are at least 12 bytes long. // Version 2 header are at least 12 bytes long.
// If this dictionary has the version 2 magic number but is less than 12 bytes long, // If this header has the version 2 magic number but is less than 12 bytes long,
// then it's an unknown format and we need to avoid confidently reading the next bytes. // then it's an unknown format and we need to avoid confidently reading the next bytes.
if (dictSize < FORMAT_VERSION_2_MINIMUM_SIZE) { if (dictSize < HEADER_VERSION_2_MINIMUM_SIZE) {
return UNKNOWN_VERSION;
}
// Version 2 header is as follows:
// Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
// Version number (2 bytes)
// Options (2 bytes)
// Header size (4 bytes) : integer, big endian
if (ByteArrayUtils::readUint16(dict, 4) == 2) {
return VERSION_2;
} else if (ByteArrayUtils::readUint16(dict, 4) == 3) {
// TODO: Support version 3 dictionary.
return UNKNOWN_VERSION;
} else {
return UNKNOWN_VERSION;
}
default:
return UNKNOWN_VERSION; return UNKNOWN_VERSION;
}
// Format 2 header is as follows:
// Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
// Version number (2 bytes) 0x00 0x02
// Options (2 bytes)
// Header size (4 bytes) : integer, big endian
if (ByteArrayUtils::readUint16(dict, 4) == 2) {
return VERSION_2;
} else {
return UNKNOWN_VERSION;
}
default:
return UNKNOWN_VERSION;
} }
} }

View file

@ -33,9 +33,9 @@ namespace latinime {
*/ */
class BinaryDictionaryFormatUtils { class BinaryDictionaryFormatUtils {
public: public:
// TODO: Support version 3 format.
enum FORMAT_VERSION { enum FORMAT_VERSION {
VERSION_2 = 1, VERSION_2,
VERSION_3,
UNKNOWN_VERSION UNKNOWN_VERSION
}; };
@ -45,8 +45,8 @@ class BinaryDictionaryFormatUtils {
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormatUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormatUtils);
static const int DICTIONARY_MINIMUM_SIZE; static const int DICTIONARY_MINIMUM_SIZE;
static const uint32_t FORMAT_VERSION_2_MAGIC_NUMBER; static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER;
static const int FORMAT_VERSION_2_MINIMUM_SIZE; static const int HEADER_VERSION_2_MINIMUM_SIZE;
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H */ #endif /* LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H */

View file

@ -26,10 +26,10 @@ namespace latinime {
const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256; const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256;
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_MAGIC_NUMBER_SIZE = 4; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_MAGIC_NUMBER_SIZE = 4;
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_VERSION_SIZE = 2; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_DICTIONARY_VERSION_SIZE = 2;
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_FLAG_SIZE = 2; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_FLAG_SIZE = 2;
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_HEADER_SIZE_SIZE = 4; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_SIZE_FIELD_SIZE = 4;
const BinaryDictionaryHeaderReadingUtils::DictionaryFlags const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
BinaryDictionaryHeaderReadingUtils::NO_FLAGS = 0; BinaryDictionaryHeaderReadingUtils::NO_FLAGS = 0;
@ -45,13 +45,13 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
/* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize( /* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize(
const BinaryDictionaryInfo *const binaryDictionaryInfo) { const BinaryDictionaryInfo *const binaryDictionaryInfo) {
switch (binaryDictionaryInfo->getFormat()) { switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) {
case BinaryDictionaryFormatUtils::VERSION_2: case HEADER_VERSION_2:
// See the format of the header in the comment in // See the format of the header in the comment in
// BinaryDictionaryFormatUtils::detectFormatVersion() // BinaryDictionaryFormatUtils::detectFormatVersion()
return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(), return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(),
VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
+ VERSION_2_DICTIONARY_FLAG_SIZE); + VERSION_2_HEADER_FLAG_SIZE);
default: default:
return S_INT_MAX; return S_INT_MAX;
} }
@ -60,10 +60,10 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
/* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags /* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags
BinaryDictionaryHeaderReadingUtils::getFlags( BinaryDictionaryHeaderReadingUtils::getFlags(
const BinaryDictionaryInfo *const binaryDictionaryInfo) { const BinaryDictionaryInfo *const binaryDictionaryInfo) {
switch (binaryDictionaryInfo->getFormat()) { switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) {
case BinaryDictionaryFormatUtils::VERSION_2: case HEADER_VERSION_2:
return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(), return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(),
VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE); VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE);
default: default:
return NO_FLAGS; return NO_FLAGS;
} }
@ -73,11 +73,15 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
/* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue( /* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const BinaryDictionaryInfo *const binaryDictionaryInfo,
const char *const key, int *outValue, const int outValueSize) { const char *const key, int *outValue, const int outValueSize) {
if (outValueSize <= 0 || !hasHeaderAttributes(binaryDictionaryInfo->getFormat())) { if (outValueSize <= 0) {
return false; return false;
} }
const int headerSize = getHeaderSize(binaryDictionaryInfo); const int headerSize = getHeaderSize(binaryDictionaryInfo);
int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat()); int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat());
if (pos == NOT_A_DICT_POS) {
// The header doesn't have header options.
return false;
}
while (pos < headerSize) { while (pos < headerSize) {
if(ByteArrayUtils::compareStringInBufferWithCharArray( if(ByteArrayUtils::compareStringInBufferWithCharArray(
binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) { binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) {

View file

@ -48,27 +48,15 @@ class BinaryDictionaryHeaderReadingUtils {
return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0; return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0;
} }
static AK_FORCE_INLINE bool hasHeaderAttributes(
const BinaryDictionaryFormatUtils::FORMAT_VERSION format) {
// Only format 2 and above have header attributes as {key,value} string pairs.
switch (format) {
case BinaryDictionaryFormatUtils::VERSION_2:
return true;
break;
default:
return false;
}
}
static AK_FORCE_INLINE int getHeaderOptionsPosition( static AK_FORCE_INLINE int getHeaderOptionsPosition(
const BinaryDictionaryFormatUtils::FORMAT_VERSION format) { const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) {
switch (format) { switch (getHeaderVersion(dictionaryFormat)) {
case BinaryDictionaryFormatUtils::VERSION_2: case HEADER_VERSION_2:
return VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE return VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
+ VERSION_2_DICTIONARY_FLAG_SIZE + VERSION_2_DICTIONARY_HEADER_SIZE_SIZE; + VERSION_2_HEADER_FLAG_SIZE + VERSION_2_HEADER_SIZE_FIELD_SIZE;
break; break;
default: default:
return 0; return NOT_A_DICT_POS;
} }
} }
@ -82,10 +70,15 @@ class BinaryDictionaryHeaderReadingUtils {
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils);
static const int VERSION_2_MAGIC_NUMBER_SIZE; enum HEADER_VERSION {
static const int VERSION_2_DICTIONARY_VERSION_SIZE; HEADER_VERSION_2,
static const int VERSION_2_DICTIONARY_FLAG_SIZE; UNKNOWN_HEADER_VERSION
static const int VERSION_2_DICTIONARY_HEADER_SIZE_SIZE; };
static const int VERSION_2_HEADER_MAGIC_NUMBER_SIZE;
static const int VERSION_2_HEADER_DICTIONARY_VERSION_SIZE;
static const int VERSION_2_HEADER_FLAG_SIZE;
static const int VERSION_2_HEADER_SIZE_FIELD_SIZE;
static const DictionaryFlags NO_FLAGS; static const DictionaryFlags NO_FLAGS;
// Flags for special processing // Flags for special processing
@ -95,6 +88,18 @@ class BinaryDictionaryHeaderReadingUtils {
static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG; static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG; static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
static const DictionaryFlags CONTAINS_BIGRAMS_FLAG; static const DictionaryFlags CONTAINS_BIGRAMS_FLAG;
static HEADER_VERSION getHeaderVersion(
const BinaryDictionaryFormatUtils::FORMAT_VERSION formatVersion) {
switch(formatVersion) {
case BinaryDictionaryFormatUtils::VERSION_2:
// Fall through
case BinaryDictionaryFormatUtils::VERSION_3:
return HEADER_VERSION_2;
default:
return UNKNOWN_HEADER_VERSION;
}
}
}; };
} }
#endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */ #endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */

View file

@ -32,6 +32,9 @@ class DictionaryStructurePolicyFactory {
switch (dictionaryFormat) { switch (dictionaryFormat) {
case BinaryDictionaryFormatUtils::VERSION_2: case BinaryDictionaryFormatUtils::VERSION_2:
return PatriciaTriePolicy::getInstance(); return PatriciaTriePolicy::getInstance();
case BinaryDictionaryFormatUtils::VERSION_3:
// TODO: support version 3 dictionaries.
return 0;
default: default:
ASSERT(false); ASSERT(false);
return 0; return 0;