am 90619ae0: Move utils for reading dict to policyimpl.
* commit '90619ae02b1861e644db6a0d72d0e5f9c195ef3e': Move utils for reading dict to policyimpl.main
commit
d0d2d239c0
|
@ -53,9 +53,7 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
dic_nodes_cache.cpp) \
|
dic_nodes_cache.cpp) \
|
||||||
$(addprefix suggest/core/dictionary/, \
|
$(addprefix suggest/core/dictionary/, \
|
||||||
bigram_dictionary.cpp \
|
bigram_dictionary.cpp \
|
||||||
binary_dictionary_format_utils.cpp \
|
|
||||||
bloom_filter.cpp \
|
bloom_filter.cpp \
|
||||||
byte_array_utils.cpp \
|
|
||||||
dictionary.cpp \
|
dictionary.cpp \
|
||||||
digraph_utils.cpp \
|
digraph_utils.cpp \
|
||||||
multi_bigram_map.cpp) \
|
multi_bigram_map.cpp) \
|
||||||
|
@ -72,6 +70,8 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
header/header_policy.cpp \
|
header/header_policy.cpp \
|
||||||
header/header_reading_utils.cpp \
|
header/header_reading_utils.cpp \
|
||||||
shortcut/shortcut_list_reading_utils.cpp \
|
shortcut/shortcut_list_reading_utils.cpp \
|
||||||
|
utils/byte_array_utils.cpp \
|
||||||
|
utils/format_utils.cpp \
|
||||||
dictionary_structure_with_buffer_policy_factory.cpp \
|
dictionary_structure_with_buffer_policy_factory.cpp \
|
||||||
dynamic_patricia_trie_node_reader.cpp \
|
dynamic_patricia_trie_node_reader.cpp \
|
||||||
dynamic_patricia_trie_policy.cpp \
|
dynamic_patricia_trie_policy.cpp \
|
||||||
|
|
|
@ -27,10 +27,10 @@
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "jni.h"
|
#include "jni.h"
|
||||||
#include "jni_common.h"
|
#include "jni_common.h"
|
||||||
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
|
|
||||||
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
||||||
#include "suggest/core/dictionary/dictionary.h"
|
#include "suggest/core/dictionary/dictionary.h"
|
||||||
#include "suggest/core/suggest_options.h"
|
#include "suggest/core/suggest_options.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||||
#include "utils/autocorrection_threshold_utils.h"
|
#include "utils/autocorrection_threshold_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -87,8 +87,8 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
Dictionary *dictionary = 0;
|
Dictionary *dictionary = 0;
|
||||||
if (BinaryDictionaryFormatUtils::UNKNOWN_VERSION
|
if (FormatUtils::UNKNOWN_VERSION
|
||||||
== BinaryDictionaryFormatUtils::detectFormatVersion(static_cast<uint8_t *>(dictBuf),
|
== FormatUtils::detectFormatVersion(static_cast<uint8_t *>(dictBuf),
|
||||||
static_cast<int>(dictSize))) {
|
static_cast<int>(dictSize))) {
|
||||||
AKLOGE("DICT: dictionary format is unknown, bad magic number");
|
AKLOGE("DICT: dictionary format is unknown, bad magic number");
|
||||||
releaseDictBuf(static_cast<const char *>(dictBuf) - offset, adjDictSize, fd);
|
releaseDictBuf(static_cast<const char *>(dictBuf) - offset, adjDictSize, fd);
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/bigram/bigram_list_reading_utils.h"
|
||||||
|
|
||||||
#include "suggest/core/dictionary/byte_array_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/byte_array_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
|
|
@ -19,19 +19,19 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
|
#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory
|
/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory
|
||||||
::newDictionaryStructureWithBufferPolicy(const uint8_t *const dictBuf,
|
::newDictionaryStructureWithBufferPolicy(const uint8_t *const dictBuf,
|
||||||
const int dictSize) {
|
const int dictSize) {
|
||||||
switch (BinaryDictionaryFormatUtils::detectFormatVersion(dictBuf, dictSize)) {
|
switch (FormatUtils::detectFormatVersion(dictBuf, dictSize)) {
|
||||||
case BinaryDictionaryFormatUtils::VERSION_2:
|
case FormatUtils::VERSION_2:
|
||||||
return new PatriciaTriePolicy(dictBuf);
|
return new PatriciaTriePolicy(dictBuf);
|
||||||
case BinaryDictionaryFormatUtils::VERSION_3:
|
case FormatUtils::VERSION_3:
|
||||||
return new DynamicPatriciaTriePolicy(dictBuf);
|
return new DynamicPatriciaTriePolicy(dictBuf);
|
||||||
default:
|
default:
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/byte_array_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/byte_array_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/byte_array_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/byte_array_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/byte_array_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/byte_array_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "suggest/core/dictionary/byte_array_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
|
@ -14,9 +14,9 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||||
|
|
||||||
#include "suggest/core/dictionary/byte_array_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -24,21 +24,19 @@ namespace latinime {
|
||||||
* Dictionary size
|
* Dictionary size
|
||||||
*/
|
*/
|
||||||
// Any file smaller than this is not a dictionary.
|
// Any file smaller than this is not a dictionary.
|
||||||
const int BinaryDictionaryFormatUtils::DICTIONARY_MINIMUM_SIZE = 4;
|
const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 4;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Format versions
|
* Format versions
|
||||||
*/
|
*/
|
||||||
// The versions of Latin IME that only handle format version 1 only test for the magic
|
// 32 bit magic number is stored at the beginning of the dictionary header to reject unsupported
|
||||||
// number, so we had to change it so that version 2 files would be rejected by older
|
// or obsolete dictionary formats.
|
||||||
// implementations. On this occasion, we made the magic number 32 bits long.
|
const uint32_t FormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
|
||||||
const uint32_t BinaryDictionaryFormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
|
|
||||||
// Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12
|
// Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12
|
||||||
const int BinaryDictionaryFormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12;
|
const int FormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12;
|
||||||
|
|
||||||
/* static */ BinaryDictionaryFormatUtils::FORMAT_VERSION
|
/* static */ FormatUtils::FORMAT_VERSION FormatUtils::detectFormatVersion(
|
||||||
BinaryDictionaryFormatUtils::detectFormatVersion(const uint8_t *const dict,
|
const uint8_t *const dict, const int dictSize) {
|
||||||
const int dictSize) {
|
|
||||||
// The magic number is stored big-endian.
|
// The magic number is stored big-endian.
|
||||||
// If the dictionary is less than 4 bytes, we can't even read the magic number, so we don't
|
// If the dictionary is less than 4 bytes, we can't even read the magic number, so we don't
|
||||||
// understand this format.
|
// understand this format.
|
|
@ -14,8 +14,8 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H
|
#ifndef LATINIME_FORMAT_UTILS_H
|
||||||
#define LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H
|
#define LATINIME_FORMAT_UTILS_H
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
@ -25,12 +25,8 @@ namespace latinime {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Methods to handle binary dictionary format version.
|
* Methods to handle binary dictionary format version.
|
||||||
*
|
|
||||||
* Currently, we have a file with a similar name, binary_format.h. binary_format.h contains binary
|
|
||||||
* reading methods and utility methods for various purposes.
|
|
||||||
* On the other hand, this file deals with only about dictionary format version.
|
|
||||||
*/
|
*/
|
||||||
class BinaryDictionaryFormatUtils {
|
class FormatUtils {
|
||||||
public:
|
public:
|
||||||
enum FORMAT_VERSION {
|
enum FORMAT_VERSION {
|
||||||
VERSION_2,
|
VERSION_2,
|
||||||
|
@ -41,11 +37,11 @@ class BinaryDictionaryFormatUtils {
|
||||||
static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize);
|
static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormatUtils);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(FormatUtils);
|
||||||
|
|
||||||
static const int DICTIONARY_MINIMUM_SIZE;
|
static const int DICTIONARY_MINIMUM_SIZE;
|
||||||
static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER;
|
static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER;
|
||||||
static const int HEADER_VERSION_2_MINIMUM_SIZE;
|
static const int HEADER_VERSION_2_MINIMUM_SIZE;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H */
|
#endif /* LATINIME_FORMAT_UTILS_H */
|
Loading…
Reference in New Issue