From c7b6393535485a893bf8b0b0c7566df70a8f2d03 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Thu, 21 Aug 2014 17:23:11 +0900 Subject: [PATCH] Update FormatSpec.java Change-Id: I27d14697f931c9511420a459c49312ea6e10c366 --- .../latin/makedict/FormatSpec.java | 93 ++----------------- 1 file changed, 10 insertions(+), 83 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index a2ae74b20..ec3c6e291 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -36,9 +36,7 @@ public final class FormatSpec { * sion * * o | - * p | not used 3 bits - * t | each unigram and bigram entry has a time stamp? - * i | 1 bit, 1 = yes, 0 = no : CONTAINS_TIMESTAMP_FLAG + * p | not used, 2 bytes. * o | * nflags * @@ -48,7 +46,7 @@ public final class FormatSpec { * d | * ersize * - * | attributes list + * attributes list * * attributes list is: * = | string of characters at the char format described below, with the terminator used @@ -86,11 +84,10 @@ public final class FormatSpec { */ /* Node (FusionDictionary.PtNode) layout is as follows: - * | is moved ? 2 bits, 11 = no : FLAG_IS_NOT_MOVED - * | This must be the same as FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES - * | 01 = yes : FLAG_IS_MOVED - * f | the new address is stored in the same place as the parent address - * l | is deleted? 10 = yes : FLAG_IS_DELETED + * | CHILDREN_ADDRESS_TYPE 2 bits, 11 : FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES + * | 10 : FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES + * f | 01 : FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE + * l | 00 : FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS * a | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS * g | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL * s | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS @@ -98,16 +95,6 @@ public final class FormatSpec { * | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD * | is blacklisted ? 1 bit, 1 = yes, 0 = no : FLAG_IS_BLACKLISTED * - * p | - * a | parent address, 3byte - * r | 1 byte = bbbbbbbb match - * e | case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte) - * n | otherwise => (bbbbbbbb << 16) + (next byte << 8) + next byte - * t | This address is relative to the head of the PtNode. - * a | If the node doesn't have a parent, this field is set to 0. - * d | - * dress - * * c | IF FLAG_HAS_MULTIPLE_CHARS * h | char, char, char, char n * (1 or 3 bytes) : use PtNodeInfo for i/o helpers * a | end 1 byte, = 0 @@ -121,15 +108,10 @@ public final class FormatSpec { * q | * * c | - * h | children address, 3 bytes - * i | 1 byte = bbbbbbbb match - * l | case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte) - * d | otherwise => (bbbbbbbb<<16) + (next byte << 8) + next byte - * r | if this node doesn't have children, this field is set to 0. - * e | (see BinaryDictEncoderUtils#writeVariableSignedAddress) - * n | This address is relative to the position of this field. - * a | - * ddress + * h | children address, CHILDREN_ADDRESS_TYPE bytes + * i | This address is relative to the position of this field. + * l | + * drenaddress * * | IF FLAG_IS_TERMINAL && FLAG_HAS_SHORTCUT_TARGETS * | shortcut string list @@ -179,8 +161,6 @@ public final class FormatSpec { public static final int MAGIC_NUMBER = 0x9BC13AFE; static final int NOT_A_VERSION_NUMBER = -1; - static final int FIRST_VERSION_WITH_DYNAMIC_UPDATE = 3; - static final int FIRST_VERSION_WITH_TERMINAL_ID = 4; // These MUST have the same values as the relevant constants in format_utils.h. // From version 4 on, we use version * 100 + revision as a version number. That allows @@ -202,9 +182,6 @@ public final class FormatSpec { // use it in the reading code. static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH; - static final int PARENT_ADDRESS_SIZE = 3; - static final int FORWARD_LINK_ADDRESS_SIZE = 3; - // These flags are used only in the static dictionary. static final int MASK_CHILDREN_ADDRESS_TYPE = 0xC0; static final int FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS = 0x00; @@ -220,13 +197,6 @@ public final class FormatSpec { static final int FLAG_IS_NOT_A_WORD = 0x02; static final int FLAG_IS_BLACKLISTED = 0x01; - // These flags are used only in the dynamic dictionary. - static final int MASK_MOVE_AND_DELETE_FLAG = 0xC0; - static final int FIXED_BIT_OF_DYNAMIC_UPDATE_MOVE = 0x40; - static final int FLAG_IS_MOVED = 0x00 | FIXED_BIT_OF_DYNAMIC_UPDATE_MOVE; - static final int FLAG_IS_NOT_MOVED = 0x80 | FIXED_BIT_OF_DYNAMIC_UPDATE_MOVE; - static final int FLAG_IS_DELETED = 0x80; - static final int FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT = 0x80; static final int FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE = 0x40; static final int MASK_BIGRAM_ATTR_ADDRESS_TYPE = 0x30; @@ -240,52 +210,12 @@ public final class FormatSpec { static final int PTNODE_TERMINATOR_SIZE = 1; static final int PTNODE_FLAGS_SIZE = 1; static final int PTNODE_FREQUENCY_SIZE = 1; - static final int PTNODE_TERMINAL_ID_SIZE = 4; static final int PTNODE_MAX_ADDRESS_SIZE = 3; static final int PTNODE_ATTRIBUTE_FLAGS_SIZE = 1; static final int PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE = 3; static final int PTNODE_SHORTCUT_LIST_SIZE_SIZE = 2; - // These values are used only by version 4 or later. They MUST match the definitions in - // ver4_dict_constants.cpp. - static final String TRIE_FILE_EXTENSION = ".trie"; - public static final String HEADER_FILE_EXTENSION = ".header"; - static final String FREQ_FILE_EXTENSION = ".freq"; - // tat = Terminal Address Table - static final String TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat"; - static final String BIGRAM_FILE_EXTENSION = ".bigram"; - static final String SHORTCUT_FILE_EXTENSION = ".shortcut"; - static final String LOOKUP_TABLE_FILE_SUFFIX = "_lookup"; - static final String CONTENT_TABLE_FILE_SUFFIX = "_index"; - static final int FLAGS_IN_FREQ_FILE_SIZE = 1; - static final int FREQUENCY_AND_FLAGS_SIZE = 2; - static final int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3; - static final int UNIGRAM_TIMESTAMP_SIZE = 4; - static final int UNIGRAM_COUNTER_SIZE = 1; - static final int UNIGRAM_LEVEL_SIZE = 1; - - // With the English main dictionary as of October 2013, the size of bigram address table is - // is 345KB with the block size being 16. - // This is 54% of that of full address table. - static final int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 16; - static final int BIGRAM_CONTENT_COUNT = 1; - static final int BIGRAM_FREQ_CONTENT_INDEX = 0; - static final String BIGRAM_FREQ_CONTENT_ID = "_freq"; - static final int BIGRAM_TIMESTAMP_SIZE = 4; - static final int BIGRAM_COUNTER_SIZE = 1; - static final int BIGRAM_LEVEL_SIZE = 1; - - static final int SHORTCUT_CONTENT_COUNT = 1; - static final int SHORTCUT_CONTENT_INDEX = 0; - // With the English main dictionary as of October 2013, the size of shortcut address table is - // 26KB with the block size being 64. - // This is only 4.4% of that of full address table. - static final int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64; - static final String SHORTCUT_CONTENT_ID = "_shortcut"; - static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE; - static final int NO_PARENT_ADDRESS = 0; - static final int NO_FORWARD_LINK_ADDRESS = 0; static final int INVALID_CHARACTER = -1; static final int MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT = 0x7F; // 127 @@ -302,14 +232,11 @@ public final class FormatSpec { // This option needs to be the same numeric value as the one in binary_format.h. static final int NOT_VALID_WORD = -99; - static final int SIGNED_CHILDREN_ADDRESS_SIZE = 3; static final int UINT8_MAX = 0xFF; static final int UINT16_MAX = 0xFFFF; static final int UINT24_MAX = 0xFFFFFF; - static final int SINT24_MAX = 0x7FFFFF; static final int MSB8 = 0x80; - static final int MSB24 = 0x800000; /** * Options about file format.