Remove flags from Java side.
This simplifies the code quite a bit. - GERMAN_UMLAUTS are now handled through a key-value attribute. The dictionary generator does not need to know about it any more. - FRENCH_LIGATURES are deprecated as we handle them with shortcuts now. - CONTAINS_BIGRAMS is deprecated. Bigram processing is always applied regardless of this flag. Bug: 11281748 Change-Id: If567e52e245a9342adc7f3104a0f7d8d782df8c1main
parent
2fa3693c26
commit
7b55cd3e2b
|
@ -18,8 +18,6 @@ package com.android.inputmethod.latin;
|
||||||
|
|
||||||
import android.content.Context;
|
import android.content.Context;
|
||||||
|
|
||||||
import com.android.inputmethod.keyboard.ProximityInfo;
|
|
||||||
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
|
|
||||||
import com.android.inputmethod.latin.makedict.DictEncoder;
|
import com.android.inputmethod.latin.makedict.DictEncoder;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
|
@ -52,7 +50,7 @@ public class DictionaryWriter extends AbstractDictionaryWriter {
|
||||||
public void clear() {
|
public void clear() {
|
||||||
final HashMap<String, String> attributes = CollectionUtils.newHashMap();
|
final HashMap<String, String> attributes = CollectionUtils.newHashMap();
|
||||||
mFusionDictionary = new FusionDictionary(new PtNodeArray(),
|
mFusionDictionary = new FusionDictionary(new PtNodeArray(),
|
||||||
new FusionDictionary.DictionaryOptions(attributes, false, false));
|
new FusionDictionary.DictionaryOptions(attributes));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -58,11 +58,8 @@ public abstract class AbstractDictDecoder implements DictDecoder {
|
||||||
headerSize);
|
headerSize);
|
||||||
|
|
||||||
final FileHeader header = new FileHeader(headerSize,
|
final FileHeader header = new FileHeader(headerSize,
|
||||||
new FusionDictionary.DictionaryOptions(attributes,
|
new FusionDictionary.DictionaryOptions(attributes),
|
||||||
0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
|
|
||||||
0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
|
|
||||||
new FormatOptions(version,
|
new FormatOptions(version,
|
||||||
0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE),
|
|
||||||
0 != (optionsFlags & FormatSpec.CONTAINS_TIMESTAMP_FLAG)));
|
0 != (optionsFlags & FormatSpec.CONTAINS_TIMESTAMP_FLAG)));
|
||||||
return header;
|
return header;
|
||||||
}
|
}
|
||||||
|
|
|
@ -330,7 +330,7 @@ public final class BinaryDictDecoderUtils {
|
||||||
|
|
||||||
static int readChildrenAddress(final DictBuffer dictBuffer,
|
static int readChildrenAddress(final DictBuffer dictBuffer,
|
||||||
final int optionFlags, final FormatOptions options) {
|
final int optionFlags, final FormatOptions options) {
|
||||||
if (options.mSupportsDynamicUpdate) {
|
if (options.supportsDynamicUpdate()) {
|
||||||
final int address = dictBuffer.readUnsignedInt24();
|
final int address = dictBuffer.readUnsignedInt24();
|
||||||
if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
|
if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
|
||||||
if ((address & FormatSpec.MSB24) != 0) {
|
if ((address & FormatSpec.MSB24) != 0) {
|
||||||
|
@ -540,11 +540,11 @@ public final class BinaryDictDecoderUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
// reach the end of the array.
|
// reach the end of the array.
|
||||||
if (options.mSupportsDynamicUpdate) {
|
if (options.supportsDynamicUpdate()) {
|
||||||
final boolean hasValidForwardLink = dictDecoder.readAndFollowForwardLink();
|
final boolean hasValidForwardLink = dictDecoder.readAndFollowForwardLink();
|
||||||
if (!hasValidForwardLink) break;
|
if (!hasValidForwardLink) break;
|
||||||
}
|
}
|
||||||
} while (options.mSupportsDynamicUpdate && dictDecoder.hasNextPtNodeArray());
|
} while (options.supportsDynamicUpdate() && dictDecoder.hasNextPtNodeArray());
|
||||||
|
|
||||||
final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
|
final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
|
||||||
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOriginPos;
|
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOriginPos;
|
||||||
|
|
|
@ -20,7 +20,6 @@ import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncodin
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
|
|
||||||
|
@ -161,7 +160,7 @@ public class BinaryDictEncoderUtils {
|
||||||
node.mCachedSize = nodeSize;
|
node.mCachedSize = nodeSize;
|
||||||
size += nodeSize;
|
size += nodeSize;
|
||||||
}
|
}
|
||||||
if (options.mSupportsDynamicUpdate) {
|
if (options.supportsDynamicUpdate()) {
|
||||||
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
||||||
}
|
}
|
||||||
ptNodeArray.mCachedSize = size;
|
ptNodeArray.mCachedSize = size;
|
||||||
|
@ -398,7 +397,7 @@ public class BinaryDictEncoderUtils {
|
||||||
nodeSize += FormatSpec.PTNODE_FREQUENCY_SIZE;
|
nodeSize += FormatSpec.PTNODE_FREQUENCY_SIZE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (formatOptions.mSupportsDynamicUpdate) {
|
if (formatOptions.supportsDynamicUpdate()) {
|
||||||
nodeSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
|
nodeSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
|
||||||
} else if (null != ptNode.mChildren) {
|
} else if (null != ptNode.mChildren) {
|
||||||
nodeSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(ptNodeArray,
|
nodeSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(ptNodeArray,
|
||||||
|
@ -418,7 +417,7 @@ public class BinaryDictEncoderUtils {
|
||||||
ptNode.mCachedSize = nodeSize;
|
ptNode.mCachedSize = nodeSize;
|
||||||
size += nodeSize;
|
size += nodeSize;
|
||||||
}
|
}
|
||||||
if (formatOptions.mSupportsDynamicUpdate) {
|
if (formatOptions.supportsDynamicUpdate()) {
|
||||||
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
||||||
}
|
}
|
||||||
if (ptNodeArray.mCachedSize != size) {
|
if (ptNodeArray.mCachedSize != size) {
|
||||||
|
@ -534,7 +533,7 @@ public class BinaryDictEncoderUtils {
|
||||||
if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug");
|
if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug");
|
||||||
} while (changesDone);
|
} while (changesDone);
|
||||||
|
|
||||||
if (formatOptions.mSupportsDynamicUpdate) {
|
if (formatOptions.supportsDynamicUpdate()) {
|
||||||
computeParentAddresses(flatNodes);
|
computeParentAddresses(flatNodes);
|
||||||
}
|
}
|
||||||
final PtNodeArray lastPtNodeArray = flatNodes.get(flatNodes.size() - 1);
|
final PtNodeArray lastPtNodeArray = flatNodes.get(flatNodes.size() - 1);
|
||||||
|
@ -643,7 +642,7 @@ public class BinaryDictEncoderUtils {
|
||||||
byte flags = 0;
|
byte flags = 0;
|
||||||
if (hasMultipleChars) flags |= FormatSpec.FLAG_HAS_MULTIPLE_CHARS;
|
if (hasMultipleChars) flags |= FormatSpec.FLAG_HAS_MULTIPLE_CHARS;
|
||||||
if (isTerminal) flags |= FormatSpec.FLAG_IS_TERMINAL;
|
if (isTerminal) flags |= FormatSpec.FLAG_IS_TERMINAL;
|
||||||
if (formatOptions.mSupportsDynamicUpdate) {
|
if (formatOptions.supportsDynamicUpdate()) {
|
||||||
flags |= FormatSpec.FLAG_IS_NOT_MOVED;
|
flags |= FormatSpec.FLAG_IS_NOT_MOVED;
|
||||||
} else if (true) {
|
} else if (true) {
|
||||||
switch (childrenAddressSize) {
|
switch (childrenAddressSize) {
|
||||||
|
@ -755,16 +754,11 @@ public class BinaryDictEncoderUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Makes the 2-byte value for options flags.
|
* Makes the 2-byte value for options flags. Unused at the moment, and always 0.
|
||||||
*/
|
*/
|
||||||
private static final int makeOptionsValue(final FusionDictionary dictionary,
|
private static final int makeOptionsValue(final FormatOptions formatOptions) {
|
||||||
final FormatOptions formatOptions) {
|
// TODO: why doesn't this handle CONTAINS_TIMESTAMP_FLAG?
|
||||||
final DictionaryOptions options = dictionary.mOptions;
|
return 0;
|
||||||
final boolean hasBigrams = dictionary.hasBigrams();
|
|
||||||
return (options.mFrenchLigatureProcessing ? FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG : 0)
|
|
||||||
+ (options.mGermanUmlautProcessing ? FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG : 0)
|
|
||||||
+ (hasBigrams ? FormatSpec.CONTAINS_BIGRAMS_FLAG : 0)
|
|
||||||
+ (formatOptions.mSupportsDynamicUpdate ? FormatSpec.SUPPORTS_DYNAMIC_UPDATE : 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -852,7 +846,7 @@ public class BinaryDictEncoderUtils {
|
||||||
}
|
}
|
||||||
dictEncoder.writePtNode(ptNode, parentPosition, formatOptions, dict);
|
dictEncoder.writePtNode(ptNode, parentPosition, formatOptions, dict);
|
||||||
}
|
}
|
||||||
if (formatOptions.mSupportsDynamicUpdate) {
|
if (formatOptions.supportsDynamicUpdate()) {
|
||||||
dictEncoder.writeForwardLinkAddress(FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
dictEncoder.writeForwardLinkAddress(FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
||||||
}
|
}
|
||||||
if (dictEncoder.getPosition() != ptNodeArray.mCachedAddressAfterUpdate
|
if (dictEncoder.getPosition() != ptNodeArray.mCachedAddressAfterUpdate
|
||||||
|
@ -953,7 +947,7 @@ public class BinaryDictEncoderUtils {
|
||||||
headerBuffer.write((byte) (0xFF & version));
|
headerBuffer.write((byte) (0xFF & version));
|
||||||
|
|
||||||
// Options flags
|
// Options flags
|
||||||
final int options = makeOptionsValue(dict, formatOptions);
|
final int options = makeOptionsValue(formatOptions);
|
||||||
headerBuffer.write((byte) (0xFF & (options >> 8)));
|
headerBuffer.write((byte) (0xFF & (options >> 8)));
|
||||||
headerBuffer.write((byte) (0xFF & options));
|
headerBuffer.write((byte) (0xFF & options));
|
||||||
final int headerSizeOffset = headerBuffer.size();
|
final int headerSizeOffset = headerBuffer.size();
|
||||||
|
|
|
@ -112,7 +112,7 @@ public final class BinaryDictIOUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p.mPosition == p.mNumOfPtNode) {
|
if (p.mPosition == p.mNumOfPtNode) {
|
||||||
if (formatOptions.mSupportsDynamicUpdate) {
|
if (formatOptions.supportsDynamicUpdate()) {
|
||||||
final boolean hasValidForwardLinkAddress =
|
final boolean hasValidForwardLinkAddress =
|
||||||
dictDecoder.readAndFollowForwardLink();
|
dictDecoder.readAndFollowForwardLink();
|
||||||
if (hasValidForwardLinkAddress && dictDecoder.hasNextPtNodeArray()) {
|
if (hasValidForwardLinkAddress && dictDecoder.hasNextPtNodeArray()) {
|
||||||
|
@ -228,7 +228,7 @@ public final class BinaryDictIOUtils {
|
||||||
// a forward link address that we need to consult and possibly resume
|
// a forward link address that we need to consult and possibly resume
|
||||||
// search on the next node array in the linked list.
|
// search on the next node array in the linked list.
|
||||||
if (foundNextPtNode) break;
|
if (foundNextPtNode) break;
|
||||||
if (!header.mFormatOptions.mSupportsDynamicUpdate) {
|
if (!header.mFormatOptions.supportsDynamicUpdate()) {
|
||||||
return FormatSpec.NOT_VALID_WORD;
|
return FormatSpec.NOT_VALID_WORD;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -507,7 +507,7 @@ public final class BinaryDictIOUtils {
|
||||||
* Helper method to check whether the node is moved.
|
* Helper method to check whether the node is moved.
|
||||||
*/
|
*/
|
||||||
public static boolean isMovedPtNode(final int flags, final FormatOptions options) {
|
public static boolean isMovedPtNode(final int flags, final FormatOptions options) {
|
||||||
return options.mSupportsDynamicUpdate
|
return options.supportsDynamicUpdate()
|
||||||
&& ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_MOVED);
|
&& ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_MOVED);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -516,14 +516,14 @@ public final class BinaryDictIOUtils {
|
||||||
*/
|
*/
|
||||||
public static boolean supportsDynamicUpdate(final FormatOptions options) {
|
public static boolean supportsDynamicUpdate(final FormatOptions options) {
|
||||||
return options.mVersion >= FormatSpec.FIRST_VERSION_WITH_DYNAMIC_UPDATE
|
return options.mVersion >= FormatSpec.FIRST_VERSION_WITH_DYNAMIC_UPDATE
|
||||||
&& options.mSupportsDynamicUpdate;
|
&& options.supportsDynamicUpdate();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper method to check whether the node is deleted.
|
* Helper method to check whether the node is deleted.
|
||||||
*/
|
*/
|
||||||
public static boolean isDeletedPtNode(final int flags, final FormatOptions formatOptions) {
|
public static boolean isDeletedPtNode(final int flags, final FormatOptions formatOptions) {
|
||||||
return formatOptions.mSupportsDynamicUpdate
|
return formatOptions.supportsDynamicUpdate()
|
||||||
&& ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_DELETED);
|
&& ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_DELETED);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -546,7 +546,7 @@ public final class BinaryDictIOUtils {
|
||||||
|
|
||||||
static int getChildrenAddressSize(final int optionFlags,
|
static int getChildrenAddressSize(final int optionFlags,
|
||||||
final FormatOptions formatOptions) {
|
final FormatOptions formatOptions) {
|
||||||
if (formatOptions.mSupportsDynamicUpdate) return FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
|
if (formatOptions.supportsDynamicUpdate()) return FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
|
||||||
switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
|
switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
|
||||||
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
|
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
|
||||||
return 1;
|
return 1;
|
||||||
|
|
|
@ -61,7 +61,7 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
final DictBuffer dictBuffer = dictUpdater.getDictBuffer();
|
final DictBuffer dictBuffer = dictUpdater.getDictBuffer();
|
||||||
final int originalPosition = dictBuffer.position();
|
final int originalPosition = dictBuffer.position();
|
||||||
dictBuffer.position(ptNodeOriginAddress);
|
dictBuffer.position(ptNodeOriginAddress);
|
||||||
if (!formatOptions.mSupportsDynamicUpdate) {
|
if (!formatOptions.supportsDynamicUpdate()) {
|
||||||
throw new RuntimeException("this file format does not support parent addresses");
|
throw new RuntimeException("this file format does not support parent addresses");
|
||||||
}
|
}
|
||||||
final int flags = dictBuffer.readUnsignedByte();
|
final int flags = dictBuffer.readUnsignedByte();
|
||||||
|
@ -102,7 +102,7 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
}
|
}
|
||||||
if (!dictUpdater.readAndFollowForwardLink()) break;
|
if (!dictUpdater.readAndFollowForwardLink()) break;
|
||||||
if (dictUpdater.getPosition() == FormatSpec.NO_FORWARD_LINK_ADDRESS) break;
|
if (dictUpdater.getPosition() == FormatSpec.NO_FORWARD_LINK_ADDRESS) break;
|
||||||
} while (formatOptions.mSupportsDynamicUpdate);
|
} while (formatOptions.supportsDynamicUpdate());
|
||||||
dictUpdater.setPosition(originalPosition);
|
dictUpdater.setPosition(originalPosition);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -40,12 +40,8 @@ public final class FormatSpec {
|
||||||
* p | not used 3 bits
|
* p | not used 3 bits
|
||||||
* t | each unigram and bigram entry has a time stamp?
|
* t | each unigram and bigram entry has a time stamp?
|
||||||
* i | 1 bit, 1 = yes, 0 = no : CONTAINS_TIMESTAMP_FLAG
|
* i | 1 bit, 1 = yes, 0 = no : CONTAINS_TIMESTAMP_FLAG
|
||||||
* o | has bigrams ? 1 bit, 1 = yes, 0 = no : CONTAINS_BIGRAMS_FLAG
|
* o |
|
||||||
* n | FRENCH_LIGATURE_PROCESSING_FLAG
|
* nflags
|
||||||
* f | supports dynamic updates ? 1 bit, 1 = yes, 0 = no : SUPPORTS_DYNAMIC_UPDATE
|
|
||||||
* l | GERMAN_UMLAUT_PROCESSING_FLAG
|
|
||||||
* a |
|
|
||||||
* gs
|
|
||||||
*
|
*
|
||||||
* h |
|
* h |
|
||||||
* e | size of the file header, 4bytes
|
* e | size of the file header, 4bytes
|
||||||
|
@ -82,45 +78,36 @@ public final class FormatSpec {
|
||||||
* s
|
* s
|
||||||
*
|
*
|
||||||
* f |
|
* f |
|
||||||
* o | IF SUPPORTS_DYNAMIC_UPDATE (defined in the file header)
|
* o | forward link address, 3byte
|
||||||
* r | forward link address, 3byte
|
* r | 1 byte = bbbbbbbb match
|
||||||
* w | 1 byte = bbbbbbbb match
|
* w | case 1xxxxxxx => -((xxxxxxx << 16) + (next byte << 8) + next byte)
|
||||||
* a | case 1xxxxxxx => -((xxxxxxx << 16) + (next byte << 8) + next byte)
|
* a | otherwise => (xxxxxxx << 16) + (next byte << 8) + next byte
|
||||||
* r | otherwise => (xxxxxxx << 16) + (next byte << 8) + next byte
|
* r |
|
||||||
* d |
|
* dlinkaddress
|
||||||
* linkaddress
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Node (FusionDictionary.PtNode) layout is as follows:
|
/* Node (FusionDictionary.PtNode) layout is as follows:
|
||||||
* | IF !SUPPORTS_DYNAMIC_UPDATE
|
* | is moved ? 2 bits, 11 = no : FLAG_IS_NOT_MOVED
|
||||||
* | addressType xx : mask with MASK_CHILDREN_ADDRESS_TYPE
|
|
||||||
* | 2 bits, 00 = no children : FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS
|
|
||||||
* f | 01 = 1 byte : FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE
|
|
||||||
* l | 10 = 2 bytes : FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES
|
|
||||||
* a | 11 = 3 bytes : FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES
|
|
||||||
* g | ELSE
|
|
||||||
* s | is moved ? 2 bits, 11 = no : FLAG_IS_NOT_MOVED
|
|
||||||
* | This must be the same as FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES
|
* | This must be the same as FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES
|
||||||
* | 01 = yes : FLAG_IS_MOVED
|
* | 01 = yes : FLAG_IS_MOVED
|
||||||
* | the new address is stored in the same place as the parent address
|
* f | the new address is stored in the same place as the parent address
|
||||||
* | is deleted? 10 = yes : FLAG_IS_DELETED
|
* l | is deleted? 10 = yes : FLAG_IS_DELETED
|
||||||
* | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS
|
* a | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS
|
||||||
* | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL
|
* g | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL
|
||||||
* | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS
|
* s | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS
|
||||||
* | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS
|
* | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS
|
||||||
* | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD
|
* | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD
|
||||||
* | is blacklisted ? 1 bit, 1 = yes, 0 = no : FLAG_IS_BLACKLISTED
|
* | is blacklisted ? 1 bit, 1 = yes, 0 = no : FLAG_IS_BLACKLISTED
|
||||||
*
|
*
|
||||||
* p |
|
* p |
|
||||||
* a | IF SUPPORTS_DYNAMIC_UPDATE (defined in the file header)
|
* a | parent address, 3byte
|
||||||
* r | parent address, 3byte
|
* r | 1 byte = bbbbbbbb match
|
||||||
* e | 1 byte = bbbbbbbb match
|
* e | case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte)
|
||||||
* n | case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte)
|
* n | otherwise => (bbbbbbbb << 16) + (next byte << 8) + next byte
|
||||||
* t | otherwise => (bbbbbbbb << 16) + (next byte << 8) + next byte
|
* t | This address is relative to the head of the PtNode.
|
||||||
* a | This address is relative to the head of the PtNode.
|
* a | If the node doesn't have a parent, this field is set to 0.
|
||||||
* d | If the node doesn't have a parent, this field is set to 0.
|
|
||||||
* d |
|
* d |
|
||||||
* ress
|
* dress
|
||||||
*
|
*
|
||||||
* c | IF FLAG_HAS_MULTIPLE_CHARS
|
* c | IF FLAG_HAS_MULTIPLE_CHARS
|
||||||
* h | char, char, char, char n * (1 or 3 bytes) : use PtNodeInfo for i/o helpers
|
* h | char, char, char, char n * (1 or 3 bytes) : use PtNodeInfo for i/o helpers
|
||||||
|
@ -134,23 +121,16 @@ public final class FormatSpec {
|
||||||
* e | frequency 1 byte
|
* e | frequency 1 byte
|
||||||
* q |
|
* q |
|
||||||
*
|
*
|
||||||
* c | IF SUPPORTS_DYNAMIC_UPDATE
|
* c |
|
||||||
* h | children address, 3 bytes
|
* h | children address, 3 bytes
|
||||||
* i | 1 byte = bbbbbbbb match
|
* i | 1 byte = bbbbbbbb match
|
||||||
* l | case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte)
|
* l | case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte)
|
||||||
* d | otherwise => (bbbbbbbb<<16) + (next byte << 8) + next byte
|
* d | otherwise => (bbbbbbbb<<16) + (next byte << 8) + next byte
|
||||||
* r | if this node doesn't have children, this field is set to 0.
|
* r | if this node doesn't have children, this field is set to 0.
|
||||||
* e | (see BinaryDictEncoderUtils#writeVariableSignedAddress)
|
* e | (see BinaryDictEncoderUtils#writeVariableSignedAddress)
|
||||||
* n | ELSIF 00 = FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS == addressType
|
* n | This address is relative to the position of this field.
|
||||||
* a | // nothing
|
* a |
|
||||||
* d | ELSIF 01 = FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE == addressType
|
* ddress
|
||||||
* d | children address, 1 byte
|
|
||||||
* r | ELSIF 10 = FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES == addressType
|
|
||||||
* e | children address, 2 bytes
|
|
||||||
* s | ELSE // 11 = FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES = addressType
|
|
||||||
* s | children address, 3 bytes
|
|
||||||
* | END
|
|
||||||
* | This address is relative to the position of this field.
|
|
||||||
*
|
*
|
||||||
* | IF FLAG_IS_TERMINAL && FLAG_HAS_SHORTCUT_TARGETS
|
* | IF FLAG_IS_TERMINAL && FLAG_HAS_SHORTCUT_TARGETS
|
||||||
* | shortcut string list
|
* | shortcut string list
|
||||||
|
@ -214,11 +194,7 @@ public final class FormatSpec {
|
||||||
static final int MAXIMUM_SUPPORTED_VERSION = VERSION4;
|
static final int MAXIMUM_SUPPORTED_VERSION = VERSION4;
|
||||||
|
|
||||||
// These options need to be the same numeric values as the one in the native reading code.
|
// These options need to be the same numeric values as the one in the native reading code.
|
||||||
static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
|
|
||||||
// TODO: Make the native reading code read this variable.
|
// TODO: Make the native reading code read this variable.
|
||||||
static final int SUPPORTS_DYNAMIC_UPDATE = 0x2;
|
|
||||||
static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
|
|
||||||
static final int CONTAINS_BIGRAMS_FLAG = 0x8;
|
|
||||||
static final int CONTAINS_TIMESTAMP_FLAG = 0x10;
|
static final int CONTAINS_TIMESTAMP_FLAG = 0x10;
|
||||||
|
|
||||||
// TODO: Make this value adaptative to content data, store it in the header, and
|
// TODO: Make this value adaptative to content data, store it in the header, and
|
||||||
|
@ -339,30 +315,23 @@ public final class FormatSpec {
|
||||||
*/
|
*/
|
||||||
public static final class FormatOptions {
|
public static final class FormatOptions {
|
||||||
public final int mVersion;
|
public final int mVersion;
|
||||||
public final boolean mSupportsDynamicUpdate;
|
|
||||||
public final boolean mHasTerminalId;
|
public final boolean mHasTerminalId;
|
||||||
public final boolean mHasTimestamp;
|
public final boolean mHasTimestamp;
|
||||||
|
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public FormatOptions(final int version) {
|
public FormatOptions(final int version) {
|
||||||
this(version, false);
|
this(version, false /* hasTimestamp */);
|
||||||
}
|
}
|
||||||
|
|
||||||
@UsedForTesting
|
public FormatOptions(final int version, final boolean hasTimestamp) {
|
||||||
public FormatOptions(final int version, final boolean supportsDynamicUpdate) {
|
|
||||||
this(version, supportsDynamicUpdate, false /* hasTimestamp */);
|
|
||||||
}
|
|
||||||
|
|
||||||
public FormatOptions(final int version, final boolean supportsDynamicUpdate,
|
|
||||||
final boolean hasTimestamp) {
|
|
||||||
mVersion = version;
|
mVersion = version;
|
||||||
if (version < FIRST_VERSION_WITH_DYNAMIC_UPDATE && supportsDynamicUpdate) {
|
|
||||||
throw new RuntimeException("Dynamic updates are only supported with versions "
|
|
||||||
+ FIRST_VERSION_WITH_DYNAMIC_UPDATE + " and ulterior.");
|
|
||||||
}
|
|
||||||
mSupportsDynamicUpdate = supportsDynamicUpdate;
|
|
||||||
mHasTerminalId = (version >= FIRST_VERSION_WITH_TERMINAL_ID);
|
mHasTerminalId = (version >= FIRST_VERSION_WITH_TERMINAL_ID);
|
||||||
mHasTimestamp = hasTimestamp;
|
mHasTimestamp = hasTimestamp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean supportsDynamicUpdate() {
|
||||||
|
return mVersion >= FIRST_VERSION_WITH_DYNAMIC_UPDATE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -374,7 +343,6 @@ public final class FormatSpec {
|
||||||
public final FormatOptions mFormatOptions;
|
public final FormatOptions mFormatOptions;
|
||||||
// Note that these are corresponding definitions in native code in latinime::HeaderPolicy
|
// Note that these are corresponding definitions in native code in latinime::HeaderPolicy
|
||||||
// and latinime::HeaderReadWriteUtils.
|
// and latinime::HeaderReadWriteUtils.
|
||||||
public static final String SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE = "SUPPORTS_DYNAMIC_UPDATE";
|
|
||||||
public static final String USES_FORGETTING_CURVE_ATTRIBUTE = "USES_FORGETTING_CURVE";
|
public static final String USES_FORGETTING_CURVE_ATTRIBUTE = "USES_FORGETTING_CURVE";
|
||||||
public static final String HAS_HISTORICAL_INFO_ATTRIBUTE = "HAS_HISTORICAL_INFO";
|
public static final String HAS_HISTORICAL_INFO_ATTRIBUTE = "HAS_HISTORICAL_INFO";
|
||||||
public static final String ATTRIBUTE_VALUE_TRUE = "1";
|
public static final String ATTRIBUTE_VALUE_TRUE = "1";
|
||||||
|
|
|
@ -303,14 +303,9 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
* Options global to the dictionary.
|
* Options global to the dictionary.
|
||||||
*/
|
*/
|
||||||
public static final class DictionaryOptions {
|
public static final class DictionaryOptions {
|
||||||
public final boolean mGermanUmlautProcessing;
|
|
||||||
public final boolean mFrenchLigatureProcessing;
|
|
||||||
public final HashMap<String, String> mAttributes;
|
public final HashMap<String, String> mAttributes;
|
||||||
public DictionaryOptions(final HashMap<String, String> attributes,
|
public DictionaryOptions(final HashMap<String, String> attributes) {
|
||||||
final boolean germanUmlautProcessing, final boolean frenchLigatureProcessing) {
|
|
||||||
mAttributes = attributes;
|
mAttributes = attributes;
|
||||||
mGermanUmlautProcessing = germanUmlautProcessing;
|
|
||||||
mFrenchLigatureProcessing = frenchLigatureProcessing;
|
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public String toString() { // Convenience method
|
public String toString() { // Convenience method
|
||||||
|
@ -339,14 +334,6 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
}
|
}
|
||||||
s.append("\n");
|
s.append("\n");
|
||||||
}
|
}
|
||||||
if (mGermanUmlautProcessing) {
|
|
||||||
s.append(indent);
|
|
||||||
s.append("Needs German umlaut processing\n");
|
|
||||||
}
|
|
||||||
if (mFrenchLigatureProcessing) {
|
|
||||||
s.append(indent);
|
|
||||||
s.append("Needs French ligature processing\n");
|
|
||||||
}
|
|
||||||
return s.toString();
|
return s.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -700,138 +687,6 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Recursively count the number of nodes in a given branch of the trie.
|
|
||||||
*
|
|
||||||
* @param nodeArray the node array to count.
|
|
||||||
* @return the number of nodes in this branch.
|
|
||||||
*/
|
|
||||||
public static int countNodeArrays(final PtNodeArray nodeArray) {
|
|
||||||
int size = 1;
|
|
||||||
for (int i = nodeArray.mData.size() - 1; i >= 0; --i) {
|
|
||||||
PtNode ptNode = nodeArray.mData.get(i);
|
|
||||||
if (null != ptNode.mChildren)
|
|
||||||
size += countNodeArrays(ptNode.mChildren);
|
|
||||||
}
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Recursively find out whether there are any bigrams.
|
|
||||||
// This can be pretty expensive especially if there aren't any (we return as soon
|
|
||||||
// as we find one, so it's much cheaper if there are bigrams)
|
|
||||||
private static boolean hasBigramsInternal(final PtNodeArray nodeArray) {
|
|
||||||
if (null == nodeArray) return false;
|
|
||||||
for (int i = nodeArray.mData.size() - 1; i >= 0; --i) {
|
|
||||||
PtNode ptNode = nodeArray.mData.get(i);
|
|
||||||
if (null != ptNode.mBigrams) return true;
|
|
||||||
if (hasBigramsInternal(ptNode.mChildren)) return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Finds out whether there are any bigrams in this dictionary.
|
|
||||||
*
|
|
||||||
* @return true if there is any bigram, false otherwise.
|
|
||||||
*/
|
|
||||||
// TODO: this is expensive especially for large dictionaries without any bigram.
|
|
||||||
// The up side is, this is always accurate and correct and uses no memory. We should
|
|
||||||
// find a more efficient way of doing this, without compromising too much on memory
|
|
||||||
// and ease of use.
|
|
||||||
public boolean hasBigrams() {
|
|
||||||
return hasBigramsInternal(mRootNodeArray);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Historically, the tails of the words were going to be merged to save space.
|
|
||||||
// However, that would prevent the code to search for a specific address in log(n)
|
|
||||||
// time so this was abandoned.
|
|
||||||
// The code is still of interest as it does add some compression to any dictionary
|
|
||||||
// that has no need for attributes. Implementations that does not read attributes should be
|
|
||||||
// able to read a dictionary with merged tails.
|
|
||||||
// Also, the following code does support frequencies, as in, it will only merges
|
|
||||||
// tails that share the same frequency. Though it would result in the above loss of
|
|
||||||
// performance while searching by address, it is still technically possible to merge
|
|
||||||
// tails that contain attributes, but this code does not take that into account - it does
|
|
||||||
// not compare attributes and will merge terminals with different attributes regardless.
|
|
||||||
public void mergeTails() {
|
|
||||||
MakedictLog.i("Do not merge tails");
|
|
||||||
return;
|
|
||||||
|
|
||||||
// MakedictLog.i("Merging PtNodes. Number of PtNodes : " + countPtNodes(root));
|
|
||||||
// MakedictLog.i("Number of PtNodes : " + countPtNodes(root));
|
|
||||||
//
|
|
||||||
// final HashMap<String, ArrayList<PtNodeArray>> repository =
|
|
||||||
// new HashMap<String, ArrayList<PtNodeArray>>();
|
|
||||||
// mergeTailsInner(repository, root);
|
|
||||||
//
|
|
||||||
// MakedictLog.i("Number of different pseudohashes : " + repository.size());
|
|
||||||
// int size = 0;
|
|
||||||
// for (ArrayList<PtNodeArray> a : repository.values()) {
|
|
||||||
// size += a.size();
|
|
||||||
// }
|
|
||||||
// MakedictLog.i("Number of nodes after merge : " + (1 + size));
|
|
||||||
// MakedictLog.i("Recursively seen nodes : " + countNodes(root));
|
|
||||||
}
|
|
||||||
|
|
||||||
// The following methods are used by the deactivated mergeTails()
|
|
||||||
// private static boolean isEqual(PtNodeArray a, PtNodeArray b) {
|
|
||||||
// if (null == a && null == b) return true;
|
|
||||||
// if (null == a || null == b) return false;
|
|
||||||
// if (a.data.size() != b.data.size()) return false;
|
|
||||||
// final int size = a.data.size();
|
|
||||||
// for (int i = size - 1; i >= 0; --i) {
|
|
||||||
// PtNode aPtNode = a.data.get(i);
|
|
||||||
// PtNode bPtNode = b.data.get(i);
|
|
||||||
// if (aPtNode.frequency != bPtNode.frequency) return false;
|
|
||||||
// if (aPtNode.alternates == null && bPtNode.alternates != null) return false;
|
|
||||||
// if (aPtNode.alternates != null && !aPtNode.equals(bPtNode.alternates)) return false;
|
|
||||||
// if (!Arrays.equals(aPtNode.chars, bPtNode.chars)) return false;
|
|
||||||
// if (!isEqual(aPtNode.children, bPtNode.children)) return false;
|
|
||||||
// }
|
|
||||||
// return true;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// static private HashMap<String, ArrayList<PtNodeArray>> mergeTailsInner(
|
|
||||||
// final HashMap<String, ArrayList<PtNodeArray>> map, final PtNodeArray nodeArray) {
|
|
||||||
// final ArrayList<PtNode> branches = nodeArray.data;
|
|
||||||
// final int nodeSize = branches.size();
|
|
||||||
// for (int i = 0; i < nodeSize; ++i) {
|
|
||||||
// PtNode ptNode = branches.get(i);
|
|
||||||
// if (null != ptNode.children) {
|
|
||||||
// String pseudoHash = getPseudoHash(ptNode.children);
|
|
||||||
// ArrayList<PtNodeArray> similarList = map.get(pseudoHash);
|
|
||||||
// if (null == similarList) {
|
|
||||||
// similarList = new ArrayList<PtNodeArray>();
|
|
||||||
// map.put(pseudoHash, similarList);
|
|
||||||
// }
|
|
||||||
// boolean merged = false;
|
|
||||||
// for (PtNodeArray similar : similarList) {
|
|
||||||
// if (isEqual(ptNode.children, similar)) {
|
|
||||||
// ptNode.children = similar;
|
|
||||||
// merged = true;
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// if (!merged) {
|
|
||||||
// similarList.add(ptNode.children);
|
|
||||||
// }
|
|
||||||
// mergeTailsInner(map, ptNode.children);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// return map;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// private static String getPseudoHash(final PtNodeArray nodeArray) {
|
|
||||||
// StringBuilder s = new StringBuilder();
|
|
||||||
// for (PtNode ptNode : nodeArray.data) {
|
|
||||||
// s.append(ptNode.frequency);
|
|
||||||
// for (int ch : ptNode.chars) {
|
|
||||||
// s.append(Character.toChars(ch));
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// return s.toString();
|
|
||||||
// }
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Iterator to walk through a dictionary.
|
* Iterator to walk through a dictionary.
|
||||||
*
|
*
|
||||||
|
|
|
@ -169,7 +169,7 @@ public class Ver3DictEncoder implements DictEncoder {
|
||||||
|
|
||||||
private void writeChildrenPosition(final PtNode ptNode, final FormatOptions formatOptions) {
|
private void writeChildrenPosition(final PtNode ptNode, final FormatOptions formatOptions) {
|
||||||
final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
|
final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
|
||||||
if (formatOptions.mSupportsDynamicUpdate) {
|
if (formatOptions.supportsDynamicUpdate()) {
|
||||||
mPosition += BinaryDictEncoderUtils.writeSignedChildrenPosition(mBuffer, mPosition,
|
mPosition += BinaryDictEncoderUtils.writeSignedChildrenPosition(mBuffer, mPosition,
|
||||||
childrenPos);
|
childrenPos);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -183,14 +183,11 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
* An auxiliary class for reading bigrams.
|
* An auxiliary class for reading bigrams.
|
||||||
*/
|
*/
|
||||||
protected static class BigramContentReader extends SparseTableContentReader {
|
protected static class BigramContentReader extends SparseTableContentReader {
|
||||||
private final boolean mHasTimestamp;
|
|
||||||
|
|
||||||
public BigramContentReader(final String name, final File baseDir,
|
public BigramContentReader(final String name, final File baseDir,
|
||||||
final DictionaryBufferFactory factory, final boolean hasTimestamp) {
|
final DictionaryBufferFactory factory, final boolean hasTimestamp) {
|
||||||
super(name + FormatSpec.BIGRAM_FILE_EXTENSION,
|
super(name + FormatSpec.BIGRAM_FILE_EXTENSION,
|
||||||
FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
|
FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
|
||||||
getContentFilenames(name, hasTimestamp), getContentIds(hasTimestamp), factory);
|
getContentFilenames(name, hasTimestamp), getContentIds(hasTimestamp), factory);
|
||||||
mHasTimestamp = hasTimestamp;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Consolidate this method and BigramContentWriter.getContentFilenames.
|
// TODO: Consolidate this method and BigramContentWriter.getContentFilenames.
|
||||||
|
|
|
@ -365,7 +365,7 @@ public class Ver4DictEncoder implements DictEncoder {
|
||||||
|
|
||||||
private void writeChildrenPosition(PtNode ptNode, FormatOptions formatOptions) {
|
private void writeChildrenPosition(PtNode ptNode, FormatOptions formatOptions) {
|
||||||
final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
|
final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
|
||||||
if (formatOptions.mSupportsDynamicUpdate) {
|
if (formatOptions.supportsDynamicUpdate()) {
|
||||||
mTriePos += BinaryDictEncoderUtils.writeSignedChildrenPosition(mTrieBuf,
|
mTriePos += BinaryDictEncoderUtils.writeSignedChildrenPosition(mTrieBuf,
|
||||||
mTriePos, childrenPos);
|
mTriePos, childrenPos);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -54,8 +54,6 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class BigramContentUpdater extends SparseTableContentUpdater {
|
private static class BigramContentUpdater extends SparseTableContentUpdater {
|
||||||
private final boolean mHasTimestamp;
|
|
||||||
|
|
||||||
public BigramContentUpdater(final String name, final File baseDir,
|
public BigramContentUpdater(final String name, final File baseDir,
|
||||||
final boolean hasTimestamp) {
|
final boolean hasTimestamp) {
|
||||||
super(name + FormatSpec.BIGRAM_FILE_EXTENSION,
|
super(name + FormatSpec.BIGRAM_FILE_EXTENSION,
|
||||||
|
@ -63,7 +61,6 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
|
||||||
BigramContentReader.getContentFilenames(name, hasTimestamp),
|
BigramContentReader.getContentFilenames(name, hasTimestamp),
|
||||||
BigramContentReader.getContentIds(hasTimestamp),
|
BigramContentReader.getContentIds(hasTimestamp),
|
||||||
new DictionaryBufferFromWritableByteBufferFactory());
|
new DictionaryBufferFromWritableByteBufferFactory());
|
||||||
mHasTimestamp = hasTimestamp;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void insertBigramEntries(final int terminalId, final int frequency,
|
public void insertBigramEntries(final int terminalId, final int frequency,
|
||||||
|
|
|
@ -95,8 +95,6 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
|
||||||
@Override
|
@Override
|
||||||
protected Map<String, String> getHeaderAttributeMap() {
|
protected Map<String, String> getHeaderAttributeMap() {
|
||||||
HashMap<String, String> attributeMap = new HashMap<String, String>();
|
HashMap<String, String> attributeMap = new HashMap<String, String>();
|
||||||
attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
|
|
||||||
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
|
||||||
attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE,
|
attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE,
|
||||||
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
||||||
attributeMap.put(FormatSpec.FileHeader.HAS_HISTORICAL_INFO_ATTRIBUTE,
|
attributeMap.put(FormatSpec.FileHeader.HAS_HISTORICAL_INFO_ATTRIBUTE,
|
||||||
|
|
|
@ -95,8 +95,7 @@ public final class UserHistoryDictIOUtils {
|
||||||
static FusionDictionary constructFusionDictionary(final BigramDictionaryInterface dict,
|
static FusionDictionary constructFusionDictionary(final BigramDictionaryInterface dict,
|
||||||
final UserHistoryDictionaryBigramList bigrams, final HashMap<String, String> options) {
|
final UserHistoryDictionaryBigramList bigrams, final HashMap<String, String> options) {
|
||||||
final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(),
|
final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(),
|
||||||
new FusionDictionary.DictionaryOptions(options, false,
|
new FusionDictionary.DictionaryOptions(options));
|
||||||
false));
|
|
||||||
int profTotal = 0;
|
int profTotal = 0;
|
||||||
for (final String word1 : bigrams.keySet()) {
|
for (final String word1 : bigrams.keySet()) {
|
||||||
final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1);
|
final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1);
|
||||||
|
|
|
@ -115,8 +115,6 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
getContext().getCacheDir());
|
getContext().getCacheDir());
|
||||||
FileUtils.deleteRecursively(file);
|
FileUtils.deleteRecursively(file);
|
||||||
Map<String, String> attributeMap = new HashMap<String, String>();
|
Map<String, String> attributeMap = new HashMap<String, String>();
|
||||||
attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
|
|
||||||
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
|
||||||
attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE,
|
attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE,
|
||||||
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
||||||
attributeMap.put(FormatSpec.FileHeader.HAS_HISTORICAL_INFO_ATTRIBUTE,
|
attributeMap.put(FormatSpec.FileHeader.HAS_HISTORICAL_INFO_ATTRIBUTE,
|
||||||
|
|
|
@ -69,8 +69,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
file.delete();
|
file.delete();
|
||||||
file.mkdir();
|
file.mkdir();
|
||||||
Map<String, String> attributeMap = new HashMap<String, String>();
|
Map<String, String> attributeMap = new HashMap<String, String>();
|
||||||
attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
|
|
||||||
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
|
||||||
if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
|
if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
|
||||||
FormatSpec.VERSION4, attributeMap)) {
|
FormatSpec.VERSION4, attributeMap)) {
|
||||||
return file;
|
return file;
|
||||||
|
|
|
@ -31,7 +31,7 @@ import java.util.HashMap;
|
||||||
public class FusionDictionaryTests extends AndroidTestCase {
|
public class FusionDictionaryTests extends AndroidTestCase {
|
||||||
public void testFindWordInTree() {
|
public void testFindWordInTree() {
|
||||||
FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
|
new FusionDictionary.DictionaryOptions(new HashMap<String,String>()));
|
||||||
|
|
||||||
dict.add("abc", 10, null, false /* isNotAWord */);
|
dict.add("abc", 10, null, false /* isNotAWord */);
|
||||||
assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa"));
|
assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa"));
|
||||||
|
|
|
@ -226,8 +226,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
final FormatSpec.FormatOptions formatOptions) {
|
final FormatSpec.FormatOptions formatOptions) {
|
||||||
String result = " : buffer type = "
|
String result = " : buffer type = "
|
||||||
+ ((bufferType == BinaryDictUtils.USE_BYTE_BUFFER) ? "byte buffer" : "byte array");
|
+ ((bufferType == BinaryDictUtils.USE_BYTE_BUFFER) ? "byte buffer" : "byte array");
|
||||||
result += " : version = " + formatOptions.mVersion;
|
return result + " : version = " + formatOptions.mVersion;
|
||||||
return result + ", supportsDynamicUpdate = " + formatOptions.mSupportsDynamicUpdate;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tests for readDictionaryBinary and writeDictionaryBinary
|
// Tests for readDictionaryBinary and writeDictionaryBinary
|
||||||
|
@ -315,17 +314,11 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
final List<String> results = CollectionUtils.newArrayList();
|
final List<String> results = CollectionUtils.newArrayList();
|
||||||
|
|
||||||
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
||||||
BinaryDictUtils.VERSION2);
|
BinaryDictUtils.VERSION2_OPTIONS);
|
||||||
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
||||||
BinaryDictUtils.VERSION3_WITHOUT_DYNAMIC_UPDATE);
|
BinaryDictUtils.VERSION3_OPTIONS);
|
||||||
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
||||||
BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE);
|
BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
|
||||||
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
|
||||||
BinaryDictUtils.VERSION4_WITHOUT_DYNAMIC_UPDATE);
|
|
||||||
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
|
||||||
BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE);
|
|
||||||
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
|
||||||
BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP);
|
|
||||||
|
|
||||||
for (final String result : results) {
|
for (final String result : results) {
|
||||||
Log.d(TAG, result);
|
Log.d(TAG, result);
|
||||||
|
@ -336,17 +329,11 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
final List<String> results = CollectionUtils.newArrayList();
|
final List<String> results = CollectionUtils.newArrayList();
|
||||||
|
|
||||||
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
||||||
BinaryDictUtils.VERSION2);
|
BinaryDictUtils.VERSION2_OPTIONS);
|
||||||
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
||||||
BinaryDictUtils.VERSION3_WITHOUT_DYNAMIC_UPDATE);
|
BinaryDictUtils.VERSION3_OPTIONS);
|
||||||
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
||||||
BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE);
|
BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
|
||||||
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
|
||||||
BinaryDictUtils.VERSION4_WITHOUT_DYNAMIC_UPDATE);
|
|
||||||
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
|
||||||
BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE);
|
|
||||||
runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
|
||||||
BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP);
|
|
||||||
|
|
||||||
for (final String result : results) {
|
for (final String result : results) {
|
||||||
Log.d(TAG, result);
|
Log.d(TAG, result);
|
||||||
|
@ -472,17 +459,11 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
final ArrayList<String> results = CollectionUtils.newArrayList();
|
final ArrayList<String> results = CollectionUtils.newArrayList();
|
||||||
|
|
||||||
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
||||||
BinaryDictUtils.VERSION2);
|
BinaryDictUtils.VERSION2_OPTIONS);
|
||||||
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
||||||
BinaryDictUtils.VERSION3_WITHOUT_DYNAMIC_UPDATE);
|
BinaryDictUtils.VERSION3_OPTIONS);
|
||||||
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
||||||
BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE);
|
BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
|
||||||
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
|
||||||
BinaryDictUtils.VERSION4_WITHOUT_DYNAMIC_UPDATE);
|
|
||||||
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
|
||||||
BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE);
|
|
||||||
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
|
||||||
BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP);
|
|
||||||
|
|
||||||
for (final String result : results) {
|
for (final String result : results) {
|
||||||
Log.d(TAG, result);
|
Log.d(TAG, result);
|
||||||
|
@ -493,17 +474,11 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
final ArrayList<String> results = CollectionUtils.newArrayList();
|
final ArrayList<String> results = CollectionUtils.newArrayList();
|
||||||
|
|
||||||
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
||||||
BinaryDictUtils.VERSION2);
|
BinaryDictUtils.VERSION2_OPTIONS);
|
||||||
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
||||||
BinaryDictUtils.VERSION3_WITHOUT_DYNAMIC_UPDATE);
|
BinaryDictUtils.VERSION3_OPTIONS);
|
||||||
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
||||||
BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE);
|
BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
|
||||||
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
|
||||||
BinaryDictUtils.VERSION4_WITHOUT_DYNAMIC_UPDATE);
|
|
||||||
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
|
||||||
BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE);
|
|
||||||
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
|
||||||
BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP);
|
|
||||||
|
|
||||||
for (final String result : results) {
|
for (final String result : results) {
|
||||||
Log.d(TAG, result);
|
Log.d(TAG, result);
|
||||||
|
@ -612,29 +587,19 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
public void testGetTerminalPosition() {
|
public void testGetTerminalPosition() {
|
||||||
final ArrayList<String> results = CollectionUtils.newArrayList();
|
final ArrayList<String> results = CollectionUtils.newArrayList();
|
||||||
|
|
||||||
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY, BinaryDictUtils.VERSION2);
|
|
||||||
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY,
|
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY,
|
||||||
BinaryDictUtils.VERSION3_WITHOUT_DYNAMIC_UPDATE);
|
BinaryDictUtils.VERSION2_OPTIONS);
|
||||||
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY,
|
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY,
|
||||||
BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE);
|
BinaryDictUtils.VERSION3_OPTIONS);
|
||||||
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY,
|
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY,
|
||||||
BinaryDictUtils.VERSION4_WITHOUT_DYNAMIC_UPDATE);
|
BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
|
||||||
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY,
|
|
||||||
BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE);
|
|
||||||
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY,
|
|
||||||
BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP);
|
|
||||||
|
|
||||||
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER, BinaryDictUtils.VERSION2);
|
|
||||||
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER,
|
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER,
|
||||||
BinaryDictUtils.VERSION3_WITHOUT_DYNAMIC_UPDATE);
|
BinaryDictUtils.VERSION2_OPTIONS);
|
||||||
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER,
|
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER,
|
||||||
BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE);
|
BinaryDictUtils.VERSION3_OPTIONS);
|
||||||
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER,
|
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER,
|
||||||
BinaryDictUtils.VERSION4_WITHOUT_DYNAMIC_UPDATE);
|
BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
|
||||||
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER,
|
|
||||||
BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE);
|
|
||||||
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER,
|
|
||||||
BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP);
|
|
||||||
|
|
||||||
for (final String result : results) {
|
for (final String result : results) {
|
||||||
Log.d(TAG, result);
|
Log.d(TAG, result);
|
||||||
|
@ -668,7 +633,6 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testDeleteWord() throws IOException, UnsupportedFormatException {
|
public void testDeleteWord() throws IOException, UnsupportedFormatException {
|
||||||
runTestDeleteWord(BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE);
|
runTestDeleteWord(BinaryDictUtils.VERSION3_OPTIONS);
|
||||||
runTestDeleteWord(BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -114,7 +114,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
||||||
formatOptions);
|
formatOptions);
|
||||||
printPtNode(currentInfo);
|
printPtNode(currentInfo);
|
||||||
}
|
}
|
||||||
if (formatOptions.mSupportsDynamicUpdate) {
|
if (formatOptions.supportsDynamicUpdate()) {
|
||||||
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
|
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
|
||||||
Log.d(TAG, " forwardLinkAddress = " + forwardLinkAddress);
|
Log.d(TAG, " forwardLinkAddress = " + forwardLinkAddress);
|
||||||
}
|
}
|
||||||
|
@ -289,8 +289,9 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testInsertWord() {
|
public void testInsertWord() {
|
||||||
runTestInsertWord(BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE);
|
runTestInsertWord(BinaryDictUtils.VERSION3_OPTIONS);
|
||||||
runTestInsertWord(BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE);
|
runTestInsertWord(BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
|
||||||
|
runTestInsertWord(BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void runTestInsertWordWithBigrams(final FormatOptions formatOptions) {
|
private void runTestInsertWordWithBigrams(final FormatOptions formatOptions) {
|
||||||
|
@ -329,8 +330,9 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testInsertWordWithBigrams() {
|
public void testInsertWordWithBigrams() {
|
||||||
runTestInsertWordWithBigrams(BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE);
|
runTestInsertWordWithBigrams(BinaryDictUtils.VERSION3_OPTIONS);
|
||||||
runTestInsertWordWithBigrams(BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE);
|
runTestInsertWordWithBigrams(BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
|
||||||
|
runTestInsertWordWithBigrams(BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void runTestRandomWords(final FormatOptions formatOptions) {
|
private void runTestRandomWords(final FormatOptions formatOptions) {
|
||||||
|
@ -377,7 +379,8 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRandomWords() {
|
public void testRandomWords() {
|
||||||
runTestRandomWords(BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE);
|
runTestRandomWords(BinaryDictUtils.VERSION3_OPTIONS);
|
||||||
runTestRandomWords(BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE);
|
runTestRandomWords(BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
|
||||||
|
runTestRandomWords(BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,23 +29,17 @@ public class BinaryDictUtils {
|
||||||
|
|
||||||
public static final String TEST_DICT_FILE_EXTENSION = ".testDict";
|
public static final String TEST_DICT_FILE_EXTENSION = ".testDict";
|
||||||
|
|
||||||
public static final FormatSpec.FormatOptions VERSION2 =
|
public static final FormatSpec.FormatOptions VERSION2_OPTIONS =
|
||||||
new FormatSpec.FormatOptions(FormatSpec.VERSION2);
|
new FormatSpec.FormatOptions(FormatSpec.VERSION2);
|
||||||
public static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE =
|
public static final FormatSpec.FormatOptions VERSION3_OPTIONS =
|
||||||
new FormatSpec.FormatOptions(FormatSpec.VERSION3, false /* supportsDynamicUpdate */);
|
new FormatSpec.FormatOptions(FormatSpec.VERSION3);
|
||||||
public static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE =
|
public static final FormatSpec.FormatOptions VERSION4_OPTIONS_WITHOUT_TIMESTAMP =
|
||||||
new FormatSpec.FormatOptions(FormatSpec.VERSION3, true /* supportsDynamicUpdate */);
|
new FormatSpec.FormatOptions(FormatSpec.VERSION4, false /* hasTimestamp */);
|
||||||
public static final FormatSpec.FormatOptions VERSION4_WITHOUT_DYNAMIC_UPDATE =
|
public static final FormatSpec.FormatOptions VERSION4_OPTIONS_WITH_TIMESTAMP =
|
||||||
new FormatSpec.FormatOptions(FormatSpec.VERSION4, false /* supportsDynamicUpdate */);
|
new FormatSpec.FormatOptions(FormatSpec.VERSION4, true /* hasTimestamp */);
|
||||||
public static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE =
|
|
||||||
new FormatSpec.FormatOptions(FormatSpec.VERSION4, true /* supportsDynamicUpdate */);
|
|
||||||
public static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP =
|
|
||||||
new FormatSpec.FormatOptions(FormatSpec.VERSION4, true /* supportsDynamicUpdate */,
|
|
||||||
true /* hasTimestamp */);
|
|
||||||
|
|
||||||
public static DictionaryOptions makeDictionaryOptions(final String id, final String version) {
|
public static DictionaryOptions makeDictionaryOptions(final String id, final String version) {
|
||||||
final DictionaryOptions options = new DictionaryOptions(new HashMap<String, String>(),
|
final DictionaryOptions options = new DictionaryOptions(new HashMap<String, String>());
|
||||||
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */);
|
|
||||||
options.mAttributes.put(FileHeader.DICTIONARY_LOCALE_ATTRIBUTE, "en_US");
|
options.mAttributes.put(FileHeader.DICTIONARY_LOCALE_ATTRIBUTE, "en_US");
|
||||||
options.mAttributes.put(FileHeader.DICTIONARY_ID_ATTRIBUTE, id);
|
options.mAttributes.put(FileHeader.DICTIONARY_ID_ATTRIBUTE, id);
|
||||||
options.mAttributes.put(FileHeader.DICTIONARY_VERSION_ATTRIBUTE, version);
|
options.mAttributes.put(FileHeader.DICTIONARY_VERSION_ATTRIBUTE, version);
|
||||||
|
|
|
@ -50,8 +50,6 @@ public class CombinedInputOutput {
|
||||||
private static final String NOT_A_WORD_TAG = "not_a_word";
|
private static final String NOT_A_WORD_TAG = "not_a_word";
|
||||||
private static final String WHITELIST_TAG = "whitelist";
|
private static final String WHITELIST_TAG = "whitelist";
|
||||||
private static final String OPTIONS_TAG = "options";
|
private static final String OPTIONS_TAG = "options";
|
||||||
private static final String GERMAN_UMLAUT_PROCESSING_OPTION = "german_umlaut_processing";
|
|
||||||
private static final String FRENCH_LIGATURE_PROCESSING_OPTION = "french_ligature_processing";
|
|
||||||
private static final String COMMENT_LINE_STARTER = "#";
|
private static final String COMMENT_LINE_STARTER = "#";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -112,13 +110,9 @@ public class CombinedInputOutput {
|
||||||
attributes.put(keyValue[0], keyValue[1]);
|
attributes.put(keyValue[0], keyValue[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
final boolean processUmlauts =
|
|
||||||
GERMAN_UMLAUT_PROCESSING_OPTION.equals(attributes.get(OPTIONS_TAG));
|
|
||||||
final boolean processLigatures =
|
|
||||||
FRENCH_LIGATURE_PROCESSING_OPTION.equals(attributes.get(OPTIONS_TAG));
|
|
||||||
attributes.remove(OPTIONS_TAG);
|
attributes.remove(OPTIONS_TAG);
|
||||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new DictionaryOptions(
|
final FusionDictionary dict =
|
||||||
attributes, processUmlauts, processLigatures));
|
new FusionDictionary(new PtNodeArray(), new DictionaryOptions(attributes));
|
||||||
|
|
||||||
String line;
|
String line;
|
||||||
String word = null;
|
String word = null;
|
||||||
|
@ -216,11 +210,6 @@ public class CombinedInputOutput {
|
||||||
destination.write(options.get(DICTIONARY_TAG));
|
destination.write(options.get(DICTIONARY_TAG));
|
||||||
options.remove(DICTIONARY_TAG);
|
options.remove(DICTIONARY_TAG);
|
||||||
}
|
}
|
||||||
if (dict.mOptions.mGermanUmlautProcessing) {
|
|
||||||
destination.write("," + OPTIONS_TAG + "=" + GERMAN_UMLAUT_PROCESSING_OPTION);
|
|
||||||
} else if (dict.mOptions.mFrenchLigatureProcessing) {
|
|
||||||
destination.write("," + OPTIONS_TAG + "=" + FRENCH_LIGATURE_PROCESSING_OPTION);
|
|
||||||
}
|
|
||||||
for (final String key : dict.mOptions.mAttributes.keySet()) {
|
for (final String key : dict.mOptions.mAttributes.keySet()) {
|
||||||
final String value = dict.mOptions.mAttributes.get(key);
|
final String value = dict.mOptions.mAttributes.get(key);
|
||||||
destination.write("," + key + "=" + value);
|
destination.write("," + key + "=" + value);
|
||||||
|
|
|
@ -85,18 +85,6 @@ public class Diff extends Dicttool.Command {
|
||||||
|
|
||||||
private static void diffHeaders(final FusionDictionary dict0, final FusionDictionary dict1) {
|
private static void diffHeaders(final FusionDictionary dict0, final FusionDictionary dict1) {
|
||||||
boolean hasDifferences = false;
|
boolean hasDifferences = false;
|
||||||
if (dict0.mOptions.mFrenchLigatureProcessing != dict1.mOptions.mFrenchLigatureProcessing) {
|
|
||||||
System.out.println(" French ligature processing : "
|
|
||||||
+ dict0.mOptions.mFrenchLigatureProcessing + " <=> "
|
|
||||||
+ dict1.mOptions.mFrenchLigatureProcessing);
|
|
||||||
hasDifferences = true;
|
|
||||||
}
|
|
||||||
else if (dict0.mOptions.mGermanUmlautProcessing != dict1.mOptions.mGermanUmlautProcessing) {
|
|
||||||
System.out.println(" German umlaut processing : "
|
|
||||||
+ dict0.mOptions.mGermanUmlautProcessing + " <=> "
|
|
||||||
+ dict1.mOptions.mGermanUmlautProcessing);
|
|
||||||
hasDifferences = true;
|
|
||||||
}
|
|
||||||
final HashMap<String, String> options1 =
|
final HashMap<String, String> options1 =
|
||||||
new HashMap<String, String>(dict1.mOptions.mAttributes);
|
new HashMap<String, String>(dict1.mOptions.mAttributes);
|
||||||
for (final String optionKey : dict0.mOptions.mAttributes.keySet()) {
|
for (final String optionKey : dict0.mOptions.mAttributes.keySet()) {
|
||||||
|
|
|
@ -57,8 +57,6 @@ public class XmlDictInputOutput {
|
||||||
private static final String NOT_A_WORD_ATTR = "not_a_word";
|
private static final String NOT_A_WORD_ATTR = "not_a_word";
|
||||||
|
|
||||||
private static final String OPTIONS_KEY = "options";
|
private static final String OPTIONS_KEY = "options";
|
||||||
private static final String GERMAN_UMLAUT_PROCESSING_OPTION = "german_umlaut_processing";
|
|
||||||
private static final String FRENCH_LIGATURE_PROCESSING_OPTION = "french_ligature_processing";
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* SAX handler for a unigram XML file.
|
* SAX handler for a unigram XML file.
|
||||||
|
@ -120,12 +118,8 @@ public class XmlDictInputOutput {
|
||||||
attributes.put(attrName, attrs.getValue(attrIndex));
|
attributes.put(attrName, attrs.getValue(attrIndex));
|
||||||
}
|
}
|
||||||
final String optionsString = attributes.get(OPTIONS_KEY);
|
final String optionsString = attributes.get(OPTIONS_KEY);
|
||||||
final boolean processUmlauts =
|
|
||||||
GERMAN_UMLAUT_PROCESSING_OPTION.equals(optionsString);
|
|
||||||
final boolean processLigatures =
|
|
||||||
FRENCH_LIGATURE_PROCESSING_OPTION.equals(optionsString);
|
|
||||||
mDictionary = new FusionDictionary(new PtNodeArray(),
|
mDictionary = new FusionDictionary(new PtNodeArray(),
|
||||||
new DictionaryOptions(attributes, processUmlauts, processLigatures));
|
new DictionaryOptions(attributes));
|
||||||
} else {
|
} else {
|
||||||
mState = UNKNOWN;
|
mState = UNKNOWN;
|
||||||
}
|
}
|
||||||
|
@ -361,11 +355,6 @@ public class XmlDictInputOutput {
|
||||||
// TODO: use an XMLSerializer if this gets big
|
// TODO: use an XMLSerializer if this gets big
|
||||||
destination.write("<wordlist format=\"2\"");
|
destination.write("<wordlist format=\"2\"");
|
||||||
final HashMap<String, String> options = dict.mOptions.mAttributes;
|
final HashMap<String, String> options = dict.mOptions.mAttributes;
|
||||||
if (dict.mOptions.mGermanUmlautProcessing) {
|
|
||||||
destination.write(" " + OPTIONS_KEY + "=\"" + GERMAN_UMLAUT_PROCESSING_OPTION + "\"");
|
|
||||||
} else if (dict.mOptions.mFrenchLigatureProcessing) {
|
|
||||||
destination.write(" " + OPTIONS_KEY + "=\"" + FRENCH_LIGATURE_PROCESSING_OPTION + "\"");
|
|
||||||
}
|
|
||||||
for (final String key : dict.mOptions.mAttributes.keySet()) {
|
for (final String key : dict.mOptions.mAttributes.keySet()) {
|
||||||
final String value = dict.mOptions.mAttributes.get(key);
|
final String value = dict.mOptions.mAttributes.get(key);
|
||||||
destination.write(" " + key + "=\"" + value + "\"");
|
destination.write(" " + key + "=\"" + value + "\"");
|
||||||
|
|
|
@ -44,8 +44,7 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
|
||||||
public void testGetRawDictWorks() throws IOException, UnsupportedFormatException {
|
public void testGetRawDictWorks() throws IOException, UnsupportedFormatException {
|
||||||
// Create a thrice-compressed dictionary file.
|
// Create a thrice-compressed dictionary file.
|
||||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
new DictionaryOptions(new HashMap<String, String>(),
|
new DictionaryOptions(new HashMap<String, String>()));
|
||||||
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
|
|
||||||
dict.add("foo", TEST_FREQ, null, false /* isNotAWord */);
|
dict.add("foo", TEST_FREQ, null, false /* isNotAWord */);
|
||||||
dict.add("fta", 1, null, false /* isNotAWord */);
|
dict.add("fta", 1, null, false /* isNotAWord */);
|
||||||
dict.add("ftb", 1, null, false /* isNotAWord */);
|
dict.add("ftb", 1, null, false /* isNotAWord */);
|
||||||
|
|
|
@ -32,8 +32,7 @@ public class BinaryDictEncoderFlattenTreeTests extends TestCase {
|
||||||
// that it does not contain any duplicates.
|
// that it does not contain any duplicates.
|
||||||
public void testFlattenNodes() {
|
public void testFlattenNodes() {
|
||||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
new DictionaryOptions(new HashMap<String, String>(),
|
new DictionaryOptions(new HashMap<String, String>()));
|
||||||
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
|
|
||||||
dict.add("foo", 1, null, false /* isNotAWord */);
|
dict.add("foo", 1, null, false /* isNotAWord */);
|
||||||
dict.add("fta", 1, null, false /* isNotAWord */);
|
dict.add("fta", 1, null, false /* isNotAWord */);
|
||||||
dict.add("ftb", 1, null, false /* isNotAWord */);
|
dict.add("ftb", 1, null, false /* isNotAWord */);
|
||||||
|
|
|
@ -96,8 +96,7 @@ public class FusionDictionaryTest extends TestCase {
|
||||||
// that it does not contain any duplicates.
|
// that it does not contain any duplicates.
|
||||||
public void testFusion() {
|
public void testFusion() {
|
||||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
new DictionaryOptions(new HashMap<String, String>(),
|
new DictionaryOptions(new HashMap<String, String>()));
|
||||||
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
|
|
||||||
final long time = System.currentTimeMillis();
|
final long time = System.currentTimeMillis();
|
||||||
prepare(time);
|
prepare(time);
|
||||||
for (int i = 0; i < sWords.size(); ++i) {
|
for (int i = 0; i < sWords.size(); ++i) {
|
||||||
|
|
Loading…
Reference in New Issue