parent
e301085a70
commit
aa4168ee09
|
@ -108,7 +108,7 @@ public class BinaryDictEncoderUtils {
|
||||||
* like address lists do.
|
* like address lists do.
|
||||||
*/
|
*/
|
||||||
static int getShortcutListSize(final ArrayList<WeightedString> shortcutList) {
|
static int getShortcutListSize(final ArrayList<WeightedString> shortcutList) {
|
||||||
if (null == shortcutList) return 0;
|
if (null == shortcutList || shortcutList.isEmpty()) return 0;
|
||||||
int size = FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE;
|
int size = FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE;
|
||||||
for (final WeightedString shortcut : shortcutList) {
|
for (final WeightedString shortcut : shortcutList) {
|
||||||
size += getShortcutSize(shortcut);
|
size += getShortcutSize(shortcut);
|
||||||
|
@ -601,8 +601,9 @@ public class BinaryDictEncoderUtils {
|
||||||
private static byte makeCharGroupFlags(final CharGroup group, final int groupAddress,
|
private static byte makeCharGroupFlags(final CharGroup group, final int groupAddress,
|
||||||
final int childrenOffset, final FormatOptions formatOptions) {
|
final int childrenOffset, final FormatOptions formatOptions) {
|
||||||
return (byte) makeCharGroupFlags(group.mChars.length > 1, group.mFrequency >= 0,
|
return (byte) makeCharGroupFlags(group.mChars.length > 1, group.mFrequency >= 0,
|
||||||
getByteSize(childrenOffset), group.mShortcutTargets != null, group.mBigrams != null,
|
getByteSize(childrenOffset),
|
||||||
group.mIsNotAWord, group.mIsBlacklistEntry, formatOptions);
|
group.mShortcutTargets != null && !group.mShortcutTargets.isEmpty(),
|
||||||
|
group.mBigrams != null, group.mIsNotAWord, group.mIsBlacklistEntry, formatOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -795,7 +796,7 @@ public class BinaryDictEncoderUtils {
|
||||||
groupAddress += shift;
|
groupAddress += shift;
|
||||||
|
|
||||||
// Write shortcuts
|
// Write shortcuts
|
||||||
if (null != group.mShortcutTargets) {
|
if (null != group.mShortcutTargets && !group.mShortcutTargets.isEmpty()) {
|
||||||
final int indexOfShortcutByteSize = index;
|
final int indexOfShortcutByteSize = index;
|
||||||
index += FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE;
|
index += FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE;
|
||||||
groupAddress += FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE;
|
groupAddress += FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE;
|
||||||
|
|
|
@ -255,18 +255,26 @@ public class Ver3DictDecoder implements DictDecoder {
|
||||||
childrenAddress += addressPointer;
|
childrenAddress += addressPointer;
|
||||||
}
|
}
|
||||||
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
|
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
|
||||||
ArrayList<WeightedString> shortcutTargets = null;
|
final ArrayList<WeightedString> shortcutTargets;
|
||||||
if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
|
if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
|
||||||
|
// readShortcut will add shortcuts to shortcutTargets.
|
||||||
|
shortcutTargets = new ArrayList<WeightedString>();
|
||||||
addressPointer += PtNodeReader.readShortcut(mDictBuffer, shortcutTargets);
|
addressPointer += PtNodeReader.readShortcut(mDictBuffer, shortcutTargets);
|
||||||
|
} else {
|
||||||
|
shortcutTargets = null;
|
||||||
}
|
}
|
||||||
ArrayList<PendingAttribute> bigrams = null;
|
|
||||||
|
final ArrayList<PendingAttribute> bigrams;
|
||||||
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
|
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
|
||||||
bigrams = new ArrayList<PendingAttribute>();
|
bigrams = new ArrayList<PendingAttribute>();
|
||||||
addressPointer += PtNodeReader.readBigrams(mDictBuffer, bigrams, addressPointer);
|
addressPointer += PtNodeReader.readBigrams(mDictBuffer, bigrams, addressPointer);
|
||||||
if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
|
if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
|
||||||
MakedictLog.d("too many bigrams in a group.");
|
MakedictLog.d("too many bigrams in a group.");
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
bigrams = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return new CharGroupInfo(ptNodePos, addressPointer, flags, characters, frequency,
|
return new CharGroupInfo(ptNodePos, addressPointer, flags, characters, frequency,
|
||||||
parentAddress, childrenAddress, shortcutTargets, bigrams);
|
parentAddress, childrenAddress, shortcutTargets, bigrams);
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,6 +53,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
private static final int UNIGRAM_FREQ = 10;
|
private static final int UNIGRAM_FREQ = 10;
|
||||||
private static final int BIGRAM_FREQ = 50;
|
private static final int BIGRAM_FREQ = 50;
|
||||||
private static final int TOLERANCE_OF_BIGRAM_FREQ = 5;
|
private static final int TOLERANCE_OF_BIGRAM_FREQ = 5;
|
||||||
|
private static final int NUM_OF_NODES_HAVING_SHORTCUTS = 50;
|
||||||
|
private static final int NUM_OF_SHORTCUTS = 5;
|
||||||
|
|
||||||
private static final int USE_BYTE_ARRAY = 1;
|
private static final int USE_BYTE_ARRAY = 1;
|
||||||
private static final int USE_BYTE_BUFFER = 2;
|
private static final int USE_BYTE_BUFFER = 2;
|
||||||
|
@ -63,6 +65,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
private static final SparseArray<List<Integer>> sStarBigrams = CollectionUtils.newSparseArray();
|
private static final SparseArray<List<Integer>> sStarBigrams = CollectionUtils.newSparseArray();
|
||||||
private static final SparseArray<List<Integer>> sChainBigrams =
|
private static final SparseArray<List<Integer>> sChainBigrams =
|
||||||
CollectionUtils.newSparseArray();
|
CollectionUtils.newSparseArray();
|
||||||
|
private static final Map<String, List<String>> sShortcuts = CollectionUtils.newHashMap();
|
||||||
|
|
||||||
private static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2);
|
private static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2);
|
||||||
private static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE =
|
private static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE =
|
||||||
|
@ -95,6 +98,16 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
for (int i = 1; i < sWords.size(); ++i) {
|
for (int i = 1; i < sWords.size(); ++i) {
|
||||||
sStarBigrams.get(0).add(i);
|
sStarBigrams.get(0).add(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sShortcuts.clear();
|
||||||
|
for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) {
|
||||||
|
final int from = Math.abs(random.nextInt()) % sWords.size();
|
||||||
|
sShortcuts.put(sWords.get(from), new ArrayList<String>());
|
||||||
|
for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) {
|
||||||
|
final int to = Math.abs(random.nextInt()) % sWords.size();
|
||||||
|
sShortcuts.get(sWords.get(from)).add(sWords.get(to));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private int[] generateCodePointSet(final int codePointSetSize, final Random random) {
|
private int[] generateCodePointSet(final int codePointSetSize, final Random random) {
|
||||||
|
@ -104,7 +117,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
if (r < 0) continue;
|
if (r < 0) continue;
|
||||||
// Don't insert 0~0x20, but insert any other code point.
|
// Don't insert 0~0x20, but insert any other code point.
|
||||||
// Code points are in the range 0~0x10FFFF.
|
// Code points are in the range 0~0x10FFFF.
|
||||||
final int candidateCodePoint = (int)(0x20 + r % (Character.MAX_CODE_POINT - 0x20));
|
final int candidateCodePoint = 0x20 + r % (Character.MAX_CODE_POINT - 0x20);
|
||||||
// Code points between MIN_ and MAX_SURROGATE are not valid on their own.
|
// Code points between MIN_ and MAX_SURROGATE are not valid on their own.
|
||||||
if (candidateCodePoint >= Character.MIN_SURROGATE
|
if (candidateCodePoint >= Character.MIN_SURROGATE
|
||||||
&& candidateCodePoint <= Character.MAX_SURROGATE) continue;
|
&& candidateCodePoint <= Character.MAX_SURROGATE) continue;
|
||||||
|
@ -243,6 +256,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
// check shortcut
|
// check shortcut
|
||||||
if (shortcutMap != null) {
|
if (shortcutMap != null) {
|
||||||
for (final Map.Entry<String, List<String>> entry : shortcutMap.entrySet()) {
|
for (final Map.Entry<String, List<String>> entry : shortcutMap.entrySet()) {
|
||||||
|
assertTrue(words.contains(entry.getKey()));
|
||||||
final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray,
|
final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray,
|
||||||
entry.getKey());
|
entry.getKey());
|
||||||
for (final String word : entry.getValue()) {
|
for (final String word : entry.getValue()) {
|
||||||
|
@ -321,6 +335,12 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
formatOptions, "chain"));
|
formatOptions, "chain"));
|
||||||
results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType,
|
results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType,
|
||||||
formatOptions, "star"));
|
formatOptions, "star"));
|
||||||
|
results.add(runReadAndWrite(sWords, sEmptyBigrams, sShortcuts, bufferType, formatOptions,
|
||||||
|
"unigram with shortcuts"));
|
||||||
|
results.add(runReadAndWrite(sWords, sChainBigrams, sShortcuts, bufferType, formatOptions,
|
||||||
|
"chain with shortcuts"));
|
||||||
|
results.add(runReadAndWrite(sWords, sStarBigrams, sShortcuts, bufferType, formatOptions,
|
||||||
|
"star with shortcuts"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testReadAndWriteWithByteBuffer() {
|
public void testReadAndWriteWithByteBuffer() {
|
||||||
|
|
Loading…
Reference in New Issue