parent
e301085a70
commit
aa4168ee09
|
@ -108,7 +108,7 @@ public class BinaryDictEncoderUtils {
|
|||
* like address lists do.
|
||||
*/
|
||||
static int getShortcutListSize(final ArrayList<WeightedString> shortcutList) {
|
||||
if (null == shortcutList) return 0;
|
||||
if (null == shortcutList || shortcutList.isEmpty()) return 0;
|
||||
int size = FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE;
|
||||
for (final WeightedString shortcut : shortcutList) {
|
||||
size += getShortcutSize(shortcut);
|
||||
|
@ -601,8 +601,9 @@ public class BinaryDictEncoderUtils {
|
|||
private static byte makeCharGroupFlags(final CharGroup group, final int groupAddress,
|
||||
final int childrenOffset, final FormatOptions formatOptions) {
|
||||
return (byte) makeCharGroupFlags(group.mChars.length > 1, group.mFrequency >= 0,
|
||||
getByteSize(childrenOffset), group.mShortcutTargets != null, group.mBigrams != null,
|
||||
group.mIsNotAWord, group.mIsBlacklistEntry, formatOptions);
|
||||
getByteSize(childrenOffset),
|
||||
group.mShortcutTargets != null && !group.mShortcutTargets.isEmpty(),
|
||||
group.mBigrams != null, group.mIsNotAWord, group.mIsBlacklistEntry, formatOptions);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -795,7 +796,7 @@ public class BinaryDictEncoderUtils {
|
|||
groupAddress += shift;
|
||||
|
||||
// Write shortcuts
|
||||
if (null != group.mShortcutTargets) {
|
||||
if (null != group.mShortcutTargets && !group.mShortcutTargets.isEmpty()) {
|
||||
final int indexOfShortcutByteSize = index;
|
||||
index += FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE;
|
||||
groupAddress += FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE;
|
||||
|
|
|
@ -255,18 +255,26 @@ public class Ver3DictDecoder implements DictDecoder {
|
|||
childrenAddress += addressPointer;
|
||||
}
|
||||
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
|
||||
ArrayList<WeightedString> shortcutTargets = null;
|
||||
final ArrayList<WeightedString> shortcutTargets;
|
||||
if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
|
||||
// readShortcut will add shortcuts to shortcutTargets.
|
||||
shortcutTargets = new ArrayList<WeightedString>();
|
||||
addressPointer += PtNodeReader.readShortcut(mDictBuffer, shortcutTargets);
|
||||
} else {
|
||||
shortcutTargets = null;
|
||||
}
|
||||
ArrayList<PendingAttribute> bigrams = null;
|
||||
|
||||
final ArrayList<PendingAttribute> bigrams;
|
||||
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
|
||||
bigrams = new ArrayList<PendingAttribute>();
|
||||
addressPointer += PtNodeReader.readBigrams(mDictBuffer, bigrams, addressPointer);
|
||||
if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
|
||||
MakedictLog.d("too many bigrams in a group.");
|
||||
}
|
||||
} else {
|
||||
bigrams = null;
|
||||
}
|
||||
|
||||
return new CharGroupInfo(ptNodePos, addressPointer, flags, characters, frequency,
|
||||
parentAddress, childrenAddress, shortcutTargets, bigrams);
|
||||
}
|
||||
|
|
|
@ -53,6 +53,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
private static final int UNIGRAM_FREQ = 10;
|
||||
private static final int BIGRAM_FREQ = 50;
|
||||
private static final int TOLERANCE_OF_BIGRAM_FREQ = 5;
|
||||
private static final int NUM_OF_NODES_HAVING_SHORTCUTS = 50;
|
||||
private static final int NUM_OF_SHORTCUTS = 5;
|
||||
|
||||
private static final int USE_BYTE_ARRAY = 1;
|
||||
private static final int USE_BYTE_BUFFER = 2;
|
||||
|
@ -63,6 +65,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
private static final SparseArray<List<Integer>> sStarBigrams = CollectionUtils.newSparseArray();
|
||||
private static final SparseArray<List<Integer>> sChainBigrams =
|
||||
CollectionUtils.newSparseArray();
|
||||
private static final Map<String, List<String>> sShortcuts = CollectionUtils.newHashMap();
|
||||
|
||||
private static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2);
|
||||
private static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE =
|
||||
|
@ -95,6 +98,16 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
for (int i = 1; i < sWords.size(); ++i) {
|
||||
sStarBigrams.get(0).add(i);
|
||||
}
|
||||
|
||||
sShortcuts.clear();
|
||||
for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) {
|
||||
final int from = Math.abs(random.nextInt()) % sWords.size();
|
||||
sShortcuts.put(sWords.get(from), new ArrayList<String>());
|
||||
for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) {
|
||||
final int to = Math.abs(random.nextInt()) % sWords.size();
|
||||
sShortcuts.get(sWords.get(from)).add(sWords.get(to));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int[] generateCodePointSet(final int codePointSetSize, final Random random) {
|
||||
|
@ -104,7 +117,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
if (r < 0) continue;
|
||||
// Don't insert 0~0x20, but insert any other code point.
|
||||
// Code points are in the range 0~0x10FFFF.
|
||||
final int candidateCodePoint = (int)(0x20 + r % (Character.MAX_CODE_POINT - 0x20));
|
||||
final int candidateCodePoint = 0x20 + r % (Character.MAX_CODE_POINT - 0x20);
|
||||
// Code points between MIN_ and MAX_SURROGATE are not valid on their own.
|
||||
if (candidateCodePoint >= Character.MIN_SURROGATE
|
||||
&& candidateCodePoint <= Character.MAX_SURROGATE) continue;
|
||||
|
@ -243,6 +256,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
// check shortcut
|
||||
if (shortcutMap != null) {
|
||||
for (final Map.Entry<String, List<String>> entry : shortcutMap.entrySet()) {
|
||||
assertTrue(words.contains(entry.getKey()));
|
||||
final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray,
|
||||
entry.getKey());
|
||||
for (final String word : entry.getValue()) {
|
||||
|
@ -321,6 +335,12 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
formatOptions, "chain"));
|
||||
results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType,
|
||||
formatOptions, "star"));
|
||||
results.add(runReadAndWrite(sWords, sEmptyBigrams, sShortcuts, bufferType, formatOptions,
|
||||
"unigram with shortcuts"));
|
||||
results.add(runReadAndWrite(sWords, sChainBigrams, sShortcuts, bufferType, formatOptions,
|
||||
"chain with shortcuts"));
|
||||
results.add(runReadAndWrite(sWords, sStarBigrams, sShortcuts, bufferType, formatOptions,
|
||||
"star with shortcuts"));
|
||||
}
|
||||
|
||||
public void testReadAndWriteWithByteBuffer() {
|
||||
|
|
Loading…
Reference in New Issue