Fix writePlacedNode.

Change-Id: I1d6b086f1d9f0dbd8d74f964e29ae62c533af978
main
Yuichiro Hanada 2013-08-22 22:01:19 +09:00
parent e301085a70
commit aa4168ee09
3 changed files with 36 additions and 7 deletions

View File

@ -108,7 +108,7 @@ public class BinaryDictEncoderUtils {
* like address lists do. * like address lists do.
*/ */
static int getShortcutListSize(final ArrayList<WeightedString> shortcutList) { static int getShortcutListSize(final ArrayList<WeightedString> shortcutList) {
if (null == shortcutList) return 0; if (null == shortcutList || shortcutList.isEmpty()) return 0;
int size = FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE; int size = FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE;
for (final WeightedString shortcut : shortcutList) { for (final WeightedString shortcut : shortcutList) {
size += getShortcutSize(shortcut); size += getShortcutSize(shortcut);
@ -601,8 +601,9 @@ public class BinaryDictEncoderUtils {
private static byte makeCharGroupFlags(final CharGroup group, final int groupAddress, private static byte makeCharGroupFlags(final CharGroup group, final int groupAddress,
final int childrenOffset, final FormatOptions formatOptions) { final int childrenOffset, final FormatOptions formatOptions) {
return (byte) makeCharGroupFlags(group.mChars.length > 1, group.mFrequency >= 0, return (byte) makeCharGroupFlags(group.mChars.length > 1, group.mFrequency >= 0,
getByteSize(childrenOffset), group.mShortcutTargets != null, group.mBigrams != null, getByteSize(childrenOffset),
group.mIsNotAWord, group.mIsBlacklistEntry, formatOptions); group.mShortcutTargets != null && !group.mShortcutTargets.isEmpty(),
group.mBigrams != null, group.mIsNotAWord, group.mIsBlacklistEntry, formatOptions);
} }
/** /**
@ -795,7 +796,7 @@ public class BinaryDictEncoderUtils {
groupAddress += shift; groupAddress += shift;
// Write shortcuts // Write shortcuts
if (null != group.mShortcutTargets) { if (null != group.mShortcutTargets && !group.mShortcutTargets.isEmpty()) {
final int indexOfShortcutByteSize = index; final int indexOfShortcutByteSize = index;
index += FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE; index += FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE;
groupAddress += FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE; groupAddress += FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE;

View File

@ -255,18 +255,26 @@ public class Ver3DictDecoder implements DictDecoder {
childrenAddress += addressPointer; childrenAddress += addressPointer;
} }
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options); addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
ArrayList<WeightedString> shortcutTargets = null; final ArrayList<WeightedString> shortcutTargets;
if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) { if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
// readShortcut will add shortcuts to shortcutTargets.
shortcutTargets = new ArrayList<WeightedString>();
addressPointer += PtNodeReader.readShortcut(mDictBuffer, shortcutTargets); addressPointer += PtNodeReader.readShortcut(mDictBuffer, shortcutTargets);
} else {
shortcutTargets = null;
} }
ArrayList<PendingAttribute> bigrams = null;
final ArrayList<PendingAttribute> bigrams;
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
bigrams = new ArrayList<PendingAttribute>(); bigrams = new ArrayList<PendingAttribute>();
addressPointer += PtNodeReader.readBigrams(mDictBuffer, bigrams, addressPointer); addressPointer += PtNodeReader.readBigrams(mDictBuffer, bigrams, addressPointer);
if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
MakedictLog.d("too many bigrams in a group."); MakedictLog.d("too many bigrams in a group.");
} }
} else {
bigrams = null;
} }
return new CharGroupInfo(ptNodePos, addressPointer, flags, characters, frequency, return new CharGroupInfo(ptNodePos, addressPointer, flags, characters, frequency,
parentAddress, childrenAddress, shortcutTargets, bigrams); parentAddress, childrenAddress, shortcutTargets, bigrams);
} }

View File

@ -53,6 +53,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
private static final int UNIGRAM_FREQ = 10; private static final int UNIGRAM_FREQ = 10;
private static final int BIGRAM_FREQ = 50; private static final int BIGRAM_FREQ = 50;
private static final int TOLERANCE_OF_BIGRAM_FREQ = 5; private static final int TOLERANCE_OF_BIGRAM_FREQ = 5;
private static final int NUM_OF_NODES_HAVING_SHORTCUTS = 50;
private static final int NUM_OF_SHORTCUTS = 5;
private static final int USE_BYTE_ARRAY = 1; private static final int USE_BYTE_ARRAY = 1;
private static final int USE_BYTE_BUFFER = 2; private static final int USE_BYTE_BUFFER = 2;
@ -63,6 +65,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
private static final SparseArray<List<Integer>> sStarBigrams = CollectionUtils.newSparseArray(); private static final SparseArray<List<Integer>> sStarBigrams = CollectionUtils.newSparseArray();
private static final SparseArray<List<Integer>> sChainBigrams = private static final SparseArray<List<Integer>> sChainBigrams =
CollectionUtils.newSparseArray(); CollectionUtils.newSparseArray();
private static final Map<String, List<String>> sShortcuts = CollectionUtils.newHashMap();
private static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2); private static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2);
private static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE = private static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE =
@ -95,6 +98,16 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
for (int i = 1; i < sWords.size(); ++i) { for (int i = 1; i < sWords.size(); ++i) {
sStarBigrams.get(0).add(i); sStarBigrams.get(0).add(i);
} }
sShortcuts.clear();
for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) {
final int from = Math.abs(random.nextInt()) % sWords.size();
sShortcuts.put(sWords.get(from), new ArrayList<String>());
for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) {
final int to = Math.abs(random.nextInt()) % sWords.size();
sShortcuts.get(sWords.get(from)).add(sWords.get(to));
}
}
} }
private int[] generateCodePointSet(final int codePointSetSize, final Random random) { private int[] generateCodePointSet(final int codePointSetSize, final Random random) {
@ -104,7 +117,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
if (r < 0) continue; if (r < 0) continue;
// Don't insert 0~0x20, but insert any other code point. // Don't insert 0~0x20, but insert any other code point.
// Code points are in the range 0~0x10FFFF. // Code points are in the range 0~0x10FFFF.
final int candidateCodePoint = (int)(0x20 + r % (Character.MAX_CODE_POINT - 0x20)); final int candidateCodePoint = 0x20 + r % (Character.MAX_CODE_POINT - 0x20);
// Code points between MIN_ and MAX_SURROGATE are not valid on their own. // Code points between MIN_ and MAX_SURROGATE are not valid on their own.
if (candidateCodePoint >= Character.MIN_SURROGATE if (candidateCodePoint >= Character.MIN_SURROGATE
&& candidateCodePoint <= Character.MAX_SURROGATE) continue; && candidateCodePoint <= Character.MAX_SURROGATE) continue;
@ -243,6 +256,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
// check shortcut // check shortcut
if (shortcutMap != null) { if (shortcutMap != null) {
for (final Map.Entry<String, List<String>> entry : shortcutMap.entrySet()) { for (final Map.Entry<String, List<String>> entry : shortcutMap.entrySet()) {
assertTrue(words.contains(entry.getKey()));
final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray, final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray,
entry.getKey()); entry.getKey());
for (final String word : entry.getValue()) { for (final String word : entry.getValue()) {
@ -321,6 +335,12 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
formatOptions, "chain")); formatOptions, "chain"));
results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType, results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType,
formatOptions, "star")); formatOptions, "star"));
results.add(runReadAndWrite(sWords, sEmptyBigrams, sShortcuts, bufferType, formatOptions,
"unigram with shortcuts"));
results.add(runReadAndWrite(sWords, sChainBigrams, sShortcuts, bufferType, formatOptions,
"chain with shortcuts"));
results.add(runReadAndWrite(sWords, sStarBigrams, sShortcuts, bufferType, formatOptions,
"star with shortcuts"));
} }
public void testReadAndWriteWithByteBuffer() { public void testReadAndWriteWithByteBuffer() {