Add moved char groups.

bug: 6669677

Change-Id: I372f841044fe8e076a50a80ac10b715e5f8fd4eb
This commit is contained in:
Yuichiro Hanada 2012-09-25 20:48:25 +09:00
parent 002a502c84
commit 2ee70804e9
3 changed files with 43 additions and 11 deletions

View file

@ -85,7 +85,10 @@ public class BinaryDictIOUtils {
} }
p.mPosition++; p.mPosition++;
if (info.mFrequency != FusionDictionary.CharGroup.NOT_A_TERMINAL) { // found word final boolean isMovedGroup = BinaryDictInputOutput.isMovedGroup(info.mFlags,
formatOptions);
if (!isMovedGroup
&& info.mFrequency != FusionDictionary.CharGroup.NOT_A_TERMINAL) {// found word
words.put(info.mOriginalAddress, new String(pushedChars, 0, index)); words.put(info.mOriginalAddress, new String(pushedChars, 0, index));
frequencies.put(info.mOriginalAddress, info.mFrequency); frequencies.put(info.mOriginalAddress, info.mFrequency);
if (info.mBigrams != null) bigrams.put(info.mOriginalAddress, info.mBigrams); if (info.mBigrams != null) bigrams.put(info.mOriginalAddress, info.mBigrams);
@ -109,7 +112,7 @@ public class BinaryDictIOUtils {
p.mAddress = buffer.position(); p.mAddress = buffer.position();
} }
if (BinaryDictInputOutput.hasChildrenAddress(info.mChildrenAddress)) { if (!isMovedGroup && BinaryDictInputOutput.hasChildrenAddress(info.mChildrenAddress)) {
Position childrenPos = new Position(info.mChildrenAddress + headerSize, index); Position childrenPos = new Position(info.mChildrenAddress + headerSize, index);
stack.push(childrenPos); stack.push(childrenPos);
} }
@ -168,6 +171,10 @@ public class BinaryDictIOUtils {
final int charGroupPos = buffer.position(); final int charGroupPos = buffer.position();
final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer, final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer,
buffer.position(), header.mFormatOptions); buffer.position(), header.mFormatOptions);
if (BinaryDictInputOutput.isMovedGroup(currentInfo.mFlags,
header.mFormatOptions)) {
continue;
}
boolean same = true; boolean same = true;
for (int p = 0, j = word.offsetByCodePoints(0, wordPos); for (int p = 0, j = word.offsetByCodePoints(0, wordPos);
p < currentInfo.mCharacters.length; p < currentInfo.mCharacters.length;

View file

@ -53,6 +53,7 @@ public class BinaryDictInputOutput {
// If the number of passes exceeds this number, makedict bails with an exception on // If the number of passes exceeds this number, makedict bails with an exception on
// suspicion that a bug might be causing an infinite loop. // suspicion that a bug might be causing an infinite loop.
private static final int MAX_PASSES = 24; private static final int MAX_PASSES = 24;
private static final int MAX_JUMPS = 12;
public interface FusionDictionaryBufferInterface { public interface FusionDictionaryBufferInterface {
public int readUnsignedByte(); public int readUnsignedByte();
@ -394,6 +395,13 @@ public class BinaryDictInputOutput {
return FormatSpec.NO_CHILDREN_ADDRESS != address; return FormatSpec.NO_CHILDREN_ADDRESS != address;
} }
/**
* Helper method to check whether the group is moved.
*/
public static boolean isMovedGroup(final int flags, final FormatOptions options) {
return options.mSupportsDynamicUpdate && ((flags & FormatSpec.FLAG_IS_MOVED) == 1);
}
/** /**
* Helper method to check whether the dictionary can be updated dynamically. * Helper method to check whether the dictionary can be updated dynamically.
*/ */
@ -1374,8 +1382,18 @@ public class BinaryDictInputOutput {
int index = FormatSpec.MAX_WORD_LENGTH - 1; int index = FormatSpec.MAX_WORD_LENGTH - 1;
// the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH // the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH
for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) { for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) {
buffer.position(currentAddress + headerSize); CharGroupInfo currentInfo;
final CharGroupInfo currentInfo = readCharGroup(buffer, currentAddress, options); int loopCounter = 0;
do {
buffer.position(currentAddress + headerSize);
currentInfo = readCharGroup(buffer, currentAddress, options);
if (isMovedGroup(currentInfo.mFlags, options)) {
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
}
if (DBG && loopCounter++ > MAX_JUMPS) {
MakedictLog.d("Too many jumps - probably a bug");
}
} while (isMovedGroup(currentInfo.mFlags, options));
for (int i = 0; i < currentInfo.mCharacters.length; ++i) { for (int i = 0; i < currentInfo.mCharacters.length; ++i) {
sGetWordBuffer[index--] = sGetWordBuffer[index--] =
currentInfo.mCharacters[currentInfo.mCharacters.length - i - 1]; currentInfo.mCharacters[currentInfo.mCharacters.length - i - 1];
@ -1457,6 +1475,7 @@ public class BinaryDictInputOutput {
int groupOffset = nodeHeadPosition + getGroupCountSize(count); int groupOffset = nodeHeadPosition + getGroupCountSize(count);
for (int i = count; i > 0; --i) { // Scan the array of CharGroup. for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
CharGroupInfo info = readCharGroup(buffer, groupOffset, options); CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
if (isMovedGroup(info.mFlags, options)) continue;
ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets; ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
ArrayList<WeightedString> bigrams = null; ArrayList<WeightedString> bigrams = null;
if (null != info.mBigrams) { if (null != info.mBigrams) {

View file

@ -52,13 +52,18 @@ public final class FormatSpec {
*/ */
/* Node(CharGroup) layout is as follows: /* Node(CharGroup) layout is as follows:
* | addressType xx : mask with MASK_GROUP_ADDRESS_TYPE * | IF !SUPPORTS_DYNAMIC_UPDATE
* 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS * | addressType xx : mask with MASK_GROUP_ADDRESS_TYPE
* f | 01 = 1 byte : FLAG_GROUP_ADDRESS_TYPE_ONEBYTE * | 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
* l | 10 = 2 bytes : FLAG_GROUP_ADDRESS_TYPE_TWOBYTES * f | 01 = 1 byte : FLAG_GROUP_ADDRESS_TYPE_ONEBYTE
* a | 11 = 3 bytes : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES * l | 10 = 2 bytes : FLAG_GROUP_ADDRESS_TYPE_TWOBYTES
* g | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS * a | 11 = 3 bytes : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES
* s | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL * g | ELSE
* s | is moved ? 2 bits, 11 = no
* | 01 = yes
* | the new address is stored in the same place as the parent address
* | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS
* | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL
* | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS * | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS
* | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS * | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS
* | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD * | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD
@ -178,6 +183,7 @@ public final class FormatSpec {
static final int FLAG_HAS_BIGRAMS = 0x04; static final int FLAG_HAS_BIGRAMS = 0x04;
static final int FLAG_IS_NOT_A_WORD = 0x02; static final int FLAG_IS_NOT_A_WORD = 0x02;
static final int FLAG_IS_BLACKLISTED = 0x01; static final int FLAG_IS_BLACKLISTED = 0x01;
static final int FLAG_IS_MOVED = 0x40;
static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80; static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40; static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;