am 2ee70804: Add moved char groups.
* commit '2ee70804e92b17016a2f042c4f6b0e94b5d23e88': Add moved char groups.main
commit
9facffbb87
|
@ -85,7 +85,10 @@ public class BinaryDictIOUtils {
|
||||||
}
|
}
|
||||||
p.mPosition++;
|
p.mPosition++;
|
||||||
|
|
||||||
if (info.mFrequency != FusionDictionary.CharGroup.NOT_A_TERMINAL) { // found word
|
final boolean isMovedGroup = BinaryDictInputOutput.isMovedGroup(info.mFlags,
|
||||||
|
formatOptions);
|
||||||
|
if (!isMovedGroup
|
||||||
|
&& info.mFrequency != FusionDictionary.CharGroup.NOT_A_TERMINAL) {// found word
|
||||||
words.put(info.mOriginalAddress, new String(pushedChars, 0, index));
|
words.put(info.mOriginalAddress, new String(pushedChars, 0, index));
|
||||||
frequencies.put(info.mOriginalAddress, info.mFrequency);
|
frequencies.put(info.mOriginalAddress, info.mFrequency);
|
||||||
if (info.mBigrams != null) bigrams.put(info.mOriginalAddress, info.mBigrams);
|
if (info.mBigrams != null) bigrams.put(info.mOriginalAddress, info.mBigrams);
|
||||||
|
@ -109,7 +112,7 @@ public class BinaryDictIOUtils {
|
||||||
p.mAddress = buffer.position();
|
p.mAddress = buffer.position();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (BinaryDictInputOutput.hasChildrenAddress(info.mChildrenAddress)) {
|
if (!isMovedGroup && BinaryDictInputOutput.hasChildrenAddress(info.mChildrenAddress)) {
|
||||||
Position childrenPos = new Position(info.mChildrenAddress + headerSize, index);
|
Position childrenPos = new Position(info.mChildrenAddress + headerSize, index);
|
||||||
stack.push(childrenPos);
|
stack.push(childrenPos);
|
||||||
}
|
}
|
||||||
|
@ -168,6 +171,10 @@ public class BinaryDictIOUtils {
|
||||||
final int charGroupPos = buffer.position();
|
final int charGroupPos = buffer.position();
|
||||||
final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer,
|
final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer,
|
||||||
buffer.position(), header.mFormatOptions);
|
buffer.position(), header.mFormatOptions);
|
||||||
|
if (BinaryDictInputOutput.isMovedGroup(currentInfo.mFlags,
|
||||||
|
header.mFormatOptions)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
boolean same = true;
|
boolean same = true;
|
||||||
for (int p = 0, j = word.offsetByCodePoints(0, wordPos);
|
for (int p = 0, j = word.offsetByCodePoints(0, wordPos);
|
||||||
p < currentInfo.mCharacters.length;
|
p < currentInfo.mCharacters.length;
|
||||||
|
|
|
@ -53,6 +53,7 @@ public class BinaryDictInputOutput {
|
||||||
// If the number of passes exceeds this number, makedict bails with an exception on
|
// If the number of passes exceeds this number, makedict bails with an exception on
|
||||||
// suspicion that a bug might be causing an infinite loop.
|
// suspicion that a bug might be causing an infinite loop.
|
||||||
private static final int MAX_PASSES = 24;
|
private static final int MAX_PASSES = 24;
|
||||||
|
private static final int MAX_JUMPS = 12;
|
||||||
|
|
||||||
public interface FusionDictionaryBufferInterface {
|
public interface FusionDictionaryBufferInterface {
|
||||||
public int readUnsignedByte();
|
public int readUnsignedByte();
|
||||||
|
@ -394,6 +395,13 @@ public class BinaryDictInputOutput {
|
||||||
return FormatSpec.NO_CHILDREN_ADDRESS != address;
|
return FormatSpec.NO_CHILDREN_ADDRESS != address;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method to check whether the group is moved.
|
||||||
|
*/
|
||||||
|
public static boolean isMovedGroup(final int flags, final FormatOptions options) {
|
||||||
|
return options.mSupportsDynamicUpdate && ((flags & FormatSpec.FLAG_IS_MOVED) == 1);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper method to check whether the dictionary can be updated dynamically.
|
* Helper method to check whether the dictionary can be updated dynamically.
|
||||||
*/
|
*/
|
||||||
|
@ -1374,8 +1382,18 @@ public class BinaryDictInputOutput {
|
||||||
int index = FormatSpec.MAX_WORD_LENGTH - 1;
|
int index = FormatSpec.MAX_WORD_LENGTH - 1;
|
||||||
// the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH
|
// the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH
|
||||||
for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) {
|
for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) {
|
||||||
buffer.position(currentAddress + headerSize);
|
CharGroupInfo currentInfo;
|
||||||
final CharGroupInfo currentInfo = readCharGroup(buffer, currentAddress, options);
|
int loopCounter = 0;
|
||||||
|
do {
|
||||||
|
buffer.position(currentAddress + headerSize);
|
||||||
|
currentInfo = readCharGroup(buffer, currentAddress, options);
|
||||||
|
if (isMovedGroup(currentInfo.mFlags, options)) {
|
||||||
|
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
|
||||||
|
}
|
||||||
|
if (DBG && loopCounter++ > MAX_JUMPS) {
|
||||||
|
MakedictLog.d("Too many jumps - probably a bug");
|
||||||
|
}
|
||||||
|
} while (isMovedGroup(currentInfo.mFlags, options));
|
||||||
for (int i = 0; i < currentInfo.mCharacters.length; ++i) {
|
for (int i = 0; i < currentInfo.mCharacters.length; ++i) {
|
||||||
sGetWordBuffer[index--] =
|
sGetWordBuffer[index--] =
|
||||||
currentInfo.mCharacters[currentInfo.mCharacters.length - i - 1];
|
currentInfo.mCharacters[currentInfo.mCharacters.length - i - 1];
|
||||||
|
@ -1457,6 +1475,7 @@ public class BinaryDictInputOutput {
|
||||||
int groupOffset = nodeHeadPosition + getGroupCountSize(count);
|
int groupOffset = nodeHeadPosition + getGroupCountSize(count);
|
||||||
for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
|
for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
|
||||||
CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
|
CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
|
||||||
|
if (isMovedGroup(info.mFlags, options)) continue;
|
||||||
ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
|
ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
|
||||||
ArrayList<WeightedString> bigrams = null;
|
ArrayList<WeightedString> bigrams = null;
|
||||||
if (null != info.mBigrams) {
|
if (null != info.mBigrams) {
|
||||||
|
|
|
@ -52,13 +52,18 @@ public final class FormatSpec {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Node(CharGroup) layout is as follows:
|
/* Node(CharGroup) layout is as follows:
|
||||||
* | addressType xx : mask with MASK_GROUP_ADDRESS_TYPE
|
* | IF !SUPPORTS_DYNAMIC_UPDATE
|
||||||
* 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
|
* | addressType xx : mask with MASK_GROUP_ADDRESS_TYPE
|
||||||
* f | 01 = 1 byte : FLAG_GROUP_ADDRESS_TYPE_ONEBYTE
|
* | 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
|
||||||
* l | 10 = 2 bytes : FLAG_GROUP_ADDRESS_TYPE_TWOBYTES
|
* f | 01 = 1 byte : FLAG_GROUP_ADDRESS_TYPE_ONEBYTE
|
||||||
* a | 11 = 3 bytes : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES
|
* l | 10 = 2 bytes : FLAG_GROUP_ADDRESS_TYPE_TWOBYTES
|
||||||
* g | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS
|
* a | 11 = 3 bytes : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES
|
||||||
* s | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL
|
* g | ELSE
|
||||||
|
* s | is moved ? 2 bits, 11 = no
|
||||||
|
* | 01 = yes
|
||||||
|
* | the new address is stored in the same place as the parent address
|
||||||
|
* | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS
|
||||||
|
* | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL
|
||||||
* | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS
|
* | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS
|
||||||
* | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS
|
* | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS
|
||||||
* | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD
|
* | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD
|
||||||
|
@ -178,6 +183,7 @@ public final class FormatSpec {
|
||||||
static final int FLAG_HAS_BIGRAMS = 0x04;
|
static final int FLAG_HAS_BIGRAMS = 0x04;
|
||||||
static final int FLAG_IS_NOT_A_WORD = 0x02;
|
static final int FLAG_IS_NOT_A_WORD = 0x02;
|
||||||
static final int FLAG_IS_BLACKLISTED = 0x01;
|
static final int FLAG_IS_BLACKLISTED = 0x01;
|
||||||
|
static final int FLAG_IS_MOVED = 0x40;
|
||||||
|
|
||||||
static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
|
static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
|
||||||
static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
|
static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
|
||||||
|
|
Loading…
Reference in New Issue