Add internal structure support for isShortcutOnly (B8)

Change-Id: Iaac80937540a45849c347f80400762032b27c82c
main
Jean Chalard 2011-12-28 18:33:39 +09:00
parent b751dbb6c8
commit 903e58886e
2 changed files with 39 additions and 14 deletions

View File

@ -311,6 +311,13 @@ public class BinaryDictInputOutput {
return NO_CHILDREN_ADDRESS != address; return NO_CHILDREN_ADDRESS != address;
} }
/**
* Helper method to find out if a character info is a shortcut only.
*/
private static boolean isShortcutOnly(final CharGroupInfo info) {
return 0 != (info.mFlags & FLAG_IS_SHORTCUT_ONLY);
}
/** /**
* Compute the size, in bytes, that an address will occupy. * Compute the size, in bytes, that an address will occupy.
* *
@ -1027,10 +1034,11 @@ public class BinaryDictInputOutput {
} }
nodeContents.add( nodeContents.add(
new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency, new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
children)); children, isShortcutOnly(info)));
} else { } else {
nodeContents.add( nodeContents.add(
new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency)); new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
isShortcutOnly(info)));
} }
groupOffset = info.mEndAddress; groupOffset = info.mEndAddress;
} }

View File

@ -85,24 +85,35 @@ public class FusionDictionary implements Iterable<Word> {
final ArrayList<WeightedString> mShortcutTargets; final ArrayList<WeightedString> mShortcutTargets;
final ArrayList<WeightedString> mBigrams; final ArrayList<WeightedString> mBigrams;
final int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal. final int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
final boolean mIsShortcutOnly; // Only valid if this is a terminal.
Node mChildren; Node mChildren;
// The two following members to help with binary generation // The two following members to help with binary generation
int mCachedSize; int mCachedSize;
int mCachedAddress; int mCachedAddress;
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets, public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final int frequency) { final ArrayList<WeightedString> bigrams, final int frequency,
final boolean isShortcutOnly) {
mChars = chars; mChars = chars;
mFrequency = frequency; mFrequency = frequency;
mIsShortcutOnly = isShortcutOnly;
if (mIsShortcutOnly && NOT_A_TERMINAL == mFrequency) {
throw new RuntimeException("A node must be a terminal to be a shortcut only");
}
mShortcutTargets = shortcutTargets; mShortcutTargets = shortcutTargets;
mBigrams = bigrams; mBigrams = bigrams;
mChildren = null; mChildren = null;
} }
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets, public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final int frequency, final Node children) { final ArrayList<WeightedString> bigrams, final int frequency, final Node children,
final boolean isShortcutOnly) {
mChars = chars; mChars = chars;
mFrequency = frequency; mFrequency = frequency;
mIsShortcutOnly = isShortcutOnly;
if (mIsShortcutOnly && NOT_A_TERMINAL == mFrequency) {
throw new RuntimeException("A node must be a terminal to be a shortcut only");
}
mShortcutTargets = shortcutTargets; mShortcutTargets = shortcutTargets;
mBigrams = bigrams; mBigrams = bigrams;
mChildren = children; mChildren = children;
@ -249,7 +260,7 @@ public class FusionDictionary implements Iterable<Word> {
final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]); final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
final CharGroup newGroup = new CharGroup( final CharGroup newGroup = new CharGroup(
Arrays.copyOfRange(word, charIndex, word.length), Arrays.copyOfRange(word, charIndex, word.length),
shortcutTargets, bigrams, frequency); shortcutTargets, bigrams, frequency, false /* isShortcutOnly */);
currentNode.mData.add(insertionIndex, newGroup); currentNode.mData.add(insertionIndex, newGroup);
checkStack(currentNode); checkStack(currentNode);
} else { } else {
@ -263,7 +274,8 @@ public class FusionDictionary implements Iterable<Word> {
+ new String(word, 0, word.length)); + new String(word, 0, word.length));
} else { } else {
final CharGroup newNode = new CharGroup(currentGroup.mChars, final CharGroup newNode = new CharGroup(currentGroup.mChars,
shortcutTargets, bigrams, frequency, currentGroup.mChildren); shortcutTargets, bigrams, frequency, currentGroup.mChildren,
false /* isShortcutOnly */);
currentNode.mData.set(nodeIndex, newNode); currentNode.mData.set(nodeIndex, newNode);
checkStack(currentNode); checkStack(currentNode);
} }
@ -272,13 +284,14 @@ public class FusionDictionary implements Iterable<Word> {
// We only have to create a new node and add it to the end of this. // We only have to create a new node and add it to the end of this.
final CharGroup newNode = new CharGroup( final CharGroup newNode = new CharGroup(
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
shortcutTargets, bigrams, frequency); shortcutTargets, bigrams, frequency,
false /* isShortcutOnly */);
currentGroup.mChildren = new Node(); currentGroup.mChildren = new Node();
currentGroup.mChildren.mData.add(newNode); currentGroup.mChildren.mData.add(newNode);
} }
} else { } else {
if (0 == differentCharIndex) { if (0 == differentCharIndex) {
// Exact same word. Check the frequency is 0 or -1, and update. // Exact same word. Check the frequency is 0 or NOT_A_TERMINAL, and update.
if (0 != frequency) { if (0 != frequency) {
if (0 < currentGroup.mFrequency) { if (0 < currentGroup.mFrequency) {
throw new RuntimeException("This word already exists with frequency " throw new RuntimeException("This word already exists with frequency "
@ -287,7 +300,7 @@ public class FusionDictionary implements Iterable<Word> {
} }
final CharGroup newGroup = new CharGroup(word, final CharGroup newGroup = new CharGroup(word,
currentGroup.mShortcutTargets, currentGroup.mBigrams, currentGroup.mShortcutTargets, currentGroup.mBigrams,
frequency, currentGroup.mChildren); frequency, currentGroup.mChildren, false /* isShortcutOnly */);
currentNode.mData.set(nodeIndex, newGroup); currentNode.mData.set(nodeIndex, newGroup);
} }
} else { } else {
@ -297,21 +310,24 @@ public class FusionDictionary implements Iterable<Word> {
final CharGroup newOldWord = new CharGroup( final CharGroup newOldWord = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, differentCharIndex, Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
currentGroup.mChars.length), currentGroup.mShortcutTargets, currentGroup.mChars.length), currentGroup.mShortcutTargets,
currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren); currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren,
currentGroup.mIsShortcutOnly);
newChildren.mData.add(newOldWord); newChildren.mData.add(newOldWord);
final CharGroup newParent; final CharGroup newParent;
if (charIndex + differentCharIndex >= word.length) { if (charIndex + differentCharIndex >= word.length) {
newParent = new CharGroup( newParent = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
shortcutTargets, bigrams, frequency, newChildren); shortcutTargets, bigrams, frequency, newChildren,
false /* isShortcutOnly */);
} else { } else {
newParent = new CharGroup( newParent = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
null, null, -1, newChildren); null, null, -1, newChildren, false /* isShortcutOnly */);
final CharGroup newWord = new CharGroup( final CharGroup newWord = new CharGroup(
Arrays.copyOfRange(word, charIndex + differentCharIndex, Arrays.copyOfRange(word, charIndex + differentCharIndex,
word.length), shortcutTargets, bigrams, frequency); word.length), shortcutTargets, bigrams, frequency,
false /* isShortcutOnly */);
final int addIndex = word[charIndex + differentCharIndex] final int addIndex = word[charIndex + differentCharIndex]
> currentGroup.mChars[differentCharIndex] ? 1 : 0; > currentGroup.mChars[differentCharIndex] ? 1 : 0;
newChildren.mData.add(addIndex, newWord); newChildren.mData.add(addIndex, newWord);
@ -374,7 +390,8 @@ public class FusionDictionary implements Iterable<Word> {
*/ */
private static int findInsertionIndex(final Node node, int character) { private static int findInsertionIndex(final Node node, int character) {
final List data = node.mData; final List data = node.mData;
final CharGroup reference = new CharGroup(new int[] { character }, null, null, 0); final CharGroup reference = new CharGroup(new int[] { character }, null, null, 0,
false /* isShortcutOnly */);
int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR); int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR);
return result >= 0 ? result : -result - 1; return result >= 0 ? result : -result - 1;
} }