am 21dddb14: Merge "Rename Node to PtNodeArray"

* commit '21dddb1462df8f32b40365dbb27930ae6c8113b8':
  Rename Node to PtNodeArray
This commit is contained in:
Jean Chalard 2013-08-16 01:38:11 -07:00 committed by Android Git Automerger
commit 2b3ff52496
19 changed files with 400 additions and 379 deletions

View file

@ -23,7 +23,7 @@ import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.makedict.BinaryDictEncoder; import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.CollectionUtils;
@ -51,7 +51,7 @@ public class DictionaryWriter extends AbstractDictionaryWriter {
@Override @Override
public void clear() { public void clear() {
final HashMap<String, String> attributes = CollectionUtils.newHashMap(); final HashMap<String, String> attributes = CollectionUtils.newHashMap();
mFusionDictionary = new FusionDictionary(new Node(), mFusionDictionary = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(attributes, false, false)); new FusionDictionary.DictionaryOptions(attributes, false, false));
} }

View file

@ -20,7 +20,7 @@ import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.JniUtils; import com.android.inputmethod.latin.utils.JniUtils;
@ -548,31 +548,31 @@ public final class BinaryDictDecoder {
} }
/** /**
* Reads a single node from a buffer. * Reads a single node array from a buffer.
* *
* This methods reads the file at the current position. A node is fully expected to start at * This methods reads the file at the current position. A node array is fully expected to start
* the current position. * at the current position.
* This will recursively read other nodes into the structure, populating the reverse * This will recursively read other node arrays into the structure, populating the reverse
* maps on the fly and using them to keep track of already read nodes. * maps on the fly and using them to keep track of already read nodes.
* *
* @param buffer the buffer, correctly positioned at the start of a node. * @param buffer the buffer, correctly positioned at the start of a node array.
* @param headerSize the size, in bytes, of the file header. * @param headerSize the size, in bytes, of the file header.
* @param reverseNodeMap a mapping from addresses to already read nodes. * @param reverseNodeArrayMap a mapping from addresses to already read node arrays.
* @param reverseGroupMap a mapping from addresses to already read character groups. * @param reverseGroupMap a mapping from addresses to already read character groups.
* @param options file format options. * @param options file format options.
* @return the read node with all his children already read. * @return the read node array with all his children already read.
*/ */
private static Node readNode(final FusionDictionaryBufferInterface buffer, final int headerSize, private static PtNodeArray readNodeArray(final FusionDictionaryBufferInterface buffer,
final Map<Integer, Node> reverseNodeMap, final Map<Integer, CharGroup> reverseGroupMap, final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap,
final FormatOptions options) final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options)
throws IOException { throws IOException {
final ArrayList<CharGroup> nodeContents = new ArrayList<CharGroup>(); final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>();
final int nodeOrigin = buffer.position() - headerSize; final int nodeArrayOrigin = buffer.position() - headerSize;
do { // Scan the linked-list node. do { // Scan the linked-list node.
final int nodeHeadPosition = buffer.position() - headerSize; final int nodeArrayHeadPosition = buffer.position() - headerSize;
final int count = readCharGroupCount(buffer); final int count = readCharGroupCount(buffer);
int groupOffset = nodeHeadPosition + BinaryDictIOUtils.getGroupCountSize(count); int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count);
for (int i = count; i > 0; --i) { // Scan the array of CharGroup. for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
CharGroupInfo info = readCharGroup(buffer, groupOffset, options); CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue; if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue;
@ -589,21 +589,21 @@ public final class BinaryDictDecoder {
} }
} }
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) { if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
Node children = reverseNodeMap.get(info.mChildrenAddress); PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress);
if (null == children) { if (null == children) {
final int currentPosition = buffer.position(); final int currentPosition = buffer.position();
buffer.position(info.mChildrenAddress + headerSize); buffer.position(info.mChildrenAddress + headerSize);
children = readNode( children = readNodeArray(
buffer, headerSize, reverseNodeMap, reverseGroupMap, options); buffer, headerSize, reverseNodeArrayMap, reverseGroupMap, options);
buffer.position(currentPosition); buffer.position(currentPosition);
} }
nodeContents.add( nodeArrayContents.add(
new CharGroup(info.mCharacters, shortcutTargets, bigrams, new CharGroup(info.mCharacters, shortcutTargets, bigrams,
info.mFrequency, info.mFrequency,
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children)); 0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children));
} else { } else {
nodeContents.add( nodeArrayContents.add(
new CharGroup(info.mCharacters, shortcutTargets, bigrams, new CharGroup(info.mCharacters, shortcutTargets, bigrams,
info.mFrequency, info.mFrequency,
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
@ -624,11 +624,11 @@ public final class BinaryDictDecoder {
} while (options.mSupportsDynamicUpdate && } while (options.mSupportsDynamicUpdate &&
buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS); buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
final Node node = new Node(nodeContents); final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
node.mCachedAddressBeforeUpdate = nodeOrigin; nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin;
node.mCachedAddressAfterUpdate = nodeOrigin; nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin;
reverseNodeMap.put(node.mCachedAddressAfterUpdate, node); reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray);
return node; return nodeArray;
} }
/** /**
@ -733,10 +733,10 @@ public final class BinaryDictDecoder {
// Read header // Read header
final FileHeader header = readHeader(reader.getBuffer()); final FileHeader header = readHeader(reader.getBuffer());
Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>(); Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>(); Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
final Node root = readNode(reader.getBuffer(), header.mHeaderSize, reverseNodeMapping, final PtNodeArray root = readNodeArray(reader.getBuffer(), header.mHeaderSize,
reverseGroupMapping, header.mFormatOptions); reverseNodeArrayMapping, reverseGroupMapping, header.mFormatOptions);
FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions); FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions);
if (null != dict) { if (null != dict) {
@ -803,8 +803,6 @@ public final class BinaryDictDecoder {
/** /**
* Calculate bigram frequency from compressed value * Calculate bigram frequency from compressed value
* *
* @see #makeBigramFlags
*
* @param unigramFrequency * @param unigramFrequency
* @param bigramFrequency compressed frequency * @param bigramFrequency compressed frequency
* @return approximate bigram frequency * @return approximate bigram frequency

View file

@ -20,7 +20,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
@ -78,12 +78,12 @@ public class BinaryDictEncoder {
} }
/** /**
* Compute the binary size of the group count for a node * Compute the binary size of the group count for a node array.
* @param node the node * @param nodeArray the nodeArray
* @return the size of the group count, either 1 or 2 bytes. * @return the size of the group count, either 1 or 2 bytes.
*/ */
private static int getGroupCountSize(final Node node) { private static int getGroupCountSize(final PtNodeArray nodeArray) {
return BinaryDictIOUtils.getGroupCountSize(node.mData.size()); return BinaryDictIOUtils.getGroupCountSize(nodeArray.mData.size());
} }
/** /**
@ -138,15 +138,17 @@ public class BinaryDictEncoder {
} }
/** /**
* Compute the maximum size of a node, assuming 3-byte addresses for everything, and caches * Compute the maximum size of each node of a node array, assuming 3-byte addresses for
* it in the 'actualSize' member of the node. * everything, and caches it in the `mCachedSize' member of the nodes; deduce the size of
* the containing node array, and cache it it its 'mCachedSize' member.
* *
* @param node the node to compute the maximum size of. * @param nodeArray the node array to compute the maximum size of.
* @param options file format options. * @param options file format options.
*/ */
private static void calculateNodeMaximumSize(final Node node, final FormatOptions options) { private static void calculateNodeArrayMaximumSize(final PtNodeArray nodeArray,
int size = getGroupCountSize(node); final FormatOptions options) {
for (CharGroup g : node.mData) { int size = getGroupCountSize(nodeArray);
for (CharGroup g : nodeArray.mData) {
final int groupSize = getCharGroupMaximumSize(g, options); final int groupSize = getCharGroupMaximumSize(g, options);
g.mCachedSize = groupSize; g.mCachedSize = groupSize;
size += groupSize; size += groupSize;
@ -154,7 +156,7 @@ public class BinaryDictEncoder {
if (options.mSupportsDynamicUpdate) { if (options.mSupportsDynamicUpdate) {
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE; size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
} }
node.mCachedSize = size; nodeArray.mCachedSize = size;
} }
/** /**
@ -199,14 +201,16 @@ public class BinaryDictEncoder {
// This method is responsible for finding a nice ordering of the nodes that favors run-time // This method is responsible for finding a nice ordering of the nodes that favors run-time
// cache performance and dictionary size. // cache performance and dictionary size.
/* package for tests */ static ArrayList<Node> flattenTree(final Node root) { /* package for tests */ static ArrayList<PtNodeArray> flattenTree(
final int treeSize = FusionDictionary.countCharGroups(root); final PtNodeArray rootNodeArray) {
final int treeSize = FusionDictionary.countCharGroups(rootNodeArray);
MakedictLog.i("Counted nodes : " + treeSize); MakedictLog.i("Counted nodes : " + treeSize);
final ArrayList<Node> flatTree = new ArrayList<Node>(treeSize); final ArrayList<PtNodeArray> flatTree = new ArrayList<PtNodeArray>(treeSize);
return flattenTreeInner(flatTree, root); return flattenTreeInner(flatTree, rootNodeArray);
} }
private static ArrayList<Node> flattenTreeInner(final ArrayList<Node> list, final Node node) { private static ArrayList<PtNodeArray> flattenTreeInner(final ArrayList<PtNodeArray> list,
final PtNodeArray nodeArray) {
// Removing the node is necessary if the tails are merged, because we would then // Removing the node is necessary if the tails are merged, because we would then
// add the same node several times when we only want it once. A number of places in // add the same node several times when we only want it once. A number of places in
// the code also depends on any node being only once in the list. // the code also depends on any node being only once in the list.
@ -224,8 +228,8 @@ public class BinaryDictEncoder {
// this simple list.remove operation O(n*n) overall. On Android this overhead is very // this simple list.remove operation O(n*n) overall. On Android this overhead is very
// high. // high.
// For future reference, the code to remove duplicate is a simple : list.remove(node); // For future reference, the code to remove duplicate is a simple : list.remove(node);
list.add(node); list.add(nodeArray);
final ArrayList<CharGroup> branches = node.mData; final ArrayList<CharGroup> branches = nodeArray.mData;
final int nodeSize = branches.size(); final int nodeSize = branches.size();
for (CharGroup group : branches) { for (CharGroup group : branches) {
if (null != group.mChildren) flattenTreeInner(list, group.mChildren); if (null != group.mChildren) flattenTreeInner(list, group.mChildren);
@ -234,52 +238,60 @@ public class BinaryDictEncoder {
} }
/** /**
* Get the offset from a position inside a current node to a target node, during update. * Get the offset from a position inside a current node array to a target node array, during
* update.
* *
* If the current node is before the target node, the target node has not been updated yet, * If the current node array is before the target node array, the target node array has not
* so we should return the offset from the old position of the current node to the old position * been updated yet, so we should return the offset from the old position of the current node
* of the target node. If on the other hand the target is before the current node, it already * array to the old position of the target node array. If on the other hand the target is
* has been updated, so we should return the offset from the new position in the current node * before the current node array, it already has been updated, so we should return the offset
* to the new position in the target node. * from the new position in the current node array to the new position in the target node
* @param currentNode the node containing the CharGroup where the offset will be written * array.
* @param offsetFromStartOfCurrentNode the offset, in bytes, from the start of currentNode *
* @param targetNode the target node to get the offset to * @param currentNodeArray node array containing the CharGroup where the offset will be written
* @return the offset to the target node * @param offsetFromStartOfCurrentNodeArray offset, in bytes, from the start of currentNodeArray
* @param targetNodeArray the target node array to get the offset to
* @return the offset to the target node array
*/ */
private static int getOffsetToTargetNodeDuringUpdate(final Node currentNode, private static int getOffsetToTargetNodeArrayDuringUpdate(final PtNodeArray currentNodeArray,
final int offsetFromStartOfCurrentNode, final Node targetNode) { final int offsetFromStartOfCurrentNodeArray, final PtNodeArray targetNodeArray) {
final boolean isTargetBeforeCurrent = (targetNode.mCachedAddressBeforeUpdate final boolean isTargetBeforeCurrent = (targetNodeArray.mCachedAddressBeforeUpdate
< currentNode.mCachedAddressBeforeUpdate); < currentNodeArray.mCachedAddressBeforeUpdate);
if (isTargetBeforeCurrent) { if (isTargetBeforeCurrent) {
return targetNode.mCachedAddressAfterUpdate return targetNodeArray.mCachedAddressAfterUpdate
- (currentNode.mCachedAddressAfterUpdate + offsetFromStartOfCurrentNode); - (currentNodeArray.mCachedAddressAfterUpdate
+ offsetFromStartOfCurrentNodeArray);
} else { } else {
return targetNode.mCachedAddressBeforeUpdate return targetNodeArray.mCachedAddressBeforeUpdate
- (currentNode.mCachedAddressBeforeUpdate + offsetFromStartOfCurrentNode); - (currentNodeArray.mCachedAddressBeforeUpdate
+ offsetFromStartOfCurrentNodeArray);
} }
} }
/** /**
* Get the offset from a position inside a current node to a target CharGroup, during update. * Get the offset from a position inside a current node array to a target CharGroup, during
* @param currentNode the node containing the CharGroup where the offset will be written * update.
* @param offsetFromStartOfCurrentNode the offset, in bytes, from the start of currentNode *
* @param currentNodeArray node array containing the CharGroup where the offset will be written
* @param offsetFromStartOfCurrentNodeArray offset, in bytes, from the start of currentNodeArray
* @param targetCharGroup the target CharGroup to get the offset to * @param targetCharGroup the target CharGroup to get the offset to
* @return the offset to the target CharGroup * @return the offset to the target CharGroup
*/ */
// TODO: is there any way to factorize this method with the one above? // TODO: is there any way to factorize this method with the one above?
private static int getOffsetToTargetCharGroupDuringUpdate(final Node currentNode, private static int getOffsetToTargetCharGroupDuringUpdate(final PtNodeArray currentNodeArray,
final int offsetFromStartOfCurrentNode, final CharGroup targetCharGroup) { final int offsetFromStartOfCurrentNodeArray, final CharGroup targetCharGroup) {
final int oldOffsetBasePoint = currentNode.mCachedAddressBeforeUpdate final int oldOffsetBasePoint = currentNodeArray.mCachedAddressBeforeUpdate
+ offsetFromStartOfCurrentNode; + offsetFromStartOfCurrentNodeArray;
final boolean isTargetBeforeCurrent = (targetCharGroup.mCachedAddressBeforeUpdate final boolean isTargetBeforeCurrent = (targetCharGroup.mCachedAddressBeforeUpdate
< oldOffsetBasePoint); < oldOffsetBasePoint);
// If the target is before the current node, then its address has already been updated. // If the target is before the current node array, then its address has already been
// We can use the AfterUpdate member, and compare it to our own member after update. // updated. We can use the AfterUpdate member, and compare it to our own member after
// Otherwise, the AfterUpdate member is not updated yet, so we need to use the BeforeUpdate // update. Otherwise, the AfterUpdate member is not updated yet, so we need to use the
// member, and of course we have to compare this to our own address before update. // BeforeUpdate member, and of course we have to compare this to our own address before
// update.
if (isTargetBeforeCurrent) { if (isTargetBeforeCurrent) {
final int newOffsetBasePoint = currentNode.mCachedAddressAfterUpdate final int newOffsetBasePoint = currentNodeArray.mCachedAddressAfterUpdate
+ offsetFromStartOfCurrentNode; + offsetFromStartOfCurrentNodeArray;
return targetCharGroup.mCachedAddressAfterUpdate - newOffsetBasePoint; return targetCharGroup.mCachedAddressAfterUpdate - newOffsetBasePoint;
} else { } else {
return targetCharGroup.mCachedAddressBeforeUpdate - oldOffsetBasePoint; return targetCharGroup.mCachedAddressBeforeUpdate - oldOffsetBasePoint;
@ -287,26 +299,26 @@ public class BinaryDictEncoder {
} }
/** /**
* Computes the actual node size, based on the cached addresses of the children nodes. * Computes the actual node array size, based on the cached addresses of the children nodes.
* *
* Each node stores its tentative address. During dictionary address computing, these * Each node array stores its tentative address. During dictionary address computing, these
* are not final, but they can be used to compute the node size (the node size depends * are not final, but they can be used to compute the node array size (the node array size
* on the address of the children because the number of bytes necessary to store an * depends on the address of the children because the number of bytes necessary to store an
* address depends on its numeric value. The return value indicates whether the node * address depends on its numeric value. The return value indicates whether the node array
* contents (as in, any of the addresses stored in the cache fields) have changed with * contents (as in, any of the addresses stored in the cache fields) have changed with
* respect to their previous value. * respect to their previous value.
* *
* @param node the node to compute the size of. * @param nodeArray the node array to compute the size of.
* @param dict the dictionary in which the word/attributes are to be found. * @param dict the dictionary in which the word/attributes are to be found.
* @param formatOptions file format options. * @param formatOptions file format options.
* @return false if none of the cached addresses inside the node changed, true otherwise. * @return false if none of the cached addresses inside the node array changed, true otherwise.
*/ */
private static boolean computeActualNodeSize(final Node node, final FusionDictionary dict, private static boolean computeActualNodeArraySize(final PtNodeArray nodeArray,
final FormatOptions formatOptions) { final FusionDictionary dict, final FormatOptions formatOptions) {
boolean changed = false; boolean changed = false;
int size = getGroupCountSize(node); int size = getGroupCountSize(nodeArray);
for (CharGroup group : node.mData) { for (CharGroup group : nodeArray.mData) {
group.mCachedAddressAfterUpdate = node.mCachedAddressAfterUpdate + size; group.mCachedAddressAfterUpdate = nodeArray.mCachedAddressAfterUpdate + size;
if (group.mCachedAddressAfterUpdate != group.mCachedAddressBeforeUpdate) { if (group.mCachedAddressAfterUpdate != group.mCachedAddressBeforeUpdate) {
changed = true; changed = true;
} }
@ -318,16 +330,16 @@ public class BinaryDictEncoder {
if (formatOptions.mSupportsDynamicUpdate) { if (formatOptions.mSupportsDynamicUpdate) {
groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
} else { } else {
groupSize += getByteSize(getOffsetToTargetNodeDuringUpdate(node, groupSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(nodeArray,
groupSize + size, group.mChildren)); groupSize + size, group.mChildren));
} }
} }
groupSize += getShortcutListSize(group.mShortcutTargets); groupSize += getShortcutListSize(group.mShortcutTargets);
if (null != group.mBigrams) { if (null != group.mBigrams) {
for (WeightedString bigram : group.mBigrams) { for (WeightedString bigram : group.mBigrams) {
final int offset = getOffsetToTargetCharGroupDuringUpdate(node, final int offset = getOffsetToTargetCharGroupDuringUpdate(nodeArray,
groupSize + size + FormatSpec.GROUP_FLAGS_SIZE, groupSize + size + FormatSpec.GROUP_FLAGS_SIZE,
FusionDictionary.findWordInTree(dict.mRoot, bigram.mWord)); FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord));
groupSize += getByteSize(offset) + FormatSpec.GROUP_FLAGS_SIZE; groupSize += getByteSize(offset) + FormatSpec.GROUP_FLAGS_SIZE;
} }
} }
@ -337,49 +349,49 @@ public class BinaryDictEncoder {
if (formatOptions.mSupportsDynamicUpdate) { if (formatOptions.mSupportsDynamicUpdate) {
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE; size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
} }
if (node.mCachedSize != size) { if (nodeArray.mCachedSize != size) {
node.mCachedSize = size; nodeArray.mCachedSize = size;
changed = true; changed = true;
} }
return changed; return changed;
} }
/** /**
* Initializes the cached addresses of nodes from their size. * Initializes the cached addresses of node arrays and their containing nodes from their size.
* *
* @param flatNodes the array of nodes. * @param flatNodes the list of node arrays.
* @param formatOptions file format options. * @param formatOptions file format options.
* @return the byte size of the entire stack. * @return the byte size of the entire stack.
*/ */
private static int initializeNodesCachedAddresses(final ArrayList<Node> flatNodes, private static int initializeNodeArraysCachedAddresses(final ArrayList<PtNodeArray> flatNodes,
final FormatOptions formatOptions) { final FormatOptions formatOptions) {
int nodeOffset = 0; int nodeArrayOffset = 0;
for (final Node n : flatNodes) { for (final PtNodeArray nodeArray : flatNodes) {
n.mCachedAddressBeforeUpdate = nodeOffset; nodeArray.mCachedAddressBeforeUpdate = nodeArrayOffset;
int groupCountSize = getGroupCountSize(n); int groupCountSize = getGroupCountSize(nodeArray);
int groupOffset = 0; int groupOffset = 0;
for (final CharGroup g : n.mData) { for (final CharGroup g : nodeArray.mData) {
g.mCachedAddressBeforeUpdate = g.mCachedAddressAfterUpdate = g.mCachedAddressBeforeUpdate = g.mCachedAddressAfterUpdate =
groupCountSize + nodeOffset + groupOffset; groupCountSize + nodeArrayOffset + groupOffset;
groupOffset += g.mCachedSize; groupOffset += g.mCachedSize;
} }
final int nodeSize = groupCountSize + groupOffset final int nodeSize = groupCountSize + groupOffset
+ (formatOptions.mSupportsDynamicUpdate + (formatOptions.mSupportsDynamicUpdate
? FormatSpec.FORWARD_LINK_ADDRESS_SIZE : 0); ? FormatSpec.FORWARD_LINK_ADDRESS_SIZE : 0);
nodeOffset += n.mCachedSize; nodeArrayOffset += nodeArray.mCachedSize;
} }
return nodeOffset; return nodeArrayOffset;
} }
/** /**
* Updates the cached addresses of nodes after recomputing their new positions. * Updates the cached addresses of node arrays after recomputing their new positions.
* *
* @param flatNodes the array of nodes. * @param flatNodes the list of node arrays.
*/ */
private static void updateNodeCachedAddresses(final ArrayList<Node> flatNodes) { private static void updateNodeArraysCachedAddresses(final ArrayList<PtNodeArray> flatNodes) {
for (final Node n : flatNodes) { for (final PtNodeArray nodeArray : flatNodes) {
n.mCachedAddressBeforeUpdate = n.mCachedAddressAfterUpdate; nodeArray.mCachedAddressBeforeUpdate = nodeArray.mCachedAddressAfterUpdate;
for (final CharGroup g : n.mData) { for (final CharGroup g : nodeArray.mData) {
g.mCachedAddressBeforeUpdate = g.mCachedAddressAfterUpdate; g.mCachedAddressBeforeUpdate = g.mCachedAddressAfterUpdate;
} }
} }
@ -391,11 +403,11 @@ public class BinaryDictEncoder {
* The parent addresses are used by some binary formats at write-to-disk time. Not all formats * The parent addresses are used by some binary formats at write-to-disk time. Not all formats
* need them. In particular, version 2 does not need them, and version 3 does. * need them. In particular, version 2 does not need them, and version 3 does.
* *
* @param flatNodes the flat array of nodes to fill in * @param flatNodes the flat array of node arrays to fill in
*/ */
private static void computeParentAddresses(final ArrayList<Node> flatNodes) { private static void computeParentAddresses(final ArrayList<PtNodeArray> flatNodes) {
for (final Node node : flatNodes) { for (final PtNodeArray nodeArray : flatNodes) {
for (final CharGroup group : node.mData) { for (final CharGroup group : nodeArray.mData) {
if (null != group.mChildren) { if (null != group.mChildren) {
// Assign my address to children's parent address // Assign my address to children's parent address
// Here BeforeUpdate and AfterUpdate addresses have the same value, so it // Here BeforeUpdate and AfterUpdate addresses have the same value, so it
@ -408,25 +420,25 @@ public class BinaryDictEncoder {
} }
/** /**
* Compute the addresses and sizes of an ordered node array. * Compute the addresses and sizes of an ordered list of node arrays.
* *
* This method takes a node array and will update its cached address and size values * This method takes a list of node arrays and will update their cached address and size
* so that they can be written into a file. It determines the smallest size each of the * values so that they can be written into a file. It determines the smallest size each of the
* nodes can be given the addresses of its children and attributes, and store that into * nodes arrays can be given the addresses of its children and attributes, and store that into
* each node. * each node.
* The order of the node is given by the order of the array. This method makes no effort * The order of the node is given by the order of the array. This method makes no effort
* to find a good order; it only mechanically computes the size this order results in. * to find a good order; it only mechanically computes the size this order results in.
* *
* @param dict the dictionary * @param dict the dictionary
* @param flatNodes the ordered array of nodes * @param flatNodes the ordered list of nodes arrays
* @param formatOptions file format options. * @param formatOptions file format options.
* @return the same array it was passed. The nodes have been updated for address and size. * @return the same array it was passed. The nodes have been updated for address and size.
*/ */
private static ArrayList<Node> computeAddresses(final FusionDictionary dict, private static ArrayList<PtNodeArray> computeAddresses(final FusionDictionary dict,
final ArrayList<Node> flatNodes, final FormatOptions formatOptions) { final ArrayList<PtNodeArray> flatNodes, final FormatOptions formatOptions) {
// First get the worst possible sizes and offsets // First get the worst possible sizes and offsets
for (final Node n : flatNodes) calculateNodeMaximumSize(n, formatOptions); for (final PtNodeArray n : flatNodes) calculateNodeArrayMaximumSize(n, formatOptions);
final int offset = initializeNodesCachedAddresses(flatNodes, formatOptions); final int offset = initializeNodeArraysCachedAddresses(flatNodes, formatOptions);
MakedictLog.i("Compressing the array addresses. Original size : " + offset); MakedictLog.i("Compressing the array addresses. Original size : " + offset);
MakedictLog.i("(Recursively seen size : " + offset + ")"); MakedictLog.i("(Recursively seen size : " + offset + ")");
@ -435,17 +447,19 @@ public class BinaryDictEncoder {
boolean changesDone = false; boolean changesDone = false;
do { do {
changesDone = false; changesDone = false;
int nodeStartOffset = 0; int nodeArrayStartOffset = 0;
for (final Node n : flatNodes) { for (final PtNodeArray nodeArray : flatNodes) {
n.mCachedAddressAfterUpdate = nodeStartOffset; nodeArray.mCachedAddressAfterUpdate = nodeArrayStartOffset;
final int oldNodeSize = n.mCachedSize; final int oldNodeArraySize = nodeArray.mCachedSize;
final boolean changed = computeActualNodeSize(n, dict, formatOptions); final boolean changed = computeActualNodeArraySize(nodeArray, dict, formatOptions);
final int newNodeSize = n.mCachedSize; final int newNodeArraySize = nodeArray.mCachedSize;
if (oldNodeSize < newNodeSize) throw new RuntimeException("Increased size ?!"); if (oldNodeArraySize < newNodeArraySize) {
nodeStartOffset += newNodeSize; throw new RuntimeException("Increased size ?!");
}
nodeArrayStartOffset += newNodeArraySize;
changesDone |= changed; changesDone |= changed;
} }
updateNodeCachedAddresses(flatNodes); updateNodeArraysCachedAddresses(flatNodes);
++passes; ++passes;
if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug"); if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug");
} while (changesDone); } while (changesDone);
@ -453,10 +467,10 @@ public class BinaryDictEncoder {
if (formatOptions.mSupportsDynamicUpdate) { if (formatOptions.mSupportsDynamicUpdate) {
computeParentAddresses(flatNodes); computeParentAddresses(flatNodes);
} }
final Node lastNode = flatNodes.get(flatNodes.size() - 1); final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
MakedictLog.i("Compression complete in " + passes + " passes."); MakedictLog.i("Compression complete in " + passes + " passes.");
MakedictLog.i("After address compression : " MakedictLog.i("After address compression : "
+ (lastNode.mCachedAddressAfterUpdate + lastNode.mCachedSize)); + (lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize));
return flatNodes; return flatNodes;
} }
@ -464,25 +478,25 @@ public class BinaryDictEncoder {
/** /**
* Sanity-checking method. * Sanity-checking method.
* *
* This method checks an array of node for juxtaposition, that is, it will do * This method checks a list of node arrays for juxtaposition, that is, it will do
* nothing if each node's cached address is actually the previous node's address * nothing if each node array's cached address is actually the previous node array's address
* plus the previous node's size. * plus the previous node's size.
* If this is not the case, it will throw an exception. * If this is not the case, it will throw an exception.
* *
* @param array the array node to check * @param arrays the list of node arrays to check
*/ */
private static void checkFlatNodeArray(final ArrayList<Node> array) { private static void checkFlatNodeArrayList(final ArrayList<PtNodeArray> arrays) {
int offset = 0; int offset = 0;
int index = 0; int index = 0;
for (final Node n : array) { for (final PtNodeArray nodeArray : arrays) {
// BeforeUpdate and AfterUpdate addresses are the same here, so it does not matter // BeforeUpdate and AfterUpdate addresses are the same here, so it does not matter
// which we use. // which we use.
if (n.mCachedAddressAfterUpdate != offset) { if (nodeArray.mCachedAddressAfterUpdate != offset) {
throw new RuntimeException("Wrong address for node " + index throw new RuntimeException("Wrong address for node " + index
+ " : expected " + offset + ", got " + n.mCachedAddressAfterUpdate); + " : expected " + offset + ", got " + nodeArray.mCachedAddressAfterUpdate);
} }
++index; ++index;
offset += n.mCachedSize; offset += nodeArray.mCachedSize;
} }
} }
@ -707,26 +721,23 @@ public class BinaryDictEncoder {
} }
/** /**
* Write a node to memory. The node is expected to have its final position cached. * Write a node array to memory. The node array is expected to have its final position cached.
* *
* This can be an empty map, but the more is inside the faster the lookups will be. It can * @param dict the dictionary the node array is a part of (for relative offsets).
* be carried on as long as nodes do not move.
*
* @param dict the dictionary the node is a part of (for relative offsets).
* @param buffer the memory buffer to write to. * @param buffer the memory buffer to write to.
* @param node the node to write. * @param nodeArray the node array to write.
* @param formatOptions file format options. * @param formatOptions file format options.
* @return the address of the END of the node. * @return the address of the END of the node.
*/ */
@SuppressWarnings("unused") @SuppressWarnings("unused")
private static int writePlacedNode(final FusionDictionary dict, byte[] buffer, private static int writePlacedNode(final FusionDictionary dict, byte[] buffer,
final Node node, final FormatOptions formatOptions) { final PtNodeArray nodeArray, final FormatOptions formatOptions) {
// TODO: Make the code in common with BinaryDictIOUtils#writeCharGroup // TODO: Make the code in common with BinaryDictIOUtils#writeCharGroup
int index = node.mCachedAddressAfterUpdate; int index = nodeArray.mCachedAddressAfterUpdate;
final int groupCount = node.mData.size(); final int groupCount = nodeArray.mData.size();
final int countSize = getGroupCountSize(node); final int countSize = getGroupCountSize(nodeArray);
final int parentAddress = node.mCachedParentAddress; final int parentAddress = nodeArray.mCachedParentAddress;
if (1 == countSize) { if (1 == countSize) {
buffer[index++] = (byte)groupCount; buffer[index++] = (byte)groupCount;
} else if (2 == countSize) { } else if (2 == countSize) {
@ -739,7 +750,7 @@ public class BinaryDictEncoder {
} }
int groupAddress = index; int groupAddress = index;
for (int i = 0; i < groupCount; ++i) { for (int i = 0; i < groupCount; ++i) {
final CharGroup group = node.mData.get(i); final CharGroup group = nodeArray.mData.get(i);
if (index != group.mCachedAddressAfterUpdate) { if (index != group.mCachedAddressAfterUpdate) {
throw new RuntimeException("Bug: write index is not the same as the cached address " throw new RuntimeException("Bug: write index is not the same as the cached address "
+ "of the group : " + index + " <> " + group.mCachedAddressAfterUpdate); + "of the group : " + index + " <> " + group.mCachedAddressAfterUpdate);
@ -762,7 +773,7 @@ public class BinaryDictEncoder {
index = writeParentAddress(buffer, index, parentAddress, formatOptions); index = writeParentAddress(buffer, index, parentAddress, formatOptions);
} else { } else {
index = writeParentAddress(buffer, index, parentAddress index = writeParentAddress(buffer, index, parentAddress
+ (node.mCachedAddressAfterUpdate - group.mCachedAddressAfterUpdate), + (nodeArray.mCachedAddressAfterUpdate - group.mCachedAddressAfterUpdate),
formatOptions); formatOptions);
} }
@ -812,7 +823,7 @@ public class BinaryDictEncoder {
while (bigramIterator.hasNext()) { while (bigramIterator.hasNext()) {
final WeightedString bigram = bigramIterator.next(); final WeightedString bigram = bigramIterator.next();
final CharGroup target = final CharGroup target =
FusionDictionary.findWordInTree(dict.mRoot, bigram.mWord); FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
final int addressOfBigram = target.mCachedAddressAfterUpdate; final int addressOfBigram = target.mCachedAddressAfterUpdate;
final int unigramFrequencyForThisWord = target.mFrequency; final int unigramFrequencyForThisWord = target.mFrequency;
++groupAddress; ++groupAddress;
@ -832,57 +843,58 @@ public class BinaryDictEncoder {
= FormatSpec.NO_FORWARD_LINK_ADDRESS; = FormatSpec.NO_FORWARD_LINK_ADDRESS;
index += FormatSpec.FORWARD_LINK_ADDRESS_SIZE; index += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
} }
if (index != node.mCachedAddressAfterUpdate + node.mCachedSize) throw new RuntimeException( if (index != nodeArray.mCachedAddressAfterUpdate + nodeArray.mCachedSize) {
"Not the same size : written " throw new RuntimeException(
+ (index - node.mCachedAddressAfterUpdate) + " bytes from a node that should have " "Not the same size : written " + (index - nodeArray.mCachedAddressAfterUpdate)
+ node.mCachedSize + " bytes"); + " bytes from a node that should have " + nodeArray.mCachedSize + " bytes");
}
return index; return index;
} }
/** /**
* Dumps a collection of useful statistics about a node array. * Dumps a collection of useful statistics about a list of node arrays.
* *
* This prints purely informative stuff, like the total estimated file size, the * This prints purely informative stuff, like the total estimated file size, the
* number of nodes, of character groups, the repartition of each address size, etc * number of node arrays, of character groups, the repartition of each address size, etc
* *
* @param nodes the node array. * @param nodeArrays the list of node arrays.
*/ */
private static void showStatistics(ArrayList<Node> nodes) { private static void showStatistics(ArrayList<PtNodeArray> nodeArrays) {
int firstTerminalAddress = Integer.MAX_VALUE; int firstTerminalAddress = Integer.MAX_VALUE;
int lastTerminalAddress = Integer.MIN_VALUE; int lastTerminalAddress = Integer.MIN_VALUE;
int size = 0; int size = 0;
int charGroups = 0; int charGroups = 0;
int maxGroups = 0; int maxGroups = 0;
int maxRuns = 0; int maxRuns = 0;
for (final Node n : nodes) { for (final PtNodeArray nodeArray : nodeArrays) {
if (maxGroups < n.mData.size()) maxGroups = n.mData.size(); if (maxGroups < nodeArray.mData.size()) maxGroups = nodeArray.mData.size();
for (final CharGroup cg : n.mData) { for (final CharGroup cg : nodeArray.mData) {
++charGroups; ++charGroups;
if (cg.mChars.length > maxRuns) maxRuns = cg.mChars.length; if (cg.mChars.length > maxRuns) maxRuns = cg.mChars.length;
if (cg.mFrequency >= 0) { if (cg.mFrequency >= 0) {
if (n.mCachedAddressAfterUpdate < firstTerminalAddress) if (nodeArray.mCachedAddressAfterUpdate < firstTerminalAddress)
firstTerminalAddress = n.mCachedAddressAfterUpdate; firstTerminalAddress = nodeArray.mCachedAddressAfterUpdate;
if (n.mCachedAddressAfterUpdate > lastTerminalAddress) if (nodeArray.mCachedAddressAfterUpdate > lastTerminalAddress)
lastTerminalAddress = n.mCachedAddressAfterUpdate; lastTerminalAddress = nodeArray.mCachedAddressAfterUpdate;
} }
} }
if (n.mCachedAddressAfterUpdate + n.mCachedSize > size) { if (nodeArray.mCachedAddressAfterUpdate + nodeArray.mCachedSize > size) {
size = n.mCachedAddressAfterUpdate + n.mCachedSize; size = nodeArray.mCachedAddressAfterUpdate + nodeArray.mCachedSize;
} }
} }
final int[] groupCounts = new int[maxGroups + 1]; final int[] groupCounts = new int[maxGroups + 1];
final int[] runCounts = new int[maxRuns + 1]; final int[] runCounts = new int[maxRuns + 1];
for (final Node n : nodes) { for (final PtNodeArray nodeArray : nodeArrays) {
++groupCounts[n.mData.size()]; ++groupCounts[nodeArray.mData.size()];
for (final CharGroup cg : n.mData) { for (final CharGroup cg : nodeArray.mData) {
++runCounts[cg.mChars.length]; ++runCounts[cg.mChars.length];
} }
} }
MakedictLog.i("Statistics:\n" MakedictLog.i("Statistics:\n"
+ " total file size " + size + "\n" + " total file size " + size + "\n"
+ " " + nodes.size() + " nodes\n" + " " + nodeArrays.size() + " node arrays\n"
+ " " + charGroups + " groups (" + ((float)charGroups / nodes.size()) + " " + charGroups + " groups (" + ((float)charGroups / nodeArrays.size())
+ " groups per node)\n" + " groups per node)\n"
+ " first terminal at " + firstTerminalAddress + "\n" + " first terminal at " + firstTerminalAddress + "\n"
+ " last terminal at " + lastTerminalAddress + "\n" + " last terminal at " + lastTerminalAddress + "\n"
@ -909,11 +921,12 @@ public class BinaryDictEncoder {
final FusionDictionary dict, final FormatOptions formatOptions) final FusionDictionary dict, final FormatOptions formatOptions)
throws IOException, UnsupportedFormatException { throws IOException, UnsupportedFormatException {
// Addresses are limited to 3 bytes, but since addresses can be relative to each node, the // Addresses are limited to 3 bytes, but since addresses can be relative to each node
// structure itself is not limited to 16MB. However, if it is over 16MB deciding the order // array, the structure itself is not limited to 16MB. However, if it is over 16MB deciding
// of the nodes becomes a quite complicated problem, because though the dictionary itself // the order of the node arrays becomes a quite complicated problem, because though the
// does not have a size limit, each node must still be within 16MB of all its children and // dictionary itself does not have a size limit, each node array must still be within 16MB
// parents. As long as this is ensured, the dictionary file may grow to any size. // of all its children and parents. As long as this is ensured, the dictionary file may
// grow to any size.
final int version = formatOptions.mVersion; final int version = formatOptions.mVersion;
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
@ -964,23 +977,23 @@ public class BinaryDictEncoder {
// Leave the choice of the optimal node order to the flattenTree function. // Leave the choice of the optimal node order to the flattenTree function.
MakedictLog.i("Flattening the tree..."); MakedictLog.i("Flattening the tree...");
ArrayList<Node> flatNodes = flattenTree(dict.mRoot); ArrayList<PtNodeArray> flatNodes = flattenTree(dict.mRootNodeArray);
MakedictLog.i("Computing addresses..."); MakedictLog.i("Computing addresses...");
computeAddresses(dict, flatNodes, formatOptions); computeAddresses(dict, flatNodes, formatOptions);
MakedictLog.i("Checking array..."); MakedictLog.i("Checking array...");
if (DBG) checkFlatNodeArray(flatNodes); if (DBG) checkFlatNodeArrayList(flatNodes);
// Create a buffer that matches the final dictionary size. // Create a buffer that matches the final dictionary size.
final Node lastNode = flatNodes.get(flatNodes.size() - 1); final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
final int bufferSize = lastNode.mCachedAddressAfterUpdate + lastNode.mCachedSize; final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize;
final byte[] buffer = new byte[bufferSize]; final byte[] buffer = new byte[bufferSize];
int index = 0; int index = 0;
MakedictLog.i("Writing file..."); MakedictLog.i("Writing file...");
int dataEndOffset = 0; int dataEndOffset = 0;
for (Node n : flatNodes) { for (PtNodeArray nodeArray : flatNodes) {
dataEndOffset = writePlacedNode(dict, buffer, n, formatOptions); dataEndOffset = writePlacedNode(dict, buffer, nodeArray, formatOptions);
} }
if (DBG) showStatistics(flatNodes); if (DBG) showStatistics(flatNodes);

View file

@ -59,7 +59,7 @@ public final class BinaryDictIOUtils {
} }
/** /**
* Tours all node without recursive call. * Retrieves all node arrays without recursive call.
*/ */
private static void readUnigramsAndBigramsBinaryInner( private static void readUnigramsAndBigramsBinaryInner(
final FusionDictionaryBufferInterface buffer, final int headerSize, final FusionDictionaryBufferInterface buffer, final int headerSize,
@ -116,7 +116,7 @@ public final class BinaryDictIOUtils {
if (formatOptions.mSupportsDynamicUpdate) { if (formatOptions.mSupportsDynamicUpdate) {
final int forwardLinkAddress = buffer.readUnsignedInt24(); final int forwardLinkAddress = buffer.readUnsignedInt24();
if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) { if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) {
// the node has a forward link. // The node array has a forward link.
p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT; p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT;
p.mAddress = forwardLinkAddress; p.mAddress = forwardLinkAddress;
} else { } else {
@ -126,7 +126,7 @@ public final class BinaryDictIOUtils {
stack.pop(); stack.pop();
} }
} else { } else {
// the node has more groups. // The node array has more groups.
p.mAddress = buffer.position(); p.mAddress = buffer.position();
} }
@ -139,14 +139,14 @@ public final class BinaryDictIOUtils {
/** /**
* Reads unigrams and bigrams from the binary file. * Reads unigrams and bigrams from the binary file.
* Doesn't make the memory representation of the dictionary. * Doesn't store a full memory representation of the dictionary.
* *
* @param reader the reader. * @param reader the reader.
* @param words the map to store the address as a key and the word as a value. * @param words the map to store the address as a key and the word as a value.
* @param frequencies the map to store the address as a key and the frequency as a value. * @param frequencies the map to store the address as a key and the frequency as a value.
* @param bigrams the map to store the address as a key and the list of address as a value. * @param bigrams the map to store the address as a key and the list of address as a value.
* @throws IOException * @throws IOException if the file can't be read.
* @throws UnsupportedFormatException * @throws UnsupportedFormatException if the format of the file is not recognized.
*/ */
public static void readUnigramsAndBigramsBinary(final BinaryDictReader reader, public static void readUnigramsAndBigramsBinary(final BinaryDictReader reader,
final Map<Integer, String> words, final Map<Integer, Integer> frequencies, final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
@ -165,8 +165,8 @@ public final class BinaryDictIOUtils {
* @param buffer the buffer to read. * @param buffer the buffer to read.
* @param word the word we search for. * @param word the word we search for.
* @return the address of the terminal node. * @return the address of the terminal node.
* @throws IOException * @throws IOException if the file can't be read.
* @throws UnsupportedFormatException * @throws UnsupportedFormatException if the format of the file is not recognized.
*/ */
@UsedForTesting @UsedForTesting
public static int getTerminalPosition(final FusionDictionaryBufferInterface buffer, public static int getTerminalPosition(final FusionDictionaryBufferInterface buffer,
@ -224,9 +224,9 @@ public final class BinaryDictIOUtils {
} }
// If we found the next char group, it is under the file pointer. // If we found the next char group, it is under the file pointer.
// But if not, we are at the end of this node so we expect to have // But if not, we are at the end of this node array so we expect to have
// a forward link address that we need to consult and possibly resume // a forward link address that we need to consult and possibly resume
// search on the next node in the linked list. // search on the next node array in the linked list.
if (foundNextCharGroup) break; if (foundNextCharGroup) break;
if (!header.mFormatOptions.mSupportsDynamicUpdate) { if (!header.mFormatOptions.mSupportsDynamicUpdate) {
return FormatSpec.NOT_VALID_WORD; return FormatSpec.NOT_VALID_WORD;
@ -365,9 +365,10 @@ public final class BinaryDictIOUtils {
} }
/** /**
* Write a char group to an output stream. * Write a char group to an output stream from a CharGroupInfo.
* A char group is an in-memory representation of a node in trie. * A char group is an in-memory representation of a node in the patricia trie.
* A char group info is an on-disk representation of a node. * A char group info is a container for low-level information about how the
* char group is stored in the binary format.
* *
* @param destination the stream to write. * @param destination the stream to write.
* @param info the char group info to be written. * @param info the char group info to be written.
@ -427,7 +428,7 @@ public final class BinaryDictIOUtils {
if (info.mBigrams != null) { if (info.mBigrams != null) {
// TODO: Consolidate this code with the code that computes the size of the bigram list // TODO: Consolidate this code with the code that computes the size of the bigram list
// in BinaryDictEncoder#computeActualNodeSize // in BinaryDictEncoder#computeActualNodeArraySize
for (int i = 0; i < info.mBigrams.size(); ++i) { for (int i = 0; i < info.mBigrams.size(); ++i) {
final int bigramFrequency = info.mBigrams.get(i).mFrequency; final int bigramFrequency = info.mBigrams.get(i).mFrequency;
@ -479,14 +480,14 @@ public final class BinaryDictIOUtils {
} }
/** /**
* Write a node to the stream. * Write a node array to the stream.
* *
* @param destination the stream to write. * @param destination the stream to write.
* @param infos groups to be written. * @param infos an array of CharGroupInfo to be written.
* @return the size written, in bytes. * @return the size written, in bytes.
* @throws IOException * @throws IOException
*/ */
static int writeNode(final OutputStream destination, final CharGroupInfo[] infos) static int writeNodes(final OutputStream destination, final CharGroupInfo[] infos)
throws IOException { throws IOException {
int size = getGroupCountSize(infos.length); int size = getGroupCountSize(infos.length);
switch (getGroupCountSize(infos.length)) { switch (getGroupCountSize(infos.length)) {
@ -604,12 +605,12 @@ public final class BinaryDictIOUtils {
public static int getGroupCountSize(final int count) { public static int getGroupCountSize(final int count) {
if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= count) { if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= count) {
return 1; return 1;
} else if (FormatSpec.MAX_CHARGROUPS_IN_A_NODE >= count) { } else if (FormatSpec.MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY >= count) {
return 2; return 2;
} else { } else {
throw new RuntimeException("Can't have more than " throw new RuntimeException("Can't have more than "
+ FormatSpec.MAX_CHARGROUPS_IN_A_NODE + " groups in a node (found " + count + FormatSpec.MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY + " groups in a node (found "
+ ")"); + count + ")");
} }
} }

View file

@ -86,7 +86,7 @@ public final class DynamicBinaryDictIOUtils {
} }
final int flags = buffer.readUnsignedByte(); final int flags = buffer.readUnsignedByte();
if (BinaryDictIOUtils.isMovedGroup(flags, formatOptions)) { if (BinaryDictIOUtils.isMovedGroup(flags, formatOptions)) {
// if the group is moved, the parent address is stored in the destination group. // If the group is moved, the parent address is stored in the destination group.
// We are guaranteed to process the destination group later, so there is no need to // We are guaranteed to process the destination group later, so there is no need to
// update anything here. // update anything here.
buffer.position(originalPosition); buffer.position(originalPosition);
@ -101,10 +101,10 @@ public final class DynamicBinaryDictIOUtils {
} }
/** /**
* Update parent addresses in a Node that is referred to by nodeOriginAddress. * Update parent addresses in a node array stored at nodeOriginAddress.
* *
* @param buffer the buffer to be modified. * @param buffer the buffer to be modified.
* @param nodeOriginAddress the address of a modified Node. * @param nodeOriginAddress the address of the node array to update.
* @param newParentAddress the address to be written. * @param newParentAddress the address to be written.
* @param formatOptions file format options. * @param formatOptions file format options.
*/ */
@ -154,7 +154,7 @@ public final class DynamicBinaryDictIOUtils {
*/ */
private static int moveCharGroup(final OutputStream destination, private static int moveCharGroup(final OutputStream destination,
final FusionDictionaryBufferInterface buffer, final CharGroupInfo info, final FusionDictionaryBufferInterface buffer, final CharGroupInfo info,
final int nodeOriginAddress, final int oldGroupAddress, final int nodeArrayOriginAddress, final int oldGroupAddress,
final FormatOptions formatOptions) throws IOException { final FormatOptions formatOptions) throws IOException {
updateParentAddress(buffer, oldGroupAddress, buffer.limit() + 1, formatOptions); updateParentAddress(buffer, oldGroupAddress, buffer.limit() + 1, formatOptions);
buffer.position(oldGroupAddress); buffer.position(oldGroupAddress);
@ -163,15 +163,16 @@ public final class DynamicBinaryDictIOUtils {
buffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags buffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags
& (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG)))); & (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG))));
int size = FormatSpec.GROUP_FLAGS_SIZE; int size = FormatSpec.GROUP_FLAGS_SIZE;
updateForwardLink(buffer, nodeOriginAddress, buffer.limit(), formatOptions); updateForwardLink(buffer, nodeArrayOriginAddress, buffer.limit(), formatOptions);
size += BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { info }); size += BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { info });
return size; return size;
} }
@SuppressWarnings("unused") @SuppressWarnings("unused")
private static void updateForwardLink(final FusionDictionaryBufferInterface buffer, private static void updateForwardLink(final FusionDictionaryBufferInterface buffer,
final int nodeOriginAddress, final int newNodeAddress, final int nodeArrayOriginAddress, final int newNodeArrayAddress,
final FormatOptions formatOptions) { final FormatOptions formatOptions) {
buffer.position(nodeOriginAddress); buffer.position(nodeArrayOriginAddress);
int jumpCount = 0; int jumpCount = 0;
while (jumpCount++ < MAX_JUMPS) { while (jumpCount++ < MAX_JUMPS) {
final int count = BinaryDictDecoder.readCharGroupCount(buffer); final int count = BinaryDictDecoder.readCharGroupCount(buffer);
@ -179,7 +180,7 @@ public final class DynamicBinaryDictIOUtils {
final int forwardLinkAddress = buffer.readUnsignedInt24(); final int forwardLinkAddress = buffer.readUnsignedInt24();
if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) { if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
buffer.position(buffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE); buffer.position(buffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeAddress); BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeArrayAddress);
return; return;
} }
buffer.position(forwardLinkAddress); buffer.position(forwardLinkAddress);
@ -190,57 +191,59 @@ public final class DynamicBinaryDictIOUtils {
} }
/** /**
* Move a group that is referred to by oldGroupOrigin to the tail of the file. * Move a group that is referred to by oldGroupOrigin to the tail of the file, and set the
* And set the children address to the byte after the group. * children address to the byte after the group
* *
* @param nodeOrigin the address of the tail of the file. * @param fileEndAddress the address of the tail of the file.
* @param characters * @param codePoints the characters to put inside the group.
* @param length * @param length how many code points to read from codePoints.
* @param flags * @param flags the flags for this group.
* @param frequency * @param frequency the frequency of this terminal.
* @param parentAddress * @param parentAddress the address of the parent group of this group.
* @param shortcutTargets * @param shortcutTargets the shortcut targets for this group.
* @param bigrams * @param bigrams the bigrams for this group.
* @param destination the stream representing the tail of the file. * @param destination the stream representing the tail of the file.
* @param buffer the buffer representing the (constant-size) body of the file. * @param buffer the buffer representing the (constant-size) body of the file.
* @param oldNodeOrigin * @param oldNodeArrayOrigin the origin of the old node array this group was a part of.
* @param oldGroupOrigin * @param oldGroupOrigin the old origin where this group used to be stored.
* @param formatOptions * @param formatOptions format options for this dictionary.
* @return the size written, in bytes. * @return the size written, in bytes.
* @throws IOException * @throws IOException if the file can't be accessed
*/ */
private static int moveGroup(final int nodeOrigin, final int[] characters, final int length, private static int moveGroup(final int fileEndAddress, final int[] codePoints,
final int flags, final int frequency, final int parentAddress, final int length, final int flags, final int frequency, final int parentAddress,
final ArrayList<WeightedString> shortcutTargets, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<PendingAttribute> bigrams, final OutputStream destination, final ArrayList<PendingAttribute> bigrams, final OutputStream destination,
final FusionDictionaryBufferInterface buffer, final int oldNodeOrigin, final FusionDictionaryBufferInterface buffer, final int oldNodeArrayOrigin,
final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException { final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException {
int size = 0; int size = 0;
final int newGroupOrigin = nodeOrigin + 1; final int newGroupOrigin = fileEndAddress + 1;
final int[] writtenCharacters = Arrays.copyOfRange(characters, 0, length); final int[] writtenCharacters = Arrays.copyOfRange(codePoints, 0, length);
final CharGroupInfo tmpInfo = new CharGroupInfo(newGroupOrigin, -1 /* endAddress */, final CharGroupInfo tmpInfo = new CharGroupInfo(newGroupOrigin, -1 /* endAddress */,
flags, writtenCharacters, frequency, parentAddress, FormatSpec.NO_CHILDREN_ADDRESS, flags, writtenCharacters, frequency, parentAddress, FormatSpec.NO_CHILDREN_ADDRESS,
shortcutTargets, bigrams); shortcutTargets, bigrams);
size = BinaryDictIOUtils.computeGroupSize(tmpInfo, formatOptions); size = BinaryDictIOUtils.computeGroupSize(tmpInfo, formatOptions);
final CharGroupInfo newInfo = new CharGroupInfo(newGroupOrigin, newGroupOrigin + size, final CharGroupInfo newInfo = new CharGroupInfo(newGroupOrigin, newGroupOrigin + size,
flags, writtenCharacters, frequency, parentAddress, flags, writtenCharacters, frequency, parentAddress,
nodeOrigin + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets, fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets,
bigrams); bigrams);
moveCharGroup(destination, buffer, newInfo, oldNodeOrigin, oldGroupOrigin, formatOptions); moveCharGroup(destination, buffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin,
formatOptions);
return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE; return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
} }
/** /**
* Insert a word into a binary dictionary. * Insert a word into a binary dictionary.
* *
* @param buffer * @param buffer the buffer containing the existing dictionary.
* @param destination * @param destination a stream to the underlying file, with the pointer at the end of the file.
* @param word * @param word the word to insert.
* @param frequency * @param frequency the frequency of the new word.
* @param bigramStrings * @param bigramStrings bigram list, or null if none.
* @param shortcuts * @param shortcuts shortcut list, or null if none.
* @throws IOException * @param isBlackListEntry whether this should be a blacklist entry.
* @throws UnsupportedFormatException * @throws IOException if the file can't be accessed.
* @throws UnsupportedFormatException if the existing dictionary is in an unexpected format.
*/ */
// TODO: Support batch insertion. // TODO: Support batch insertion.
// TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary. // TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
@ -323,7 +326,7 @@ public final class DynamicBinaryDictIOUtils {
currentInfo.mFlags, characters2, currentInfo.mFrequency, currentInfo.mFlags, characters2, currentInfo.mFrequency,
newNodeAddress + 1, currentInfo.mChildrenAddress, newNodeAddress + 1, currentInfo.mChildrenAddress,
currentInfo.mShortcutTargets, currentInfo.mBigrams); currentInfo.mShortcutTargets, currentInfo.mBigrams);
BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { newInfo2 }); BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo2 });
return; return;
} else if (codePoints[wordPos + p] != currentInfo.mCharacters[p]) { } else if (codePoints[wordPos + p] != currentInfo.mCharacters[p]) {
if (p > 0) { if (p > 0) {
@ -386,7 +389,7 @@ public final class DynamicBinaryDictIOUtils {
newNodeAddress + written, -1 /* endAddress */, flags, newNodeAddress + written, -1 /* endAddress */, flags,
newCharacters, frequency, newNodeAddress + 1, newCharacters, frequency, newNodeAddress + 1,
FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams);
BinaryDictIOUtils.writeNode(destination, BinaryDictIOUtils.writeNodes(destination,
new CharGroupInfo[] { suffixInfo, newInfo }); new CharGroupInfo[] { suffixInfo, newInfo });
return; return;
} }
@ -438,7 +441,7 @@ public final class DynamicBinaryDictIOUtils {
final CharGroupInfo newInfo = new CharGroupInfo(newGroupAddress, -1, flags, final CharGroupInfo newInfo = new CharGroupInfo(newGroupAddress, -1, flags,
characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS, characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS,
shortcuts, bigrams); shortcuts, bigrams);
BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { newInfo }); BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo });
return; return;
} }
buffer.position(currentInfo.mChildrenAddress); buffer.position(currentInfo.mChildrenAddress);
@ -482,7 +485,7 @@ public final class DynamicBinaryDictIOUtils {
final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1, final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1,
-1 /* endAddress */, flags, characters, frequency, nodeParentAddress, -1 /* endAddress */, flags, characters, frequency, nodeParentAddress,
FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams);
BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[]{ newInfo }); BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[]{ newInfo });
return; return;
} else { } else {
depth--; depth--;

View file

@ -60,7 +60,7 @@ public final class FormatSpec {
*/ */
/* /*
* Array of Node(FusionDictionary.Node) layout is as follows: * Node array (FusionDictionary.PtNodeArray) layout is as follows:
* *
* g | * g |
* r | the number of groups, 1 or 2 bytes. * r | the number of groups, 1 or 2 bytes.
@ -86,7 +86,7 @@ public final class FormatSpec {
* linkaddress * linkaddress
*/ */
/* Node(CharGroup) layout is as follows: /* Node (FusionDictionary.CharGroup) layout is as follows:
* | IF !SUPPORTS_DYNAMIC_UPDATE * | IF !SUPPORTS_DYNAMIC_UPDATE
* | addressType xx : mask with MASK_GROUP_ADDRESS_TYPE * | addressType xx : mask with MASK_GROUP_ADDRESS_TYPE
* | 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS * | 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
@ -251,7 +251,7 @@ public final class FormatSpec {
static final int INVALID_CHARACTER = -1; static final int INVALID_CHARACTER = -1;
static final int MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT = 0x7F; // 127 static final int MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT = 0x7F; // 127
static final int MAX_CHARGROUPS_IN_A_NODE = 0x7FFF; // 32767 static final int MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY = 0x7FFF; // 32767
static final int MAX_BIGRAMS_IN_A_GROUP = 10000; static final int MAX_BIGRAMS_IN_A_GROUP = 10000;
static final int MAX_TERMINAL_FREQUENCY = 255; static final int MAX_TERMINAL_FREQUENCY = 255;

View file

@ -37,14 +37,14 @@ public final class FusionDictionary implements Iterable<Word> {
private static int CHARACTER_NOT_FOUND_INDEX = -1; private static int CHARACTER_NOT_FOUND_INDEX = -1;
/** /**
* A node of the dictionary, containing several CharGroups. * A node array of the dictionary, containing several CharGroups.
* *
* A node is but an ordered array of CharGroups, which essentially contain all the * A PtNodeArray is but an ordered array of CharGroups, which essentially contain all the
* real information. * real information.
* This class also contains fields to cache size and address, to help with binary * This class also contains fields to cache size and address, to help with binary
* generation. * generation.
*/ */
public static final class Node { public static final class PtNodeArray {
ArrayList<CharGroup> mData; ArrayList<CharGroup> mData;
// To help with binary generation // To help with binary generation
int mCachedSize = Integer.MIN_VALUE; int mCachedSize = Integer.MIN_VALUE;
@ -57,10 +57,10 @@ public final class FusionDictionary implements Iterable<Word> {
int mCachedAddressAfterUpdate = Integer.MIN_VALUE; int mCachedAddressAfterUpdate = Integer.MIN_VALUE;
int mCachedParentAddress = 0; int mCachedParentAddress = 0;
public Node() { public PtNodeArray() {
mData = new ArrayList<CharGroup>(); mData = new ArrayList<CharGroup>();
} }
public Node(ArrayList<CharGroup> data) { public PtNodeArray(ArrayList<CharGroup> data) {
mData = data; mData = data;
} }
} }
@ -98,7 +98,7 @@ public final class FusionDictionary implements Iterable<Word> {
* This is the central class of the in-memory representation. A CharGroup is what can * This is the central class of the in-memory representation. A CharGroup is what can
* be seen as a traditional "trie node", except it can hold several characters at the * be seen as a traditional "trie node", except it can hold several characters at the
* same time. A CharGroup essentially represents one or several characters in the middle * same time. A CharGroup essentially represents one or several characters in the middle
* of the trie trie; as such, it can be a terminal, and it can have children. * of the trie tree; as such, it can be a terminal, and it can have children.
* In this in-memory representation, whether the CharGroup is a terminal or not is represented * In this in-memory representation, whether the CharGroup is a terminal or not is represented
* in the frequency, where NOT_A_TERMINAL (= -1) means this is not a terminal and any other * in the frequency, where NOT_A_TERMINAL (= -1) means this is not a terminal and any other
* value is the frequency of this terminal. A terminal may have non-null shortcuts and/or * value is the frequency of this terminal. A terminal may have non-null shortcuts and/or
@ -110,7 +110,7 @@ public final class FusionDictionary implements Iterable<Word> {
ArrayList<WeightedString> mShortcutTargets; ArrayList<WeightedString> mShortcutTargets;
ArrayList<WeightedString> mBigrams; ArrayList<WeightedString> mBigrams;
int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal. int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
Node mChildren; PtNodeArray mChildren;
boolean mIsNotAWord; // Only a shortcut boolean mIsNotAWord; // Only a shortcut
boolean mIsBlacklistEntry; boolean mIsBlacklistEntry;
// mCachedSize and mCachedAddressBefore/AfterUpdate are helpers for binary dictionary // mCachedSize and mCachedAddressBefore/AfterUpdate are helpers for binary dictionary
@ -137,7 +137,8 @@ public final class FusionDictionary implements Iterable<Word> {
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets, public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final int frequency, final ArrayList<WeightedString> bigrams, final int frequency,
final boolean isNotAWord, final boolean isBlacklistEntry, final Node children) { final boolean isNotAWord, final boolean isBlacklistEntry,
final PtNodeArray children) {
mChars = chars; mChars = chars;
mFrequency = frequency; mFrequency = frequency;
mShortcutTargets = shortcutTargets; mShortcutTargets = shortcutTargets;
@ -149,7 +150,7 @@ public final class FusionDictionary implements Iterable<Word> {
public void addChild(CharGroup n) { public void addChild(CharGroup n) {
if (null == mChildren) { if (null == mChildren) {
mChildren = new Node(); mChildren = new PtNodeArray();
} }
mChildren.mData.add(n); mChildren.mData.add(n);
} }
@ -344,10 +345,10 @@ public final class FusionDictionary implements Iterable<Word> {
} }
public final DictionaryOptions mOptions; public final DictionaryOptions mOptions;
public final Node mRoot; public final PtNodeArray mRootNodeArray;
public FusionDictionary(final Node root, final DictionaryOptions options) { public FusionDictionary(final PtNodeArray rootNodeArray, final DictionaryOptions options) {
mRoot = root; mRootNodeArray = rootNodeArray;
mOptions = options; mOptions = options;
} }
@ -406,13 +407,13 @@ public final class FusionDictionary implements Iterable<Word> {
} }
/** /**
* Sanity check for a node. * Sanity check for a node array.
* *
* This method checks that all CharGroups in a node are ordered as expected. * This method checks that all CharGroups in a node array are ordered as expected.
* If they are, nothing happens. If they aren't, an exception is thrown. * If they are, nothing happens. If they aren't, an exception is thrown.
*/ */
private void checkStack(Node node) { private void checkStack(PtNodeArray nodeArray) {
ArrayList<CharGroup> stack = node.mData; ArrayList<CharGroup> stack = nodeArray.mData;
int lastValue = -1; int lastValue = -1;
for (int i = 0; i < stack.size(); ++i) { for (int i = 0; i < stack.size(); ++i) {
int currentValue = stack.get(i).mChars[0]; int currentValue = stack.get(i).mChars[0];
@ -431,16 +432,16 @@ public final class FusionDictionary implements Iterable<Word> {
* @param frequency the bigram frequency * @param frequency the bigram frequency
*/ */
public void setBigram(final String word1, final String word2, final int frequency) { public void setBigram(final String word1, final String word2, final int frequency) {
CharGroup charGroup = findWordInTree(mRoot, word1); CharGroup charGroup = findWordInTree(mRootNodeArray, word1);
if (charGroup != null) { if (charGroup != null) {
final CharGroup charGroup2 = findWordInTree(mRoot, word2); final CharGroup charGroup2 = findWordInTree(mRootNodeArray, word2);
if (charGroup2 == null) { if (charGroup2 == null) {
add(getCodePoints(word2), 0, null, false /* isNotAWord */, add(getCodePoints(word2), 0, null, false /* isNotAWord */,
false /* isBlacklistEntry */); false /* isBlacklistEntry */);
// The chargroup for the first word may have moved by the above insertion, // The chargroup for the first word may have moved by the above insertion,
// if word1 and word2 share a common stem that happens not to have been // if word1 and word2 share a common stem that happens not to have been
// a cutting point until now. In this case, we need to refresh charGroup. // a cutting point until now. In this case, we need to refresh charGroup.
charGroup = findWordInTree(mRoot, word1); charGroup = findWordInTree(mRootNodeArray, word1);
} }
charGroup.addBigram(word2, frequency); charGroup.addBigram(word2, frequency);
} else { } else {
@ -469,38 +470,38 @@ public final class FusionDictionary implements Iterable<Word> {
return; return;
} }
Node currentNode = mRoot; PtNodeArray currentNodeArray = mRootNodeArray;
int charIndex = 0; int charIndex = 0;
CharGroup currentGroup = null; CharGroup currentGroup = null;
int differentCharIndex = 0; // Set by the loop to the index of the char that differs int differentCharIndex = 0; // Set by the loop to the index of the char that differs
int nodeIndex = findIndexOfChar(mRoot, word[charIndex]); int nodeIndex = findIndexOfChar(mRootNodeArray, word[charIndex]);
while (CHARACTER_NOT_FOUND_INDEX != nodeIndex) { while (CHARACTER_NOT_FOUND_INDEX != nodeIndex) {
currentGroup = currentNode.mData.get(nodeIndex); currentGroup = currentNodeArray.mData.get(nodeIndex);
differentCharIndex = compareArrays(currentGroup.mChars, word, charIndex); differentCharIndex = compareCharArrays(currentGroup.mChars, word, charIndex);
if (ARRAYS_ARE_EQUAL != differentCharIndex if (ARRAYS_ARE_EQUAL != differentCharIndex
&& differentCharIndex < currentGroup.mChars.length) break; && differentCharIndex < currentGroup.mChars.length) break;
if (null == currentGroup.mChildren) break; if (null == currentGroup.mChildren) break;
charIndex += currentGroup.mChars.length; charIndex += currentGroup.mChars.length;
if (charIndex >= word.length) break; if (charIndex >= word.length) break;
currentNode = currentGroup.mChildren; currentNodeArray = currentGroup.mChildren;
nodeIndex = findIndexOfChar(currentNode, word[charIndex]); nodeIndex = findIndexOfChar(currentNodeArray, word[charIndex]);
} }
if (CHARACTER_NOT_FOUND_INDEX == nodeIndex) { if (CHARACTER_NOT_FOUND_INDEX == nodeIndex) {
// No node at this point to accept the word. Create one. // No node at this point to accept the word. Create one.
final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]); final int insertionIndex = findInsertionIndex(currentNodeArray, word[charIndex]);
final CharGroup newGroup = new CharGroup( final CharGroup newGroup = new CharGroup(
Arrays.copyOfRange(word, charIndex, word.length), Arrays.copyOfRange(word, charIndex, word.length),
shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry); shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry);
currentNode.mData.add(insertionIndex, newGroup); currentNodeArray.mData.add(insertionIndex, newGroup);
if (DBG) checkStack(currentNode); if (DBG) checkStack(currentNodeArray);
} else { } else {
// There is a word with a common prefix. // There is a word with a common prefix.
if (differentCharIndex == currentGroup.mChars.length) { if (differentCharIndex == currentGroup.mChars.length) {
if (charIndex + differentCharIndex >= word.length) { if (charIndex + differentCharIndex >= word.length) {
// The new word is a prefix of an existing word, but the node on which it // The new word is a prefix of an existing word, but the node on which it
// should end already exists as is. Since the old CharNode was not a terminal, // should end already exists as is. Since the old CharGroup was not a terminal,
// make it one by filling in its frequency and other attributes // make it one by filling in its frequency and other attributes
currentGroup.update(frequency, shortcutTargets, null, isNotAWord, currentGroup.update(frequency, shortcutTargets, null, isNotAWord,
isBlacklistEntry); isBlacklistEntry);
@ -511,7 +512,7 @@ public final class FusionDictionary implements Iterable<Word> {
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
shortcutTargets, null /* bigrams */, frequency, isNotAWord, shortcutTargets, null /* bigrams */, frequency, isNotAWord,
isBlacklistEntry); isBlacklistEntry);
currentGroup.mChildren = new Node(); currentGroup.mChildren = new PtNodeArray();
currentGroup.mChildren.mData.add(newNode); currentGroup.mChildren.mData.add(newNode);
} }
} else { } else {
@ -524,7 +525,7 @@ public final class FusionDictionary implements Iterable<Word> {
} else { } else {
// Partial prefix match only. We have to replace the current node with a node // Partial prefix match only. We have to replace the current node with a node
// containing the current prefix and create two new ones for the tails. // containing the current prefix and create two new ones for the tails.
Node newChildren = new Node(); PtNodeArray newChildren = new PtNodeArray();
final CharGroup newOldWord = new CharGroup( final CharGroup newOldWord = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, differentCharIndex, Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
currentGroup.mChars.length), currentGroup.mShortcutTargets, currentGroup.mChars.length), currentGroup.mShortcutTargets,
@ -552,9 +553,9 @@ public final class FusionDictionary implements Iterable<Word> {
> currentGroup.mChars[differentCharIndex] ? 1 : 0; > currentGroup.mChars[differentCharIndex] ? 1 : 0;
newChildren.mData.add(addIndex, newWord); newChildren.mData.add(addIndex, newWord);
} }
currentNode.mData.set(nodeIndex, newParent); currentNodeArray.mData.set(nodeIndex, newParent);
} }
if (DBG) checkStack(currentNode); if (DBG) checkStack(currentNodeArray);
} }
} }
} }
@ -576,7 +577,7 @@ public final class FusionDictionary implements Iterable<Word> {
* @param dstOffset the offset in the right-hand side string. * @param dstOffset the offset in the right-hand side string.
* @return the index at which the strings differ, or ARRAYS_ARE_EQUAL = 0 if they don't. * @return the index at which the strings differ, or ARRAYS_ARE_EQUAL = 0 if they don't.
*/ */
private static int compareArrays(final int[] src, final int[] dst, int dstOffset) { private static int compareCharArrays(final int[] src, final int[] dst, int dstOffset) {
// We do NOT test the first char, because we come from a method that already // We do NOT test the first char, because we come from a method that already
// tested it. // tested it.
for (int i = 1; i < src.length; ++i) { for (int i = 1; i < src.length; ++i) {
@ -603,10 +604,10 @@ public final class FusionDictionary implements Iterable<Word> {
final static private CharGroupComparator CHARGROUP_COMPARATOR = new CharGroupComparator(); final static private CharGroupComparator CHARGROUP_COMPARATOR = new CharGroupComparator();
/** /**
* Finds the insertion index of a character within a node. * Finds the insertion index of a character within a node array.
*/ */
private static int findInsertionIndex(final Node node, int character) { private static int findInsertionIndex(final PtNodeArray nodeArray, int character) {
final ArrayList<CharGroup> data = node.mData; final ArrayList<CharGroup> data = nodeArray.mData;
final CharGroup reference = new CharGroup(new int[] { character }, final CharGroup reference = new CharGroup(new int[] { character },
null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */, null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */,
false /* isBlacklistEntry */); false /* isBlacklistEntry */);
@ -615,16 +616,16 @@ public final class FusionDictionary implements Iterable<Word> {
} }
/** /**
* Find the index of a char in a node, if it exists. * Find the index of a char in a node array, if it exists.
* *
* @param node the node to search in. * @param nodeArray the node array to search in.
* @param character the character to search for. * @param character the character to search for.
* @return the position of the character if it's there, or CHARACTER_NOT_FOUND_INDEX = -1 else. * @return the position of the character if it's there, or CHARACTER_NOT_FOUND_INDEX = -1 else.
*/ */
private static int findIndexOfChar(final Node node, int character) { private static int findIndexOfChar(final PtNodeArray nodeArray, int character) {
final int insertionIndex = findInsertionIndex(node, character); final int insertionIndex = findInsertionIndex(nodeArray, character);
if (node.mData.size() <= insertionIndex) return CHARACTER_NOT_FOUND_INDEX; if (nodeArray.mData.size() <= insertionIndex) return CHARACTER_NOT_FOUND_INDEX;
return character == node.mData.get(insertionIndex).mChars[0] ? insertionIndex return character == nodeArray.mData.get(insertionIndex).mChars[0] ? insertionIndex
: CHARACTER_NOT_FOUND_INDEX; : CHARACTER_NOT_FOUND_INDEX;
} }
@ -632,16 +633,16 @@ public final class FusionDictionary implements Iterable<Word> {
* Helper method to find a word in a given branch. * Helper method to find a word in a given branch.
*/ */
@SuppressWarnings("unused") @SuppressWarnings("unused")
public static CharGroup findWordInTree(Node node, final String string) { public static CharGroup findWordInTree(PtNodeArray nodeArray, final String string) {
int index = 0; int index = 0;
final StringBuilder checker = DBG ? new StringBuilder() : null; final StringBuilder checker = DBG ? new StringBuilder() : null;
final int[] codePoints = getCodePoints(string); final int[] codePoints = getCodePoints(string);
CharGroup currentGroup; CharGroup currentGroup;
do { do {
int indexOfGroup = findIndexOfChar(node, codePoints[index]); int indexOfGroup = findIndexOfChar(nodeArray, codePoints[index]);
if (CHARACTER_NOT_FOUND_INDEX == indexOfGroup) return null; if (CHARACTER_NOT_FOUND_INDEX == indexOfGroup) return null;
currentGroup = node.mData.get(indexOfGroup); currentGroup = nodeArray.mData.get(indexOfGroup);
if (codePoints.length - index < currentGroup.mChars.length) return null; if (codePoints.length - index < currentGroup.mChars.length) return null;
int newIndex = index; int newIndex = index;
@ -653,9 +654,9 @@ public final class FusionDictionary implements Iterable<Word> {
if (DBG) checker.append(new String(currentGroup.mChars, 0, currentGroup.mChars.length)); if (DBG) checker.append(new String(currentGroup.mChars, 0, currentGroup.mChars.length));
if (index < codePoints.length) { if (index < codePoints.length) {
node = currentGroup.mChildren; nodeArray = currentGroup.mChildren;
} }
} while (null != node && index < codePoints.length); } while (null != nodeArray && index < codePoints.length);
if (index < codePoints.length) return null; if (index < codePoints.length) return null;
if (!currentGroup.isTerminal()) return null; if (!currentGroup.isTerminal()) return null;
@ -670,20 +671,20 @@ public final class FusionDictionary implements Iterable<Word> {
if (null == s || "".equals(s)) { if (null == s || "".equals(s)) {
throw new RuntimeException("Can't search for a null or empty string"); throw new RuntimeException("Can't search for a null or empty string");
} }
return null != findWordInTree(mRoot, s); return null != findWordInTree(mRootNodeArray, s);
} }
/** /**
* Recursively count the number of character groups in a given branch of the trie. * Recursively count the number of character groups in a given branch of the trie.
* *
* @param node the parent node. * @param nodeArray the parent node.
* @return the number of char groups in all the branch under this node. * @return the number of char groups in all the branch under this node.
*/ */
public static int countCharGroups(final Node node) { public static int countCharGroups(final PtNodeArray nodeArray) {
final int nodeSize = node.mData.size(); final int nodeSize = nodeArray.mData.size();
int size = nodeSize; int size = nodeSize;
for (int i = nodeSize - 1; i >= 0; --i) { for (int i = nodeSize - 1; i >= 0; --i) {
CharGroup group = node.mData.get(i); CharGroup group = nodeArray.mData.get(i);
if (null != group.mChildren) if (null != group.mChildren)
size += countCharGroups(group.mChildren); size += countCharGroups(group.mChildren);
} }
@ -693,15 +694,15 @@ public final class FusionDictionary implements Iterable<Word> {
/** /**
* Recursively count the number of nodes in a given branch of the trie. * Recursively count the number of nodes in a given branch of the trie.
* *
* @param node the node to count. * @param nodeArray the node array to count.
* @return the number of nodes in this branch. * @return the number of nodes in this branch.
*/ */
public static int countNodes(final Node node) { public static int countNodeArrays(final PtNodeArray nodeArray) {
int size = 1; int size = 1;
for (int i = node.mData.size() - 1; i >= 0; --i) { for (int i = nodeArray.mData.size() - 1; i >= 0; --i) {
CharGroup group = node.mData.get(i); CharGroup group = nodeArray.mData.get(i);
if (null != group.mChildren) if (null != group.mChildren)
size += countNodes(group.mChildren); size += countNodeArrays(group.mChildren);
} }
return size; return size;
} }
@ -709,10 +710,10 @@ public final class FusionDictionary implements Iterable<Word> {
// Recursively find out whether there are any bigrams. // Recursively find out whether there are any bigrams.
// This can be pretty expensive especially if there aren't any (we return as soon // This can be pretty expensive especially if there aren't any (we return as soon
// as we find one, so it's much cheaper if there are bigrams) // as we find one, so it's much cheaper if there are bigrams)
private static boolean hasBigramsInternal(final Node node) { private static boolean hasBigramsInternal(final PtNodeArray nodeArray) {
if (null == node) return false; if (null == nodeArray) return false;
for (int i = node.mData.size() - 1; i >= 0; --i) { for (int i = nodeArray.mData.size() - 1; i >= 0; --i) {
CharGroup group = node.mData.get(i); CharGroup group = nodeArray.mData.get(i);
if (null != group.mBigrams) return true; if (null != group.mBigrams) return true;
if (hasBigramsInternal(group.mChildren)) return true; if (hasBigramsInternal(group.mChildren)) return true;
} }
@ -729,7 +730,7 @@ public final class FusionDictionary implements Iterable<Word> {
// find a more efficient way of doing this, without compromising too much on memory // find a more efficient way of doing this, without compromising too much on memory
// and ease of use. // and ease of use.
public boolean hasBigrams() { public boolean hasBigrams() {
return hasBigramsInternal(mRoot); return hasBigramsInternal(mRootNodeArray);
} }
// Historically, the tails of the words were going to be merged to save space. // Historically, the tails of the words were going to be merged to save space.
@ -750,13 +751,13 @@ public final class FusionDictionary implements Iterable<Word> {
// MakedictLog.i("Merging nodes. Number of nodes : " + countNodes(root)); // MakedictLog.i("Merging nodes. Number of nodes : " + countNodes(root));
// MakedictLog.i("Number of groups : " + countCharGroups(root)); // MakedictLog.i("Number of groups : " + countCharGroups(root));
// //
// final HashMap<String, ArrayList<Node>> repository = // final HashMap<String, ArrayList<PtNodeArray>> repository =
// new HashMap<String, ArrayList<Node>>(); // new HashMap<String, ArrayList<PtNodeArray>>();
// mergeTailsInner(repository, root); // mergeTailsInner(repository, root);
// //
// MakedictLog.i("Number of different pseudohashes : " + repository.size()); // MakedictLog.i("Number of different pseudohashes : " + repository.size());
// int size = 0; // int size = 0;
// for (ArrayList<Node> a : repository.values()) { // for (ArrayList<PtNodeArray> a : repository.values()) {
// size += a.size(); // size += a.size();
// } // }
// MakedictLog.i("Number of nodes after merge : " + (1 + size)); // MakedictLog.i("Number of nodes after merge : " + (1 + size));
@ -764,7 +765,7 @@ public final class FusionDictionary implements Iterable<Word> {
} }
// The following methods are used by the deactivated mergeTails() // The following methods are used by the deactivated mergeTails()
// private static boolean isEqual(Node a, Node b) { // private static boolean isEqual(PtNodeArray a, PtNodeArray b) {
// if (null == a && null == b) return true; // if (null == a && null == b) return true;
// if (null == a || null == b) return false; // if (null == a || null == b) return false;
// if (a.data.size() != b.data.size()) return false; // if (a.data.size() != b.data.size()) return false;
@ -781,21 +782,21 @@ public final class FusionDictionary implements Iterable<Word> {
// return true; // return true;
// } // }
// static private HashMap<String, ArrayList<Node>> mergeTailsInner( // static private HashMap<String, ArrayList<PtNodeArray>> mergeTailsInner(
// final HashMap<String, ArrayList<Node>> map, final Node node) { // final HashMap<String, ArrayList<PtNodeArray>> map, final PtNodeArray nodeArray) {
// final ArrayList<CharGroup> branches = node.data; // final ArrayList<CharGroup> branches = nodeArray.data;
// final int nodeSize = branches.size(); // final int nodeSize = branches.size();
// for (int i = 0; i < nodeSize; ++i) { // for (int i = 0; i < nodeSize; ++i) {
// CharGroup group = branches.get(i); // CharGroup group = branches.get(i);
// if (null != group.children) { // if (null != group.children) {
// String pseudoHash = getPseudoHash(group.children); // String pseudoHash = getPseudoHash(group.children);
// ArrayList<Node> similarList = map.get(pseudoHash); // ArrayList<PtNodeArray> similarList = map.get(pseudoHash);
// if (null == similarList) { // if (null == similarList) {
// similarList = new ArrayList<Node>(); // similarList = new ArrayList<PtNodeArray>();
// map.put(pseudoHash, similarList); // map.put(pseudoHash, similarList);
// } // }
// boolean merged = false; // boolean merged = false;
// for (Node similar : similarList) { // for (PtNodeArray similar : similarList) {
// if (isEqual(group.children, similar)) { // if (isEqual(group.children, similar)) {
// group.children = similar; // group.children = similar;
// merged = true; // merged = true;
@ -811,9 +812,9 @@ public final class FusionDictionary implements Iterable<Word> {
// return map; // return map;
// } // }
// private static String getPseudoHash(final Node node) { // private static String getPseudoHash(final PtNodeArray nodeArray) {
// StringBuilder s = new StringBuilder(); // StringBuilder s = new StringBuilder();
// for (CharGroup g : node.data) { // for (CharGroup g : nodeArray.data) {
// s.append(g.frequency); // s.append(g.frequency);
// for (int ch : g.chars) { // for (int ch : g.chars) {
// s.append(Character.toChars(ch)); // s.append(Character.toChars(ch));
@ -901,6 +902,6 @@ public final class FusionDictionary implements Iterable<Word> {
*/ */
@Override @Override
public Iterator<Word> iterator() { public Iterator<Word> iterator() {
return new DictionaryIterator(mRoot.mData); return new DictionaryIterator(mRootNodeArray.mData);
} }
} }

View file

@ -25,7 +25,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
import com.android.inputmethod.latin.makedict.BinaryDictReader; import com.android.inputmethod.latin.makedict.BinaryDictReader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.PendingAttribute; import com.android.inputmethod.latin.makedict.PendingAttribute;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList; import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList;
@ -78,7 +78,7 @@ public final class UserHistoryDictIOUtils {
@UsedForTesting @UsedForTesting
static FusionDictionary constructFusionDictionary( static FusionDictionary constructFusionDictionary(
final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) { final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) {
final FusionDictionary fusionDict = new FusionDictionary(new Node(), final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false, new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
false)); false));
int profTotal = 0; int profTotal = 0;
@ -102,7 +102,7 @@ public final class UserHistoryDictIOUtils {
if (word1 == null) { // unigram if (word1 == null) { // unigram
fusionDict.add(word2, freq, null, false /* isNotAWord */); fusionDict.add(word2, freq, null, false /* isNotAWord */);
} else { // bigram } else { // bigram
if (FusionDictionary.findWordInTree(fusionDict.mRoot, word1) == null) { if (FusionDictionary.findWordInTree(fusionDict.mRootNodeArray, word1) == null) {
fusionDict.add(word1, 2, null, false /* isNotAWord */); fusionDict.add(word1, 2, null, false /* isNotAWord */);
} }
fusionDict.setBigram(word1, word2, freq); fusionDict.setBigram(word1, word2, freq);

View file

@ -20,7 +20,7 @@ import android.test.AndroidTestCase;
import android.test.suitebuilder.annotation.SmallTest; import android.test.suitebuilder.annotation.SmallTest;
import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import java.util.HashMap; import java.util.HashMap;
@ -30,21 +30,21 @@ import java.util.HashMap;
@SmallTest @SmallTest
public class FusionDictionaryTests extends AndroidTestCase { public class FusionDictionaryTests extends AndroidTestCase {
public void testFindWordInTree() { public void testFindWordInTree() {
FusionDictionary dict = new FusionDictionary(new Node(), FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false)); new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
dict.add("abc", 10, null, false /* isNotAWord */); dict.add("abc", 10, null, false /* isNotAWord */);
assertNull(FusionDictionary.findWordInTree(dict.mRoot, "aaa")); assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "abc")); assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "abc"));
dict.add("aa", 10, null, false /* isNotAWord */); dict.add("aa", 10, null, false /* isNotAWord */);
assertNull(FusionDictionary.findWordInTree(dict.mRoot, "aaa")); assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "aa")); assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aa"));
dict.add("babcd", 10, null, false /* isNotAWord */); dict.add("babcd", 10, null, false /* isNotAWord */);
dict.add("bacde", 10, null, false /* isNotAWord */); dict.add("bacde", 10, null, false /* isNotAWord */);
assertNull(FusionDictionary.findWordInTree(dict.mRoot, "ba")); assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "ba"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "babcd")); assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "babcd"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "bacde")); assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "bacde"));
} }
} }

View file

@ -25,7 +25,7 @@ import android.util.SparseArray;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.CollectionUtils;
@ -226,7 +226,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
// check unigram // check unigram
for (final String word : words) { for (final String word : words) {
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, word); final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
assertNotNull(cg); assertNotNull(cg);
} }
@ -234,7 +234,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
for (int i = 0; i < bigrams.size(); ++i) { for (int i = 0; i < bigrams.size(); ++i) {
final int w1 = bigrams.keyAt(i); final int w1 = bigrams.keyAt(i);
for (final int w2 : bigrams.valueAt(i)) { for (final int w2 : bigrams.valueAt(i)) {
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, words.get(w1)); final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray,
words.get(w1));
assertNotNull(words.get(w1) + "," + words.get(w2), cg.getBigram(words.get(w2))); assertNotNull(words.get(w1) + "," + words.get(w2), cg.getBigram(words.get(w2)));
} }
} }
@ -242,7 +243,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
// check shortcut // check shortcut
if (shortcutMap != null) { if (shortcutMap != null) {
for (final Map.Entry<String, List<String>> entry : shortcutMap.entrySet()) { for (final Map.Entry<String, List<String>> entry : shortcutMap.entrySet()) {
final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, entry.getKey()); final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray,
entry.getKey());
for (final String word : entry.getValue()) { for (final String word : entry.getValue()) {
assertNotNull("shortcut not found: " + entry.getKey() + ", " + word, assertNotNull("shortcut not found: " + entry.getKey() + ", " + word,
group.getShortcut(word)); group.getShortcut(word));
@ -297,7 +299,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
} }
assertNotNull(file); assertNotNull(file);
final FusionDictionary dict = new FusionDictionary(new Node(), final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false)); new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
addUnigrams(words.size(), dict, words, shortcuts); addUnigrams(words.size(), dict, words, shortcuts);
addBigrams(dict, words, bigrams); addBigrams(dict, words, bigrams);
@ -440,7 +442,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
assertNotNull(file); assertNotNull(file);
// making the dictionary from lists of words. // making the dictionary from lists of words.
final FusionDictionary dict = new FusionDictionary(new Node(), final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions( new FusionDictionary.DictionaryOptions(
new HashMap<String, String>(), false, false)); new HashMap<String, String>(), false, false));
addUnigrams(words.size(), dict, words, null /* shortcutMap */); addUnigrams(words.size(), dict, words, null /* shortcutMap */);
@ -538,7 +540,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
} }
assertNotNull(file); assertNotNull(file);
final FusionDictionary dict = new FusionDictionary(new Node(), final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions( new FusionDictionary.DictionaryOptions(
new HashMap<String, String>(), false, false)); new HashMap<String, String>(), false, false));
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
@ -599,7 +601,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
} }
assertNotNull(file); assertNotNull(file);
final FusionDictionary dict = new FusionDictionary(new Node(), final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions( new FusionDictionary.DictionaryOptions(
new HashMap<String, String>(), false, false)); new HashMap<String, String>(), false, false));
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);

View file

@ -24,7 +24,7 @@ import android.util.Log;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.ByteBufferWrapper; import com.android.inputmethod.latin.makedict.BinaryDictDecoder.ByteBufferWrapper;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.CollectionUtils;
@ -277,7 +277,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
} }
// set an initial dictionary. // set an initial dictionary.
final FusionDictionary dict = new FusionDictionary(new Node(), final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false)); new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
dict.add("abcd", 10, null, false); dict.add("abcd", 10, null, false);
@ -328,7 +328,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
} }
// set an initial dictionary. // set an initial dictionary.
final FusionDictionary dict = new FusionDictionary(new Node(), final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false)); new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
dict.add("abcd", 10, null, false); dict.add("abcd", 10, null, false);
dict.add("efgh", 15, null, false); dict.add("efgh", 15, null, false);
@ -365,7 +365,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
assertNotNull(file); assertNotNull(file);
// set an initial dictionary. // set an initial dictionary.
final FusionDictionary dict = new FusionDictionary(new Node(), final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false, new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
false)); false));
dict.add("initial", 10, null, false); dict.add("initial", 10, null, false);

View file

@ -86,7 +86,7 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
private void checkWordInFusionDict(final FusionDictionary dict, final String word, private void checkWordInFusionDict(final FusionDictionary dict, final String word,
final ArrayList<String> expectedBigrams) { final ArrayList<String> expectedBigrams) {
final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, word); final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
assertNotNull(group); assertNotNull(group);
assertTrue(group.isTerminal()); assertTrue(group.isTerminal());

View file

@ -19,7 +19,7 @@ package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.Word; import com.android.inputmethod.latin.makedict.Word;
@ -117,7 +117,7 @@ public class CombinedInputOutput {
final boolean processLigatures = final boolean processLigatures =
FRENCH_LIGATURE_PROCESSING_OPTION.equals(attributes.get(OPTIONS_TAG)); FRENCH_LIGATURE_PROCESSING_OPTION.equals(attributes.get(OPTIONS_TAG));
attributes.remove(OPTIONS_TAG); attributes.remove(OPTIONS_TAG);
final FusionDictionary dict = new FusionDictionary(new Node(), new DictionaryOptions( final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new DictionaryOptions(
attributes, processUmlauts, processLigatures)); attributes, processUmlauts, processLigatures));
String line; String line;

View file

@ -121,7 +121,8 @@ public class Diff extends Dicttool.Command {
private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) { private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) {
boolean hasDifferences = false; boolean hasDifferences = false;
for (final Word word0 : dict0) { for (final Word word0 : dict0) {
final CharGroup word1 = FusionDictionary.findWordInTree(dict1.mRoot, word0.mWord); final CharGroup word1 = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
word0.mWord);
if (null == word1) { if (null == word1) {
// This word is not in dict1 // This word is not in dict1
System.out.println("Deleted: " + word0.mWord + " " + word0.mFrequency); System.out.println("Deleted: " + word0.mWord + " " + word0.mFrequency);
@ -150,7 +151,8 @@ public class Diff extends Dicttool.Command {
} }
} }
for (final Word word1 : dict1) { for (final Word word1 : dict1) {
final CharGroup word0 = FusionDictionary.findWordInTree(dict0.mRoot, word1.mWord); final CharGroup word0 = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
word1.mWord);
if (null == word0) { if (null == word0) {
// This word is not in dict0 // This word is not in dict0
System.out.println("Added: " + word1.mWord + " " + word1.mFrequency); System.out.println("Added: " + word1.mWord + " " + word1.mFrequency);

View file

@ -65,7 +65,7 @@ public class Info extends Dicttool.Command {
private static void showWordInfo(final FusionDictionary dict, final String word, private static void showWordInfo(final FusionDictionary dict, final String word,
final boolean plumbing) { final boolean plumbing) {
final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, word); final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
if (null == group) { if (null == group) {
System.out.println(word + " is not in the dictionary"); System.out.println(word + " is not in the dictionary");
return; return;

View file

@ -18,7 +18,7 @@ package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.Word; import com.android.inputmethod.latin.makedict.Word;
@ -124,8 +124,8 @@ public class XmlDictInputOutput {
GERMAN_UMLAUT_PROCESSING_OPTION.equals(optionsString); GERMAN_UMLAUT_PROCESSING_OPTION.equals(optionsString);
final boolean processLigatures = final boolean processLigatures =
FRENCH_LIGATURE_PROCESSING_OPTION.equals(optionsString); FRENCH_LIGATURE_PROCESSING_OPTION.equals(optionsString);
mDictionary = new FusionDictionary(new Node(), new DictionaryOptions(attributes, mDictionary = new FusionDictionary(new PtNodeArray(),
processUmlauts, processLigatures)); new DictionaryOptions(attributes, processUmlauts, processLigatures));
} else { } else {
mState = UNKNOWN; mState = UNKNOWN;
} }

View file

@ -22,7 +22,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictReader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
import junit.framework.TestCase; import junit.framework.TestCase;
@ -42,7 +42,7 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
public void testGetRawDictWorks() throws IOException, UnsupportedFormatException { public void testGetRawDictWorks() throws IOException, UnsupportedFormatException {
// Create a thrice-compressed dictionary file. // Create a thrice-compressed dictionary file.
final FusionDictionary dict = new FusionDictionary(new Node(), final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new DictionaryOptions(new HashMap<String, String>(), new DictionaryOptions(new HashMap<String, String>(),
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */)); false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
dict.add("foo", TEST_FREQ, null, false /* isNotAWord */); dict.add("foo", TEST_FREQ, null, false /* isNotAWord */);
@ -72,7 +72,8 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
final FusionDictionary resultDict = BinaryDictDecoder.readDictionaryBinary(reader, final FusionDictionary resultDict = BinaryDictDecoder.readDictionaryBinary(reader,
null /* dict : an optional dictionary to add words to, or null */); null /* dict : an optional dictionary to add words to, or null */);
assertEquals("Dictionary can't be read back correctly", assertEquals("Dictionary can't be read back correctly",
FusionDictionary.findWordInTree(resultDict.mRoot, "foo").getFrequency(), TEST_FREQ); FusionDictionary.findWordInTree(resultDict.mRootNodeArray, "foo").getFrequency(),
TEST_FREQ);
} }
public void testGetRawDictFails() throws IOException { public void testGetRawDictFails() throws IOException {

View file

@ -17,7 +17,7 @@
package com.android.inputmethod.latin.makedict; package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import junit.framework.TestCase; import junit.framework.TestCase;
@ -31,7 +31,7 @@ public class BinaryDictEncoderFlattenTreeTests extends TestCase {
// Test the flattened array contains the expected number of nodes, and // Test the flattened array contains the expected number of nodes, and
// that it does not contain any duplicates. // that it does not contain any duplicates.
public void testFlattenNodes() { public void testFlattenNodes() {
final FusionDictionary dict = new FusionDictionary(new Node(), final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new DictionaryOptions(new HashMap<String, String>(), new DictionaryOptions(new HashMap<String, String>(),
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */)); false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
dict.add("foo", 1, null, false /* isNotAWord */); dict.add("foo", 1, null, false /* isNotAWord */);
@ -39,10 +39,10 @@ public class BinaryDictEncoderFlattenTreeTests extends TestCase {
dict.add("ftb", 1, null, false /* isNotAWord */); dict.add("ftb", 1, null, false /* isNotAWord */);
dict.add("bar", 1, null, false /* isNotAWord */); dict.add("bar", 1, null, false /* isNotAWord */);
dict.add("fool", 1, null, false /* isNotAWord */); dict.add("fool", 1, null, false /* isNotAWord */);
final ArrayList<Node> result = BinaryDictEncoder.flattenTree(dict.mRoot); final ArrayList<PtNodeArray> result = BinaryDictEncoder.flattenTree(dict.mRootNodeArray);
assertEquals(4, result.size()); assertEquals(4, result.size());
while (!result.isEmpty()) { while (!result.isEmpty()) {
final Node n = result.remove(0); final PtNodeArray n = result.remove(0);
assertFalse("Flattened array contained the same node twice", result.contains(n)); assertFalse("Flattened array contained the same node twice", result.contains(n));
} }
} }

View file

@ -19,7 +19,7 @@ package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.Word; import com.android.inputmethod.latin.makedict.Word;
import junit.framework.TestCase; import junit.framework.TestCase;
@ -72,7 +72,7 @@ public class FusionDictionaryTest extends TestCase {
assertNotNull(dict); assertNotNull(dict);
for (final String word : words) { for (final String word : words) {
if (--limit < 0) return; if (--limit < 0) return;
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, word); final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
assertNotNull(cg); assertNotNull(cg);
} }
} }
@ -95,7 +95,7 @@ public class FusionDictionaryTest extends TestCase {
// Test the flattened array contains the expected number of nodes, and // Test the flattened array contains the expected number of nodes, and
// that it does not contain any duplicates. // that it does not contain any duplicates.
public void testFusion() { public void testFusion() {
final FusionDictionary dict = new FusionDictionary(new Node(), final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new DictionaryOptions(new HashMap<String, String>(), new DictionaryOptions(new HashMap<String, String>(),
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */)); false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
final long time = System.currentTimeMillis(); final long time = System.currentTimeMillis();