From af30cbf0ee8370763edf22822ea34a282e882084 Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Fri, 16 Aug 2013 14:51:37 +0900 Subject: [PATCH] Rename Node to PtNodeArray Bug: 10247660 Change-Id: I1a0ac19f58f96adb5efac5fd35c6404831618c99 --- .../inputmethod/latin/DictionaryWriter.java | 4 +- .../latin/makedict/BinaryDictDecoder.java | 58 ++- .../latin/makedict/BinaryDictEncoder.java | 339 +++++++++--------- .../latin/makedict/BinaryDictIOUtils.java | 41 +-- .../makedict/DynamicBinaryDictIOUtils.java | 87 ++--- .../latin/makedict/FormatSpec.java | 6 +- .../latin/makedict/FusionDictionary.java | 153 ++++---- .../latin/utils/UserHistoryDictIOUtils.java | 6 +- .../latin/FusionDictionaryTests.java | 18 +- .../BinaryDictDecoderEncoderTests.java | 18 +- .../makedict/BinaryDictIOUtilsTests.java | 8 +- .../utils/UserHistoryDictIOUtilsTests.java | 2 +- .../latin/dicttool/CombinedInputOutput.java | 4 +- .../inputmethod/latin/dicttool/Diff.java | 6 +- .../inputmethod/latin/dicttool/Info.java | 2 +- .../latin/dicttool/XmlDictInputOutput.java | 6 +- .../BinaryDictOffdeviceUtilsTests.java | 7 +- .../BinaryDictEncoderFlattenTreeTests.java | 8 +- .../latin/makedict/FusionDictionaryTest.java | 6 +- 19 files changed, 400 insertions(+), 379 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/DictionaryWriter.java b/java/src/com/android/inputmethod/latin/DictionaryWriter.java index 3b300a3fd..1765ce5f8 100644 --- a/java/src/com/android/inputmethod/latin/DictionaryWriter.java +++ b/java/src/com/android/inputmethod/latin/DictionaryWriter.java @@ -23,7 +23,7 @@ import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.makedict.BinaryDictEncoder; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FusionDictionary; -import com.android.inputmethod.latin.makedict.FusionDictionary.Node; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import com.android.inputmethod.latin.utils.CollectionUtils; @@ -51,7 +51,7 @@ public class DictionaryWriter extends AbstractDictionaryWriter { @Override public void clear() { final HashMap attributes = CollectionUtils.newHashMap(); - mFusionDictionary = new FusionDictionary(new Node(), + mFusionDictionary = new FusionDictionary(new PtNodeArray(), new FusionDictionary.DictionaryOptions(attributes, false, false)); } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoder.java index 767f4fc72..01cc8ac1e 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoder.java @@ -20,7 +20,7 @@ import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; -import com.android.inputmethod.latin.makedict.FusionDictionary.Node; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.utils.JniUtils; @@ -548,31 +548,31 @@ public final class BinaryDictDecoder { } /** - * Reads a single node from a buffer. + * Reads a single node array from a buffer. * - * This methods reads the file at the current position. A node is fully expected to start at - * the current position. - * This will recursively read other nodes into the structure, populating the reverse + * This methods reads the file at the current position. A node array is fully expected to start + * at the current position. + * This will recursively read other node arrays into the structure, populating the reverse * maps on the fly and using them to keep track of already read nodes. * - * @param buffer the buffer, correctly positioned at the start of a node. + * @param buffer the buffer, correctly positioned at the start of a node array. * @param headerSize the size, in bytes, of the file header. - * @param reverseNodeMap a mapping from addresses to already read nodes. + * @param reverseNodeArrayMap a mapping from addresses to already read node arrays. * @param reverseGroupMap a mapping from addresses to already read character groups. * @param options file format options. - * @return the read node with all his children already read. + * @return the read node array with all his children already read. */ - private static Node readNode(final FusionDictionaryBufferInterface buffer, final int headerSize, - final Map reverseNodeMap, final Map reverseGroupMap, - final FormatOptions options) + private static PtNodeArray readNodeArray(final FusionDictionaryBufferInterface buffer, + final int headerSize, final Map reverseNodeArrayMap, + final Map reverseGroupMap, final FormatOptions options) throws IOException { - final ArrayList nodeContents = new ArrayList(); - final int nodeOrigin = buffer.position() - headerSize; + final ArrayList nodeArrayContents = new ArrayList(); + final int nodeArrayOrigin = buffer.position() - headerSize; do { // Scan the linked-list node. - final int nodeHeadPosition = buffer.position() - headerSize; + final int nodeArrayHeadPosition = buffer.position() - headerSize; final int count = readCharGroupCount(buffer); - int groupOffset = nodeHeadPosition + BinaryDictIOUtils.getGroupCountSize(count); + int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count); for (int i = count; i > 0; --i) { // Scan the array of CharGroup. CharGroupInfo info = readCharGroup(buffer, groupOffset, options); if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue; @@ -589,21 +589,21 @@ public final class BinaryDictDecoder { } } if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) { - Node children = reverseNodeMap.get(info.mChildrenAddress); + PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress); if (null == children) { final int currentPosition = buffer.position(); buffer.position(info.mChildrenAddress + headerSize); - children = readNode( - buffer, headerSize, reverseNodeMap, reverseGroupMap, options); + children = readNodeArray( + buffer, headerSize, reverseNodeArrayMap, reverseGroupMap, options); buffer.position(currentPosition); } - nodeContents.add( + nodeArrayContents.add( new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency, 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), 0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children)); } else { - nodeContents.add( + nodeArrayContents.add( new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency, 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), @@ -624,11 +624,11 @@ public final class BinaryDictDecoder { } while (options.mSupportsDynamicUpdate && buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS); - final Node node = new Node(nodeContents); - node.mCachedAddressBeforeUpdate = nodeOrigin; - node.mCachedAddressAfterUpdate = nodeOrigin; - reverseNodeMap.put(node.mCachedAddressAfterUpdate, node); - return node; + final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents); + nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin; + nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin; + reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray); + return nodeArray; } /** @@ -733,10 +733,10 @@ public final class BinaryDictDecoder { // Read header final FileHeader header = readHeader(reader.getBuffer()); - Map reverseNodeMapping = new TreeMap(); + Map reverseNodeArrayMapping = new TreeMap(); Map reverseGroupMapping = new TreeMap(); - final Node root = readNode(reader.getBuffer(), header.mHeaderSize, reverseNodeMapping, - reverseGroupMapping, header.mFormatOptions); + final PtNodeArray root = readNodeArray(reader.getBuffer(), header.mHeaderSize, + reverseNodeArrayMapping, reverseGroupMapping, header.mFormatOptions); FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions); if (null != dict) { @@ -803,8 +803,6 @@ public final class BinaryDictDecoder { /** * Calculate bigram frequency from compressed value * - * @see #makeBigramFlags - * * @param unigramFrequency * @param bigramFrequency compressed frequency * @return approximate bigram frequency diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoder.java index 85219e485..d9005b926 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoder.java @@ -20,7 +20,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.Node; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import java.io.ByteArrayOutputStream; @@ -78,12 +78,12 @@ public class BinaryDictEncoder { } /** - * Compute the binary size of the group count for a node - * @param node the node + * Compute the binary size of the group count for a node array. + * @param nodeArray the nodeArray * @return the size of the group count, either 1 or 2 bytes. */ - private static int getGroupCountSize(final Node node) { - return BinaryDictIOUtils.getGroupCountSize(node.mData.size()); + private static int getGroupCountSize(final PtNodeArray nodeArray) { + return BinaryDictIOUtils.getGroupCountSize(nodeArray.mData.size()); } /** @@ -138,15 +138,17 @@ public class BinaryDictEncoder { } /** - * Compute the maximum size of a node, assuming 3-byte addresses for everything, and caches - * it in the 'actualSize' member of the node. + * Compute the maximum size of each node of a node array, assuming 3-byte addresses for + * everything, and caches it in the `mCachedSize' member of the nodes; deduce the size of + * the containing node array, and cache it it its 'mCachedSize' member. * - * @param node the node to compute the maximum size of. + * @param nodeArray the node array to compute the maximum size of. * @param options file format options. */ - private static void calculateNodeMaximumSize(final Node node, final FormatOptions options) { - int size = getGroupCountSize(node); - for (CharGroup g : node.mData) { + private static void calculateNodeArrayMaximumSize(final PtNodeArray nodeArray, + final FormatOptions options) { + int size = getGroupCountSize(nodeArray); + for (CharGroup g : nodeArray.mData) { final int groupSize = getCharGroupMaximumSize(g, options); g.mCachedSize = groupSize; size += groupSize; @@ -154,7 +156,7 @@ public class BinaryDictEncoder { if (options.mSupportsDynamicUpdate) { size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE; } - node.mCachedSize = size; + nodeArray.mCachedSize = size; } /** @@ -199,14 +201,16 @@ public class BinaryDictEncoder { // This method is responsible for finding a nice ordering of the nodes that favors run-time // cache performance and dictionary size. - /* package for tests */ static ArrayList flattenTree(final Node root) { - final int treeSize = FusionDictionary.countCharGroups(root); + /* package for tests */ static ArrayList flattenTree( + final PtNodeArray rootNodeArray) { + final int treeSize = FusionDictionary.countCharGroups(rootNodeArray); MakedictLog.i("Counted nodes : " + treeSize); - final ArrayList flatTree = new ArrayList(treeSize); - return flattenTreeInner(flatTree, root); + final ArrayList flatTree = new ArrayList(treeSize); + return flattenTreeInner(flatTree, rootNodeArray); } - private static ArrayList flattenTreeInner(final ArrayList list, final Node node) { + private static ArrayList flattenTreeInner(final ArrayList list, + final PtNodeArray nodeArray) { // Removing the node is necessary if the tails are merged, because we would then // add the same node several times when we only want it once. A number of places in // the code also depends on any node being only once in the list. @@ -224,8 +228,8 @@ public class BinaryDictEncoder { // this simple list.remove operation O(n*n) overall. On Android this overhead is very // high. // For future reference, the code to remove duplicate is a simple : list.remove(node); - list.add(node); - final ArrayList branches = node.mData; + list.add(nodeArray); + final ArrayList branches = nodeArray.mData; final int nodeSize = branches.size(); for (CharGroup group : branches) { if (null != group.mChildren) flattenTreeInner(list, group.mChildren); @@ -234,52 +238,60 @@ public class BinaryDictEncoder { } /** - * Get the offset from a position inside a current node to a target node, during update. + * Get the offset from a position inside a current node array to a target node array, during + * update. * - * If the current node is before the target node, the target node has not been updated yet, - * so we should return the offset from the old position of the current node to the old position - * of the target node. If on the other hand the target is before the current node, it already - * has been updated, so we should return the offset from the new position in the current node - * to the new position in the target node. - * @param currentNode the node containing the CharGroup where the offset will be written - * @param offsetFromStartOfCurrentNode the offset, in bytes, from the start of currentNode - * @param targetNode the target node to get the offset to - * @return the offset to the target node + * If the current node array is before the target node array, the target node array has not + * been updated yet, so we should return the offset from the old position of the current node + * array to the old position of the target node array. If on the other hand the target is + * before the current node array, it already has been updated, so we should return the offset + * from the new position in the current node array to the new position in the target node + * array. + * + * @param currentNodeArray node array containing the CharGroup where the offset will be written + * @param offsetFromStartOfCurrentNodeArray offset, in bytes, from the start of currentNodeArray + * @param targetNodeArray the target node array to get the offset to + * @return the offset to the target node array */ - private static int getOffsetToTargetNodeDuringUpdate(final Node currentNode, - final int offsetFromStartOfCurrentNode, final Node targetNode) { - final boolean isTargetBeforeCurrent = (targetNode.mCachedAddressBeforeUpdate - < currentNode.mCachedAddressBeforeUpdate); + private static int getOffsetToTargetNodeArrayDuringUpdate(final PtNodeArray currentNodeArray, + final int offsetFromStartOfCurrentNodeArray, final PtNodeArray targetNodeArray) { + final boolean isTargetBeforeCurrent = (targetNodeArray.mCachedAddressBeforeUpdate + < currentNodeArray.mCachedAddressBeforeUpdate); if (isTargetBeforeCurrent) { - return targetNode.mCachedAddressAfterUpdate - - (currentNode.mCachedAddressAfterUpdate + offsetFromStartOfCurrentNode); + return targetNodeArray.mCachedAddressAfterUpdate + - (currentNodeArray.mCachedAddressAfterUpdate + + offsetFromStartOfCurrentNodeArray); } else { - return targetNode.mCachedAddressBeforeUpdate - - (currentNode.mCachedAddressBeforeUpdate + offsetFromStartOfCurrentNode); + return targetNodeArray.mCachedAddressBeforeUpdate + - (currentNodeArray.mCachedAddressBeforeUpdate + + offsetFromStartOfCurrentNodeArray); } } /** - * Get the offset from a position inside a current node to a target CharGroup, during update. - * @param currentNode the node containing the CharGroup where the offset will be written - * @param offsetFromStartOfCurrentNode the offset, in bytes, from the start of currentNode + * Get the offset from a position inside a current node array to a target CharGroup, during + * update. + * + * @param currentNodeArray node array containing the CharGroup where the offset will be written + * @param offsetFromStartOfCurrentNodeArray offset, in bytes, from the start of currentNodeArray * @param targetCharGroup the target CharGroup to get the offset to * @return the offset to the target CharGroup */ // TODO: is there any way to factorize this method with the one above? - private static int getOffsetToTargetCharGroupDuringUpdate(final Node currentNode, - final int offsetFromStartOfCurrentNode, final CharGroup targetCharGroup) { - final int oldOffsetBasePoint = currentNode.mCachedAddressBeforeUpdate - + offsetFromStartOfCurrentNode; + private static int getOffsetToTargetCharGroupDuringUpdate(final PtNodeArray currentNodeArray, + final int offsetFromStartOfCurrentNodeArray, final CharGroup targetCharGroup) { + final int oldOffsetBasePoint = currentNodeArray.mCachedAddressBeforeUpdate + + offsetFromStartOfCurrentNodeArray; final boolean isTargetBeforeCurrent = (targetCharGroup.mCachedAddressBeforeUpdate < oldOffsetBasePoint); - // If the target is before the current node, then its address has already been updated. - // We can use the AfterUpdate member, and compare it to our own member after update. - // Otherwise, the AfterUpdate member is not updated yet, so we need to use the BeforeUpdate - // member, and of course we have to compare this to our own address before update. + // If the target is before the current node array, then its address has already been + // updated. We can use the AfterUpdate member, and compare it to our own member after + // update. Otherwise, the AfterUpdate member is not updated yet, so we need to use the + // BeforeUpdate member, and of course we have to compare this to our own address before + // update. if (isTargetBeforeCurrent) { - final int newOffsetBasePoint = currentNode.mCachedAddressAfterUpdate - + offsetFromStartOfCurrentNode; + final int newOffsetBasePoint = currentNodeArray.mCachedAddressAfterUpdate + + offsetFromStartOfCurrentNodeArray; return targetCharGroup.mCachedAddressAfterUpdate - newOffsetBasePoint; } else { return targetCharGroup.mCachedAddressBeforeUpdate - oldOffsetBasePoint; @@ -287,26 +299,26 @@ public class BinaryDictEncoder { } /** - * Computes the actual node size, based on the cached addresses of the children nodes. + * Computes the actual node array size, based on the cached addresses of the children nodes. * - * Each node stores its tentative address. During dictionary address computing, these - * are not final, but they can be used to compute the node size (the node size depends - * on the address of the children because the number of bytes necessary to store an - * address depends on its numeric value. The return value indicates whether the node + * Each node array stores its tentative address. During dictionary address computing, these + * are not final, but they can be used to compute the node array size (the node array size + * depends on the address of the children because the number of bytes necessary to store an + * address depends on its numeric value. The return value indicates whether the node array * contents (as in, any of the addresses stored in the cache fields) have changed with * respect to their previous value. * - * @param node the node to compute the size of. + * @param nodeArray the node array to compute the size of. * @param dict the dictionary in which the word/attributes are to be found. * @param formatOptions file format options. - * @return false if none of the cached addresses inside the node changed, true otherwise. + * @return false if none of the cached addresses inside the node array changed, true otherwise. */ - private static boolean computeActualNodeSize(final Node node, final FusionDictionary dict, - final FormatOptions formatOptions) { + private static boolean computeActualNodeArraySize(final PtNodeArray nodeArray, + final FusionDictionary dict, final FormatOptions formatOptions) { boolean changed = false; - int size = getGroupCountSize(node); - for (CharGroup group : node.mData) { - group.mCachedAddressAfterUpdate = node.mCachedAddressAfterUpdate + size; + int size = getGroupCountSize(nodeArray); + for (CharGroup group : nodeArray.mData) { + group.mCachedAddressAfterUpdate = nodeArray.mCachedAddressAfterUpdate + size; if (group.mCachedAddressAfterUpdate != group.mCachedAddressBeforeUpdate) { changed = true; } @@ -318,16 +330,16 @@ public class BinaryDictEncoder { if (formatOptions.mSupportsDynamicUpdate) { groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; } else { - groupSize += getByteSize(getOffsetToTargetNodeDuringUpdate(node, + groupSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(nodeArray, groupSize + size, group.mChildren)); } } groupSize += getShortcutListSize(group.mShortcutTargets); if (null != group.mBigrams) { for (WeightedString bigram : group.mBigrams) { - final int offset = getOffsetToTargetCharGroupDuringUpdate(node, + final int offset = getOffsetToTargetCharGroupDuringUpdate(nodeArray, groupSize + size + FormatSpec.GROUP_FLAGS_SIZE, - FusionDictionary.findWordInTree(dict.mRoot, bigram.mWord)); + FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord)); groupSize += getByteSize(offset) + FormatSpec.GROUP_FLAGS_SIZE; } } @@ -337,49 +349,49 @@ public class BinaryDictEncoder { if (formatOptions.mSupportsDynamicUpdate) { size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE; } - if (node.mCachedSize != size) { - node.mCachedSize = size; + if (nodeArray.mCachedSize != size) { + nodeArray.mCachedSize = size; changed = true; } return changed; } /** - * Initializes the cached addresses of nodes from their size. + * Initializes the cached addresses of node arrays and their containing nodes from their size. * - * @param flatNodes the array of nodes. + * @param flatNodes the list of node arrays. * @param formatOptions file format options. * @return the byte size of the entire stack. */ - private static int initializeNodesCachedAddresses(final ArrayList flatNodes, + private static int initializeNodeArraysCachedAddresses(final ArrayList flatNodes, final FormatOptions formatOptions) { - int nodeOffset = 0; - for (final Node n : flatNodes) { - n.mCachedAddressBeforeUpdate = nodeOffset; - int groupCountSize = getGroupCountSize(n); + int nodeArrayOffset = 0; + for (final PtNodeArray nodeArray : flatNodes) { + nodeArray.mCachedAddressBeforeUpdate = nodeArrayOffset; + int groupCountSize = getGroupCountSize(nodeArray); int groupOffset = 0; - for (final CharGroup g : n.mData) { + for (final CharGroup g : nodeArray.mData) { g.mCachedAddressBeforeUpdate = g.mCachedAddressAfterUpdate = - groupCountSize + nodeOffset + groupOffset; + groupCountSize + nodeArrayOffset + groupOffset; groupOffset += g.mCachedSize; } final int nodeSize = groupCountSize + groupOffset + (formatOptions.mSupportsDynamicUpdate ? FormatSpec.FORWARD_LINK_ADDRESS_SIZE : 0); - nodeOffset += n.mCachedSize; + nodeArrayOffset += nodeArray.mCachedSize; } - return nodeOffset; + return nodeArrayOffset; } /** - * Updates the cached addresses of nodes after recomputing their new positions. + * Updates the cached addresses of node arrays after recomputing their new positions. * - * @param flatNodes the array of nodes. + * @param flatNodes the list of node arrays. */ - private static void updateNodeCachedAddresses(final ArrayList flatNodes) { - for (final Node n : flatNodes) { - n.mCachedAddressBeforeUpdate = n.mCachedAddressAfterUpdate; - for (final CharGroup g : n.mData) { + private static void updateNodeArraysCachedAddresses(final ArrayList flatNodes) { + for (final PtNodeArray nodeArray : flatNodes) { + nodeArray.mCachedAddressBeforeUpdate = nodeArray.mCachedAddressAfterUpdate; + for (final CharGroup g : nodeArray.mData) { g.mCachedAddressBeforeUpdate = g.mCachedAddressAfterUpdate; } } @@ -391,11 +403,11 @@ public class BinaryDictEncoder { * The parent addresses are used by some binary formats at write-to-disk time. Not all formats * need them. In particular, version 2 does not need them, and version 3 does. * - * @param flatNodes the flat array of nodes to fill in + * @param flatNodes the flat array of node arrays to fill in */ - private static void computeParentAddresses(final ArrayList flatNodes) { - for (final Node node : flatNodes) { - for (final CharGroup group : node.mData) { + private static void computeParentAddresses(final ArrayList flatNodes) { + for (final PtNodeArray nodeArray : flatNodes) { + for (final CharGroup group : nodeArray.mData) { if (null != group.mChildren) { // Assign my address to children's parent address // Here BeforeUpdate and AfterUpdate addresses have the same value, so it @@ -408,25 +420,25 @@ public class BinaryDictEncoder { } /** - * Compute the addresses and sizes of an ordered node array. + * Compute the addresses and sizes of an ordered list of node arrays. * - * This method takes a node array and will update its cached address and size values - * so that they can be written into a file. It determines the smallest size each of the - * nodes can be given the addresses of its children and attributes, and store that into + * This method takes a list of node arrays and will update their cached address and size + * values so that they can be written into a file. It determines the smallest size each of the + * nodes arrays can be given the addresses of its children and attributes, and store that into * each node. * The order of the node is given by the order of the array. This method makes no effort * to find a good order; it only mechanically computes the size this order results in. * * @param dict the dictionary - * @param flatNodes the ordered array of nodes + * @param flatNodes the ordered list of nodes arrays * @param formatOptions file format options. * @return the same array it was passed. The nodes have been updated for address and size. */ - private static ArrayList computeAddresses(final FusionDictionary dict, - final ArrayList flatNodes, final FormatOptions formatOptions) { + private static ArrayList computeAddresses(final FusionDictionary dict, + final ArrayList flatNodes, final FormatOptions formatOptions) { // First get the worst possible sizes and offsets - for (final Node n : flatNodes) calculateNodeMaximumSize(n, formatOptions); - final int offset = initializeNodesCachedAddresses(flatNodes, formatOptions); + for (final PtNodeArray n : flatNodes) calculateNodeArrayMaximumSize(n, formatOptions); + final int offset = initializeNodeArraysCachedAddresses(flatNodes, formatOptions); MakedictLog.i("Compressing the array addresses. Original size : " + offset); MakedictLog.i("(Recursively seen size : " + offset + ")"); @@ -435,17 +447,19 @@ public class BinaryDictEncoder { boolean changesDone = false; do { changesDone = false; - int nodeStartOffset = 0; - for (final Node n : flatNodes) { - n.mCachedAddressAfterUpdate = nodeStartOffset; - final int oldNodeSize = n.mCachedSize; - final boolean changed = computeActualNodeSize(n, dict, formatOptions); - final int newNodeSize = n.mCachedSize; - if (oldNodeSize < newNodeSize) throw new RuntimeException("Increased size ?!"); - nodeStartOffset += newNodeSize; + int nodeArrayStartOffset = 0; + for (final PtNodeArray nodeArray : flatNodes) { + nodeArray.mCachedAddressAfterUpdate = nodeArrayStartOffset; + final int oldNodeArraySize = nodeArray.mCachedSize; + final boolean changed = computeActualNodeArraySize(nodeArray, dict, formatOptions); + final int newNodeArraySize = nodeArray.mCachedSize; + if (oldNodeArraySize < newNodeArraySize) { + throw new RuntimeException("Increased size ?!"); + } + nodeArrayStartOffset += newNodeArraySize; changesDone |= changed; } - updateNodeCachedAddresses(flatNodes); + updateNodeArraysCachedAddresses(flatNodes); ++passes; if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug"); } while (changesDone); @@ -453,10 +467,10 @@ public class BinaryDictEncoder { if (formatOptions.mSupportsDynamicUpdate) { computeParentAddresses(flatNodes); } - final Node lastNode = flatNodes.get(flatNodes.size() - 1); + final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1); MakedictLog.i("Compression complete in " + passes + " passes."); MakedictLog.i("After address compression : " - + (lastNode.mCachedAddressAfterUpdate + lastNode.mCachedSize)); + + (lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize)); return flatNodes; } @@ -464,25 +478,25 @@ public class BinaryDictEncoder { /** * Sanity-checking method. * - * This method checks an array of node for juxtaposition, that is, it will do - * nothing if each node's cached address is actually the previous node's address + * This method checks a list of node arrays for juxtaposition, that is, it will do + * nothing if each node array's cached address is actually the previous node array's address * plus the previous node's size. * If this is not the case, it will throw an exception. * - * @param array the array node to check + * @param arrays the list of node arrays to check */ - private static void checkFlatNodeArray(final ArrayList array) { + private static void checkFlatNodeArrayList(final ArrayList arrays) { int offset = 0; int index = 0; - for (final Node n : array) { + for (final PtNodeArray nodeArray : arrays) { // BeforeUpdate and AfterUpdate addresses are the same here, so it does not matter // which we use. - if (n.mCachedAddressAfterUpdate != offset) { + if (nodeArray.mCachedAddressAfterUpdate != offset) { throw new RuntimeException("Wrong address for node " + index - + " : expected " + offset + ", got " + n.mCachedAddressAfterUpdate); + + " : expected " + offset + ", got " + nodeArray.mCachedAddressAfterUpdate); } ++index; - offset += n.mCachedSize; + offset += nodeArray.mCachedSize; } } @@ -707,26 +721,23 @@ public class BinaryDictEncoder { } /** - * Write a node to memory. The node is expected to have its final position cached. + * Write a node array to memory. The node array is expected to have its final position cached. * - * This can be an empty map, but the more is inside the faster the lookups will be. It can - * be carried on as long as nodes do not move. - * - * @param dict the dictionary the node is a part of (for relative offsets). + * @param dict the dictionary the node array is a part of (for relative offsets). * @param buffer the memory buffer to write to. - * @param node the node to write. + * @param nodeArray the node array to write. * @param formatOptions file format options. * @return the address of the END of the node. */ @SuppressWarnings("unused") private static int writePlacedNode(final FusionDictionary dict, byte[] buffer, - final Node node, final FormatOptions formatOptions) { + final PtNodeArray nodeArray, final FormatOptions formatOptions) { // TODO: Make the code in common with BinaryDictIOUtils#writeCharGroup - int index = node.mCachedAddressAfterUpdate; + int index = nodeArray.mCachedAddressAfterUpdate; - final int groupCount = node.mData.size(); - final int countSize = getGroupCountSize(node); - final int parentAddress = node.mCachedParentAddress; + final int groupCount = nodeArray.mData.size(); + final int countSize = getGroupCountSize(nodeArray); + final int parentAddress = nodeArray.mCachedParentAddress; if (1 == countSize) { buffer[index++] = (byte)groupCount; } else if (2 == countSize) { @@ -739,7 +750,7 @@ public class BinaryDictEncoder { } int groupAddress = index; for (int i = 0; i < groupCount; ++i) { - final CharGroup group = node.mData.get(i); + final CharGroup group = nodeArray.mData.get(i); if (index != group.mCachedAddressAfterUpdate) { throw new RuntimeException("Bug: write index is not the same as the cached address " + "of the group : " + index + " <> " + group.mCachedAddressAfterUpdate); @@ -762,7 +773,7 @@ public class BinaryDictEncoder { index = writeParentAddress(buffer, index, parentAddress, formatOptions); } else { index = writeParentAddress(buffer, index, parentAddress - + (node.mCachedAddressAfterUpdate - group.mCachedAddressAfterUpdate), + + (nodeArray.mCachedAddressAfterUpdate - group.mCachedAddressAfterUpdate), formatOptions); } @@ -812,7 +823,7 @@ public class BinaryDictEncoder { while (bigramIterator.hasNext()) { final WeightedString bigram = bigramIterator.next(); final CharGroup target = - FusionDictionary.findWordInTree(dict.mRoot, bigram.mWord); + FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord); final int addressOfBigram = target.mCachedAddressAfterUpdate; final int unigramFrequencyForThisWord = target.mFrequency; ++groupAddress; @@ -832,57 +843,58 @@ public class BinaryDictEncoder { = FormatSpec.NO_FORWARD_LINK_ADDRESS; index += FormatSpec.FORWARD_LINK_ADDRESS_SIZE; } - if (index != node.mCachedAddressAfterUpdate + node.mCachedSize) throw new RuntimeException( - "Not the same size : written " - + (index - node.mCachedAddressAfterUpdate) + " bytes from a node that should have " - + node.mCachedSize + " bytes"); + if (index != nodeArray.mCachedAddressAfterUpdate + nodeArray.mCachedSize) { + throw new RuntimeException( + "Not the same size : written " + (index - nodeArray.mCachedAddressAfterUpdate) + + " bytes from a node that should have " + nodeArray.mCachedSize + " bytes"); + } return index; } /** - * Dumps a collection of useful statistics about a node array. + * Dumps a collection of useful statistics about a list of node arrays. * * This prints purely informative stuff, like the total estimated file size, the - * number of nodes, of character groups, the repartition of each address size, etc + * number of node arrays, of character groups, the repartition of each address size, etc * - * @param nodes the node array. + * @param nodeArrays the list of node arrays. */ - private static void showStatistics(ArrayList nodes) { + private static void showStatistics(ArrayList nodeArrays) { int firstTerminalAddress = Integer.MAX_VALUE; int lastTerminalAddress = Integer.MIN_VALUE; int size = 0; int charGroups = 0; int maxGroups = 0; int maxRuns = 0; - for (final Node n : nodes) { - if (maxGroups < n.mData.size()) maxGroups = n.mData.size(); - for (final CharGroup cg : n.mData) { + for (final PtNodeArray nodeArray : nodeArrays) { + if (maxGroups < nodeArray.mData.size()) maxGroups = nodeArray.mData.size(); + for (final CharGroup cg : nodeArray.mData) { ++charGroups; if (cg.mChars.length > maxRuns) maxRuns = cg.mChars.length; if (cg.mFrequency >= 0) { - if (n.mCachedAddressAfterUpdate < firstTerminalAddress) - firstTerminalAddress = n.mCachedAddressAfterUpdate; - if (n.mCachedAddressAfterUpdate > lastTerminalAddress) - lastTerminalAddress = n.mCachedAddressAfterUpdate; + if (nodeArray.mCachedAddressAfterUpdate < firstTerminalAddress) + firstTerminalAddress = nodeArray.mCachedAddressAfterUpdate; + if (nodeArray.mCachedAddressAfterUpdate > lastTerminalAddress) + lastTerminalAddress = nodeArray.mCachedAddressAfterUpdate; } } - if (n.mCachedAddressAfterUpdate + n.mCachedSize > size) { - size = n.mCachedAddressAfterUpdate + n.mCachedSize; + if (nodeArray.mCachedAddressAfterUpdate + nodeArray.mCachedSize > size) { + size = nodeArray.mCachedAddressAfterUpdate + nodeArray.mCachedSize; } } final int[] groupCounts = new int[maxGroups + 1]; final int[] runCounts = new int[maxRuns + 1]; - for (final Node n : nodes) { - ++groupCounts[n.mData.size()]; - for (final CharGroup cg : n.mData) { + for (final PtNodeArray nodeArray : nodeArrays) { + ++groupCounts[nodeArray.mData.size()]; + for (final CharGroup cg : nodeArray.mData) { ++runCounts[cg.mChars.length]; } } MakedictLog.i("Statistics:\n" + " total file size " + size + "\n" - + " " + nodes.size() + " nodes\n" - + " " + charGroups + " groups (" + ((float)charGroups / nodes.size()) + + " " + nodeArrays.size() + " node arrays\n" + + " " + charGroups + " groups (" + ((float)charGroups / nodeArrays.size()) + " groups per node)\n" + " first terminal at " + firstTerminalAddress + "\n" + " last terminal at " + lastTerminalAddress + "\n" @@ -909,11 +921,12 @@ public class BinaryDictEncoder { final FusionDictionary dict, final FormatOptions formatOptions) throws IOException, UnsupportedFormatException { - // Addresses are limited to 3 bytes, but since addresses can be relative to each node, the - // structure itself is not limited to 16MB. However, if it is over 16MB deciding the order - // of the nodes becomes a quite complicated problem, because though the dictionary itself - // does not have a size limit, each node must still be within 16MB of all its children and - // parents. As long as this is ensured, the dictionary file may grow to any size. + // Addresses are limited to 3 bytes, but since addresses can be relative to each node + // array, the structure itself is not limited to 16MB. However, if it is over 16MB deciding + // the order of the node arrays becomes a quite complicated problem, because though the + // dictionary itself does not have a size limit, each node array must still be within 16MB + // of all its children and parents. As long as this is ensured, the dictionary file may + // grow to any size. final int version = formatOptions.mVersion; if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION @@ -964,23 +977,23 @@ public class BinaryDictEncoder { // Leave the choice of the optimal node order to the flattenTree function. MakedictLog.i("Flattening the tree..."); - ArrayList flatNodes = flattenTree(dict.mRoot); + ArrayList flatNodes = flattenTree(dict.mRootNodeArray); MakedictLog.i("Computing addresses..."); computeAddresses(dict, flatNodes, formatOptions); MakedictLog.i("Checking array..."); - if (DBG) checkFlatNodeArray(flatNodes); + if (DBG) checkFlatNodeArrayList(flatNodes); // Create a buffer that matches the final dictionary size. - final Node lastNode = flatNodes.get(flatNodes.size() - 1); - final int bufferSize = lastNode.mCachedAddressAfterUpdate + lastNode.mCachedSize; + final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1); + final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize; final byte[] buffer = new byte[bufferSize]; int index = 0; MakedictLog.i("Writing file..."); int dataEndOffset = 0; - for (Node n : flatNodes) { - dataEndOffset = writePlacedNode(dict, buffer, n, formatOptions); + for (PtNodeArray nodeArray : flatNodes) { + dataEndOffset = writePlacedNode(dict, buffer, nodeArray, formatOptions); } if (DBG) showStatistics(flatNodes); diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index c013013e6..476d51b8e 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -59,7 +59,7 @@ public final class BinaryDictIOUtils { } /** - * Tours all node without recursive call. + * Retrieves all node arrays without recursive call. */ private static void readUnigramsAndBigramsBinaryInner( final FusionDictionaryBufferInterface buffer, final int headerSize, @@ -116,7 +116,7 @@ public final class BinaryDictIOUtils { if (formatOptions.mSupportsDynamicUpdate) { final int forwardLinkAddress = buffer.readUnsignedInt24(); if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) { - // the node has a forward link. + // The node array has a forward link. p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT; p.mAddress = forwardLinkAddress; } else { @@ -126,7 +126,7 @@ public final class BinaryDictIOUtils { stack.pop(); } } else { - // the node has more groups. + // The node array has more groups. p.mAddress = buffer.position(); } @@ -139,14 +139,14 @@ public final class BinaryDictIOUtils { /** * Reads unigrams and bigrams from the binary file. - * Doesn't make the memory representation of the dictionary. + * Doesn't store a full memory representation of the dictionary. * * @param reader the reader. * @param words the map to store the address as a key and the word as a value. * @param frequencies the map to store the address as a key and the frequency as a value. * @param bigrams the map to store the address as a key and the list of address as a value. - * @throws IOException - * @throws UnsupportedFormatException + * @throws IOException if the file can't be read. + * @throws UnsupportedFormatException if the format of the file is not recognized. */ public static void readUnigramsAndBigramsBinary(final BinaryDictReader reader, final Map words, final Map frequencies, @@ -165,8 +165,8 @@ public final class BinaryDictIOUtils { * @param buffer the buffer to read. * @param word the word we search for. * @return the address of the terminal node. - * @throws IOException - * @throws UnsupportedFormatException + * @throws IOException if the file can't be read. + * @throws UnsupportedFormatException if the format of the file is not recognized. */ @UsedForTesting public static int getTerminalPosition(final FusionDictionaryBufferInterface buffer, @@ -224,9 +224,9 @@ public final class BinaryDictIOUtils { } // If we found the next char group, it is under the file pointer. - // But if not, we are at the end of this node so we expect to have + // But if not, we are at the end of this node array so we expect to have // a forward link address that we need to consult and possibly resume - // search on the next node in the linked list. + // search on the next node array in the linked list. if (foundNextCharGroup) break; if (!header.mFormatOptions.mSupportsDynamicUpdate) { return FormatSpec.NOT_VALID_WORD; @@ -365,9 +365,10 @@ public final class BinaryDictIOUtils { } /** - * Write a char group to an output stream. - * A char group is an in-memory representation of a node in trie. - * A char group info is an on-disk representation of a node. + * Write a char group to an output stream from a CharGroupInfo. + * A char group is an in-memory representation of a node in the patricia trie. + * A char group info is a container for low-level information about how the + * char group is stored in the binary format. * * @param destination the stream to write. * @param info the char group info to be written. @@ -427,7 +428,7 @@ public final class BinaryDictIOUtils { if (info.mBigrams != null) { // TODO: Consolidate this code with the code that computes the size of the bigram list - // in BinaryDictEncoder#computeActualNodeSize + // in BinaryDictEncoder#computeActualNodeArraySize for (int i = 0; i < info.mBigrams.size(); ++i) { final int bigramFrequency = info.mBigrams.get(i).mFrequency; @@ -479,14 +480,14 @@ public final class BinaryDictIOUtils { } /** - * Write a node to the stream. + * Write a node array to the stream. * * @param destination the stream to write. - * @param infos groups to be written. + * @param infos an array of CharGroupInfo to be written. * @return the size written, in bytes. * @throws IOException */ - static int writeNode(final OutputStream destination, final CharGroupInfo[] infos) + static int writeNodes(final OutputStream destination, final CharGroupInfo[] infos) throws IOException { int size = getGroupCountSize(infos.length); switch (getGroupCountSize(infos.length)) { @@ -604,12 +605,12 @@ public final class BinaryDictIOUtils { public static int getGroupCountSize(final int count) { if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= count) { return 1; - } else if (FormatSpec.MAX_CHARGROUPS_IN_A_NODE >= count) { + } else if (FormatSpec.MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY >= count) { return 2; } else { throw new RuntimeException("Can't have more than " - + FormatSpec.MAX_CHARGROUPS_IN_A_NODE + " groups in a node (found " + count - + ")"); + + FormatSpec.MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY + " groups in a node (found " + + count + ")"); } } diff --git a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java index 4dbfcb77d..5d116d79c 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java @@ -86,7 +86,7 @@ public final class DynamicBinaryDictIOUtils { } final int flags = buffer.readUnsignedByte(); if (BinaryDictIOUtils.isMovedGroup(flags, formatOptions)) { - // if the group is moved, the parent address is stored in the destination group. + // If the group is moved, the parent address is stored in the destination group. // We are guaranteed to process the destination group later, so there is no need to // update anything here. buffer.position(originalPosition); @@ -101,10 +101,10 @@ public final class DynamicBinaryDictIOUtils { } /** - * Update parent addresses in a Node that is referred to by nodeOriginAddress. + * Update parent addresses in a node array stored at nodeOriginAddress. * * @param buffer the buffer to be modified. - * @param nodeOriginAddress the address of a modified Node. + * @param nodeOriginAddress the address of the node array to update. * @param newParentAddress the address to be written. * @param formatOptions file format options. */ @@ -154,7 +154,7 @@ public final class DynamicBinaryDictIOUtils { */ private static int moveCharGroup(final OutputStream destination, final FusionDictionaryBufferInterface buffer, final CharGroupInfo info, - final int nodeOriginAddress, final int oldGroupAddress, + final int nodeArrayOriginAddress, final int oldGroupAddress, final FormatOptions formatOptions) throws IOException { updateParentAddress(buffer, oldGroupAddress, buffer.limit() + 1, formatOptions); buffer.position(oldGroupAddress); @@ -163,15 +163,16 @@ public final class DynamicBinaryDictIOUtils { buffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags & (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG)))); int size = FormatSpec.GROUP_FLAGS_SIZE; - updateForwardLink(buffer, nodeOriginAddress, buffer.limit(), formatOptions); - size += BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { info }); + updateForwardLink(buffer, nodeArrayOriginAddress, buffer.limit(), formatOptions); + size += BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { info }); return size; } + @SuppressWarnings("unused") private static void updateForwardLink(final FusionDictionaryBufferInterface buffer, - final int nodeOriginAddress, final int newNodeAddress, + final int nodeArrayOriginAddress, final int newNodeArrayAddress, final FormatOptions formatOptions) { - buffer.position(nodeOriginAddress); + buffer.position(nodeArrayOriginAddress); int jumpCount = 0; while (jumpCount++ < MAX_JUMPS) { final int count = BinaryDictDecoder.readCharGroupCount(buffer); @@ -179,7 +180,7 @@ public final class DynamicBinaryDictIOUtils { final int forwardLinkAddress = buffer.readUnsignedInt24(); if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) { buffer.position(buffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE); - BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeAddress); + BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeArrayAddress); return; } buffer.position(forwardLinkAddress); @@ -190,57 +191,59 @@ public final class DynamicBinaryDictIOUtils { } /** - * Move a group that is referred to by oldGroupOrigin to the tail of the file. - * And set the children address to the byte after the group. + * Move a group that is referred to by oldGroupOrigin to the tail of the file, and set the + * children address to the byte after the group * - * @param nodeOrigin the address of the tail of the file. - * @param characters - * @param length - * @param flags - * @param frequency - * @param parentAddress - * @param shortcutTargets - * @param bigrams + * @param fileEndAddress the address of the tail of the file. + * @param codePoints the characters to put inside the group. + * @param length how many code points to read from codePoints. + * @param flags the flags for this group. + * @param frequency the frequency of this terminal. + * @param parentAddress the address of the parent group of this group. + * @param shortcutTargets the shortcut targets for this group. + * @param bigrams the bigrams for this group. * @param destination the stream representing the tail of the file. * @param buffer the buffer representing the (constant-size) body of the file. - * @param oldNodeOrigin - * @param oldGroupOrigin - * @param formatOptions + * @param oldNodeArrayOrigin the origin of the old node array this group was a part of. + * @param oldGroupOrigin the old origin where this group used to be stored. + * @param formatOptions format options for this dictionary. * @return the size written, in bytes. - * @throws IOException + * @throws IOException if the file can't be accessed */ - private static int moveGroup(final int nodeOrigin, final int[] characters, final int length, - final int flags, final int frequency, final int parentAddress, + private static int moveGroup(final int fileEndAddress, final int[] codePoints, + final int length, final int flags, final int frequency, final int parentAddress, final ArrayList shortcutTargets, final ArrayList bigrams, final OutputStream destination, - final FusionDictionaryBufferInterface buffer, final int oldNodeOrigin, + final FusionDictionaryBufferInterface buffer, final int oldNodeArrayOrigin, final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException { int size = 0; - final int newGroupOrigin = nodeOrigin + 1; - final int[] writtenCharacters = Arrays.copyOfRange(characters, 0, length); + final int newGroupOrigin = fileEndAddress + 1; + final int[] writtenCharacters = Arrays.copyOfRange(codePoints, 0, length); final CharGroupInfo tmpInfo = new CharGroupInfo(newGroupOrigin, -1 /* endAddress */, flags, writtenCharacters, frequency, parentAddress, FormatSpec.NO_CHILDREN_ADDRESS, shortcutTargets, bigrams); size = BinaryDictIOUtils.computeGroupSize(tmpInfo, formatOptions); final CharGroupInfo newInfo = new CharGroupInfo(newGroupOrigin, newGroupOrigin + size, flags, writtenCharacters, frequency, parentAddress, - nodeOrigin + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets, + fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets, bigrams); - moveCharGroup(destination, buffer, newInfo, oldNodeOrigin, oldGroupOrigin, formatOptions); + moveCharGroup(destination, buffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin, + formatOptions); return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE; } /** * Insert a word into a binary dictionary. * - * @param buffer - * @param destination - * @param word - * @param frequency - * @param bigramStrings - * @param shortcuts - * @throws IOException - * @throws UnsupportedFormatException + * @param buffer the buffer containing the existing dictionary. + * @param destination a stream to the underlying file, with the pointer at the end of the file. + * @param word the word to insert. + * @param frequency the frequency of the new word. + * @param bigramStrings bigram list, or null if none. + * @param shortcuts shortcut list, or null if none. + * @param isBlackListEntry whether this should be a blacklist entry. + * @throws IOException if the file can't be accessed. + * @throws UnsupportedFormatException if the existing dictionary is in an unexpected format. */ // TODO: Support batch insertion. // TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary. @@ -323,7 +326,7 @@ public final class DynamicBinaryDictIOUtils { currentInfo.mFlags, characters2, currentInfo.mFrequency, newNodeAddress + 1, currentInfo.mChildrenAddress, currentInfo.mShortcutTargets, currentInfo.mBigrams); - BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { newInfo2 }); + BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo2 }); return; } else if (codePoints[wordPos + p] != currentInfo.mCharacters[p]) { if (p > 0) { @@ -386,7 +389,7 @@ public final class DynamicBinaryDictIOUtils { newNodeAddress + written, -1 /* endAddress */, flags, newCharacters, frequency, newNodeAddress + 1, FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); - BinaryDictIOUtils.writeNode(destination, + BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { suffixInfo, newInfo }); return; } @@ -438,7 +441,7 @@ public final class DynamicBinaryDictIOUtils { final CharGroupInfo newInfo = new CharGroupInfo(newGroupAddress, -1, flags, characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); - BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { newInfo }); + BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo }); return; } buffer.position(currentInfo.mChildrenAddress); @@ -482,7 +485,7 @@ public final class DynamicBinaryDictIOUtils { final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1, -1 /* endAddress */, flags, characters, frequency, nodeParentAddress, FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); - BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[]{ newInfo }); + BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[]{ newInfo }); return; } else { depth--; diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index 9af66ed4c..5e331219c 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -60,7 +60,7 @@ public final class FormatSpec { */ /* - * Array of Node(FusionDictionary.Node) layout is as follows: + * Node array (FusionDictionary.PtNodeArray) layout is as follows: * * g | * r | the number of groups, 1 or 2 bytes. @@ -86,7 +86,7 @@ public final class FormatSpec { * linkaddress */ - /* Node(CharGroup) layout is as follows: + /* Node (FusionDictionary.CharGroup) layout is as follows: * | IF !SUPPORTS_DYNAMIC_UPDATE * | addressType xx : mask with MASK_GROUP_ADDRESS_TYPE * | 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS @@ -251,7 +251,7 @@ public final class FormatSpec { static final int INVALID_CHARACTER = -1; static final int MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT = 0x7F; // 127 - static final int MAX_CHARGROUPS_IN_A_NODE = 0x7FFF; // 32767 + static final int MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY = 0x7FFF; // 32767 static final int MAX_BIGRAMS_IN_A_GROUP = 10000; static final int MAX_TERMINAL_FREQUENCY = 255; diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java index 118dc22b8..fce1c5cdd 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java +++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java @@ -37,14 +37,14 @@ public final class FusionDictionary implements Iterable { private static int CHARACTER_NOT_FOUND_INDEX = -1; /** - * A node of the dictionary, containing several CharGroups. + * A node array of the dictionary, containing several CharGroups. * - * A node is but an ordered array of CharGroups, which essentially contain all the + * A PtNodeArray is but an ordered array of CharGroups, which essentially contain all the * real information. * This class also contains fields to cache size and address, to help with binary * generation. */ - public static final class Node { + public static final class PtNodeArray { ArrayList mData; // To help with binary generation int mCachedSize = Integer.MIN_VALUE; @@ -57,10 +57,10 @@ public final class FusionDictionary implements Iterable { int mCachedAddressAfterUpdate = Integer.MIN_VALUE; int mCachedParentAddress = 0; - public Node() { + public PtNodeArray() { mData = new ArrayList(); } - public Node(ArrayList data) { + public PtNodeArray(ArrayList data) { mData = data; } } @@ -98,7 +98,7 @@ public final class FusionDictionary implements Iterable { * This is the central class of the in-memory representation. A CharGroup is what can * be seen as a traditional "trie node", except it can hold several characters at the * same time. A CharGroup essentially represents one or several characters in the middle - * of the trie trie; as such, it can be a terminal, and it can have children. + * of the trie tree; as such, it can be a terminal, and it can have children. * In this in-memory representation, whether the CharGroup is a terminal or not is represented * in the frequency, where NOT_A_TERMINAL (= -1) means this is not a terminal and any other * value is the frequency of this terminal. A terminal may have non-null shortcuts and/or @@ -110,7 +110,7 @@ public final class FusionDictionary implements Iterable { ArrayList mShortcutTargets; ArrayList mBigrams; int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal. - Node mChildren; + PtNodeArray mChildren; boolean mIsNotAWord; // Only a shortcut boolean mIsBlacklistEntry; // mCachedSize and mCachedAddressBefore/AfterUpdate are helpers for binary dictionary @@ -137,7 +137,8 @@ public final class FusionDictionary implements Iterable { public CharGroup(final int[] chars, final ArrayList shortcutTargets, final ArrayList bigrams, final int frequency, - final boolean isNotAWord, final boolean isBlacklistEntry, final Node children) { + final boolean isNotAWord, final boolean isBlacklistEntry, + final PtNodeArray children) { mChars = chars; mFrequency = frequency; mShortcutTargets = shortcutTargets; @@ -149,7 +150,7 @@ public final class FusionDictionary implements Iterable { public void addChild(CharGroup n) { if (null == mChildren) { - mChildren = new Node(); + mChildren = new PtNodeArray(); } mChildren.mData.add(n); } @@ -344,10 +345,10 @@ public final class FusionDictionary implements Iterable { } public final DictionaryOptions mOptions; - public final Node mRoot; + public final PtNodeArray mRootNodeArray; - public FusionDictionary(final Node root, final DictionaryOptions options) { - mRoot = root; + public FusionDictionary(final PtNodeArray rootNodeArray, final DictionaryOptions options) { + mRootNodeArray = rootNodeArray; mOptions = options; } @@ -406,13 +407,13 @@ public final class FusionDictionary implements Iterable { } /** - * Sanity check for a node. + * Sanity check for a node array. * - * This method checks that all CharGroups in a node are ordered as expected. + * This method checks that all CharGroups in a node array are ordered as expected. * If they are, nothing happens. If they aren't, an exception is thrown. */ - private void checkStack(Node node) { - ArrayList stack = node.mData; + private void checkStack(PtNodeArray nodeArray) { + ArrayList stack = nodeArray.mData; int lastValue = -1; for (int i = 0; i < stack.size(); ++i) { int currentValue = stack.get(i).mChars[0]; @@ -431,16 +432,16 @@ public final class FusionDictionary implements Iterable { * @param frequency the bigram frequency */ public void setBigram(final String word1, final String word2, final int frequency) { - CharGroup charGroup = findWordInTree(mRoot, word1); + CharGroup charGroup = findWordInTree(mRootNodeArray, word1); if (charGroup != null) { - final CharGroup charGroup2 = findWordInTree(mRoot, word2); + final CharGroup charGroup2 = findWordInTree(mRootNodeArray, word2); if (charGroup2 == null) { add(getCodePoints(word2), 0, null, false /* isNotAWord */, false /* isBlacklistEntry */); // The chargroup for the first word may have moved by the above insertion, // if word1 and word2 share a common stem that happens not to have been // a cutting point until now. In this case, we need to refresh charGroup. - charGroup = findWordInTree(mRoot, word1); + charGroup = findWordInTree(mRootNodeArray, word1); } charGroup.addBigram(word2, frequency); } else { @@ -469,38 +470,38 @@ public final class FusionDictionary implements Iterable { return; } - Node currentNode = mRoot; + PtNodeArray currentNodeArray = mRootNodeArray; int charIndex = 0; CharGroup currentGroup = null; int differentCharIndex = 0; // Set by the loop to the index of the char that differs - int nodeIndex = findIndexOfChar(mRoot, word[charIndex]); + int nodeIndex = findIndexOfChar(mRootNodeArray, word[charIndex]); while (CHARACTER_NOT_FOUND_INDEX != nodeIndex) { - currentGroup = currentNode.mData.get(nodeIndex); - differentCharIndex = compareArrays(currentGroup.mChars, word, charIndex); + currentGroup = currentNodeArray.mData.get(nodeIndex); + differentCharIndex = compareCharArrays(currentGroup.mChars, word, charIndex); if (ARRAYS_ARE_EQUAL != differentCharIndex && differentCharIndex < currentGroup.mChars.length) break; if (null == currentGroup.mChildren) break; charIndex += currentGroup.mChars.length; if (charIndex >= word.length) break; - currentNode = currentGroup.mChildren; - nodeIndex = findIndexOfChar(currentNode, word[charIndex]); + currentNodeArray = currentGroup.mChildren; + nodeIndex = findIndexOfChar(currentNodeArray, word[charIndex]); } if (CHARACTER_NOT_FOUND_INDEX == nodeIndex) { // No node at this point to accept the word. Create one. - final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]); + final int insertionIndex = findInsertionIndex(currentNodeArray, word[charIndex]); final CharGroup newGroup = new CharGroup( Arrays.copyOfRange(word, charIndex, word.length), shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry); - currentNode.mData.add(insertionIndex, newGroup); - if (DBG) checkStack(currentNode); + currentNodeArray.mData.add(insertionIndex, newGroup); + if (DBG) checkStack(currentNodeArray); } else { // There is a word with a common prefix. if (differentCharIndex == currentGroup.mChars.length) { if (charIndex + differentCharIndex >= word.length) { // The new word is a prefix of an existing word, but the node on which it - // should end already exists as is. Since the old CharNode was not a terminal, + // should end already exists as is. Since the old CharGroup was not a terminal, // make it one by filling in its frequency and other attributes currentGroup.update(frequency, shortcutTargets, null, isNotAWord, isBlacklistEntry); @@ -511,7 +512,7 @@ public final class FusionDictionary implements Iterable { Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry); - currentGroup.mChildren = new Node(); + currentGroup.mChildren = new PtNodeArray(); currentGroup.mChildren.mData.add(newNode); } } else { @@ -524,7 +525,7 @@ public final class FusionDictionary implements Iterable { } else { // Partial prefix match only. We have to replace the current node with a node // containing the current prefix and create two new ones for the tails. - Node newChildren = new Node(); + PtNodeArray newChildren = new PtNodeArray(); final CharGroup newOldWord = new CharGroup( Arrays.copyOfRange(currentGroup.mChars, differentCharIndex, currentGroup.mChars.length), currentGroup.mShortcutTargets, @@ -552,9 +553,9 @@ public final class FusionDictionary implements Iterable { > currentGroup.mChars[differentCharIndex] ? 1 : 0; newChildren.mData.add(addIndex, newWord); } - currentNode.mData.set(nodeIndex, newParent); + currentNodeArray.mData.set(nodeIndex, newParent); } - if (DBG) checkStack(currentNode); + if (DBG) checkStack(currentNodeArray); } } } @@ -576,7 +577,7 @@ public final class FusionDictionary implements Iterable { * @param dstOffset the offset in the right-hand side string. * @return the index at which the strings differ, or ARRAYS_ARE_EQUAL = 0 if they don't. */ - private static int compareArrays(final int[] src, final int[] dst, int dstOffset) { + private static int compareCharArrays(final int[] src, final int[] dst, int dstOffset) { // We do NOT test the first char, because we come from a method that already // tested it. for (int i = 1; i < src.length; ++i) { @@ -603,10 +604,10 @@ public final class FusionDictionary implements Iterable { final static private CharGroupComparator CHARGROUP_COMPARATOR = new CharGroupComparator(); /** - * Finds the insertion index of a character within a node. + * Finds the insertion index of a character within a node array. */ - private static int findInsertionIndex(final Node node, int character) { - final ArrayList data = node.mData; + private static int findInsertionIndex(final PtNodeArray nodeArray, int character) { + final ArrayList data = nodeArray.mData; final CharGroup reference = new CharGroup(new int[] { character }, null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */, false /* isBlacklistEntry */); @@ -615,16 +616,16 @@ public final class FusionDictionary implements Iterable { } /** - * Find the index of a char in a node, if it exists. + * Find the index of a char in a node array, if it exists. * - * @param node the node to search in. + * @param nodeArray the node array to search in. * @param character the character to search for. * @return the position of the character if it's there, or CHARACTER_NOT_FOUND_INDEX = -1 else. */ - private static int findIndexOfChar(final Node node, int character) { - final int insertionIndex = findInsertionIndex(node, character); - if (node.mData.size() <= insertionIndex) return CHARACTER_NOT_FOUND_INDEX; - return character == node.mData.get(insertionIndex).mChars[0] ? insertionIndex + private static int findIndexOfChar(final PtNodeArray nodeArray, int character) { + final int insertionIndex = findInsertionIndex(nodeArray, character); + if (nodeArray.mData.size() <= insertionIndex) return CHARACTER_NOT_FOUND_INDEX; + return character == nodeArray.mData.get(insertionIndex).mChars[0] ? insertionIndex : CHARACTER_NOT_FOUND_INDEX; } @@ -632,16 +633,16 @@ public final class FusionDictionary implements Iterable { * Helper method to find a word in a given branch. */ @SuppressWarnings("unused") - public static CharGroup findWordInTree(Node node, final String string) { + public static CharGroup findWordInTree(PtNodeArray nodeArray, final String string) { int index = 0; final StringBuilder checker = DBG ? new StringBuilder() : null; final int[] codePoints = getCodePoints(string); CharGroup currentGroup; do { - int indexOfGroup = findIndexOfChar(node, codePoints[index]); + int indexOfGroup = findIndexOfChar(nodeArray, codePoints[index]); if (CHARACTER_NOT_FOUND_INDEX == indexOfGroup) return null; - currentGroup = node.mData.get(indexOfGroup); + currentGroup = nodeArray.mData.get(indexOfGroup); if (codePoints.length - index < currentGroup.mChars.length) return null; int newIndex = index; @@ -653,9 +654,9 @@ public final class FusionDictionary implements Iterable { if (DBG) checker.append(new String(currentGroup.mChars, 0, currentGroup.mChars.length)); if (index < codePoints.length) { - node = currentGroup.mChildren; + nodeArray = currentGroup.mChildren; } - } while (null != node && index < codePoints.length); + } while (null != nodeArray && index < codePoints.length); if (index < codePoints.length) return null; if (!currentGroup.isTerminal()) return null; @@ -670,20 +671,20 @@ public final class FusionDictionary implements Iterable { if (null == s || "".equals(s)) { throw new RuntimeException("Can't search for a null or empty string"); } - return null != findWordInTree(mRoot, s); + return null != findWordInTree(mRootNodeArray, s); } /** * Recursively count the number of character groups in a given branch of the trie. * - * @param node the parent node. + * @param nodeArray the parent node. * @return the number of char groups in all the branch under this node. */ - public static int countCharGroups(final Node node) { - final int nodeSize = node.mData.size(); + public static int countCharGroups(final PtNodeArray nodeArray) { + final int nodeSize = nodeArray.mData.size(); int size = nodeSize; for (int i = nodeSize - 1; i >= 0; --i) { - CharGroup group = node.mData.get(i); + CharGroup group = nodeArray.mData.get(i); if (null != group.mChildren) size += countCharGroups(group.mChildren); } @@ -693,15 +694,15 @@ public final class FusionDictionary implements Iterable { /** * Recursively count the number of nodes in a given branch of the trie. * - * @param node the node to count. + * @param nodeArray the node array to count. * @return the number of nodes in this branch. */ - public static int countNodes(final Node node) { + public static int countNodeArrays(final PtNodeArray nodeArray) { int size = 1; - for (int i = node.mData.size() - 1; i >= 0; --i) { - CharGroup group = node.mData.get(i); + for (int i = nodeArray.mData.size() - 1; i >= 0; --i) { + CharGroup group = nodeArray.mData.get(i); if (null != group.mChildren) - size += countNodes(group.mChildren); + size += countNodeArrays(group.mChildren); } return size; } @@ -709,10 +710,10 @@ public final class FusionDictionary implements Iterable { // Recursively find out whether there are any bigrams. // This can be pretty expensive especially if there aren't any (we return as soon // as we find one, so it's much cheaper if there are bigrams) - private static boolean hasBigramsInternal(final Node node) { - if (null == node) return false; - for (int i = node.mData.size() - 1; i >= 0; --i) { - CharGroup group = node.mData.get(i); + private static boolean hasBigramsInternal(final PtNodeArray nodeArray) { + if (null == nodeArray) return false; + for (int i = nodeArray.mData.size() - 1; i >= 0; --i) { + CharGroup group = nodeArray.mData.get(i); if (null != group.mBigrams) return true; if (hasBigramsInternal(group.mChildren)) return true; } @@ -729,7 +730,7 @@ public final class FusionDictionary implements Iterable { // find a more efficient way of doing this, without compromising too much on memory // and ease of use. public boolean hasBigrams() { - return hasBigramsInternal(mRoot); + return hasBigramsInternal(mRootNodeArray); } // Historically, the tails of the words were going to be merged to save space. @@ -750,13 +751,13 @@ public final class FusionDictionary implements Iterable { // MakedictLog.i("Merging nodes. Number of nodes : " + countNodes(root)); // MakedictLog.i("Number of groups : " + countCharGroups(root)); // -// final HashMap> repository = -// new HashMap>(); +// final HashMap> repository = +// new HashMap>(); // mergeTailsInner(repository, root); // // MakedictLog.i("Number of different pseudohashes : " + repository.size()); // int size = 0; -// for (ArrayList a : repository.values()) { +// for (ArrayList a : repository.values()) { // size += a.size(); // } // MakedictLog.i("Number of nodes after merge : " + (1 + size)); @@ -764,7 +765,7 @@ public final class FusionDictionary implements Iterable { } // The following methods are used by the deactivated mergeTails() -// private static boolean isEqual(Node a, Node b) { +// private static boolean isEqual(PtNodeArray a, PtNodeArray b) { // if (null == a && null == b) return true; // if (null == a || null == b) return false; // if (a.data.size() != b.data.size()) return false; @@ -781,21 +782,21 @@ public final class FusionDictionary implements Iterable { // return true; // } -// static private HashMap> mergeTailsInner( -// final HashMap> map, final Node node) { -// final ArrayList branches = node.data; +// static private HashMap> mergeTailsInner( +// final HashMap> map, final PtNodeArray nodeArray) { +// final ArrayList branches = nodeArray.data; // final int nodeSize = branches.size(); // for (int i = 0; i < nodeSize; ++i) { // CharGroup group = branches.get(i); // if (null != group.children) { // String pseudoHash = getPseudoHash(group.children); -// ArrayList similarList = map.get(pseudoHash); +// ArrayList similarList = map.get(pseudoHash); // if (null == similarList) { -// similarList = new ArrayList(); +// similarList = new ArrayList(); // map.put(pseudoHash, similarList); // } // boolean merged = false; -// for (Node similar : similarList) { +// for (PtNodeArray similar : similarList) { // if (isEqual(group.children, similar)) { // group.children = similar; // merged = true; @@ -811,9 +812,9 @@ public final class FusionDictionary implements Iterable { // return map; // } -// private static String getPseudoHash(final Node node) { +// private static String getPseudoHash(final PtNodeArray nodeArray) { // StringBuilder s = new StringBuilder(); -// for (CharGroup g : node.data) { +// for (CharGroup g : nodeArray.data) { // s.append(g.frequency); // for (int ch : g.chars) { // s.append(Character.toChars(ch)); @@ -901,6 +902,6 @@ public final class FusionDictionary implements Iterable { */ @Override public Iterator iterator() { - return new DictionaryIterator(mRoot.mData); + return new DictionaryIterator(mRootNodeArray.mData); } } diff --git a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java index aed07fd16..cd03b3822 100644 --- a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java @@ -25,7 +25,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; import com.android.inputmethod.latin.makedict.BinaryDictReader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary; -import com.android.inputmethod.latin.makedict.FusionDictionary.Node; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.PendingAttribute; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList; @@ -78,7 +78,7 @@ public final class UserHistoryDictIOUtils { @UsedForTesting static FusionDictionary constructFusionDictionary( final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) { - final FusionDictionary fusionDict = new FusionDictionary(new Node(), + final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(), new FusionDictionary.DictionaryOptions(new HashMap(), false, false)); int profTotal = 0; @@ -102,7 +102,7 @@ public final class UserHistoryDictIOUtils { if (word1 == null) { // unigram fusionDict.add(word2, freq, null, false /* isNotAWord */); } else { // bigram - if (FusionDictionary.findWordInTree(fusionDict.mRoot, word1) == null) { + if (FusionDictionary.findWordInTree(fusionDict.mRootNodeArray, word1) == null) { fusionDict.add(word1, 2, null, false /* isNotAWord */); } fusionDict.setBigram(word1, word2, freq); diff --git a/tests/src/com/android/inputmethod/latin/FusionDictionaryTests.java b/tests/src/com/android/inputmethod/latin/FusionDictionaryTests.java index 65dfd2dde..cadd0f8f3 100644 --- a/tests/src/com/android/inputmethod/latin/FusionDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/FusionDictionaryTests.java @@ -20,7 +20,7 @@ import android.test.AndroidTestCase; import android.test.suitebuilder.annotation.SmallTest; import com.android.inputmethod.latin.makedict.FusionDictionary; -import com.android.inputmethod.latin.makedict.FusionDictionary.Node; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import java.util.HashMap; @@ -30,21 +30,21 @@ import java.util.HashMap; @SmallTest public class FusionDictionaryTests extends AndroidTestCase { public void testFindWordInTree() { - FusionDictionary dict = new FusionDictionary(new Node(), + FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new FusionDictionary.DictionaryOptions(new HashMap(), false, false)); dict.add("abc", 10, null, false /* isNotAWord */); - assertNull(FusionDictionary.findWordInTree(dict.mRoot, "aaa")); - assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "abc")); + assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa")); + assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "abc")); dict.add("aa", 10, null, false /* isNotAWord */); - assertNull(FusionDictionary.findWordInTree(dict.mRoot, "aaa")); - assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "aa")); + assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa")); + assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aa")); dict.add("babcd", 10, null, false /* isNotAWord */); dict.add("bacde", 10, null, false /* isNotAWord */); - assertNull(FusionDictionary.findWordInTree(dict.mRoot, "ba")); - assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "babcd")); - assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "bacde")); + assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "ba")); + assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "babcd")); + assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "bacde")); } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index 9664779f0..6d37466b7 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -25,7 +25,7 @@ import android.util.SparseArray; import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; -import com.android.inputmethod.latin.makedict.FusionDictionary.Node; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.utils.CollectionUtils; @@ -226,7 +226,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { // check unigram for (final String word : words) { - final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, word); + final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); assertNotNull(cg); } @@ -234,7 +234,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { for (int i = 0; i < bigrams.size(); ++i) { final int w1 = bigrams.keyAt(i); for (final int w2 : bigrams.valueAt(i)) { - final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, words.get(w1)); + final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray, + words.get(w1)); assertNotNull(words.get(w1) + "," + words.get(w2), cg.getBigram(words.get(w2))); } } @@ -242,7 +243,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { // check shortcut if (shortcutMap != null) { for (final Map.Entry> entry : shortcutMap.entrySet()) { - final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, entry.getKey()); + final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray, + entry.getKey()); for (final String word : entry.getValue()) { assertNotNull("shortcut not found: " + entry.getKey() + ", " + word, group.getShortcut(word)); @@ -297,7 +299,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } assertNotNull(file); - final FusionDictionary dict = new FusionDictionary(new Node(), + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new FusionDictionary.DictionaryOptions(new HashMap(), false, false)); addUnigrams(words.size(), dict, words, shortcuts); addBigrams(dict, words, bigrams); @@ -440,7 +442,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { assertNotNull(file); // making the dictionary from lists of words. - final FusionDictionary dict = new FusionDictionary(new Node(), + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new FusionDictionary.DictionaryOptions( new HashMap(), false, false)); addUnigrams(words.size(), dict, words, null /* shortcutMap */); @@ -538,7 +540,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } assertNotNull(file); - final FusionDictionary dict = new FusionDictionary(new Node(), + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new FusionDictionary.DictionaryOptions( new HashMap(), false, false)); addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); @@ -599,7 +601,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } assertNotNull(file); - final FusionDictionary dict = new FusionDictionary(new Node(), + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new FusionDictionary.DictionaryOptions( new HashMap(), false, false)); addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java index f2476b2e6..011d711de 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java @@ -24,7 +24,7 @@ import android.util.Log; import com.android.inputmethod.latin.makedict.BinaryDictDecoder.ByteBufferWrapper; import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; -import com.android.inputmethod.latin.makedict.FusionDictionary.Node; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.utils.CollectionUtils; @@ -277,7 +277,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { } // set an initial dictionary. - final FusionDictionary dict = new FusionDictionary(new Node(), + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new FusionDictionary.DictionaryOptions(new HashMap(), false, false)); dict.add("abcd", 10, null, false); @@ -328,7 +328,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { } // set an initial dictionary. - final FusionDictionary dict = new FusionDictionary(new Node(), + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new FusionDictionary.DictionaryOptions(new HashMap(), false, false)); dict.add("abcd", 10, null, false); dict.add("efgh", 15, null, false); @@ -365,7 +365,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { assertNotNull(file); // set an initial dictionary. - final FusionDictionary dict = new FusionDictionary(new Node(), + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new FusionDictionary.DictionaryOptions(new HashMap(), false, false)); dict.add("initial", 10, null, false); diff --git a/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java index ce62bf21a..fd5517665 100644 --- a/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java @@ -86,7 +86,7 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase private void checkWordInFusionDict(final FusionDictionary dict, final String word, final ArrayList expectedBigrams) { - final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, word); + final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); assertNotNull(group); assertTrue(group.isTerminal()); diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java index 092ee767f..4b6716936 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java @@ -19,7 +19,7 @@ package com.android.inputmethod.latin.dicttool; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.Node; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.Word; @@ -117,7 +117,7 @@ public class CombinedInputOutput { final boolean processLigatures = FRENCH_LIGATURE_PROCESSING_OPTION.equals(attributes.get(OPTIONS_TAG)); attributes.remove(OPTIONS_TAG); - final FusionDictionary dict = new FusionDictionary(new Node(), new DictionaryOptions( + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new DictionaryOptions( attributes, processUmlauts, processLigatures)); String line; diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java index 5c3e87e10..d790d0652 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java @@ -121,7 +121,8 @@ public class Diff extends Dicttool.Command { private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) { boolean hasDifferences = false; for (final Word word0 : dict0) { - final CharGroup word1 = FusionDictionary.findWordInTree(dict1.mRoot, word0.mWord); + final CharGroup word1 = FusionDictionary.findWordInTree(dict1.mRootNodeArray, + word0.mWord); if (null == word1) { // This word is not in dict1 System.out.println("Deleted: " + word0.mWord + " " + word0.mFrequency); @@ -150,7 +151,8 @@ public class Diff extends Dicttool.Command { } } for (final Word word1 : dict1) { - final CharGroup word0 = FusionDictionary.findWordInTree(dict0.mRoot, word1.mWord); + final CharGroup word0 = FusionDictionary.findWordInTree(dict0.mRootNodeArray, + word1.mWord); if (null == word0) { // This word is not in dict0 System.out.println("Added: " + word1.mWord + " " + word1.mFrequency); diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java index f2894544f..fa8c5f776 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java @@ -65,7 +65,7 @@ public class Info extends Dicttool.Command { private static void showWordInfo(final FusionDictionary dict, final String word, final boolean plumbing) { - final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, word); + final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); if (null == group) { System.out.println(word + " is not in the dictionary"); return; diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java index 1fd2cba7a..4e99bf979 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java @@ -18,7 +18,7 @@ package com.android.inputmethod.latin.dicttool; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.Node; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.Word; @@ -124,8 +124,8 @@ public class XmlDictInputOutput { GERMAN_UMLAUT_PROCESSING_OPTION.equals(optionsString); final boolean processLigatures = FRENCH_LIGATURE_PROCESSING_OPTION.equals(optionsString); - mDictionary = new FusionDictionary(new Node(), new DictionaryOptions(attributes, - processUmlauts, processLigatures)); + mDictionary = new FusionDictionary(new PtNodeArray(), + new DictionaryOptions(attributes, processUmlauts, processLigatures)); } else { mState = UNKNOWN; } diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java index 01fadfc82..3bda77fe9 100644 --- a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java +++ b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java @@ -22,7 +22,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictReader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.Node; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import junit.framework.TestCase; @@ -42,7 +42,7 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase { public void testGetRawDictWorks() throws IOException, UnsupportedFormatException { // Create a thrice-compressed dictionary file. - final FusionDictionary dict = new FusionDictionary(new Node(), + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new DictionaryOptions(new HashMap(), false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */)); dict.add("foo", TEST_FREQ, null, false /* isNotAWord */); @@ -72,7 +72,8 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase { final FusionDictionary resultDict = BinaryDictDecoder.readDictionaryBinary(reader, null /* dict : an optional dictionary to add words to, or null */); assertEquals("Dictionary can't be read back correctly", - FusionDictionary.findWordInTree(resultDict.mRoot, "foo").getFrequency(), TEST_FREQ); + FusionDictionary.findWordInTree(resultDict.mRootNodeArray, "foo").getFrequency(), + TEST_FREQ); } public void testGetRawDictFails() throws IOException { diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictEncoderFlattenTreeTests.java b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictEncoderFlattenTreeTests.java index 2fcfb5e15..fe6738303 100644 --- a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictEncoderFlattenTreeTests.java +++ b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictEncoderFlattenTreeTests.java @@ -17,7 +17,7 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.Node; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import junit.framework.TestCase; @@ -31,7 +31,7 @@ public class BinaryDictEncoderFlattenTreeTests extends TestCase { // Test the flattened array contains the expected number of nodes, and // that it does not contain any duplicates. public void testFlattenNodes() { - final FusionDictionary dict = new FusionDictionary(new Node(), + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new DictionaryOptions(new HashMap(), false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */)); dict.add("foo", 1, null, false /* isNotAWord */); @@ -39,10 +39,10 @@ public class BinaryDictEncoderFlattenTreeTests extends TestCase { dict.add("ftb", 1, null, false /* isNotAWord */); dict.add("bar", 1, null, false /* isNotAWord */); dict.add("fool", 1, null, false /* isNotAWord */); - final ArrayList result = BinaryDictEncoder.flattenTree(dict.mRoot); + final ArrayList result = BinaryDictEncoder.flattenTree(dict.mRootNodeArray); assertEquals(4, result.size()); while (!result.isEmpty()) { - final Node n = result.remove(0); + final PtNodeArray n = result.remove(0); assertFalse("Flattened array contained the same node twice", result.contains(n)); } } diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java index 8efb4a4b9..22c0ceb4c 100644 --- a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java +++ b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java @@ -19,7 +19,7 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.Node; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.Word; import junit.framework.TestCase; @@ -72,7 +72,7 @@ public class FusionDictionaryTest extends TestCase { assertNotNull(dict); for (final String word : words) { if (--limit < 0) return; - final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, word); + final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); assertNotNull(cg); } } @@ -95,7 +95,7 @@ public class FusionDictionaryTest extends TestCase { // Test the flattened array contains the expected number of nodes, and // that it does not contain any duplicates. public void testFusion() { - final FusionDictionary dict = new FusionDictionary(new Node(), + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new DictionaryOptions(new HashMap(), false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */)); final long time = System.currentTimeMillis();