Merge "Rename Node to PtNodeArray"

main
Jean Chalard 2013-08-16 08:00:54 +00:00 committed by Android (Google) Code Review
commit 21dddb1462
19 changed files with 400 additions and 379 deletions

View File

@ -23,7 +23,7 @@ import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
import com.android.inputmethod.latin.utils.CollectionUtils;
@ -51,7 +51,7 @@ public class DictionaryWriter extends AbstractDictionaryWriter {
@Override
public void clear() {
final HashMap<String, String> attributes = CollectionUtils.newHashMap();
mFusionDictionary = new FusionDictionary(new Node(),
mFusionDictionary = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(attributes, false, false));
}

View File

@ -20,7 +20,7 @@ import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.JniUtils;
@ -548,31 +548,31 @@ public final class BinaryDictDecoder {
}
/**
* Reads a single node from a buffer.
* Reads a single node array from a buffer.
*
* This methods reads the file at the current position. A node is fully expected to start at
* the current position.
* This will recursively read other nodes into the structure, populating the reverse
* This methods reads the file at the current position. A node array is fully expected to start
* at the current position.
* This will recursively read other node arrays into the structure, populating the reverse
* maps on the fly and using them to keep track of already read nodes.
*
* @param buffer the buffer, correctly positioned at the start of a node.
* @param buffer the buffer, correctly positioned at the start of a node array.
* @param headerSize the size, in bytes, of the file header.
* @param reverseNodeMap a mapping from addresses to already read nodes.
* @param reverseNodeArrayMap a mapping from addresses to already read node arrays.
* @param reverseGroupMap a mapping from addresses to already read character groups.
* @param options file format options.
* @return the read node with all his children already read.
* @return the read node array with all his children already read.
*/
private static Node readNode(final FusionDictionaryBufferInterface buffer, final int headerSize,
final Map<Integer, Node> reverseNodeMap, final Map<Integer, CharGroup> reverseGroupMap,
final FormatOptions options)
private static PtNodeArray readNodeArray(final FusionDictionaryBufferInterface buffer,
final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap,
final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options)
throws IOException {
final ArrayList<CharGroup> nodeContents = new ArrayList<CharGroup>();
final int nodeOrigin = buffer.position() - headerSize;
final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>();
final int nodeArrayOrigin = buffer.position() - headerSize;
do { // Scan the linked-list node.
final int nodeHeadPosition = buffer.position() - headerSize;
final int nodeArrayHeadPosition = buffer.position() - headerSize;
final int count = readCharGroupCount(buffer);
int groupOffset = nodeHeadPosition + BinaryDictIOUtils.getGroupCountSize(count);
int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count);
for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue;
@ -589,21 +589,21 @@ public final class BinaryDictDecoder {
}
}
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
Node children = reverseNodeMap.get(info.mChildrenAddress);
PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress);
if (null == children) {
final int currentPosition = buffer.position();
buffer.position(info.mChildrenAddress + headerSize);
children = readNode(
buffer, headerSize, reverseNodeMap, reverseGroupMap, options);
children = readNodeArray(
buffer, headerSize, reverseNodeArrayMap, reverseGroupMap, options);
buffer.position(currentPosition);
}
nodeContents.add(
nodeArrayContents.add(
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
info.mFrequency,
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children));
} else {
nodeContents.add(
nodeArrayContents.add(
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
info.mFrequency,
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
@ -624,11 +624,11 @@ public final class BinaryDictDecoder {
} while (options.mSupportsDynamicUpdate &&
buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
final Node node = new Node(nodeContents);
node.mCachedAddressBeforeUpdate = nodeOrigin;
node.mCachedAddressAfterUpdate = nodeOrigin;
reverseNodeMap.put(node.mCachedAddressAfterUpdate, node);
return node;
final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin;
nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin;
reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray);
return nodeArray;
}
/**
@ -733,10 +733,10 @@ public final class BinaryDictDecoder {
// Read header
final FileHeader header = readHeader(reader.getBuffer());
Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>();
Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
final Node root = readNode(reader.getBuffer(), header.mHeaderSize, reverseNodeMapping,
reverseGroupMapping, header.mFormatOptions);
final PtNodeArray root = readNodeArray(reader.getBuffer(), header.mHeaderSize,
reverseNodeArrayMapping, reverseGroupMapping, header.mFormatOptions);
FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions);
if (null != dict) {
@ -803,8 +803,6 @@ public final class BinaryDictDecoder {
/**
* Calculate bigram frequency from compressed value
*
* @see #makeBigramFlags
*
* @param unigramFrequency
* @param bigramFrequency compressed frequency
* @return approximate bigram frequency

View File

@ -20,7 +20,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import java.io.ByteArrayOutputStream;
@ -78,12 +78,12 @@ public class BinaryDictEncoder {
}
/**
* Compute the binary size of the group count for a node
* @param node the node
* Compute the binary size of the group count for a node array.
* @param nodeArray the nodeArray
* @return the size of the group count, either 1 or 2 bytes.
*/
private static int getGroupCountSize(final Node node) {
return BinaryDictIOUtils.getGroupCountSize(node.mData.size());
private static int getGroupCountSize(final PtNodeArray nodeArray) {
return BinaryDictIOUtils.getGroupCountSize(nodeArray.mData.size());
}
/**
@ -138,15 +138,17 @@ public class BinaryDictEncoder {
}
/**
* Compute the maximum size of a node, assuming 3-byte addresses for everything, and caches
* it in the 'actualSize' member of the node.
* Compute the maximum size of each node of a node array, assuming 3-byte addresses for
* everything, and caches it in the `mCachedSize' member of the nodes; deduce the size of
* the containing node array, and cache it it its 'mCachedSize' member.
*
* @param node the node to compute the maximum size of.
* @param nodeArray the node array to compute the maximum size of.
* @param options file format options.
*/
private static void calculateNodeMaximumSize(final Node node, final FormatOptions options) {
int size = getGroupCountSize(node);
for (CharGroup g : node.mData) {
private static void calculateNodeArrayMaximumSize(final PtNodeArray nodeArray,
final FormatOptions options) {
int size = getGroupCountSize(nodeArray);
for (CharGroup g : nodeArray.mData) {
final int groupSize = getCharGroupMaximumSize(g, options);
g.mCachedSize = groupSize;
size += groupSize;
@ -154,7 +156,7 @@ public class BinaryDictEncoder {
if (options.mSupportsDynamicUpdate) {
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
}
node.mCachedSize = size;
nodeArray.mCachedSize = size;
}
/**
@ -199,14 +201,16 @@ public class BinaryDictEncoder {
// This method is responsible for finding a nice ordering of the nodes that favors run-time
// cache performance and dictionary size.
/* package for tests */ static ArrayList<Node> flattenTree(final Node root) {
final int treeSize = FusionDictionary.countCharGroups(root);
/* package for tests */ static ArrayList<PtNodeArray> flattenTree(
final PtNodeArray rootNodeArray) {
final int treeSize = FusionDictionary.countCharGroups(rootNodeArray);
MakedictLog.i("Counted nodes : " + treeSize);
final ArrayList<Node> flatTree = new ArrayList<Node>(treeSize);
return flattenTreeInner(flatTree, root);
final ArrayList<PtNodeArray> flatTree = new ArrayList<PtNodeArray>(treeSize);
return flattenTreeInner(flatTree, rootNodeArray);
}
private static ArrayList<Node> flattenTreeInner(final ArrayList<Node> list, final Node node) {
private static ArrayList<PtNodeArray> flattenTreeInner(final ArrayList<PtNodeArray> list,
final PtNodeArray nodeArray) {
// Removing the node is necessary if the tails are merged, because we would then
// add the same node several times when we only want it once. A number of places in
// the code also depends on any node being only once in the list.
@ -224,8 +228,8 @@ public class BinaryDictEncoder {
// this simple list.remove operation O(n*n) overall. On Android this overhead is very
// high.
// For future reference, the code to remove duplicate is a simple : list.remove(node);
list.add(node);
final ArrayList<CharGroup> branches = node.mData;
list.add(nodeArray);
final ArrayList<CharGroup> branches = nodeArray.mData;
final int nodeSize = branches.size();
for (CharGroup group : branches) {
if (null != group.mChildren) flattenTreeInner(list, group.mChildren);
@ -234,52 +238,60 @@ public class BinaryDictEncoder {
}
/**
* Get the offset from a position inside a current node to a target node, during update.
* Get the offset from a position inside a current node array to a target node array, during
* update.
*
* If the current node is before the target node, the target node has not been updated yet,
* so we should return the offset from the old position of the current node to the old position
* of the target node. If on the other hand the target is before the current node, it already
* has been updated, so we should return the offset from the new position in the current node
* to the new position in the target node.
* @param currentNode the node containing the CharGroup where the offset will be written
* @param offsetFromStartOfCurrentNode the offset, in bytes, from the start of currentNode
* @param targetNode the target node to get the offset to
* @return the offset to the target node
* If the current node array is before the target node array, the target node array has not
* been updated yet, so we should return the offset from the old position of the current node
* array to the old position of the target node array. If on the other hand the target is
* before the current node array, it already has been updated, so we should return the offset
* from the new position in the current node array to the new position in the target node
* array.
*
* @param currentNodeArray node array containing the CharGroup where the offset will be written
* @param offsetFromStartOfCurrentNodeArray offset, in bytes, from the start of currentNodeArray
* @param targetNodeArray the target node array to get the offset to
* @return the offset to the target node array
*/
private static int getOffsetToTargetNodeDuringUpdate(final Node currentNode,
final int offsetFromStartOfCurrentNode, final Node targetNode) {
final boolean isTargetBeforeCurrent = (targetNode.mCachedAddressBeforeUpdate
< currentNode.mCachedAddressBeforeUpdate);
private static int getOffsetToTargetNodeArrayDuringUpdate(final PtNodeArray currentNodeArray,
final int offsetFromStartOfCurrentNodeArray, final PtNodeArray targetNodeArray) {
final boolean isTargetBeforeCurrent = (targetNodeArray.mCachedAddressBeforeUpdate
< currentNodeArray.mCachedAddressBeforeUpdate);
if (isTargetBeforeCurrent) {
return targetNode.mCachedAddressAfterUpdate
- (currentNode.mCachedAddressAfterUpdate + offsetFromStartOfCurrentNode);
return targetNodeArray.mCachedAddressAfterUpdate
- (currentNodeArray.mCachedAddressAfterUpdate
+ offsetFromStartOfCurrentNodeArray);
} else {
return targetNode.mCachedAddressBeforeUpdate
- (currentNode.mCachedAddressBeforeUpdate + offsetFromStartOfCurrentNode);
return targetNodeArray.mCachedAddressBeforeUpdate
- (currentNodeArray.mCachedAddressBeforeUpdate
+ offsetFromStartOfCurrentNodeArray);
}
}
/**
* Get the offset from a position inside a current node to a target CharGroup, during update.
* @param currentNode the node containing the CharGroup where the offset will be written
* @param offsetFromStartOfCurrentNode the offset, in bytes, from the start of currentNode
* Get the offset from a position inside a current node array to a target CharGroup, during
* update.
*
* @param currentNodeArray node array containing the CharGroup where the offset will be written
* @param offsetFromStartOfCurrentNodeArray offset, in bytes, from the start of currentNodeArray
* @param targetCharGroup the target CharGroup to get the offset to
* @return the offset to the target CharGroup
*/
// TODO: is there any way to factorize this method with the one above?
private static int getOffsetToTargetCharGroupDuringUpdate(final Node currentNode,
final int offsetFromStartOfCurrentNode, final CharGroup targetCharGroup) {
final int oldOffsetBasePoint = currentNode.mCachedAddressBeforeUpdate
+ offsetFromStartOfCurrentNode;
private static int getOffsetToTargetCharGroupDuringUpdate(final PtNodeArray currentNodeArray,
final int offsetFromStartOfCurrentNodeArray, final CharGroup targetCharGroup) {
final int oldOffsetBasePoint = currentNodeArray.mCachedAddressBeforeUpdate
+ offsetFromStartOfCurrentNodeArray;
final boolean isTargetBeforeCurrent = (targetCharGroup.mCachedAddressBeforeUpdate
< oldOffsetBasePoint);
// If the target is before the current node, then its address has already been updated.
// We can use the AfterUpdate member, and compare it to our own member after update.
// Otherwise, the AfterUpdate member is not updated yet, so we need to use the BeforeUpdate
// member, and of course we have to compare this to our own address before update.
// If the target is before the current node array, then its address has already been
// updated. We can use the AfterUpdate member, and compare it to our own member after
// update. Otherwise, the AfterUpdate member is not updated yet, so we need to use the
// BeforeUpdate member, and of course we have to compare this to our own address before
// update.
if (isTargetBeforeCurrent) {
final int newOffsetBasePoint = currentNode.mCachedAddressAfterUpdate
+ offsetFromStartOfCurrentNode;
final int newOffsetBasePoint = currentNodeArray.mCachedAddressAfterUpdate
+ offsetFromStartOfCurrentNodeArray;
return targetCharGroup.mCachedAddressAfterUpdate - newOffsetBasePoint;
} else {
return targetCharGroup.mCachedAddressBeforeUpdate - oldOffsetBasePoint;
@ -287,26 +299,26 @@ public class BinaryDictEncoder {
}
/**
* Computes the actual node size, based on the cached addresses of the children nodes.
* Computes the actual node array size, based on the cached addresses of the children nodes.
*
* Each node stores its tentative address. During dictionary address computing, these
* are not final, but they can be used to compute the node size (the node size depends
* on the address of the children because the number of bytes necessary to store an
* address depends on its numeric value. The return value indicates whether the node
* Each node array stores its tentative address. During dictionary address computing, these
* are not final, but they can be used to compute the node array size (the node array size
* depends on the address of the children because the number of bytes necessary to store an
* address depends on its numeric value. The return value indicates whether the node array
* contents (as in, any of the addresses stored in the cache fields) have changed with
* respect to their previous value.
*
* @param node the node to compute the size of.
* @param nodeArray the node array to compute the size of.
* @param dict the dictionary in which the word/attributes are to be found.
* @param formatOptions file format options.
* @return false if none of the cached addresses inside the node changed, true otherwise.
* @return false if none of the cached addresses inside the node array changed, true otherwise.
*/
private static boolean computeActualNodeSize(final Node node, final FusionDictionary dict,
final FormatOptions formatOptions) {
private static boolean computeActualNodeArraySize(final PtNodeArray nodeArray,
final FusionDictionary dict, final FormatOptions formatOptions) {
boolean changed = false;
int size = getGroupCountSize(node);
for (CharGroup group : node.mData) {
group.mCachedAddressAfterUpdate = node.mCachedAddressAfterUpdate + size;
int size = getGroupCountSize(nodeArray);
for (CharGroup group : nodeArray.mData) {
group.mCachedAddressAfterUpdate = nodeArray.mCachedAddressAfterUpdate + size;
if (group.mCachedAddressAfterUpdate != group.mCachedAddressBeforeUpdate) {
changed = true;
}
@ -318,16 +330,16 @@ public class BinaryDictEncoder {
if (formatOptions.mSupportsDynamicUpdate) {
groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
} else {
groupSize += getByteSize(getOffsetToTargetNodeDuringUpdate(node,
groupSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(nodeArray,
groupSize + size, group.mChildren));
}
}
groupSize += getShortcutListSize(group.mShortcutTargets);
if (null != group.mBigrams) {
for (WeightedString bigram : group.mBigrams) {
final int offset = getOffsetToTargetCharGroupDuringUpdate(node,
final int offset = getOffsetToTargetCharGroupDuringUpdate(nodeArray,
groupSize + size + FormatSpec.GROUP_FLAGS_SIZE,
FusionDictionary.findWordInTree(dict.mRoot, bigram.mWord));
FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord));
groupSize += getByteSize(offset) + FormatSpec.GROUP_FLAGS_SIZE;
}
}
@ -337,49 +349,49 @@ public class BinaryDictEncoder {
if (formatOptions.mSupportsDynamicUpdate) {
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
}
if (node.mCachedSize != size) {
node.mCachedSize = size;
if (nodeArray.mCachedSize != size) {
nodeArray.mCachedSize = size;
changed = true;
}
return changed;
}
/**
* Initializes the cached addresses of nodes from their size.
* Initializes the cached addresses of node arrays and their containing nodes from their size.
*
* @param flatNodes the array of nodes.
* @param flatNodes the list of node arrays.
* @param formatOptions file format options.
* @return the byte size of the entire stack.
*/
private static int initializeNodesCachedAddresses(final ArrayList<Node> flatNodes,
private static int initializeNodeArraysCachedAddresses(final ArrayList<PtNodeArray> flatNodes,
final FormatOptions formatOptions) {
int nodeOffset = 0;
for (final Node n : flatNodes) {
n.mCachedAddressBeforeUpdate = nodeOffset;
int groupCountSize = getGroupCountSize(n);
int nodeArrayOffset = 0;
for (final PtNodeArray nodeArray : flatNodes) {
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOffset;
int groupCountSize = getGroupCountSize(nodeArray);
int groupOffset = 0;
for (final CharGroup g : n.mData) {
for (final CharGroup g : nodeArray.mData) {
g.mCachedAddressBeforeUpdate = g.mCachedAddressAfterUpdate =
groupCountSize + nodeOffset + groupOffset;
groupCountSize + nodeArrayOffset + groupOffset;
groupOffset += g.mCachedSize;
}
final int nodeSize = groupCountSize + groupOffset
+ (formatOptions.mSupportsDynamicUpdate
? FormatSpec.FORWARD_LINK_ADDRESS_SIZE : 0);
nodeOffset += n.mCachedSize;
nodeArrayOffset += nodeArray.mCachedSize;
}
return nodeOffset;
return nodeArrayOffset;
}
/**
* Updates the cached addresses of nodes after recomputing their new positions.
* Updates the cached addresses of node arrays after recomputing their new positions.
*
* @param flatNodes the array of nodes.
* @param flatNodes the list of node arrays.
*/
private static void updateNodeCachedAddresses(final ArrayList<Node> flatNodes) {
for (final Node n : flatNodes) {
n.mCachedAddressBeforeUpdate = n.mCachedAddressAfterUpdate;
for (final CharGroup g : n.mData) {
private static void updateNodeArraysCachedAddresses(final ArrayList<PtNodeArray> flatNodes) {
for (final PtNodeArray nodeArray : flatNodes) {
nodeArray.mCachedAddressBeforeUpdate = nodeArray.mCachedAddressAfterUpdate;
for (final CharGroup g : nodeArray.mData) {
g.mCachedAddressBeforeUpdate = g.mCachedAddressAfterUpdate;
}
}
@ -391,11 +403,11 @@ public class BinaryDictEncoder {
* The parent addresses are used by some binary formats at write-to-disk time. Not all formats
* need them. In particular, version 2 does not need them, and version 3 does.
*
* @param flatNodes the flat array of nodes to fill in
* @param flatNodes the flat array of node arrays to fill in
*/
private static void computeParentAddresses(final ArrayList<Node> flatNodes) {
for (final Node node : flatNodes) {
for (final CharGroup group : node.mData) {
private static void computeParentAddresses(final ArrayList<PtNodeArray> flatNodes) {
for (final PtNodeArray nodeArray : flatNodes) {
for (final CharGroup group : nodeArray.mData) {
if (null != group.mChildren) {
// Assign my address to children's parent address
// Here BeforeUpdate and AfterUpdate addresses have the same value, so it
@ -408,25 +420,25 @@ public class BinaryDictEncoder {
}
/**
* Compute the addresses and sizes of an ordered node array.
* Compute the addresses and sizes of an ordered list of node arrays.
*
* This method takes a node array and will update its cached address and size values
* so that they can be written into a file. It determines the smallest size each of the
* nodes can be given the addresses of its children and attributes, and store that into
* This method takes a list of node arrays and will update their cached address and size
* values so that they can be written into a file. It determines the smallest size each of the
* nodes arrays can be given the addresses of its children and attributes, and store that into
* each node.
* The order of the node is given by the order of the array. This method makes no effort
* to find a good order; it only mechanically computes the size this order results in.
*
* @param dict the dictionary
* @param flatNodes the ordered array of nodes
* @param flatNodes the ordered list of nodes arrays
* @param formatOptions file format options.
* @return the same array it was passed. The nodes have been updated for address and size.
*/
private static ArrayList<Node> computeAddresses(final FusionDictionary dict,
final ArrayList<Node> flatNodes, final FormatOptions formatOptions) {
private static ArrayList<PtNodeArray> computeAddresses(final FusionDictionary dict,
final ArrayList<PtNodeArray> flatNodes, final FormatOptions formatOptions) {
// First get the worst possible sizes and offsets
for (final Node n : flatNodes) calculateNodeMaximumSize(n, formatOptions);
final int offset = initializeNodesCachedAddresses(flatNodes, formatOptions);
for (final PtNodeArray n : flatNodes) calculateNodeArrayMaximumSize(n, formatOptions);
final int offset = initializeNodeArraysCachedAddresses(flatNodes, formatOptions);
MakedictLog.i("Compressing the array addresses. Original size : " + offset);
MakedictLog.i("(Recursively seen size : " + offset + ")");
@ -435,17 +447,19 @@ public class BinaryDictEncoder {
boolean changesDone = false;
do {
changesDone = false;
int nodeStartOffset = 0;
for (final Node n : flatNodes) {
n.mCachedAddressAfterUpdate = nodeStartOffset;
final int oldNodeSize = n.mCachedSize;
final boolean changed = computeActualNodeSize(n, dict, formatOptions);
final int newNodeSize = n.mCachedSize;
if (oldNodeSize < newNodeSize) throw new RuntimeException("Increased size ?!");
nodeStartOffset += newNodeSize;
int nodeArrayStartOffset = 0;
for (final PtNodeArray nodeArray : flatNodes) {
nodeArray.mCachedAddressAfterUpdate = nodeArrayStartOffset;
final int oldNodeArraySize = nodeArray.mCachedSize;
final boolean changed = computeActualNodeArraySize(nodeArray, dict, formatOptions);
final int newNodeArraySize = nodeArray.mCachedSize;
if (oldNodeArraySize < newNodeArraySize) {
throw new RuntimeException("Increased size ?!");
}
nodeArrayStartOffset += newNodeArraySize;
changesDone |= changed;
}
updateNodeCachedAddresses(flatNodes);
updateNodeArraysCachedAddresses(flatNodes);
++passes;
if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug");
} while (changesDone);
@ -453,10 +467,10 @@ public class BinaryDictEncoder {
if (formatOptions.mSupportsDynamicUpdate) {
computeParentAddresses(flatNodes);
}
final Node lastNode = flatNodes.get(flatNodes.size() - 1);
final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
MakedictLog.i("Compression complete in " + passes + " passes.");
MakedictLog.i("After address compression : "
+ (lastNode.mCachedAddressAfterUpdate + lastNode.mCachedSize));
+ (lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize));
return flatNodes;
}
@ -464,25 +478,25 @@ public class BinaryDictEncoder {
/**
* Sanity-checking method.
*
* This method checks an array of node for juxtaposition, that is, it will do
* nothing if each node's cached address is actually the previous node's address
* This method checks a list of node arrays for juxtaposition, that is, it will do
* nothing if each node array's cached address is actually the previous node array's address
* plus the previous node's size.
* If this is not the case, it will throw an exception.
*
* @param array the array node to check
* @param arrays the list of node arrays to check
*/
private static void checkFlatNodeArray(final ArrayList<Node> array) {
private static void checkFlatNodeArrayList(final ArrayList<PtNodeArray> arrays) {
int offset = 0;
int index = 0;
for (final Node n : array) {
for (final PtNodeArray nodeArray : arrays) {
// BeforeUpdate and AfterUpdate addresses are the same here, so it does not matter
// which we use.
if (n.mCachedAddressAfterUpdate != offset) {
if (nodeArray.mCachedAddressAfterUpdate != offset) {
throw new RuntimeException("Wrong address for node " + index
+ " : expected " + offset + ", got " + n.mCachedAddressAfterUpdate);
+ " : expected " + offset + ", got " + nodeArray.mCachedAddressAfterUpdate);
}
++index;
offset += n.mCachedSize;
offset += nodeArray.mCachedSize;
}
}
@ -707,26 +721,23 @@ public class BinaryDictEncoder {
}
/**
* Write a node to memory. The node is expected to have its final position cached.
* Write a node array to memory. The node array is expected to have its final position cached.
*
* This can be an empty map, but the more is inside the faster the lookups will be. It can
* be carried on as long as nodes do not move.
*
* @param dict the dictionary the node is a part of (for relative offsets).
* @param dict the dictionary the node array is a part of (for relative offsets).
* @param buffer the memory buffer to write to.
* @param node the node to write.
* @param nodeArray the node array to write.
* @param formatOptions file format options.
* @return the address of the END of the node.
*/
@SuppressWarnings("unused")
private static int writePlacedNode(final FusionDictionary dict, byte[] buffer,
final Node node, final FormatOptions formatOptions) {
final PtNodeArray nodeArray, final FormatOptions formatOptions) {
// TODO: Make the code in common with BinaryDictIOUtils#writeCharGroup
int index = node.mCachedAddressAfterUpdate;
int index = nodeArray.mCachedAddressAfterUpdate;
final int groupCount = node.mData.size();
final int countSize = getGroupCountSize(node);
final int parentAddress = node.mCachedParentAddress;
final int groupCount = nodeArray.mData.size();
final int countSize = getGroupCountSize(nodeArray);
final int parentAddress = nodeArray.mCachedParentAddress;
if (1 == countSize) {
buffer[index++] = (byte)groupCount;
} else if (2 == countSize) {
@ -739,7 +750,7 @@ public class BinaryDictEncoder {
}
int groupAddress = index;
for (int i = 0; i < groupCount; ++i) {
final CharGroup group = node.mData.get(i);
final CharGroup group = nodeArray.mData.get(i);
if (index != group.mCachedAddressAfterUpdate) {
throw new RuntimeException("Bug: write index is not the same as the cached address "
+ "of the group : " + index + " <> " + group.mCachedAddressAfterUpdate);
@ -762,7 +773,7 @@ public class BinaryDictEncoder {
index = writeParentAddress(buffer, index, parentAddress, formatOptions);
} else {
index = writeParentAddress(buffer, index, parentAddress
+ (node.mCachedAddressAfterUpdate - group.mCachedAddressAfterUpdate),
+ (nodeArray.mCachedAddressAfterUpdate - group.mCachedAddressAfterUpdate),
formatOptions);
}
@ -812,7 +823,7 @@ public class BinaryDictEncoder {
while (bigramIterator.hasNext()) {
final WeightedString bigram = bigramIterator.next();
final CharGroup target =
FusionDictionary.findWordInTree(dict.mRoot, bigram.mWord);
FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
final int addressOfBigram = target.mCachedAddressAfterUpdate;
final int unigramFrequencyForThisWord = target.mFrequency;
++groupAddress;
@ -832,57 +843,58 @@ public class BinaryDictEncoder {
= FormatSpec.NO_FORWARD_LINK_ADDRESS;
index += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
}
if (index != node.mCachedAddressAfterUpdate + node.mCachedSize) throw new RuntimeException(
"Not the same size : written "
+ (index - node.mCachedAddressAfterUpdate) + " bytes from a node that should have "
+ node.mCachedSize + " bytes");
if (index != nodeArray.mCachedAddressAfterUpdate + nodeArray.mCachedSize) {
throw new RuntimeException(
"Not the same size : written " + (index - nodeArray.mCachedAddressAfterUpdate)
+ " bytes from a node that should have " + nodeArray.mCachedSize + " bytes");
}
return index;
}
/**
* Dumps a collection of useful statistics about a node array.
* Dumps a collection of useful statistics about a list of node arrays.
*
* This prints purely informative stuff, like the total estimated file size, the
* number of nodes, of character groups, the repartition of each address size, etc
* number of node arrays, of character groups, the repartition of each address size, etc
*
* @param nodes the node array.
* @param nodeArrays the list of node arrays.
*/
private static void showStatistics(ArrayList<Node> nodes) {
private static void showStatistics(ArrayList<PtNodeArray> nodeArrays) {
int firstTerminalAddress = Integer.MAX_VALUE;
int lastTerminalAddress = Integer.MIN_VALUE;
int size = 0;
int charGroups = 0;
int maxGroups = 0;
int maxRuns = 0;
for (final Node n : nodes) {
if (maxGroups < n.mData.size()) maxGroups = n.mData.size();
for (final CharGroup cg : n.mData) {
for (final PtNodeArray nodeArray : nodeArrays) {
if (maxGroups < nodeArray.mData.size()) maxGroups = nodeArray.mData.size();
for (final CharGroup cg : nodeArray.mData) {
++charGroups;
if (cg.mChars.length > maxRuns) maxRuns = cg.mChars.length;
if (cg.mFrequency >= 0) {
if (n.mCachedAddressAfterUpdate < firstTerminalAddress)
firstTerminalAddress = n.mCachedAddressAfterUpdate;
if (n.mCachedAddressAfterUpdate > lastTerminalAddress)
lastTerminalAddress = n.mCachedAddressAfterUpdate;
if (nodeArray.mCachedAddressAfterUpdate < firstTerminalAddress)
firstTerminalAddress = nodeArray.mCachedAddressAfterUpdate;
if (nodeArray.mCachedAddressAfterUpdate > lastTerminalAddress)
lastTerminalAddress = nodeArray.mCachedAddressAfterUpdate;
}
}
if (n.mCachedAddressAfterUpdate + n.mCachedSize > size) {
size = n.mCachedAddressAfterUpdate + n.mCachedSize;
if (nodeArray.mCachedAddressAfterUpdate + nodeArray.mCachedSize > size) {
size = nodeArray.mCachedAddressAfterUpdate + nodeArray.mCachedSize;
}
}
final int[] groupCounts = new int[maxGroups + 1];
final int[] runCounts = new int[maxRuns + 1];
for (final Node n : nodes) {
++groupCounts[n.mData.size()];
for (final CharGroup cg : n.mData) {
for (final PtNodeArray nodeArray : nodeArrays) {
++groupCounts[nodeArray.mData.size()];
for (final CharGroup cg : nodeArray.mData) {
++runCounts[cg.mChars.length];
}
}
MakedictLog.i("Statistics:\n"
+ " total file size " + size + "\n"
+ " " + nodes.size() + " nodes\n"
+ " " + charGroups + " groups (" + ((float)charGroups / nodes.size())
+ " " + nodeArrays.size() + " node arrays\n"
+ " " + charGroups + " groups (" + ((float)charGroups / nodeArrays.size())
+ " groups per node)\n"
+ " first terminal at " + firstTerminalAddress + "\n"
+ " last terminal at " + lastTerminalAddress + "\n"
@ -909,11 +921,12 @@ public class BinaryDictEncoder {
final FusionDictionary dict, final FormatOptions formatOptions)
throws IOException, UnsupportedFormatException {
// Addresses are limited to 3 bytes, but since addresses can be relative to each node, the
// structure itself is not limited to 16MB. However, if it is over 16MB deciding the order
// of the nodes becomes a quite complicated problem, because though the dictionary itself
// does not have a size limit, each node must still be within 16MB of all its children and
// parents. As long as this is ensured, the dictionary file may grow to any size.
// Addresses are limited to 3 bytes, but since addresses can be relative to each node
// array, the structure itself is not limited to 16MB. However, if it is over 16MB deciding
// the order of the node arrays becomes a quite complicated problem, because though the
// dictionary itself does not have a size limit, each node array must still be within 16MB
// of all its children and parents. As long as this is ensured, the dictionary file may
// grow to any size.
final int version = formatOptions.mVersion;
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
@ -964,23 +977,23 @@ public class BinaryDictEncoder {
// Leave the choice of the optimal node order to the flattenTree function.
MakedictLog.i("Flattening the tree...");
ArrayList<Node> flatNodes = flattenTree(dict.mRoot);
ArrayList<PtNodeArray> flatNodes = flattenTree(dict.mRootNodeArray);
MakedictLog.i("Computing addresses...");
computeAddresses(dict, flatNodes, formatOptions);
MakedictLog.i("Checking array...");
if (DBG) checkFlatNodeArray(flatNodes);
if (DBG) checkFlatNodeArrayList(flatNodes);
// Create a buffer that matches the final dictionary size.
final Node lastNode = flatNodes.get(flatNodes.size() - 1);
final int bufferSize = lastNode.mCachedAddressAfterUpdate + lastNode.mCachedSize;
final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize;
final byte[] buffer = new byte[bufferSize];
int index = 0;
MakedictLog.i("Writing file...");
int dataEndOffset = 0;
for (Node n : flatNodes) {
dataEndOffset = writePlacedNode(dict, buffer, n, formatOptions);
for (PtNodeArray nodeArray : flatNodes) {
dataEndOffset = writePlacedNode(dict, buffer, nodeArray, formatOptions);
}
if (DBG) showStatistics(flatNodes);

View File

@ -59,7 +59,7 @@ public final class BinaryDictIOUtils {
}
/**
* Tours all node without recursive call.
* Retrieves all node arrays without recursive call.
*/
private static void readUnigramsAndBigramsBinaryInner(
final FusionDictionaryBufferInterface buffer, final int headerSize,
@ -116,7 +116,7 @@ public final class BinaryDictIOUtils {
if (formatOptions.mSupportsDynamicUpdate) {
final int forwardLinkAddress = buffer.readUnsignedInt24();
if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) {
// the node has a forward link.
// The node array has a forward link.
p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT;
p.mAddress = forwardLinkAddress;
} else {
@ -126,7 +126,7 @@ public final class BinaryDictIOUtils {
stack.pop();
}
} else {
// the node has more groups.
// The node array has more groups.
p.mAddress = buffer.position();
}
@ -139,14 +139,14 @@ public final class BinaryDictIOUtils {
/**
* Reads unigrams and bigrams from the binary file.
* Doesn't make the memory representation of the dictionary.
* Doesn't store a full memory representation of the dictionary.
*
* @param reader the reader.
* @param words the map to store the address as a key and the word as a value.
* @param frequencies the map to store the address as a key and the frequency as a value.
* @param bigrams the map to store the address as a key and the list of address as a value.
* @throws IOException
* @throws UnsupportedFormatException
* @throws IOException if the file can't be read.
* @throws UnsupportedFormatException if the format of the file is not recognized.
*/
public static void readUnigramsAndBigramsBinary(final BinaryDictReader reader,
final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
@ -165,8 +165,8 @@ public final class BinaryDictIOUtils {
* @param buffer the buffer to read.
* @param word the word we search for.
* @return the address of the terminal node.
* @throws IOException
* @throws UnsupportedFormatException
* @throws IOException if the file can't be read.
* @throws UnsupportedFormatException if the format of the file is not recognized.
*/
@UsedForTesting
public static int getTerminalPosition(final FusionDictionaryBufferInterface buffer,
@ -224,9 +224,9 @@ public final class BinaryDictIOUtils {
}
// If we found the next char group, it is under the file pointer.
// But if not, we are at the end of this node so we expect to have
// But if not, we are at the end of this node array so we expect to have
// a forward link address that we need to consult and possibly resume
// search on the next node in the linked list.
// search on the next node array in the linked list.
if (foundNextCharGroup) break;
if (!header.mFormatOptions.mSupportsDynamicUpdate) {
return FormatSpec.NOT_VALID_WORD;
@ -365,9 +365,10 @@ public final class BinaryDictIOUtils {
}
/**
* Write a char group to an output stream.
* A char group is an in-memory representation of a node in trie.
* A char group info is an on-disk representation of a node.
* Write a char group to an output stream from a CharGroupInfo.
* A char group is an in-memory representation of a node in the patricia trie.
* A char group info is a container for low-level information about how the
* char group is stored in the binary format.
*
* @param destination the stream to write.
* @param info the char group info to be written.
@ -427,7 +428,7 @@ public final class BinaryDictIOUtils {
if (info.mBigrams != null) {
// TODO: Consolidate this code with the code that computes the size of the bigram list
// in BinaryDictEncoder#computeActualNodeSize
// in BinaryDictEncoder#computeActualNodeArraySize
for (int i = 0; i < info.mBigrams.size(); ++i) {
final int bigramFrequency = info.mBigrams.get(i).mFrequency;
@ -479,14 +480,14 @@ public final class BinaryDictIOUtils {
}
/**
* Write a node to the stream.
* Write a node array to the stream.
*
* @param destination the stream to write.
* @param infos groups to be written.
* @param infos an array of CharGroupInfo to be written.
* @return the size written, in bytes.
* @throws IOException
*/
static int writeNode(final OutputStream destination, final CharGroupInfo[] infos)
static int writeNodes(final OutputStream destination, final CharGroupInfo[] infos)
throws IOException {
int size = getGroupCountSize(infos.length);
switch (getGroupCountSize(infos.length)) {
@ -604,12 +605,12 @@ public final class BinaryDictIOUtils {
public static int getGroupCountSize(final int count) {
if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= count) {
return 1;
} else if (FormatSpec.MAX_CHARGROUPS_IN_A_NODE >= count) {
} else if (FormatSpec.MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY >= count) {
return 2;
} else {
throw new RuntimeException("Can't have more than "
+ FormatSpec.MAX_CHARGROUPS_IN_A_NODE + " groups in a node (found " + count
+ ")");
+ FormatSpec.MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY + " groups in a node (found "
+ count + ")");
}
}

View File

@ -86,7 +86,7 @@ public final class DynamicBinaryDictIOUtils {
}
final int flags = buffer.readUnsignedByte();
if (BinaryDictIOUtils.isMovedGroup(flags, formatOptions)) {
// if the group is moved, the parent address is stored in the destination group.
// If the group is moved, the parent address is stored in the destination group.
// We are guaranteed to process the destination group later, so there is no need to
// update anything here.
buffer.position(originalPosition);
@ -101,10 +101,10 @@ public final class DynamicBinaryDictIOUtils {
}
/**
* Update parent addresses in a Node that is referred to by nodeOriginAddress.
* Update parent addresses in a node array stored at nodeOriginAddress.
*
* @param buffer the buffer to be modified.
* @param nodeOriginAddress the address of a modified Node.
* @param nodeOriginAddress the address of the node array to update.
* @param newParentAddress the address to be written.
* @param formatOptions file format options.
*/
@ -154,7 +154,7 @@ public final class DynamicBinaryDictIOUtils {
*/
private static int moveCharGroup(final OutputStream destination,
final FusionDictionaryBufferInterface buffer, final CharGroupInfo info,
final int nodeOriginAddress, final int oldGroupAddress,
final int nodeArrayOriginAddress, final int oldGroupAddress,
final FormatOptions formatOptions) throws IOException {
updateParentAddress(buffer, oldGroupAddress, buffer.limit() + 1, formatOptions);
buffer.position(oldGroupAddress);
@ -163,15 +163,16 @@ public final class DynamicBinaryDictIOUtils {
buffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags
& (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG))));
int size = FormatSpec.GROUP_FLAGS_SIZE;
updateForwardLink(buffer, nodeOriginAddress, buffer.limit(), formatOptions);
size += BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { info });
updateForwardLink(buffer, nodeArrayOriginAddress, buffer.limit(), formatOptions);
size += BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { info });
return size;
}
@SuppressWarnings("unused")
private static void updateForwardLink(final FusionDictionaryBufferInterface buffer,
final int nodeOriginAddress, final int newNodeAddress,
final int nodeArrayOriginAddress, final int newNodeArrayAddress,
final FormatOptions formatOptions) {
buffer.position(nodeOriginAddress);
buffer.position(nodeArrayOriginAddress);
int jumpCount = 0;
while (jumpCount++ < MAX_JUMPS) {
final int count = BinaryDictDecoder.readCharGroupCount(buffer);
@ -179,7 +180,7 @@ public final class DynamicBinaryDictIOUtils {
final int forwardLinkAddress = buffer.readUnsignedInt24();
if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
buffer.position(buffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeAddress);
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeArrayAddress);
return;
}
buffer.position(forwardLinkAddress);
@ -190,57 +191,59 @@ public final class DynamicBinaryDictIOUtils {
}
/**
* Move a group that is referred to by oldGroupOrigin to the tail of the file.
* And set the children address to the byte after the group.
* Move a group that is referred to by oldGroupOrigin to the tail of the file, and set the
* children address to the byte after the group
*
* @param nodeOrigin the address of the tail of the file.
* @param characters
* @param length
* @param flags
* @param frequency
* @param parentAddress
* @param shortcutTargets
* @param bigrams
* @param fileEndAddress the address of the tail of the file.
* @param codePoints the characters to put inside the group.
* @param length how many code points to read from codePoints.
* @param flags the flags for this group.
* @param frequency the frequency of this terminal.
* @param parentAddress the address of the parent group of this group.
* @param shortcutTargets the shortcut targets for this group.
* @param bigrams the bigrams for this group.
* @param destination the stream representing the tail of the file.
* @param buffer the buffer representing the (constant-size) body of the file.
* @param oldNodeOrigin
* @param oldGroupOrigin
* @param formatOptions
* @param oldNodeArrayOrigin the origin of the old node array this group was a part of.
* @param oldGroupOrigin the old origin where this group used to be stored.
* @param formatOptions format options for this dictionary.
* @return the size written, in bytes.
* @throws IOException
* @throws IOException if the file can't be accessed
*/
private static int moveGroup(final int nodeOrigin, final int[] characters, final int length,
final int flags, final int frequency, final int parentAddress,
private static int moveGroup(final int fileEndAddress, final int[] codePoints,
final int length, final int flags, final int frequency, final int parentAddress,
final ArrayList<WeightedString> shortcutTargets,
final ArrayList<PendingAttribute> bigrams, final OutputStream destination,
final FusionDictionaryBufferInterface buffer, final int oldNodeOrigin,
final FusionDictionaryBufferInterface buffer, final int oldNodeArrayOrigin,
final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException {
int size = 0;
final int newGroupOrigin = nodeOrigin + 1;
final int[] writtenCharacters = Arrays.copyOfRange(characters, 0, length);
final int newGroupOrigin = fileEndAddress + 1;
final int[] writtenCharacters = Arrays.copyOfRange(codePoints, 0, length);
final CharGroupInfo tmpInfo = new CharGroupInfo(newGroupOrigin, -1 /* endAddress */,
flags, writtenCharacters, frequency, parentAddress, FormatSpec.NO_CHILDREN_ADDRESS,
shortcutTargets, bigrams);
size = BinaryDictIOUtils.computeGroupSize(tmpInfo, formatOptions);
final CharGroupInfo newInfo = new CharGroupInfo(newGroupOrigin, newGroupOrigin + size,
flags, writtenCharacters, frequency, parentAddress,
nodeOrigin + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets,
fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets,
bigrams);
moveCharGroup(destination, buffer, newInfo, oldNodeOrigin, oldGroupOrigin, formatOptions);
moveCharGroup(destination, buffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin,
formatOptions);
return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
}
/**
* Insert a word into a binary dictionary.
*
* @param buffer
* @param destination
* @param word
* @param frequency
* @param bigramStrings
* @param shortcuts
* @throws IOException
* @throws UnsupportedFormatException
* @param buffer the buffer containing the existing dictionary.
* @param destination a stream to the underlying file, with the pointer at the end of the file.
* @param word the word to insert.
* @param frequency the frequency of the new word.
* @param bigramStrings bigram list, or null if none.
* @param shortcuts shortcut list, or null if none.
* @param isBlackListEntry whether this should be a blacklist entry.
* @throws IOException if the file can't be accessed.
* @throws UnsupportedFormatException if the existing dictionary is in an unexpected format.
*/
// TODO: Support batch insertion.
// TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
@ -323,7 +326,7 @@ public final class DynamicBinaryDictIOUtils {
currentInfo.mFlags, characters2, currentInfo.mFrequency,
newNodeAddress + 1, currentInfo.mChildrenAddress,
currentInfo.mShortcutTargets, currentInfo.mBigrams);
BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { newInfo2 });
BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo2 });
return;
} else if (codePoints[wordPos + p] != currentInfo.mCharacters[p]) {
if (p > 0) {
@ -386,7 +389,7 @@ public final class DynamicBinaryDictIOUtils {
newNodeAddress + written, -1 /* endAddress */, flags,
newCharacters, frequency, newNodeAddress + 1,
FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams);
BinaryDictIOUtils.writeNode(destination,
BinaryDictIOUtils.writeNodes(destination,
new CharGroupInfo[] { suffixInfo, newInfo });
return;
}
@ -438,7 +441,7 @@ public final class DynamicBinaryDictIOUtils {
final CharGroupInfo newInfo = new CharGroupInfo(newGroupAddress, -1, flags,
characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS,
shortcuts, bigrams);
BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { newInfo });
BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo });
return;
}
buffer.position(currentInfo.mChildrenAddress);
@ -482,7 +485,7 @@ public final class DynamicBinaryDictIOUtils {
final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1,
-1 /* endAddress */, flags, characters, frequency, nodeParentAddress,
FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams);
BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[]{ newInfo });
BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[]{ newInfo });
return;
} else {
depth--;

View File

@ -60,7 +60,7 @@ public final class FormatSpec {
*/
/*
* Array of Node(FusionDictionary.Node) layout is as follows:
* Node array (FusionDictionary.PtNodeArray) layout is as follows:
*
* g |
* r | the number of groups, 1 or 2 bytes.
@ -86,7 +86,7 @@ public final class FormatSpec {
* linkaddress
*/
/* Node(CharGroup) layout is as follows:
/* Node (FusionDictionary.CharGroup) layout is as follows:
* | IF !SUPPORTS_DYNAMIC_UPDATE
* | addressType xx : mask with MASK_GROUP_ADDRESS_TYPE
* | 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
@ -251,7 +251,7 @@ public final class FormatSpec {
static final int INVALID_CHARACTER = -1;
static final int MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT = 0x7F; // 127
static final int MAX_CHARGROUPS_IN_A_NODE = 0x7FFF; // 32767
static final int MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY = 0x7FFF; // 32767
static final int MAX_BIGRAMS_IN_A_GROUP = 10000;
static final int MAX_TERMINAL_FREQUENCY = 255;

View File

@ -37,14 +37,14 @@ public final class FusionDictionary implements Iterable<Word> {
private static int CHARACTER_NOT_FOUND_INDEX = -1;
/**
* A node of the dictionary, containing several CharGroups.
* A node array of the dictionary, containing several CharGroups.
*
* A node is but an ordered array of CharGroups, which essentially contain all the
* A PtNodeArray is but an ordered array of CharGroups, which essentially contain all the
* real information.
* This class also contains fields to cache size and address, to help with binary
* generation.
*/
public static final class Node {
public static final class PtNodeArray {
ArrayList<CharGroup> mData;
// To help with binary generation
int mCachedSize = Integer.MIN_VALUE;
@ -57,10 +57,10 @@ public final class FusionDictionary implements Iterable<Word> {
int mCachedAddressAfterUpdate = Integer.MIN_VALUE;
int mCachedParentAddress = 0;
public Node() {
public PtNodeArray() {
mData = new ArrayList<CharGroup>();
}
public Node(ArrayList<CharGroup> data) {
public PtNodeArray(ArrayList<CharGroup> data) {
mData = data;
}
}
@ -98,7 +98,7 @@ public final class FusionDictionary implements Iterable<Word> {
* This is the central class of the in-memory representation. A CharGroup is what can
* be seen as a traditional "trie node", except it can hold several characters at the
* same time. A CharGroup essentially represents one or several characters in the middle
* of the trie trie; as such, it can be a terminal, and it can have children.
* of the trie tree; as such, it can be a terminal, and it can have children.
* In this in-memory representation, whether the CharGroup is a terminal or not is represented
* in the frequency, where NOT_A_TERMINAL (= -1) means this is not a terminal and any other
* value is the frequency of this terminal. A terminal may have non-null shortcuts and/or
@ -110,7 +110,7 @@ public final class FusionDictionary implements Iterable<Word> {
ArrayList<WeightedString> mShortcutTargets;
ArrayList<WeightedString> mBigrams;
int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
Node mChildren;
PtNodeArray mChildren;
boolean mIsNotAWord; // Only a shortcut
boolean mIsBlacklistEntry;
// mCachedSize and mCachedAddressBefore/AfterUpdate are helpers for binary dictionary
@ -137,7 +137,8 @@ public final class FusionDictionary implements Iterable<Word> {
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final int frequency,
final boolean isNotAWord, final boolean isBlacklistEntry, final Node children) {
final boolean isNotAWord, final boolean isBlacklistEntry,
final PtNodeArray children) {
mChars = chars;
mFrequency = frequency;
mShortcutTargets = shortcutTargets;
@ -149,7 +150,7 @@ public final class FusionDictionary implements Iterable<Word> {
public void addChild(CharGroup n) {
if (null == mChildren) {
mChildren = new Node();
mChildren = new PtNodeArray();
}
mChildren.mData.add(n);
}
@ -344,10 +345,10 @@ public final class FusionDictionary implements Iterable<Word> {
}
public final DictionaryOptions mOptions;
public final Node mRoot;
public final PtNodeArray mRootNodeArray;
public FusionDictionary(final Node root, final DictionaryOptions options) {
mRoot = root;
public FusionDictionary(final PtNodeArray rootNodeArray, final DictionaryOptions options) {
mRootNodeArray = rootNodeArray;
mOptions = options;
}
@ -406,13 +407,13 @@ public final class FusionDictionary implements Iterable<Word> {
}
/**
* Sanity check for a node.
* Sanity check for a node array.
*
* This method checks that all CharGroups in a node are ordered as expected.
* This method checks that all CharGroups in a node array are ordered as expected.
* If they are, nothing happens. If they aren't, an exception is thrown.
*/
private void checkStack(Node node) {
ArrayList<CharGroup> stack = node.mData;
private void checkStack(PtNodeArray nodeArray) {
ArrayList<CharGroup> stack = nodeArray.mData;
int lastValue = -1;
for (int i = 0; i < stack.size(); ++i) {
int currentValue = stack.get(i).mChars[0];
@ -431,16 +432,16 @@ public final class FusionDictionary implements Iterable<Word> {
* @param frequency the bigram frequency
*/
public void setBigram(final String word1, final String word2, final int frequency) {
CharGroup charGroup = findWordInTree(mRoot, word1);
CharGroup charGroup = findWordInTree(mRootNodeArray, word1);
if (charGroup != null) {
final CharGroup charGroup2 = findWordInTree(mRoot, word2);
final CharGroup charGroup2 = findWordInTree(mRootNodeArray, word2);
if (charGroup2 == null) {
add(getCodePoints(word2), 0, null, false /* isNotAWord */,
false /* isBlacklistEntry */);
// The chargroup for the first word may have moved by the above insertion,
// if word1 and word2 share a common stem that happens not to have been
// a cutting point until now. In this case, we need to refresh charGroup.
charGroup = findWordInTree(mRoot, word1);
charGroup = findWordInTree(mRootNodeArray, word1);
}
charGroup.addBigram(word2, frequency);
} else {
@ -469,38 +470,38 @@ public final class FusionDictionary implements Iterable<Word> {
return;
}
Node currentNode = mRoot;
PtNodeArray currentNodeArray = mRootNodeArray;
int charIndex = 0;
CharGroup currentGroup = null;
int differentCharIndex = 0; // Set by the loop to the index of the char that differs
int nodeIndex = findIndexOfChar(mRoot, word[charIndex]);
int nodeIndex = findIndexOfChar(mRootNodeArray, word[charIndex]);
while (CHARACTER_NOT_FOUND_INDEX != nodeIndex) {
currentGroup = currentNode.mData.get(nodeIndex);
differentCharIndex = compareArrays(currentGroup.mChars, word, charIndex);
currentGroup = currentNodeArray.mData.get(nodeIndex);
differentCharIndex = compareCharArrays(currentGroup.mChars, word, charIndex);
if (ARRAYS_ARE_EQUAL != differentCharIndex
&& differentCharIndex < currentGroup.mChars.length) break;
if (null == currentGroup.mChildren) break;
charIndex += currentGroup.mChars.length;
if (charIndex >= word.length) break;
currentNode = currentGroup.mChildren;
nodeIndex = findIndexOfChar(currentNode, word[charIndex]);
currentNodeArray = currentGroup.mChildren;
nodeIndex = findIndexOfChar(currentNodeArray, word[charIndex]);
}
if (CHARACTER_NOT_FOUND_INDEX == nodeIndex) {
// No node at this point to accept the word. Create one.
final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
final int insertionIndex = findInsertionIndex(currentNodeArray, word[charIndex]);
final CharGroup newGroup = new CharGroup(
Arrays.copyOfRange(word, charIndex, word.length),
shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry);
currentNode.mData.add(insertionIndex, newGroup);
if (DBG) checkStack(currentNode);
currentNodeArray.mData.add(insertionIndex, newGroup);
if (DBG) checkStack(currentNodeArray);
} else {
// There is a word with a common prefix.
if (differentCharIndex == currentGroup.mChars.length) {
if (charIndex + differentCharIndex >= word.length) {
// The new word is a prefix of an existing word, but the node on which it
// should end already exists as is. Since the old CharNode was not a terminal,
// should end already exists as is. Since the old CharGroup was not a terminal,
// make it one by filling in its frequency and other attributes
currentGroup.update(frequency, shortcutTargets, null, isNotAWord,
isBlacklistEntry);
@ -511,7 +512,7 @@ public final class FusionDictionary implements Iterable<Word> {
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
shortcutTargets, null /* bigrams */, frequency, isNotAWord,
isBlacklistEntry);
currentGroup.mChildren = new Node();
currentGroup.mChildren = new PtNodeArray();
currentGroup.mChildren.mData.add(newNode);
}
} else {
@ -524,7 +525,7 @@ public final class FusionDictionary implements Iterable<Word> {
} else {
// Partial prefix match only. We have to replace the current node with a node
// containing the current prefix and create two new ones for the tails.
Node newChildren = new Node();
PtNodeArray newChildren = new PtNodeArray();
final CharGroup newOldWord = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
currentGroup.mChars.length), currentGroup.mShortcutTargets,
@ -552,9 +553,9 @@ public final class FusionDictionary implements Iterable<Word> {
> currentGroup.mChars[differentCharIndex] ? 1 : 0;
newChildren.mData.add(addIndex, newWord);
}
currentNode.mData.set(nodeIndex, newParent);
currentNodeArray.mData.set(nodeIndex, newParent);
}
if (DBG) checkStack(currentNode);
if (DBG) checkStack(currentNodeArray);
}
}
}
@ -576,7 +577,7 @@ public final class FusionDictionary implements Iterable<Word> {
* @param dstOffset the offset in the right-hand side string.
* @return the index at which the strings differ, or ARRAYS_ARE_EQUAL = 0 if they don't.
*/
private static int compareArrays(final int[] src, final int[] dst, int dstOffset) {
private static int compareCharArrays(final int[] src, final int[] dst, int dstOffset) {
// We do NOT test the first char, because we come from a method that already
// tested it.
for (int i = 1; i < src.length; ++i) {
@ -603,10 +604,10 @@ public final class FusionDictionary implements Iterable<Word> {
final static private CharGroupComparator CHARGROUP_COMPARATOR = new CharGroupComparator();
/**
* Finds the insertion index of a character within a node.
* Finds the insertion index of a character within a node array.
*/
private static int findInsertionIndex(final Node node, int character) {
final ArrayList<CharGroup> data = node.mData;
private static int findInsertionIndex(final PtNodeArray nodeArray, int character) {
final ArrayList<CharGroup> data = nodeArray.mData;
final CharGroup reference = new CharGroup(new int[] { character },
null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */,
false /* isBlacklistEntry */);
@ -615,16 +616,16 @@ public final class FusionDictionary implements Iterable<Word> {
}
/**
* Find the index of a char in a node, if it exists.
* Find the index of a char in a node array, if it exists.
*
* @param node the node to search in.
* @param nodeArray the node array to search in.
* @param character the character to search for.
* @return the position of the character if it's there, or CHARACTER_NOT_FOUND_INDEX = -1 else.
*/
private static int findIndexOfChar(final Node node, int character) {
final int insertionIndex = findInsertionIndex(node, character);
if (node.mData.size() <= insertionIndex) return CHARACTER_NOT_FOUND_INDEX;
return character == node.mData.get(insertionIndex).mChars[0] ? insertionIndex
private static int findIndexOfChar(final PtNodeArray nodeArray, int character) {
final int insertionIndex = findInsertionIndex(nodeArray, character);
if (nodeArray.mData.size() <= insertionIndex) return CHARACTER_NOT_FOUND_INDEX;
return character == nodeArray.mData.get(insertionIndex).mChars[0] ? insertionIndex
: CHARACTER_NOT_FOUND_INDEX;
}
@ -632,16 +633,16 @@ public final class FusionDictionary implements Iterable<Word> {
* Helper method to find a word in a given branch.
*/
@SuppressWarnings("unused")
public static CharGroup findWordInTree(Node node, final String string) {
public static CharGroup findWordInTree(PtNodeArray nodeArray, final String string) {
int index = 0;
final StringBuilder checker = DBG ? new StringBuilder() : null;
final int[] codePoints = getCodePoints(string);
CharGroup currentGroup;
do {
int indexOfGroup = findIndexOfChar(node, codePoints[index]);
int indexOfGroup = findIndexOfChar(nodeArray, codePoints[index]);
if (CHARACTER_NOT_FOUND_INDEX == indexOfGroup) return null;
currentGroup = node.mData.get(indexOfGroup);
currentGroup = nodeArray.mData.get(indexOfGroup);
if (codePoints.length - index < currentGroup.mChars.length) return null;
int newIndex = index;
@ -653,9 +654,9 @@ public final class FusionDictionary implements Iterable<Word> {
if (DBG) checker.append(new String(currentGroup.mChars, 0, currentGroup.mChars.length));
if (index < codePoints.length) {
node = currentGroup.mChildren;
nodeArray = currentGroup.mChildren;
}
} while (null != node && index < codePoints.length);
} while (null != nodeArray && index < codePoints.length);
if (index < codePoints.length) return null;
if (!currentGroup.isTerminal()) return null;
@ -670,20 +671,20 @@ public final class FusionDictionary implements Iterable<Word> {
if (null == s || "".equals(s)) {
throw new RuntimeException("Can't search for a null or empty string");
}
return null != findWordInTree(mRoot, s);
return null != findWordInTree(mRootNodeArray, s);
}
/**
* Recursively count the number of character groups in a given branch of the trie.
*
* @param node the parent node.
* @param nodeArray the parent node.
* @return the number of char groups in all the branch under this node.
*/
public static int countCharGroups(final Node node) {
final int nodeSize = node.mData.size();
public static int countCharGroups(final PtNodeArray nodeArray) {
final int nodeSize = nodeArray.mData.size();
int size = nodeSize;
for (int i = nodeSize - 1; i >= 0; --i) {
CharGroup group = node.mData.get(i);
CharGroup group = nodeArray.mData.get(i);
if (null != group.mChildren)
size += countCharGroups(group.mChildren);
}
@ -693,15 +694,15 @@ public final class FusionDictionary implements Iterable<Word> {
/**
* Recursively count the number of nodes in a given branch of the trie.
*
* @param node the node to count.
* @param nodeArray the node array to count.
* @return the number of nodes in this branch.
*/
public static int countNodes(final Node node) {
public static int countNodeArrays(final PtNodeArray nodeArray) {
int size = 1;
for (int i = node.mData.size() - 1; i >= 0; --i) {
CharGroup group = node.mData.get(i);
for (int i = nodeArray.mData.size() - 1; i >= 0; --i) {
CharGroup group = nodeArray.mData.get(i);
if (null != group.mChildren)
size += countNodes(group.mChildren);
size += countNodeArrays(group.mChildren);
}
return size;
}
@ -709,10 +710,10 @@ public final class FusionDictionary implements Iterable<Word> {
// Recursively find out whether there are any bigrams.
// This can be pretty expensive especially if there aren't any (we return as soon
// as we find one, so it's much cheaper if there are bigrams)
private static boolean hasBigramsInternal(final Node node) {
if (null == node) return false;
for (int i = node.mData.size() - 1; i >= 0; --i) {
CharGroup group = node.mData.get(i);
private static boolean hasBigramsInternal(final PtNodeArray nodeArray) {
if (null == nodeArray) return false;
for (int i = nodeArray.mData.size() - 1; i >= 0; --i) {
CharGroup group = nodeArray.mData.get(i);
if (null != group.mBigrams) return true;
if (hasBigramsInternal(group.mChildren)) return true;
}
@ -729,7 +730,7 @@ public final class FusionDictionary implements Iterable<Word> {
// find a more efficient way of doing this, without compromising too much on memory
// and ease of use.
public boolean hasBigrams() {
return hasBigramsInternal(mRoot);
return hasBigramsInternal(mRootNodeArray);
}
// Historically, the tails of the words were going to be merged to save space.
@ -750,13 +751,13 @@ public final class FusionDictionary implements Iterable<Word> {
// MakedictLog.i("Merging nodes. Number of nodes : " + countNodes(root));
// MakedictLog.i("Number of groups : " + countCharGroups(root));
//
// final HashMap<String, ArrayList<Node>> repository =
// new HashMap<String, ArrayList<Node>>();
// final HashMap<String, ArrayList<PtNodeArray>> repository =
// new HashMap<String, ArrayList<PtNodeArray>>();
// mergeTailsInner(repository, root);
//
// MakedictLog.i("Number of different pseudohashes : " + repository.size());
// int size = 0;
// for (ArrayList<Node> a : repository.values()) {
// for (ArrayList<PtNodeArray> a : repository.values()) {
// size += a.size();
// }
// MakedictLog.i("Number of nodes after merge : " + (1 + size));
@ -764,7 +765,7 @@ public final class FusionDictionary implements Iterable<Word> {
}
// The following methods are used by the deactivated mergeTails()
// private static boolean isEqual(Node a, Node b) {
// private static boolean isEqual(PtNodeArray a, PtNodeArray b) {
// if (null == a && null == b) return true;
// if (null == a || null == b) return false;
// if (a.data.size() != b.data.size()) return false;
@ -781,21 +782,21 @@ public final class FusionDictionary implements Iterable<Word> {
// return true;
// }
// static private HashMap<String, ArrayList<Node>> mergeTailsInner(
// final HashMap<String, ArrayList<Node>> map, final Node node) {
// final ArrayList<CharGroup> branches = node.data;
// static private HashMap<String, ArrayList<PtNodeArray>> mergeTailsInner(
// final HashMap<String, ArrayList<PtNodeArray>> map, final PtNodeArray nodeArray) {
// final ArrayList<CharGroup> branches = nodeArray.data;
// final int nodeSize = branches.size();
// for (int i = 0; i < nodeSize; ++i) {
// CharGroup group = branches.get(i);
// if (null != group.children) {
// String pseudoHash = getPseudoHash(group.children);
// ArrayList<Node> similarList = map.get(pseudoHash);
// ArrayList<PtNodeArray> similarList = map.get(pseudoHash);
// if (null == similarList) {
// similarList = new ArrayList<Node>();
// similarList = new ArrayList<PtNodeArray>();
// map.put(pseudoHash, similarList);
// }
// boolean merged = false;
// for (Node similar : similarList) {
// for (PtNodeArray similar : similarList) {
// if (isEqual(group.children, similar)) {
// group.children = similar;
// merged = true;
@ -811,9 +812,9 @@ public final class FusionDictionary implements Iterable<Word> {
// return map;
// }
// private static String getPseudoHash(final Node node) {
// private static String getPseudoHash(final PtNodeArray nodeArray) {
// StringBuilder s = new StringBuilder();
// for (CharGroup g : node.data) {
// for (CharGroup g : nodeArray.data) {
// s.append(g.frequency);
// for (int ch : g.chars) {
// s.append(Character.toChars(ch));
@ -901,6 +902,6 @@ public final class FusionDictionary implements Iterable<Word> {
*/
@Override
public Iterator<Word> iterator() {
return new DictionaryIterator(mRoot.mData);
return new DictionaryIterator(mRootNodeArray.mData);
}
}

View File

@ -25,7 +25,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
import com.android.inputmethod.latin.makedict.BinaryDictReader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.PendingAttribute;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList;
@ -78,7 +78,7 @@ public final class UserHistoryDictIOUtils {
@UsedForTesting
static FusionDictionary constructFusionDictionary(
final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) {
final FusionDictionary fusionDict = new FusionDictionary(new Node(),
final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
false));
int profTotal = 0;
@ -102,7 +102,7 @@ public final class UserHistoryDictIOUtils {
if (word1 == null) { // unigram
fusionDict.add(word2, freq, null, false /* isNotAWord */);
} else { // bigram
if (FusionDictionary.findWordInTree(fusionDict.mRoot, word1) == null) {
if (FusionDictionary.findWordInTree(fusionDict.mRootNodeArray, word1) == null) {
fusionDict.add(word1, 2, null, false /* isNotAWord */);
}
fusionDict.setBigram(word1, word2, freq);

View File

@ -20,7 +20,7 @@ import android.test.AndroidTestCase;
import android.test.suitebuilder.annotation.SmallTest;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import java.util.HashMap;
@ -30,21 +30,21 @@ import java.util.HashMap;
@SmallTest
public class FusionDictionaryTests extends AndroidTestCase {
public void testFindWordInTree() {
FusionDictionary dict = new FusionDictionary(new Node(),
FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
dict.add("abc", 10, null, false /* isNotAWord */);
assertNull(FusionDictionary.findWordInTree(dict.mRoot, "aaa"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "abc"));
assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "abc"));
dict.add("aa", 10, null, false /* isNotAWord */);
assertNull(FusionDictionary.findWordInTree(dict.mRoot, "aaa"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "aa"));
assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aa"));
dict.add("babcd", 10, null, false /* isNotAWord */);
dict.add("bacde", 10, null, false /* isNotAWord */);
assertNull(FusionDictionary.findWordInTree(dict.mRoot, "ba"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "babcd"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "bacde"));
assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "ba"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "babcd"));
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "bacde"));
}
}

View File

@ -25,7 +25,7 @@ import android.util.SparseArray;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.CollectionUtils;
@ -226,7 +226,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
// check unigram
for (final String word : words) {
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, word);
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
assertNotNull(cg);
}
@ -234,7 +234,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
for (int i = 0; i < bigrams.size(); ++i) {
final int w1 = bigrams.keyAt(i);
for (final int w2 : bigrams.valueAt(i)) {
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, words.get(w1));
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray,
words.get(w1));
assertNotNull(words.get(w1) + "," + words.get(w2), cg.getBigram(words.get(w2)));
}
}
@ -242,7 +243,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
// check shortcut
if (shortcutMap != null) {
for (final Map.Entry<String, List<String>> entry : shortcutMap.entrySet()) {
final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, entry.getKey());
final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray,
entry.getKey());
for (final String word : entry.getValue()) {
assertNotNull("shortcut not found: " + entry.getKey() + ", " + word,
group.getShortcut(word));
@ -297,7 +299,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
}
assertNotNull(file);
final FusionDictionary dict = new FusionDictionary(new Node(),
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
addUnigrams(words.size(), dict, words, shortcuts);
addBigrams(dict, words, bigrams);
@ -440,7 +442,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
assertNotNull(file);
// making the dictionary from lists of words.
final FusionDictionary dict = new FusionDictionary(new Node(),
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(
new HashMap<String, String>(), false, false));
addUnigrams(words.size(), dict, words, null /* shortcutMap */);
@ -538,7 +540,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
}
assertNotNull(file);
final FusionDictionary dict = new FusionDictionary(new Node(),
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(
new HashMap<String, String>(), false, false));
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
@ -599,7 +601,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
}
assertNotNull(file);
final FusionDictionary dict = new FusionDictionary(new Node(),
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(
new HashMap<String, String>(), false, false));
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);

View File

@ -24,7 +24,7 @@ import android.util.Log;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.ByteBufferWrapper;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.CollectionUtils;
@ -277,7 +277,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
}
// set an initial dictionary.
final FusionDictionary dict = new FusionDictionary(new Node(),
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
dict.add("abcd", 10, null, false);
@ -328,7 +328,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
}
// set an initial dictionary.
final FusionDictionary dict = new FusionDictionary(new Node(),
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
dict.add("abcd", 10, null, false);
dict.add("efgh", 15, null, false);
@ -365,7 +365,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
assertNotNull(file);
// set an initial dictionary.
final FusionDictionary dict = new FusionDictionary(new Node(),
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
false));
dict.add("initial", 10, null, false);

View File

@ -86,7 +86,7 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
private void checkWordInFusionDict(final FusionDictionary dict, final String word,
final ArrayList<String> expectedBigrams) {
final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, word);
final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
assertNotNull(group);
assertTrue(group.isTerminal());

View File

@ -19,7 +19,7 @@ package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.Word;
@ -117,7 +117,7 @@ public class CombinedInputOutput {
final boolean processLigatures =
FRENCH_LIGATURE_PROCESSING_OPTION.equals(attributes.get(OPTIONS_TAG));
attributes.remove(OPTIONS_TAG);
final FusionDictionary dict = new FusionDictionary(new Node(), new DictionaryOptions(
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new DictionaryOptions(
attributes, processUmlauts, processLigatures));
String line;

View File

@ -121,7 +121,8 @@ public class Diff extends Dicttool.Command {
private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) {
boolean hasDifferences = false;
for (final Word word0 : dict0) {
final CharGroup word1 = FusionDictionary.findWordInTree(dict1.mRoot, word0.mWord);
final CharGroup word1 = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
word0.mWord);
if (null == word1) {
// This word is not in dict1
System.out.println("Deleted: " + word0.mWord + " " + word0.mFrequency);
@ -150,7 +151,8 @@ public class Diff extends Dicttool.Command {
}
}
for (final Word word1 : dict1) {
final CharGroup word0 = FusionDictionary.findWordInTree(dict0.mRoot, word1.mWord);
final CharGroup word0 = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
word1.mWord);
if (null == word0) {
// This word is not in dict0
System.out.println("Added: " + word1.mWord + " " + word1.mFrequency);

View File

@ -65,7 +65,7 @@ public class Info extends Dicttool.Command {
private static void showWordInfo(final FusionDictionary dict, final String word,
final boolean plumbing) {
final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, word);
final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
if (null == group) {
System.out.println(word + " is not in the dictionary");
return;

View File

@ -18,7 +18,7 @@ package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.Word;
@ -124,8 +124,8 @@ public class XmlDictInputOutput {
GERMAN_UMLAUT_PROCESSING_OPTION.equals(optionsString);
final boolean processLigatures =
FRENCH_LIGATURE_PROCESSING_OPTION.equals(optionsString);
mDictionary = new FusionDictionary(new Node(), new DictionaryOptions(attributes,
processUmlauts, processLigatures));
mDictionary = new FusionDictionary(new PtNodeArray(),
new DictionaryOptions(attributes, processUmlauts, processLigatures));
} else {
mState = UNKNOWN;
}

View File

@ -22,7 +22,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictReader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
import junit.framework.TestCase;
@ -42,7 +42,7 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
public void testGetRawDictWorks() throws IOException, UnsupportedFormatException {
// Create a thrice-compressed dictionary file.
final FusionDictionary dict = new FusionDictionary(new Node(),
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new DictionaryOptions(new HashMap<String, String>(),
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
dict.add("foo", TEST_FREQ, null, false /* isNotAWord */);
@ -72,7 +72,8 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
final FusionDictionary resultDict = BinaryDictDecoder.readDictionaryBinary(reader,
null /* dict : an optional dictionary to add words to, or null */);
assertEquals("Dictionary can't be read back correctly",
FusionDictionary.findWordInTree(resultDict.mRoot, "foo").getFrequency(), TEST_FREQ);
FusionDictionary.findWordInTree(resultDict.mRootNodeArray, "foo").getFrequency(),
TEST_FREQ);
}
public void testGetRawDictFails() throws IOException {

View File

@ -17,7 +17,7 @@
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import junit.framework.TestCase;
@ -31,7 +31,7 @@ public class BinaryDictEncoderFlattenTreeTests extends TestCase {
// Test the flattened array contains the expected number of nodes, and
// that it does not contain any duplicates.
public void testFlattenNodes() {
final FusionDictionary dict = new FusionDictionary(new Node(),
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new DictionaryOptions(new HashMap<String, String>(),
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
dict.add("foo", 1, null, false /* isNotAWord */);
@ -39,10 +39,10 @@ public class BinaryDictEncoderFlattenTreeTests extends TestCase {
dict.add("ftb", 1, null, false /* isNotAWord */);
dict.add("bar", 1, null, false /* isNotAWord */);
dict.add("fool", 1, null, false /* isNotAWord */);
final ArrayList<Node> result = BinaryDictEncoder.flattenTree(dict.mRoot);
final ArrayList<PtNodeArray> result = BinaryDictEncoder.flattenTree(dict.mRootNodeArray);
assertEquals(4, result.size());
while (!result.isEmpty()) {
final Node n = result.remove(0);
final PtNodeArray n = result.remove(0);
assertFalse("Flattened array contained the same node twice", result.contains(n));
}
}

View File

@ -19,7 +19,7 @@ package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.Word;
import junit.framework.TestCase;
@ -72,7 +72,7 @@ public class FusionDictionaryTest extends TestCase {
assertNotNull(dict);
for (final String word : words) {
if (--limit < 0) return;
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, word);
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
assertNotNull(cg);
}
}
@ -95,7 +95,7 @@ public class FusionDictionaryTest extends TestCase {
// Test the flattened array contains the expected number of nodes, and
// that it does not contain any duplicates.
public void testFusion() {
final FusionDictionary dict = new FusionDictionary(new Node(),
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new DictionaryOptions(new HashMap<String, String>(),
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
final long time = System.currentTimeMillis();