Merge "Rename Node to PtNodeArray"
This commit is contained in:
commit
21dddb1462
19 changed files with 400 additions and 379 deletions
|
@ -23,7 +23,7 @@ import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
|
|||
import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||
|
@ -51,7 +51,7 @@ public class DictionaryWriter extends AbstractDictionaryWriter {
|
|||
@Override
|
||||
public void clear() {
|
||||
final HashMap<String, String> attributes = CollectionUtils.newHashMap();
|
||||
mFusionDictionary = new FusionDictionary(new Node(),
|
||||
mFusionDictionary = new FusionDictionary(new PtNodeArray(),
|
||||
new FusionDictionary.DictionaryOptions(attributes, false, false));
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ import com.android.inputmethod.annotations.UsedForTesting;
|
|||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
import com.android.inputmethod.latin.utils.JniUtils;
|
||||
|
||||
|
@ -548,31 +548,31 @@ public final class BinaryDictDecoder {
|
|||
}
|
||||
|
||||
/**
|
||||
* Reads a single node from a buffer.
|
||||
* Reads a single node array from a buffer.
|
||||
*
|
||||
* This methods reads the file at the current position. A node is fully expected to start at
|
||||
* the current position.
|
||||
* This will recursively read other nodes into the structure, populating the reverse
|
||||
* This methods reads the file at the current position. A node array is fully expected to start
|
||||
* at the current position.
|
||||
* This will recursively read other node arrays into the structure, populating the reverse
|
||||
* maps on the fly and using them to keep track of already read nodes.
|
||||
*
|
||||
* @param buffer the buffer, correctly positioned at the start of a node.
|
||||
* @param buffer the buffer, correctly positioned at the start of a node array.
|
||||
* @param headerSize the size, in bytes, of the file header.
|
||||
* @param reverseNodeMap a mapping from addresses to already read nodes.
|
||||
* @param reverseNodeArrayMap a mapping from addresses to already read node arrays.
|
||||
* @param reverseGroupMap a mapping from addresses to already read character groups.
|
||||
* @param options file format options.
|
||||
* @return the read node with all his children already read.
|
||||
* @return the read node array with all his children already read.
|
||||
*/
|
||||
private static Node readNode(final FusionDictionaryBufferInterface buffer, final int headerSize,
|
||||
final Map<Integer, Node> reverseNodeMap, final Map<Integer, CharGroup> reverseGroupMap,
|
||||
final FormatOptions options)
|
||||
private static PtNodeArray readNodeArray(final FusionDictionaryBufferInterface buffer,
|
||||
final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap,
|
||||
final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options)
|
||||
throws IOException {
|
||||
final ArrayList<CharGroup> nodeContents = new ArrayList<CharGroup>();
|
||||
final int nodeOrigin = buffer.position() - headerSize;
|
||||
final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>();
|
||||
final int nodeArrayOrigin = buffer.position() - headerSize;
|
||||
|
||||
do { // Scan the linked-list node.
|
||||
final int nodeHeadPosition = buffer.position() - headerSize;
|
||||
final int nodeArrayHeadPosition = buffer.position() - headerSize;
|
||||
final int count = readCharGroupCount(buffer);
|
||||
int groupOffset = nodeHeadPosition + BinaryDictIOUtils.getGroupCountSize(count);
|
||||
int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count);
|
||||
for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
|
||||
CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
|
||||
if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue;
|
||||
|
@ -589,21 +589,21 @@ public final class BinaryDictDecoder {
|
|||
}
|
||||
}
|
||||
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
|
||||
Node children = reverseNodeMap.get(info.mChildrenAddress);
|
||||
PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress);
|
||||
if (null == children) {
|
||||
final int currentPosition = buffer.position();
|
||||
buffer.position(info.mChildrenAddress + headerSize);
|
||||
children = readNode(
|
||||
buffer, headerSize, reverseNodeMap, reverseGroupMap, options);
|
||||
children = readNodeArray(
|
||||
buffer, headerSize, reverseNodeArrayMap, reverseGroupMap, options);
|
||||
buffer.position(currentPosition);
|
||||
}
|
||||
nodeContents.add(
|
||||
nodeArrayContents.add(
|
||||
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
|
||||
info.mFrequency,
|
||||
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
|
||||
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children));
|
||||
} else {
|
||||
nodeContents.add(
|
||||
nodeArrayContents.add(
|
||||
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
|
||||
info.mFrequency,
|
||||
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
|
||||
|
@ -624,11 +624,11 @@ public final class BinaryDictDecoder {
|
|||
} while (options.mSupportsDynamicUpdate &&
|
||||
buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
||||
|
||||
final Node node = new Node(nodeContents);
|
||||
node.mCachedAddressBeforeUpdate = nodeOrigin;
|
||||
node.mCachedAddressAfterUpdate = nodeOrigin;
|
||||
reverseNodeMap.put(node.mCachedAddressAfterUpdate, node);
|
||||
return node;
|
||||
final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
|
||||
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin;
|
||||
nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin;
|
||||
reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray);
|
||||
return nodeArray;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -733,10 +733,10 @@ public final class BinaryDictDecoder {
|
|||
// Read header
|
||||
final FileHeader header = readHeader(reader.getBuffer());
|
||||
|
||||
Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>();
|
||||
Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
|
||||
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
|
||||
final Node root = readNode(reader.getBuffer(), header.mHeaderSize, reverseNodeMapping,
|
||||
reverseGroupMapping, header.mFormatOptions);
|
||||
final PtNodeArray root = readNodeArray(reader.getBuffer(), header.mHeaderSize,
|
||||
reverseNodeArrayMapping, reverseGroupMapping, header.mFormatOptions);
|
||||
|
||||
FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions);
|
||||
if (null != dict) {
|
||||
|
@ -803,8 +803,6 @@ public final class BinaryDictDecoder {
|
|||
/**
|
||||
* Calculate bigram frequency from compressed value
|
||||
*
|
||||
* @see #makeBigramFlags
|
||||
*
|
||||
* @param unigramFrequency
|
||||
* @param bigramFrequency compressed frequency
|
||||
* @return approximate bigram frequency
|
||||
|
|
|
@ -20,7 +20,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding;
|
|||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
|
@ -78,12 +78,12 @@ public class BinaryDictEncoder {
|
|||
}
|
||||
|
||||
/**
|
||||
* Compute the binary size of the group count for a node
|
||||
* @param node the node
|
||||
* Compute the binary size of the group count for a node array.
|
||||
* @param nodeArray the nodeArray
|
||||
* @return the size of the group count, either 1 or 2 bytes.
|
||||
*/
|
||||
private static int getGroupCountSize(final Node node) {
|
||||
return BinaryDictIOUtils.getGroupCountSize(node.mData.size());
|
||||
private static int getGroupCountSize(final PtNodeArray nodeArray) {
|
||||
return BinaryDictIOUtils.getGroupCountSize(nodeArray.mData.size());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -138,15 +138,17 @@ public class BinaryDictEncoder {
|
|||
}
|
||||
|
||||
/**
|
||||
* Compute the maximum size of a node, assuming 3-byte addresses for everything, and caches
|
||||
* it in the 'actualSize' member of the node.
|
||||
* Compute the maximum size of each node of a node array, assuming 3-byte addresses for
|
||||
* everything, and caches it in the `mCachedSize' member of the nodes; deduce the size of
|
||||
* the containing node array, and cache it it its 'mCachedSize' member.
|
||||
*
|
||||
* @param node the node to compute the maximum size of.
|
||||
* @param nodeArray the node array to compute the maximum size of.
|
||||
* @param options file format options.
|
||||
*/
|
||||
private static void calculateNodeMaximumSize(final Node node, final FormatOptions options) {
|
||||
int size = getGroupCountSize(node);
|
||||
for (CharGroup g : node.mData) {
|
||||
private static void calculateNodeArrayMaximumSize(final PtNodeArray nodeArray,
|
||||
final FormatOptions options) {
|
||||
int size = getGroupCountSize(nodeArray);
|
||||
for (CharGroup g : nodeArray.mData) {
|
||||
final int groupSize = getCharGroupMaximumSize(g, options);
|
||||
g.mCachedSize = groupSize;
|
||||
size += groupSize;
|
||||
|
@ -154,7 +156,7 @@ public class BinaryDictEncoder {
|
|||
if (options.mSupportsDynamicUpdate) {
|
||||
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
||||
}
|
||||
node.mCachedSize = size;
|
||||
nodeArray.mCachedSize = size;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -199,14 +201,16 @@ public class BinaryDictEncoder {
|
|||
|
||||
// This method is responsible for finding a nice ordering of the nodes that favors run-time
|
||||
// cache performance and dictionary size.
|
||||
/* package for tests */ static ArrayList<Node> flattenTree(final Node root) {
|
||||
final int treeSize = FusionDictionary.countCharGroups(root);
|
||||
/* package for tests */ static ArrayList<PtNodeArray> flattenTree(
|
||||
final PtNodeArray rootNodeArray) {
|
||||
final int treeSize = FusionDictionary.countCharGroups(rootNodeArray);
|
||||
MakedictLog.i("Counted nodes : " + treeSize);
|
||||
final ArrayList<Node> flatTree = new ArrayList<Node>(treeSize);
|
||||
return flattenTreeInner(flatTree, root);
|
||||
final ArrayList<PtNodeArray> flatTree = new ArrayList<PtNodeArray>(treeSize);
|
||||
return flattenTreeInner(flatTree, rootNodeArray);
|
||||
}
|
||||
|
||||
private static ArrayList<Node> flattenTreeInner(final ArrayList<Node> list, final Node node) {
|
||||
private static ArrayList<PtNodeArray> flattenTreeInner(final ArrayList<PtNodeArray> list,
|
||||
final PtNodeArray nodeArray) {
|
||||
// Removing the node is necessary if the tails are merged, because we would then
|
||||
// add the same node several times when we only want it once. A number of places in
|
||||
// the code also depends on any node being only once in the list.
|
||||
|
@ -224,8 +228,8 @@ public class BinaryDictEncoder {
|
|||
// this simple list.remove operation O(n*n) overall. On Android this overhead is very
|
||||
// high.
|
||||
// For future reference, the code to remove duplicate is a simple : list.remove(node);
|
||||
list.add(node);
|
||||
final ArrayList<CharGroup> branches = node.mData;
|
||||
list.add(nodeArray);
|
||||
final ArrayList<CharGroup> branches = nodeArray.mData;
|
||||
final int nodeSize = branches.size();
|
||||
for (CharGroup group : branches) {
|
||||
if (null != group.mChildren) flattenTreeInner(list, group.mChildren);
|
||||
|
@ -234,52 +238,60 @@ public class BinaryDictEncoder {
|
|||
}
|
||||
|
||||
/**
|
||||
* Get the offset from a position inside a current node to a target node, during update.
|
||||
* Get the offset from a position inside a current node array to a target node array, during
|
||||
* update.
|
||||
*
|
||||
* If the current node is before the target node, the target node has not been updated yet,
|
||||
* so we should return the offset from the old position of the current node to the old position
|
||||
* of the target node. If on the other hand the target is before the current node, it already
|
||||
* has been updated, so we should return the offset from the new position in the current node
|
||||
* to the new position in the target node.
|
||||
* @param currentNode the node containing the CharGroup where the offset will be written
|
||||
* @param offsetFromStartOfCurrentNode the offset, in bytes, from the start of currentNode
|
||||
* @param targetNode the target node to get the offset to
|
||||
* @return the offset to the target node
|
||||
* If the current node array is before the target node array, the target node array has not
|
||||
* been updated yet, so we should return the offset from the old position of the current node
|
||||
* array to the old position of the target node array. If on the other hand the target is
|
||||
* before the current node array, it already has been updated, so we should return the offset
|
||||
* from the new position in the current node array to the new position in the target node
|
||||
* array.
|
||||
*
|
||||
* @param currentNodeArray node array containing the CharGroup where the offset will be written
|
||||
* @param offsetFromStartOfCurrentNodeArray offset, in bytes, from the start of currentNodeArray
|
||||
* @param targetNodeArray the target node array to get the offset to
|
||||
* @return the offset to the target node array
|
||||
*/
|
||||
private static int getOffsetToTargetNodeDuringUpdate(final Node currentNode,
|
||||
final int offsetFromStartOfCurrentNode, final Node targetNode) {
|
||||
final boolean isTargetBeforeCurrent = (targetNode.mCachedAddressBeforeUpdate
|
||||
< currentNode.mCachedAddressBeforeUpdate);
|
||||
private static int getOffsetToTargetNodeArrayDuringUpdate(final PtNodeArray currentNodeArray,
|
||||
final int offsetFromStartOfCurrentNodeArray, final PtNodeArray targetNodeArray) {
|
||||
final boolean isTargetBeforeCurrent = (targetNodeArray.mCachedAddressBeforeUpdate
|
||||
< currentNodeArray.mCachedAddressBeforeUpdate);
|
||||
if (isTargetBeforeCurrent) {
|
||||
return targetNode.mCachedAddressAfterUpdate
|
||||
- (currentNode.mCachedAddressAfterUpdate + offsetFromStartOfCurrentNode);
|
||||
return targetNodeArray.mCachedAddressAfterUpdate
|
||||
- (currentNodeArray.mCachedAddressAfterUpdate
|
||||
+ offsetFromStartOfCurrentNodeArray);
|
||||
} else {
|
||||
return targetNode.mCachedAddressBeforeUpdate
|
||||
- (currentNode.mCachedAddressBeforeUpdate + offsetFromStartOfCurrentNode);
|
||||
return targetNodeArray.mCachedAddressBeforeUpdate
|
||||
- (currentNodeArray.mCachedAddressBeforeUpdate
|
||||
+ offsetFromStartOfCurrentNodeArray);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the offset from a position inside a current node to a target CharGroup, during update.
|
||||
* @param currentNode the node containing the CharGroup where the offset will be written
|
||||
* @param offsetFromStartOfCurrentNode the offset, in bytes, from the start of currentNode
|
||||
* Get the offset from a position inside a current node array to a target CharGroup, during
|
||||
* update.
|
||||
*
|
||||
* @param currentNodeArray node array containing the CharGroup where the offset will be written
|
||||
* @param offsetFromStartOfCurrentNodeArray offset, in bytes, from the start of currentNodeArray
|
||||
* @param targetCharGroup the target CharGroup to get the offset to
|
||||
* @return the offset to the target CharGroup
|
||||
*/
|
||||
// TODO: is there any way to factorize this method with the one above?
|
||||
private static int getOffsetToTargetCharGroupDuringUpdate(final Node currentNode,
|
||||
final int offsetFromStartOfCurrentNode, final CharGroup targetCharGroup) {
|
||||
final int oldOffsetBasePoint = currentNode.mCachedAddressBeforeUpdate
|
||||
+ offsetFromStartOfCurrentNode;
|
||||
private static int getOffsetToTargetCharGroupDuringUpdate(final PtNodeArray currentNodeArray,
|
||||
final int offsetFromStartOfCurrentNodeArray, final CharGroup targetCharGroup) {
|
||||
final int oldOffsetBasePoint = currentNodeArray.mCachedAddressBeforeUpdate
|
||||
+ offsetFromStartOfCurrentNodeArray;
|
||||
final boolean isTargetBeforeCurrent = (targetCharGroup.mCachedAddressBeforeUpdate
|
||||
< oldOffsetBasePoint);
|
||||
// If the target is before the current node, then its address has already been updated.
|
||||
// We can use the AfterUpdate member, and compare it to our own member after update.
|
||||
// Otherwise, the AfterUpdate member is not updated yet, so we need to use the BeforeUpdate
|
||||
// member, and of course we have to compare this to our own address before update.
|
||||
// If the target is before the current node array, then its address has already been
|
||||
// updated. We can use the AfterUpdate member, and compare it to our own member after
|
||||
// update. Otherwise, the AfterUpdate member is not updated yet, so we need to use the
|
||||
// BeforeUpdate member, and of course we have to compare this to our own address before
|
||||
// update.
|
||||
if (isTargetBeforeCurrent) {
|
||||
final int newOffsetBasePoint = currentNode.mCachedAddressAfterUpdate
|
||||
+ offsetFromStartOfCurrentNode;
|
||||
final int newOffsetBasePoint = currentNodeArray.mCachedAddressAfterUpdate
|
||||
+ offsetFromStartOfCurrentNodeArray;
|
||||
return targetCharGroup.mCachedAddressAfterUpdate - newOffsetBasePoint;
|
||||
} else {
|
||||
return targetCharGroup.mCachedAddressBeforeUpdate - oldOffsetBasePoint;
|
||||
|
@ -287,26 +299,26 @@ public class BinaryDictEncoder {
|
|||
}
|
||||
|
||||
/**
|
||||
* Computes the actual node size, based on the cached addresses of the children nodes.
|
||||
* Computes the actual node array size, based on the cached addresses of the children nodes.
|
||||
*
|
||||
* Each node stores its tentative address. During dictionary address computing, these
|
||||
* are not final, but they can be used to compute the node size (the node size depends
|
||||
* on the address of the children because the number of bytes necessary to store an
|
||||
* address depends on its numeric value. The return value indicates whether the node
|
||||
* Each node array stores its tentative address. During dictionary address computing, these
|
||||
* are not final, but they can be used to compute the node array size (the node array size
|
||||
* depends on the address of the children because the number of bytes necessary to store an
|
||||
* address depends on its numeric value. The return value indicates whether the node array
|
||||
* contents (as in, any of the addresses stored in the cache fields) have changed with
|
||||
* respect to their previous value.
|
||||
*
|
||||
* @param node the node to compute the size of.
|
||||
* @param nodeArray the node array to compute the size of.
|
||||
* @param dict the dictionary in which the word/attributes are to be found.
|
||||
* @param formatOptions file format options.
|
||||
* @return false if none of the cached addresses inside the node changed, true otherwise.
|
||||
* @return false if none of the cached addresses inside the node array changed, true otherwise.
|
||||
*/
|
||||
private static boolean computeActualNodeSize(final Node node, final FusionDictionary dict,
|
||||
final FormatOptions formatOptions) {
|
||||
private static boolean computeActualNodeArraySize(final PtNodeArray nodeArray,
|
||||
final FusionDictionary dict, final FormatOptions formatOptions) {
|
||||
boolean changed = false;
|
||||
int size = getGroupCountSize(node);
|
||||
for (CharGroup group : node.mData) {
|
||||
group.mCachedAddressAfterUpdate = node.mCachedAddressAfterUpdate + size;
|
||||
int size = getGroupCountSize(nodeArray);
|
||||
for (CharGroup group : nodeArray.mData) {
|
||||
group.mCachedAddressAfterUpdate = nodeArray.mCachedAddressAfterUpdate + size;
|
||||
if (group.mCachedAddressAfterUpdate != group.mCachedAddressBeforeUpdate) {
|
||||
changed = true;
|
||||
}
|
||||
|
@ -318,16 +330,16 @@ public class BinaryDictEncoder {
|
|||
if (formatOptions.mSupportsDynamicUpdate) {
|
||||
groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
|
||||
} else {
|
||||
groupSize += getByteSize(getOffsetToTargetNodeDuringUpdate(node,
|
||||
groupSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(nodeArray,
|
||||
groupSize + size, group.mChildren));
|
||||
}
|
||||
}
|
||||
groupSize += getShortcutListSize(group.mShortcutTargets);
|
||||
if (null != group.mBigrams) {
|
||||
for (WeightedString bigram : group.mBigrams) {
|
||||
final int offset = getOffsetToTargetCharGroupDuringUpdate(node,
|
||||
final int offset = getOffsetToTargetCharGroupDuringUpdate(nodeArray,
|
||||
groupSize + size + FormatSpec.GROUP_FLAGS_SIZE,
|
||||
FusionDictionary.findWordInTree(dict.mRoot, bigram.mWord));
|
||||
FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord));
|
||||
groupSize += getByteSize(offset) + FormatSpec.GROUP_FLAGS_SIZE;
|
||||
}
|
||||
}
|
||||
|
@ -337,49 +349,49 @@ public class BinaryDictEncoder {
|
|||
if (formatOptions.mSupportsDynamicUpdate) {
|
||||
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
||||
}
|
||||
if (node.mCachedSize != size) {
|
||||
node.mCachedSize = size;
|
||||
if (nodeArray.mCachedSize != size) {
|
||||
nodeArray.mCachedSize = size;
|
||||
changed = true;
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the cached addresses of nodes from their size.
|
||||
* Initializes the cached addresses of node arrays and their containing nodes from their size.
|
||||
*
|
||||
* @param flatNodes the array of nodes.
|
||||
* @param flatNodes the list of node arrays.
|
||||
* @param formatOptions file format options.
|
||||
* @return the byte size of the entire stack.
|
||||
*/
|
||||
private static int initializeNodesCachedAddresses(final ArrayList<Node> flatNodes,
|
||||
private static int initializeNodeArraysCachedAddresses(final ArrayList<PtNodeArray> flatNodes,
|
||||
final FormatOptions formatOptions) {
|
||||
int nodeOffset = 0;
|
||||
for (final Node n : flatNodes) {
|
||||
n.mCachedAddressBeforeUpdate = nodeOffset;
|
||||
int groupCountSize = getGroupCountSize(n);
|
||||
int nodeArrayOffset = 0;
|
||||
for (final PtNodeArray nodeArray : flatNodes) {
|
||||
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOffset;
|
||||
int groupCountSize = getGroupCountSize(nodeArray);
|
||||
int groupOffset = 0;
|
||||
for (final CharGroup g : n.mData) {
|
||||
for (final CharGroup g : nodeArray.mData) {
|
||||
g.mCachedAddressBeforeUpdate = g.mCachedAddressAfterUpdate =
|
||||
groupCountSize + nodeOffset + groupOffset;
|
||||
groupCountSize + nodeArrayOffset + groupOffset;
|
||||
groupOffset += g.mCachedSize;
|
||||
}
|
||||
final int nodeSize = groupCountSize + groupOffset
|
||||
+ (formatOptions.mSupportsDynamicUpdate
|
||||
? FormatSpec.FORWARD_LINK_ADDRESS_SIZE : 0);
|
||||
nodeOffset += n.mCachedSize;
|
||||
nodeArrayOffset += nodeArray.mCachedSize;
|
||||
}
|
||||
return nodeOffset;
|
||||
return nodeArrayOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the cached addresses of nodes after recomputing their new positions.
|
||||
* Updates the cached addresses of node arrays after recomputing their new positions.
|
||||
*
|
||||
* @param flatNodes the array of nodes.
|
||||
* @param flatNodes the list of node arrays.
|
||||
*/
|
||||
private static void updateNodeCachedAddresses(final ArrayList<Node> flatNodes) {
|
||||
for (final Node n : flatNodes) {
|
||||
n.mCachedAddressBeforeUpdate = n.mCachedAddressAfterUpdate;
|
||||
for (final CharGroup g : n.mData) {
|
||||
private static void updateNodeArraysCachedAddresses(final ArrayList<PtNodeArray> flatNodes) {
|
||||
for (final PtNodeArray nodeArray : flatNodes) {
|
||||
nodeArray.mCachedAddressBeforeUpdate = nodeArray.mCachedAddressAfterUpdate;
|
||||
for (final CharGroup g : nodeArray.mData) {
|
||||
g.mCachedAddressBeforeUpdate = g.mCachedAddressAfterUpdate;
|
||||
}
|
||||
}
|
||||
|
@ -391,11 +403,11 @@ public class BinaryDictEncoder {
|
|||
* The parent addresses are used by some binary formats at write-to-disk time. Not all formats
|
||||
* need them. In particular, version 2 does not need them, and version 3 does.
|
||||
*
|
||||
* @param flatNodes the flat array of nodes to fill in
|
||||
* @param flatNodes the flat array of node arrays to fill in
|
||||
*/
|
||||
private static void computeParentAddresses(final ArrayList<Node> flatNodes) {
|
||||
for (final Node node : flatNodes) {
|
||||
for (final CharGroup group : node.mData) {
|
||||
private static void computeParentAddresses(final ArrayList<PtNodeArray> flatNodes) {
|
||||
for (final PtNodeArray nodeArray : flatNodes) {
|
||||
for (final CharGroup group : nodeArray.mData) {
|
||||
if (null != group.mChildren) {
|
||||
// Assign my address to children's parent address
|
||||
// Here BeforeUpdate and AfterUpdate addresses have the same value, so it
|
||||
|
@ -408,25 +420,25 @@ public class BinaryDictEncoder {
|
|||
}
|
||||
|
||||
/**
|
||||
* Compute the addresses and sizes of an ordered node array.
|
||||
* Compute the addresses and sizes of an ordered list of node arrays.
|
||||
*
|
||||
* This method takes a node array and will update its cached address and size values
|
||||
* so that they can be written into a file. It determines the smallest size each of the
|
||||
* nodes can be given the addresses of its children and attributes, and store that into
|
||||
* This method takes a list of node arrays and will update their cached address and size
|
||||
* values so that they can be written into a file. It determines the smallest size each of the
|
||||
* nodes arrays can be given the addresses of its children and attributes, and store that into
|
||||
* each node.
|
||||
* The order of the node is given by the order of the array. This method makes no effort
|
||||
* to find a good order; it only mechanically computes the size this order results in.
|
||||
*
|
||||
* @param dict the dictionary
|
||||
* @param flatNodes the ordered array of nodes
|
||||
* @param flatNodes the ordered list of nodes arrays
|
||||
* @param formatOptions file format options.
|
||||
* @return the same array it was passed. The nodes have been updated for address and size.
|
||||
*/
|
||||
private static ArrayList<Node> computeAddresses(final FusionDictionary dict,
|
||||
final ArrayList<Node> flatNodes, final FormatOptions formatOptions) {
|
||||
private static ArrayList<PtNodeArray> computeAddresses(final FusionDictionary dict,
|
||||
final ArrayList<PtNodeArray> flatNodes, final FormatOptions formatOptions) {
|
||||
// First get the worst possible sizes and offsets
|
||||
for (final Node n : flatNodes) calculateNodeMaximumSize(n, formatOptions);
|
||||
final int offset = initializeNodesCachedAddresses(flatNodes, formatOptions);
|
||||
for (final PtNodeArray n : flatNodes) calculateNodeArrayMaximumSize(n, formatOptions);
|
||||
final int offset = initializeNodeArraysCachedAddresses(flatNodes, formatOptions);
|
||||
|
||||
MakedictLog.i("Compressing the array addresses. Original size : " + offset);
|
||||
MakedictLog.i("(Recursively seen size : " + offset + ")");
|
||||
|
@ -435,17 +447,19 @@ public class BinaryDictEncoder {
|
|||
boolean changesDone = false;
|
||||
do {
|
||||
changesDone = false;
|
||||
int nodeStartOffset = 0;
|
||||
for (final Node n : flatNodes) {
|
||||
n.mCachedAddressAfterUpdate = nodeStartOffset;
|
||||
final int oldNodeSize = n.mCachedSize;
|
||||
final boolean changed = computeActualNodeSize(n, dict, formatOptions);
|
||||
final int newNodeSize = n.mCachedSize;
|
||||
if (oldNodeSize < newNodeSize) throw new RuntimeException("Increased size ?!");
|
||||
nodeStartOffset += newNodeSize;
|
||||
int nodeArrayStartOffset = 0;
|
||||
for (final PtNodeArray nodeArray : flatNodes) {
|
||||
nodeArray.mCachedAddressAfterUpdate = nodeArrayStartOffset;
|
||||
final int oldNodeArraySize = nodeArray.mCachedSize;
|
||||
final boolean changed = computeActualNodeArraySize(nodeArray, dict, formatOptions);
|
||||
final int newNodeArraySize = nodeArray.mCachedSize;
|
||||
if (oldNodeArraySize < newNodeArraySize) {
|
||||
throw new RuntimeException("Increased size ?!");
|
||||
}
|
||||
nodeArrayStartOffset += newNodeArraySize;
|
||||
changesDone |= changed;
|
||||
}
|
||||
updateNodeCachedAddresses(flatNodes);
|
||||
updateNodeArraysCachedAddresses(flatNodes);
|
||||
++passes;
|
||||
if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug");
|
||||
} while (changesDone);
|
||||
|
@ -453,10 +467,10 @@ public class BinaryDictEncoder {
|
|||
if (formatOptions.mSupportsDynamicUpdate) {
|
||||
computeParentAddresses(flatNodes);
|
||||
}
|
||||
final Node lastNode = flatNodes.get(flatNodes.size() - 1);
|
||||
final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
|
||||
MakedictLog.i("Compression complete in " + passes + " passes.");
|
||||
MakedictLog.i("After address compression : "
|
||||
+ (lastNode.mCachedAddressAfterUpdate + lastNode.mCachedSize));
|
||||
+ (lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize));
|
||||
|
||||
return flatNodes;
|
||||
}
|
||||
|
@ -464,25 +478,25 @@ public class BinaryDictEncoder {
|
|||
/**
|
||||
* Sanity-checking method.
|
||||
*
|
||||
* This method checks an array of node for juxtaposition, that is, it will do
|
||||
* nothing if each node's cached address is actually the previous node's address
|
||||
* This method checks a list of node arrays for juxtaposition, that is, it will do
|
||||
* nothing if each node array's cached address is actually the previous node array's address
|
||||
* plus the previous node's size.
|
||||
* If this is not the case, it will throw an exception.
|
||||
*
|
||||
* @param array the array node to check
|
||||
* @param arrays the list of node arrays to check
|
||||
*/
|
||||
private static void checkFlatNodeArray(final ArrayList<Node> array) {
|
||||
private static void checkFlatNodeArrayList(final ArrayList<PtNodeArray> arrays) {
|
||||
int offset = 0;
|
||||
int index = 0;
|
||||
for (final Node n : array) {
|
||||
for (final PtNodeArray nodeArray : arrays) {
|
||||
// BeforeUpdate and AfterUpdate addresses are the same here, so it does not matter
|
||||
// which we use.
|
||||
if (n.mCachedAddressAfterUpdate != offset) {
|
||||
if (nodeArray.mCachedAddressAfterUpdate != offset) {
|
||||
throw new RuntimeException("Wrong address for node " + index
|
||||
+ " : expected " + offset + ", got " + n.mCachedAddressAfterUpdate);
|
||||
+ " : expected " + offset + ", got " + nodeArray.mCachedAddressAfterUpdate);
|
||||
}
|
||||
++index;
|
||||
offset += n.mCachedSize;
|
||||
offset += nodeArray.mCachedSize;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -707,26 +721,23 @@ public class BinaryDictEncoder {
|
|||
}
|
||||
|
||||
/**
|
||||
* Write a node to memory. The node is expected to have its final position cached.
|
||||
* Write a node array to memory. The node array is expected to have its final position cached.
|
||||
*
|
||||
* This can be an empty map, but the more is inside the faster the lookups will be. It can
|
||||
* be carried on as long as nodes do not move.
|
||||
*
|
||||
* @param dict the dictionary the node is a part of (for relative offsets).
|
||||
* @param dict the dictionary the node array is a part of (for relative offsets).
|
||||
* @param buffer the memory buffer to write to.
|
||||
* @param node the node to write.
|
||||
* @param nodeArray the node array to write.
|
||||
* @param formatOptions file format options.
|
||||
* @return the address of the END of the node.
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
private static int writePlacedNode(final FusionDictionary dict, byte[] buffer,
|
||||
final Node node, final FormatOptions formatOptions) {
|
||||
final PtNodeArray nodeArray, final FormatOptions formatOptions) {
|
||||
// TODO: Make the code in common with BinaryDictIOUtils#writeCharGroup
|
||||
int index = node.mCachedAddressAfterUpdate;
|
||||
int index = nodeArray.mCachedAddressAfterUpdate;
|
||||
|
||||
final int groupCount = node.mData.size();
|
||||
final int countSize = getGroupCountSize(node);
|
||||
final int parentAddress = node.mCachedParentAddress;
|
||||
final int groupCount = nodeArray.mData.size();
|
||||
final int countSize = getGroupCountSize(nodeArray);
|
||||
final int parentAddress = nodeArray.mCachedParentAddress;
|
||||
if (1 == countSize) {
|
||||
buffer[index++] = (byte)groupCount;
|
||||
} else if (2 == countSize) {
|
||||
|
@ -739,7 +750,7 @@ public class BinaryDictEncoder {
|
|||
}
|
||||
int groupAddress = index;
|
||||
for (int i = 0; i < groupCount; ++i) {
|
||||
final CharGroup group = node.mData.get(i);
|
||||
final CharGroup group = nodeArray.mData.get(i);
|
||||
if (index != group.mCachedAddressAfterUpdate) {
|
||||
throw new RuntimeException("Bug: write index is not the same as the cached address "
|
||||
+ "of the group : " + index + " <> " + group.mCachedAddressAfterUpdate);
|
||||
|
@ -762,7 +773,7 @@ public class BinaryDictEncoder {
|
|||
index = writeParentAddress(buffer, index, parentAddress, formatOptions);
|
||||
} else {
|
||||
index = writeParentAddress(buffer, index, parentAddress
|
||||
+ (node.mCachedAddressAfterUpdate - group.mCachedAddressAfterUpdate),
|
||||
+ (nodeArray.mCachedAddressAfterUpdate - group.mCachedAddressAfterUpdate),
|
||||
formatOptions);
|
||||
}
|
||||
|
||||
|
@ -812,7 +823,7 @@ public class BinaryDictEncoder {
|
|||
while (bigramIterator.hasNext()) {
|
||||
final WeightedString bigram = bigramIterator.next();
|
||||
final CharGroup target =
|
||||
FusionDictionary.findWordInTree(dict.mRoot, bigram.mWord);
|
||||
FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
|
||||
final int addressOfBigram = target.mCachedAddressAfterUpdate;
|
||||
final int unigramFrequencyForThisWord = target.mFrequency;
|
||||
++groupAddress;
|
||||
|
@ -832,57 +843,58 @@ public class BinaryDictEncoder {
|
|||
= FormatSpec.NO_FORWARD_LINK_ADDRESS;
|
||||
index += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
||||
}
|
||||
if (index != node.mCachedAddressAfterUpdate + node.mCachedSize) throw new RuntimeException(
|
||||
"Not the same size : written "
|
||||
+ (index - node.mCachedAddressAfterUpdate) + " bytes from a node that should have "
|
||||
+ node.mCachedSize + " bytes");
|
||||
if (index != nodeArray.mCachedAddressAfterUpdate + nodeArray.mCachedSize) {
|
||||
throw new RuntimeException(
|
||||
"Not the same size : written " + (index - nodeArray.mCachedAddressAfterUpdate)
|
||||
+ " bytes from a node that should have " + nodeArray.mCachedSize + " bytes");
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Dumps a collection of useful statistics about a node array.
|
||||
* Dumps a collection of useful statistics about a list of node arrays.
|
||||
*
|
||||
* This prints purely informative stuff, like the total estimated file size, the
|
||||
* number of nodes, of character groups, the repartition of each address size, etc
|
||||
* number of node arrays, of character groups, the repartition of each address size, etc
|
||||
*
|
||||
* @param nodes the node array.
|
||||
* @param nodeArrays the list of node arrays.
|
||||
*/
|
||||
private static void showStatistics(ArrayList<Node> nodes) {
|
||||
private static void showStatistics(ArrayList<PtNodeArray> nodeArrays) {
|
||||
int firstTerminalAddress = Integer.MAX_VALUE;
|
||||
int lastTerminalAddress = Integer.MIN_VALUE;
|
||||
int size = 0;
|
||||
int charGroups = 0;
|
||||
int maxGroups = 0;
|
||||
int maxRuns = 0;
|
||||
for (final Node n : nodes) {
|
||||
if (maxGroups < n.mData.size()) maxGroups = n.mData.size();
|
||||
for (final CharGroup cg : n.mData) {
|
||||
for (final PtNodeArray nodeArray : nodeArrays) {
|
||||
if (maxGroups < nodeArray.mData.size()) maxGroups = nodeArray.mData.size();
|
||||
for (final CharGroup cg : nodeArray.mData) {
|
||||
++charGroups;
|
||||
if (cg.mChars.length > maxRuns) maxRuns = cg.mChars.length;
|
||||
if (cg.mFrequency >= 0) {
|
||||
if (n.mCachedAddressAfterUpdate < firstTerminalAddress)
|
||||
firstTerminalAddress = n.mCachedAddressAfterUpdate;
|
||||
if (n.mCachedAddressAfterUpdate > lastTerminalAddress)
|
||||
lastTerminalAddress = n.mCachedAddressAfterUpdate;
|
||||
if (nodeArray.mCachedAddressAfterUpdate < firstTerminalAddress)
|
||||
firstTerminalAddress = nodeArray.mCachedAddressAfterUpdate;
|
||||
if (nodeArray.mCachedAddressAfterUpdate > lastTerminalAddress)
|
||||
lastTerminalAddress = nodeArray.mCachedAddressAfterUpdate;
|
||||
}
|
||||
}
|
||||
if (n.mCachedAddressAfterUpdate + n.mCachedSize > size) {
|
||||
size = n.mCachedAddressAfterUpdate + n.mCachedSize;
|
||||
if (nodeArray.mCachedAddressAfterUpdate + nodeArray.mCachedSize > size) {
|
||||
size = nodeArray.mCachedAddressAfterUpdate + nodeArray.mCachedSize;
|
||||
}
|
||||
}
|
||||
final int[] groupCounts = new int[maxGroups + 1];
|
||||
final int[] runCounts = new int[maxRuns + 1];
|
||||
for (final Node n : nodes) {
|
||||
++groupCounts[n.mData.size()];
|
||||
for (final CharGroup cg : n.mData) {
|
||||
for (final PtNodeArray nodeArray : nodeArrays) {
|
||||
++groupCounts[nodeArray.mData.size()];
|
||||
for (final CharGroup cg : nodeArray.mData) {
|
||||
++runCounts[cg.mChars.length];
|
||||
}
|
||||
}
|
||||
|
||||
MakedictLog.i("Statistics:\n"
|
||||
+ " total file size " + size + "\n"
|
||||
+ " " + nodes.size() + " nodes\n"
|
||||
+ " " + charGroups + " groups (" + ((float)charGroups / nodes.size())
|
||||
+ " " + nodeArrays.size() + " node arrays\n"
|
||||
+ " " + charGroups + " groups (" + ((float)charGroups / nodeArrays.size())
|
||||
+ " groups per node)\n"
|
||||
+ " first terminal at " + firstTerminalAddress + "\n"
|
||||
+ " last terminal at " + lastTerminalAddress + "\n"
|
||||
|
@ -909,11 +921,12 @@ public class BinaryDictEncoder {
|
|||
final FusionDictionary dict, final FormatOptions formatOptions)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
|
||||
// Addresses are limited to 3 bytes, but since addresses can be relative to each node, the
|
||||
// structure itself is not limited to 16MB. However, if it is over 16MB deciding the order
|
||||
// of the nodes becomes a quite complicated problem, because though the dictionary itself
|
||||
// does not have a size limit, each node must still be within 16MB of all its children and
|
||||
// parents. As long as this is ensured, the dictionary file may grow to any size.
|
||||
// Addresses are limited to 3 bytes, but since addresses can be relative to each node
|
||||
// array, the structure itself is not limited to 16MB. However, if it is over 16MB deciding
|
||||
// the order of the node arrays becomes a quite complicated problem, because though the
|
||||
// dictionary itself does not have a size limit, each node array must still be within 16MB
|
||||
// of all its children and parents. As long as this is ensured, the dictionary file may
|
||||
// grow to any size.
|
||||
|
||||
final int version = formatOptions.mVersion;
|
||||
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|
||||
|
@ -964,23 +977,23 @@ public class BinaryDictEncoder {
|
|||
|
||||
// Leave the choice of the optimal node order to the flattenTree function.
|
||||
MakedictLog.i("Flattening the tree...");
|
||||
ArrayList<Node> flatNodes = flattenTree(dict.mRoot);
|
||||
ArrayList<PtNodeArray> flatNodes = flattenTree(dict.mRootNodeArray);
|
||||
|
||||
MakedictLog.i("Computing addresses...");
|
||||
computeAddresses(dict, flatNodes, formatOptions);
|
||||
MakedictLog.i("Checking array...");
|
||||
if (DBG) checkFlatNodeArray(flatNodes);
|
||||
if (DBG) checkFlatNodeArrayList(flatNodes);
|
||||
|
||||
// Create a buffer that matches the final dictionary size.
|
||||
final Node lastNode = flatNodes.get(flatNodes.size() - 1);
|
||||
final int bufferSize = lastNode.mCachedAddressAfterUpdate + lastNode.mCachedSize;
|
||||
final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
|
||||
final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize;
|
||||
final byte[] buffer = new byte[bufferSize];
|
||||
int index = 0;
|
||||
|
||||
MakedictLog.i("Writing file...");
|
||||
int dataEndOffset = 0;
|
||||
for (Node n : flatNodes) {
|
||||
dataEndOffset = writePlacedNode(dict, buffer, n, formatOptions);
|
||||
for (PtNodeArray nodeArray : flatNodes) {
|
||||
dataEndOffset = writePlacedNode(dict, buffer, nodeArray, formatOptions);
|
||||
}
|
||||
|
||||
if (DBG) showStatistics(flatNodes);
|
||||
|
|
|
@ -59,7 +59,7 @@ public final class BinaryDictIOUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Tours all node without recursive call.
|
||||
* Retrieves all node arrays without recursive call.
|
||||
*/
|
||||
private static void readUnigramsAndBigramsBinaryInner(
|
||||
final FusionDictionaryBufferInterface buffer, final int headerSize,
|
||||
|
@ -116,7 +116,7 @@ public final class BinaryDictIOUtils {
|
|||
if (formatOptions.mSupportsDynamicUpdate) {
|
||||
final int forwardLinkAddress = buffer.readUnsignedInt24();
|
||||
if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) {
|
||||
// the node has a forward link.
|
||||
// The node array has a forward link.
|
||||
p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT;
|
||||
p.mAddress = forwardLinkAddress;
|
||||
} else {
|
||||
|
@ -126,7 +126,7 @@ public final class BinaryDictIOUtils {
|
|||
stack.pop();
|
||||
}
|
||||
} else {
|
||||
// the node has more groups.
|
||||
// The node array has more groups.
|
||||
p.mAddress = buffer.position();
|
||||
}
|
||||
|
||||
|
@ -139,14 +139,14 @@ public final class BinaryDictIOUtils {
|
|||
|
||||
/**
|
||||
* Reads unigrams and bigrams from the binary file.
|
||||
* Doesn't make the memory representation of the dictionary.
|
||||
* Doesn't store a full memory representation of the dictionary.
|
||||
*
|
||||
* @param reader the reader.
|
||||
* @param words the map to store the address as a key and the word as a value.
|
||||
* @param frequencies the map to store the address as a key and the frequency as a value.
|
||||
* @param bigrams the map to store the address as a key and the list of address as a value.
|
||||
* @throws IOException
|
||||
* @throws UnsupportedFormatException
|
||||
* @throws IOException if the file can't be read.
|
||||
* @throws UnsupportedFormatException if the format of the file is not recognized.
|
||||
*/
|
||||
public static void readUnigramsAndBigramsBinary(final BinaryDictReader reader,
|
||||
final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
|
||||
|
@ -165,8 +165,8 @@ public final class BinaryDictIOUtils {
|
|||
* @param buffer the buffer to read.
|
||||
* @param word the word we search for.
|
||||
* @return the address of the terminal node.
|
||||
* @throws IOException
|
||||
* @throws UnsupportedFormatException
|
||||
* @throws IOException if the file can't be read.
|
||||
* @throws UnsupportedFormatException if the format of the file is not recognized.
|
||||
*/
|
||||
@UsedForTesting
|
||||
public static int getTerminalPosition(final FusionDictionaryBufferInterface buffer,
|
||||
|
@ -224,9 +224,9 @@ public final class BinaryDictIOUtils {
|
|||
}
|
||||
|
||||
// If we found the next char group, it is under the file pointer.
|
||||
// But if not, we are at the end of this node so we expect to have
|
||||
// But if not, we are at the end of this node array so we expect to have
|
||||
// a forward link address that we need to consult and possibly resume
|
||||
// search on the next node in the linked list.
|
||||
// search on the next node array in the linked list.
|
||||
if (foundNextCharGroup) break;
|
||||
if (!header.mFormatOptions.mSupportsDynamicUpdate) {
|
||||
return FormatSpec.NOT_VALID_WORD;
|
||||
|
@ -365,9 +365,10 @@ public final class BinaryDictIOUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Write a char group to an output stream.
|
||||
* A char group is an in-memory representation of a node in trie.
|
||||
* A char group info is an on-disk representation of a node.
|
||||
* Write a char group to an output stream from a CharGroupInfo.
|
||||
* A char group is an in-memory representation of a node in the patricia trie.
|
||||
* A char group info is a container for low-level information about how the
|
||||
* char group is stored in the binary format.
|
||||
*
|
||||
* @param destination the stream to write.
|
||||
* @param info the char group info to be written.
|
||||
|
@ -427,7 +428,7 @@ public final class BinaryDictIOUtils {
|
|||
|
||||
if (info.mBigrams != null) {
|
||||
// TODO: Consolidate this code with the code that computes the size of the bigram list
|
||||
// in BinaryDictEncoder#computeActualNodeSize
|
||||
// in BinaryDictEncoder#computeActualNodeArraySize
|
||||
for (int i = 0; i < info.mBigrams.size(); ++i) {
|
||||
|
||||
final int bigramFrequency = info.mBigrams.get(i).mFrequency;
|
||||
|
@ -479,14 +480,14 @@ public final class BinaryDictIOUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Write a node to the stream.
|
||||
* Write a node array to the stream.
|
||||
*
|
||||
* @param destination the stream to write.
|
||||
* @param infos groups to be written.
|
||||
* @param infos an array of CharGroupInfo to be written.
|
||||
* @return the size written, in bytes.
|
||||
* @throws IOException
|
||||
*/
|
||||
static int writeNode(final OutputStream destination, final CharGroupInfo[] infos)
|
||||
static int writeNodes(final OutputStream destination, final CharGroupInfo[] infos)
|
||||
throws IOException {
|
||||
int size = getGroupCountSize(infos.length);
|
||||
switch (getGroupCountSize(infos.length)) {
|
||||
|
@ -604,12 +605,12 @@ public final class BinaryDictIOUtils {
|
|||
public static int getGroupCountSize(final int count) {
|
||||
if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= count) {
|
||||
return 1;
|
||||
} else if (FormatSpec.MAX_CHARGROUPS_IN_A_NODE >= count) {
|
||||
} else if (FormatSpec.MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY >= count) {
|
||||
return 2;
|
||||
} else {
|
||||
throw new RuntimeException("Can't have more than "
|
||||
+ FormatSpec.MAX_CHARGROUPS_IN_A_NODE + " groups in a node (found " + count
|
||||
+ ")");
|
||||
+ FormatSpec.MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY + " groups in a node (found "
|
||||
+ count + ")");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -86,7 +86,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
}
|
||||
final int flags = buffer.readUnsignedByte();
|
||||
if (BinaryDictIOUtils.isMovedGroup(flags, formatOptions)) {
|
||||
// if the group is moved, the parent address is stored in the destination group.
|
||||
// If the group is moved, the parent address is stored in the destination group.
|
||||
// We are guaranteed to process the destination group later, so there is no need to
|
||||
// update anything here.
|
||||
buffer.position(originalPosition);
|
||||
|
@ -101,10 +101,10 @@ public final class DynamicBinaryDictIOUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Update parent addresses in a Node that is referred to by nodeOriginAddress.
|
||||
* Update parent addresses in a node array stored at nodeOriginAddress.
|
||||
*
|
||||
* @param buffer the buffer to be modified.
|
||||
* @param nodeOriginAddress the address of a modified Node.
|
||||
* @param nodeOriginAddress the address of the node array to update.
|
||||
* @param newParentAddress the address to be written.
|
||||
* @param formatOptions file format options.
|
||||
*/
|
||||
|
@ -154,7 +154,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
*/
|
||||
private static int moveCharGroup(final OutputStream destination,
|
||||
final FusionDictionaryBufferInterface buffer, final CharGroupInfo info,
|
||||
final int nodeOriginAddress, final int oldGroupAddress,
|
||||
final int nodeArrayOriginAddress, final int oldGroupAddress,
|
||||
final FormatOptions formatOptions) throws IOException {
|
||||
updateParentAddress(buffer, oldGroupAddress, buffer.limit() + 1, formatOptions);
|
||||
buffer.position(oldGroupAddress);
|
||||
|
@ -163,15 +163,16 @@ public final class DynamicBinaryDictIOUtils {
|
|||
buffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags
|
||||
& (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG))));
|
||||
int size = FormatSpec.GROUP_FLAGS_SIZE;
|
||||
updateForwardLink(buffer, nodeOriginAddress, buffer.limit(), formatOptions);
|
||||
size += BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { info });
|
||||
updateForwardLink(buffer, nodeArrayOriginAddress, buffer.limit(), formatOptions);
|
||||
size += BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { info });
|
||||
return size;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
private static void updateForwardLink(final FusionDictionaryBufferInterface buffer,
|
||||
final int nodeOriginAddress, final int newNodeAddress,
|
||||
final int nodeArrayOriginAddress, final int newNodeArrayAddress,
|
||||
final FormatOptions formatOptions) {
|
||||
buffer.position(nodeOriginAddress);
|
||||
buffer.position(nodeArrayOriginAddress);
|
||||
int jumpCount = 0;
|
||||
while (jumpCount++ < MAX_JUMPS) {
|
||||
final int count = BinaryDictDecoder.readCharGroupCount(buffer);
|
||||
|
@ -179,7 +180,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
final int forwardLinkAddress = buffer.readUnsignedInt24();
|
||||
if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
|
||||
buffer.position(buffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
|
||||
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeAddress);
|
||||
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeArrayAddress);
|
||||
return;
|
||||
}
|
||||
buffer.position(forwardLinkAddress);
|
||||
|
@ -190,57 +191,59 @@ public final class DynamicBinaryDictIOUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Move a group that is referred to by oldGroupOrigin to the tail of the file.
|
||||
* And set the children address to the byte after the group.
|
||||
* Move a group that is referred to by oldGroupOrigin to the tail of the file, and set the
|
||||
* children address to the byte after the group
|
||||
*
|
||||
* @param nodeOrigin the address of the tail of the file.
|
||||
* @param characters
|
||||
* @param length
|
||||
* @param flags
|
||||
* @param frequency
|
||||
* @param parentAddress
|
||||
* @param shortcutTargets
|
||||
* @param bigrams
|
||||
* @param fileEndAddress the address of the tail of the file.
|
||||
* @param codePoints the characters to put inside the group.
|
||||
* @param length how many code points to read from codePoints.
|
||||
* @param flags the flags for this group.
|
||||
* @param frequency the frequency of this terminal.
|
||||
* @param parentAddress the address of the parent group of this group.
|
||||
* @param shortcutTargets the shortcut targets for this group.
|
||||
* @param bigrams the bigrams for this group.
|
||||
* @param destination the stream representing the tail of the file.
|
||||
* @param buffer the buffer representing the (constant-size) body of the file.
|
||||
* @param oldNodeOrigin
|
||||
* @param oldGroupOrigin
|
||||
* @param formatOptions
|
||||
* @param oldNodeArrayOrigin the origin of the old node array this group was a part of.
|
||||
* @param oldGroupOrigin the old origin where this group used to be stored.
|
||||
* @param formatOptions format options for this dictionary.
|
||||
* @return the size written, in bytes.
|
||||
* @throws IOException
|
||||
* @throws IOException if the file can't be accessed
|
||||
*/
|
||||
private static int moveGroup(final int nodeOrigin, final int[] characters, final int length,
|
||||
final int flags, final int frequency, final int parentAddress,
|
||||
private static int moveGroup(final int fileEndAddress, final int[] codePoints,
|
||||
final int length, final int flags, final int frequency, final int parentAddress,
|
||||
final ArrayList<WeightedString> shortcutTargets,
|
||||
final ArrayList<PendingAttribute> bigrams, final OutputStream destination,
|
||||
final FusionDictionaryBufferInterface buffer, final int oldNodeOrigin,
|
||||
final FusionDictionaryBufferInterface buffer, final int oldNodeArrayOrigin,
|
||||
final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException {
|
||||
int size = 0;
|
||||
final int newGroupOrigin = nodeOrigin + 1;
|
||||
final int[] writtenCharacters = Arrays.copyOfRange(characters, 0, length);
|
||||
final int newGroupOrigin = fileEndAddress + 1;
|
||||
final int[] writtenCharacters = Arrays.copyOfRange(codePoints, 0, length);
|
||||
final CharGroupInfo tmpInfo = new CharGroupInfo(newGroupOrigin, -1 /* endAddress */,
|
||||
flags, writtenCharacters, frequency, parentAddress, FormatSpec.NO_CHILDREN_ADDRESS,
|
||||
shortcutTargets, bigrams);
|
||||
size = BinaryDictIOUtils.computeGroupSize(tmpInfo, formatOptions);
|
||||
final CharGroupInfo newInfo = new CharGroupInfo(newGroupOrigin, newGroupOrigin + size,
|
||||
flags, writtenCharacters, frequency, parentAddress,
|
||||
nodeOrigin + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets,
|
||||
fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets,
|
||||
bigrams);
|
||||
moveCharGroup(destination, buffer, newInfo, oldNodeOrigin, oldGroupOrigin, formatOptions);
|
||||
moveCharGroup(destination, buffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin,
|
||||
formatOptions);
|
||||
return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert a word into a binary dictionary.
|
||||
*
|
||||
* @param buffer
|
||||
* @param destination
|
||||
* @param word
|
||||
* @param frequency
|
||||
* @param bigramStrings
|
||||
* @param shortcuts
|
||||
* @throws IOException
|
||||
* @throws UnsupportedFormatException
|
||||
* @param buffer the buffer containing the existing dictionary.
|
||||
* @param destination a stream to the underlying file, with the pointer at the end of the file.
|
||||
* @param word the word to insert.
|
||||
* @param frequency the frequency of the new word.
|
||||
* @param bigramStrings bigram list, or null if none.
|
||||
* @param shortcuts shortcut list, or null if none.
|
||||
* @param isBlackListEntry whether this should be a blacklist entry.
|
||||
* @throws IOException if the file can't be accessed.
|
||||
* @throws UnsupportedFormatException if the existing dictionary is in an unexpected format.
|
||||
*/
|
||||
// TODO: Support batch insertion.
|
||||
// TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
|
||||
|
@ -323,7 +326,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
currentInfo.mFlags, characters2, currentInfo.mFrequency,
|
||||
newNodeAddress + 1, currentInfo.mChildrenAddress,
|
||||
currentInfo.mShortcutTargets, currentInfo.mBigrams);
|
||||
BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { newInfo2 });
|
||||
BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo2 });
|
||||
return;
|
||||
} else if (codePoints[wordPos + p] != currentInfo.mCharacters[p]) {
|
||||
if (p > 0) {
|
||||
|
@ -386,7 +389,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
newNodeAddress + written, -1 /* endAddress */, flags,
|
||||
newCharacters, frequency, newNodeAddress + 1,
|
||||
FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams);
|
||||
BinaryDictIOUtils.writeNode(destination,
|
||||
BinaryDictIOUtils.writeNodes(destination,
|
||||
new CharGroupInfo[] { suffixInfo, newInfo });
|
||||
return;
|
||||
}
|
||||
|
@ -438,7 +441,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
final CharGroupInfo newInfo = new CharGroupInfo(newGroupAddress, -1, flags,
|
||||
characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS,
|
||||
shortcuts, bigrams);
|
||||
BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { newInfo });
|
||||
BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo });
|
||||
return;
|
||||
}
|
||||
buffer.position(currentInfo.mChildrenAddress);
|
||||
|
@ -482,7 +485,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1,
|
||||
-1 /* endAddress */, flags, characters, frequency, nodeParentAddress,
|
||||
FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams);
|
||||
BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[]{ newInfo });
|
||||
BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[]{ newInfo });
|
||||
return;
|
||||
} else {
|
||||
depth--;
|
||||
|
|
|
@ -60,7 +60,7 @@ public final class FormatSpec {
|
|||
*/
|
||||
|
||||
/*
|
||||
* Array of Node(FusionDictionary.Node) layout is as follows:
|
||||
* Node array (FusionDictionary.PtNodeArray) layout is as follows:
|
||||
*
|
||||
* g |
|
||||
* r | the number of groups, 1 or 2 bytes.
|
||||
|
@ -86,7 +86,7 @@ public final class FormatSpec {
|
|||
* linkaddress
|
||||
*/
|
||||
|
||||
/* Node(CharGroup) layout is as follows:
|
||||
/* Node (FusionDictionary.CharGroup) layout is as follows:
|
||||
* | IF !SUPPORTS_DYNAMIC_UPDATE
|
||||
* | addressType xx : mask with MASK_GROUP_ADDRESS_TYPE
|
||||
* | 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
|
||||
|
@ -251,7 +251,7 @@ public final class FormatSpec {
|
|||
static final int INVALID_CHARACTER = -1;
|
||||
|
||||
static final int MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT = 0x7F; // 127
|
||||
static final int MAX_CHARGROUPS_IN_A_NODE = 0x7FFF; // 32767
|
||||
static final int MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY = 0x7FFF; // 32767
|
||||
static final int MAX_BIGRAMS_IN_A_GROUP = 10000;
|
||||
|
||||
static final int MAX_TERMINAL_FREQUENCY = 255;
|
||||
|
|
|
@ -37,14 +37,14 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
private static int CHARACTER_NOT_FOUND_INDEX = -1;
|
||||
|
||||
/**
|
||||
* A node of the dictionary, containing several CharGroups.
|
||||
* A node array of the dictionary, containing several CharGroups.
|
||||
*
|
||||
* A node is but an ordered array of CharGroups, which essentially contain all the
|
||||
* A PtNodeArray is but an ordered array of CharGroups, which essentially contain all the
|
||||
* real information.
|
||||
* This class also contains fields to cache size and address, to help with binary
|
||||
* generation.
|
||||
*/
|
||||
public static final class Node {
|
||||
public static final class PtNodeArray {
|
||||
ArrayList<CharGroup> mData;
|
||||
// To help with binary generation
|
||||
int mCachedSize = Integer.MIN_VALUE;
|
||||
|
@ -57,10 +57,10 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
int mCachedAddressAfterUpdate = Integer.MIN_VALUE;
|
||||
int mCachedParentAddress = 0;
|
||||
|
||||
public Node() {
|
||||
public PtNodeArray() {
|
||||
mData = new ArrayList<CharGroup>();
|
||||
}
|
||||
public Node(ArrayList<CharGroup> data) {
|
||||
public PtNodeArray(ArrayList<CharGroup> data) {
|
||||
mData = data;
|
||||
}
|
||||
}
|
||||
|
@ -98,7 +98,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
* This is the central class of the in-memory representation. A CharGroup is what can
|
||||
* be seen as a traditional "trie node", except it can hold several characters at the
|
||||
* same time. A CharGroup essentially represents one or several characters in the middle
|
||||
* of the trie trie; as such, it can be a terminal, and it can have children.
|
||||
* of the trie tree; as such, it can be a terminal, and it can have children.
|
||||
* In this in-memory representation, whether the CharGroup is a terminal or not is represented
|
||||
* in the frequency, where NOT_A_TERMINAL (= -1) means this is not a terminal and any other
|
||||
* value is the frequency of this terminal. A terminal may have non-null shortcuts and/or
|
||||
|
@ -110,7 +110,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
ArrayList<WeightedString> mShortcutTargets;
|
||||
ArrayList<WeightedString> mBigrams;
|
||||
int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
|
||||
Node mChildren;
|
||||
PtNodeArray mChildren;
|
||||
boolean mIsNotAWord; // Only a shortcut
|
||||
boolean mIsBlacklistEntry;
|
||||
// mCachedSize and mCachedAddressBefore/AfterUpdate are helpers for binary dictionary
|
||||
|
@ -137,7 +137,8 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
|
||||
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
|
||||
final ArrayList<WeightedString> bigrams, final int frequency,
|
||||
final boolean isNotAWord, final boolean isBlacklistEntry, final Node children) {
|
||||
final boolean isNotAWord, final boolean isBlacklistEntry,
|
||||
final PtNodeArray children) {
|
||||
mChars = chars;
|
||||
mFrequency = frequency;
|
||||
mShortcutTargets = shortcutTargets;
|
||||
|
@ -149,7 +150,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
|
||||
public void addChild(CharGroup n) {
|
||||
if (null == mChildren) {
|
||||
mChildren = new Node();
|
||||
mChildren = new PtNodeArray();
|
||||
}
|
||||
mChildren.mData.add(n);
|
||||
}
|
||||
|
@ -344,10 +345,10 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
}
|
||||
|
||||
public final DictionaryOptions mOptions;
|
||||
public final Node mRoot;
|
||||
public final PtNodeArray mRootNodeArray;
|
||||
|
||||
public FusionDictionary(final Node root, final DictionaryOptions options) {
|
||||
mRoot = root;
|
||||
public FusionDictionary(final PtNodeArray rootNodeArray, final DictionaryOptions options) {
|
||||
mRootNodeArray = rootNodeArray;
|
||||
mOptions = options;
|
||||
}
|
||||
|
||||
|
@ -406,13 +407,13 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
}
|
||||
|
||||
/**
|
||||
* Sanity check for a node.
|
||||
* Sanity check for a node array.
|
||||
*
|
||||
* This method checks that all CharGroups in a node are ordered as expected.
|
||||
* This method checks that all CharGroups in a node array are ordered as expected.
|
||||
* If they are, nothing happens. If they aren't, an exception is thrown.
|
||||
*/
|
||||
private void checkStack(Node node) {
|
||||
ArrayList<CharGroup> stack = node.mData;
|
||||
private void checkStack(PtNodeArray nodeArray) {
|
||||
ArrayList<CharGroup> stack = nodeArray.mData;
|
||||
int lastValue = -1;
|
||||
for (int i = 0; i < stack.size(); ++i) {
|
||||
int currentValue = stack.get(i).mChars[0];
|
||||
|
@ -431,16 +432,16 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
* @param frequency the bigram frequency
|
||||
*/
|
||||
public void setBigram(final String word1, final String word2, final int frequency) {
|
||||
CharGroup charGroup = findWordInTree(mRoot, word1);
|
||||
CharGroup charGroup = findWordInTree(mRootNodeArray, word1);
|
||||
if (charGroup != null) {
|
||||
final CharGroup charGroup2 = findWordInTree(mRoot, word2);
|
||||
final CharGroup charGroup2 = findWordInTree(mRootNodeArray, word2);
|
||||
if (charGroup2 == null) {
|
||||
add(getCodePoints(word2), 0, null, false /* isNotAWord */,
|
||||
false /* isBlacklistEntry */);
|
||||
// The chargroup for the first word may have moved by the above insertion,
|
||||
// if word1 and word2 share a common stem that happens not to have been
|
||||
// a cutting point until now. In this case, we need to refresh charGroup.
|
||||
charGroup = findWordInTree(mRoot, word1);
|
||||
charGroup = findWordInTree(mRootNodeArray, word1);
|
||||
}
|
||||
charGroup.addBigram(word2, frequency);
|
||||
} else {
|
||||
|
@ -469,38 +470,38 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
return;
|
||||
}
|
||||
|
||||
Node currentNode = mRoot;
|
||||
PtNodeArray currentNodeArray = mRootNodeArray;
|
||||
int charIndex = 0;
|
||||
|
||||
CharGroup currentGroup = null;
|
||||
int differentCharIndex = 0; // Set by the loop to the index of the char that differs
|
||||
int nodeIndex = findIndexOfChar(mRoot, word[charIndex]);
|
||||
int nodeIndex = findIndexOfChar(mRootNodeArray, word[charIndex]);
|
||||
while (CHARACTER_NOT_FOUND_INDEX != nodeIndex) {
|
||||
currentGroup = currentNode.mData.get(nodeIndex);
|
||||
differentCharIndex = compareArrays(currentGroup.mChars, word, charIndex);
|
||||
currentGroup = currentNodeArray.mData.get(nodeIndex);
|
||||
differentCharIndex = compareCharArrays(currentGroup.mChars, word, charIndex);
|
||||
if (ARRAYS_ARE_EQUAL != differentCharIndex
|
||||
&& differentCharIndex < currentGroup.mChars.length) break;
|
||||
if (null == currentGroup.mChildren) break;
|
||||
charIndex += currentGroup.mChars.length;
|
||||
if (charIndex >= word.length) break;
|
||||
currentNode = currentGroup.mChildren;
|
||||
nodeIndex = findIndexOfChar(currentNode, word[charIndex]);
|
||||
currentNodeArray = currentGroup.mChildren;
|
||||
nodeIndex = findIndexOfChar(currentNodeArray, word[charIndex]);
|
||||
}
|
||||
|
||||
if (CHARACTER_NOT_FOUND_INDEX == nodeIndex) {
|
||||
// No node at this point to accept the word. Create one.
|
||||
final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
|
||||
final int insertionIndex = findInsertionIndex(currentNodeArray, word[charIndex]);
|
||||
final CharGroup newGroup = new CharGroup(
|
||||
Arrays.copyOfRange(word, charIndex, word.length),
|
||||
shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry);
|
||||
currentNode.mData.add(insertionIndex, newGroup);
|
||||
if (DBG) checkStack(currentNode);
|
||||
currentNodeArray.mData.add(insertionIndex, newGroup);
|
||||
if (DBG) checkStack(currentNodeArray);
|
||||
} else {
|
||||
// There is a word with a common prefix.
|
||||
if (differentCharIndex == currentGroup.mChars.length) {
|
||||
if (charIndex + differentCharIndex >= word.length) {
|
||||
// The new word is a prefix of an existing word, but the node on which it
|
||||
// should end already exists as is. Since the old CharNode was not a terminal,
|
||||
// should end already exists as is. Since the old CharGroup was not a terminal,
|
||||
// make it one by filling in its frequency and other attributes
|
||||
currentGroup.update(frequency, shortcutTargets, null, isNotAWord,
|
||||
isBlacklistEntry);
|
||||
|
@ -511,7 +512,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
|
||||
shortcutTargets, null /* bigrams */, frequency, isNotAWord,
|
||||
isBlacklistEntry);
|
||||
currentGroup.mChildren = new Node();
|
||||
currentGroup.mChildren = new PtNodeArray();
|
||||
currentGroup.mChildren.mData.add(newNode);
|
||||
}
|
||||
} else {
|
||||
|
@ -524,7 +525,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
} else {
|
||||
// Partial prefix match only. We have to replace the current node with a node
|
||||
// containing the current prefix and create two new ones for the tails.
|
||||
Node newChildren = new Node();
|
||||
PtNodeArray newChildren = new PtNodeArray();
|
||||
final CharGroup newOldWord = new CharGroup(
|
||||
Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
|
||||
currentGroup.mChars.length), currentGroup.mShortcutTargets,
|
||||
|
@ -552,9 +553,9 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
> currentGroup.mChars[differentCharIndex] ? 1 : 0;
|
||||
newChildren.mData.add(addIndex, newWord);
|
||||
}
|
||||
currentNode.mData.set(nodeIndex, newParent);
|
||||
currentNodeArray.mData.set(nodeIndex, newParent);
|
||||
}
|
||||
if (DBG) checkStack(currentNode);
|
||||
if (DBG) checkStack(currentNodeArray);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -576,7 +577,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
* @param dstOffset the offset in the right-hand side string.
|
||||
* @return the index at which the strings differ, or ARRAYS_ARE_EQUAL = 0 if they don't.
|
||||
*/
|
||||
private static int compareArrays(final int[] src, final int[] dst, int dstOffset) {
|
||||
private static int compareCharArrays(final int[] src, final int[] dst, int dstOffset) {
|
||||
// We do NOT test the first char, because we come from a method that already
|
||||
// tested it.
|
||||
for (int i = 1; i < src.length; ++i) {
|
||||
|
@ -603,10 +604,10 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
final static private CharGroupComparator CHARGROUP_COMPARATOR = new CharGroupComparator();
|
||||
|
||||
/**
|
||||
* Finds the insertion index of a character within a node.
|
||||
* Finds the insertion index of a character within a node array.
|
||||
*/
|
||||
private static int findInsertionIndex(final Node node, int character) {
|
||||
final ArrayList<CharGroup> data = node.mData;
|
||||
private static int findInsertionIndex(final PtNodeArray nodeArray, int character) {
|
||||
final ArrayList<CharGroup> data = nodeArray.mData;
|
||||
final CharGroup reference = new CharGroup(new int[] { character },
|
||||
null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */,
|
||||
false /* isBlacklistEntry */);
|
||||
|
@ -615,16 +616,16 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
}
|
||||
|
||||
/**
|
||||
* Find the index of a char in a node, if it exists.
|
||||
* Find the index of a char in a node array, if it exists.
|
||||
*
|
||||
* @param node the node to search in.
|
||||
* @param nodeArray the node array to search in.
|
||||
* @param character the character to search for.
|
||||
* @return the position of the character if it's there, or CHARACTER_NOT_FOUND_INDEX = -1 else.
|
||||
*/
|
||||
private static int findIndexOfChar(final Node node, int character) {
|
||||
final int insertionIndex = findInsertionIndex(node, character);
|
||||
if (node.mData.size() <= insertionIndex) return CHARACTER_NOT_FOUND_INDEX;
|
||||
return character == node.mData.get(insertionIndex).mChars[0] ? insertionIndex
|
||||
private static int findIndexOfChar(final PtNodeArray nodeArray, int character) {
|
||||
final int insertionIndex = findInsertionIndex(nodeArray, character);
|
||||
if (nodeArray.mData.size() <= insertionIndex) return CHARACTER_NOT_FOUND_INDEX;
|
||||
return character == nodeArray.mData.get(insertionIndex).mChars[0] ? insertionIndex
|
||||
: CHARACTER_NOT_FOUND_INDEX;
|
||||
}
|
||||
|
||||
|
@ -632,16 +633,16 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
* Helper method to find a word in a given branch.
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public static CharGroup findWordInTree(Node node, final String string) {
|
||||
public static CharGroup findWordInTree(PtNodeArray nodeArray, final String string) {
|
||||
int index = 0;
|
||||
final StringBuilder checker = DBG ? new StringBuilder() : null;
|
||||
final int[] codePoints = getCodePoints(string);
|
||||
|
||||
CharGroup currentGroup;
|
||||
do {
|
||||
int indexOfGroup = findIndexOfChar(node, codePoints[index]);
|
||||
int indexOfGroup = findIndexOfChar(nodeArray, codePoints[index]);
|
||||
if (CHARACTER_NOT_FOUND_INDEX == indexOfGroup) return null;
|
||||
currentGroup = node.mData.get(indexOfGroup);
|
||||
currentGroup = nodeArray.mData.get(indexOfGroup);
|
||||
|
||||
if (codePoints.length - index < currentGroup.mChars.length) return null;
|
||||
int newIndex = index;
|
||||
|
@ -653,9 +654,9 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
|
||||
if (DBG) checker.append(new String(currentGroup.mChars, 0, currentGroup.mChars.length));
|
||||
if (index < codePoints.length) {
|
||||
node = currentGroup.mChildren;
|
||||
nodeArray = currentGroup.mChildren;
|
||||
}
|
||||
} while (null != node && index < codePoints.length);
|
||||
} while (null != nodeArray && index < codePoints.length);
|
||||
|
||||
if (index < codePoints.length) return null;
|
||||
if (!currentGroup.isTerminal()) return null;
|
||||
|
@ -670,20 +671,20 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
if (null == s || "".equals(s)) {
|
||||
throw new RuntimeException("Can't search for a null or empty string");
|
||||
}
|
||||
return null != findWordInTree(mRoot, s);
|
||||
return null != findWordInTree(mRootNodeArray, s);
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively count the number of character groups in a given branch of the trie.
|
||||
*
|
||||
* @param node the parent node.
|
||||
* @param nodeArray the parent node.
|
||||
* @return the number of char groups in all the branch under this node.
|
||||
*/
|
||||
public static int countCharGroups(final Node node) {
|
||||
final int nodeSize = node.mData.size();
|
||||
public static int countCharGroups(final PtNodeArray nodeArray) {
|
||||
final int nodeSize = nodeArray.mData.size();
|
||||
int size = nodeSize;
|
||||
for (int i = nodeSize - 1; i >= 0; --i) {
|
||||
CharGroup group = node.mData.get(i);
|
||||
CharGroup group = nodeArray.mData.get(i);
|
||||
if (null != group.mChildren)
|
||||
size += countCharGroups(group.mChildren);
|
||||
}
|
||||
|
@ -693,15 +694,15 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
/**
|
||||
* Recursively count the number of nodes in a given branch of the trie.
|
||||
*
|
||||
* @param node the node to count.
|
||||
* @param nodeArray the node array to count.
|
||||
* @return the number of nodes in this branch.
|
||||
*/
|
||||
public static int countNodes(final Node node) {
|
||||
public static int countNodeArrays(final PtNodeArray nodeArray) {
|
||||
int size = 1;
|
||||
for (int i = node.mData.size() - 1; i >= 0; --i) {
|
||||
CharGroup group = node.mData.get(i);
|
||||
for (int i = nodeArray.mData.size() - 1; i >= 0; --i) {
|
||||
CharGroup group = nodeArray.mData.get(i);
|
||||
if (null != group.mChildren)
|
||||
size += countNodes(group.mChildren);
|
||||
size += countNodeArrays(group.mChildren);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
@ -709,10 +710,10 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
// Recursively find out whether there are any bigrams.
|
||||
// This can be pretty expensive especially if there aren't any (we return as soon
|
||||
// as we find one, so it's much cheaper if there are bigrams)
|
||||
private static boolean hasBigramsInternal(final Node node) {
|
||||
if (null == node) return false;
|
||||
for (int i = node.mData.size() - 1; i >= 0; --i) {
|
||||
CharGroup group = node.mData.get(i);
|
||||
private static boolean hasBigramsInternal(final PtNodeArray nodeArray) {
|
||||
if (null == nodeArray) return false;
|
||||
for (int i = nodeArray.mData.size() - 1; i >= 0; --i) {
|
||||
CharGroup group = nodeArray.mData.get(i);
|
||||
if (null != group.mBigrams) return true;
|
||||
if (hasBigramsInternal(group.mChildren)) return true;
|
||||
}
|
||||
|
@ -729,7 +730,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
// find a more efficient way of doing this, without compromising too much on memory
|
||||
// and ease of use.
|
||||
public boolean hasBigrams() {
|
||||
return hasBigramsInternal(mRoot);
|
||||
return hasBigramsInternal(mRootNodeArray);
|
||||
}
|
||||
|
||||
// Historically, the tails of the words were going to be merged to save space.
|
||||
|
@ -750,13 +751,13 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
// MakedictLog.i("Merging nodes. Number of nodes : " + countNodes(root));
|
||||
// MakedictLog.i("Number of groups : " + countCharGroups(root));
|
||||
//
|
||||
// final HashMap<String, ArrayList<Node>> repository =
|
||||
// new HashMap<String, ArrayList<Node>>();
|
||||
// final HashMap<String, ArrayList<PtNodeArray>> repository =
|
||||
// new HashMap<String, ArrayList<PtNodeArray>>();
|
||||
// mergeTailsInner(repository, root);
|
||||
//
|
||||
// MakedictLog.i("Number of different pseudohashes : " + repository.size());
|
||||
// int size = 0;
|
||||
// for (ArrayList<Node> a : repository.values()) {
|
||||
// for (ArrayList<PtNodeArray> a : repository.values()) {
|
||||
// size += a.size();
|
||||
// }
|
||||
// MakedictLog.i("Number of nodes after merge : " + (1 + size));
|
||||
|
@ -764,7 +765,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
}
|
||||
|
||||
// The following methods are used by the deactivated mergeTails()
|
||||
// private static boolean isEqual(Node a, Node b) {
|
||||
// private static boolean isEqual(PtNodeArray a, PtNodeArray b) {
|
||||
// if (null == a && null == b) return true;
|
||||
// if (null == a || null == b) return false;
|
||||
// if (a.data.size() != b.data.size()) return false;
|
||||
|
@ -781,21 +782,21 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
// return true;
|
||||
// }
|
||||
|
||||
// static private HashMap<String, ArrayList<Node>> mergeTailsInner(
|
||||
// final HashMap<String, ArrayList<Node>> map, final Node node) {
|
||||
// final ArrayList<CharGroup> branches = node.data;
|
||||
// static private HashMap<String, ArrayList<PtNodeArray>> mergeTailsInner(
|
||||
// final HashMap<String, ArrayList<PtNodeArray>> map, final PtNodeArray nodeArray) {
|
||||
// final ArrayList<CharGroup> branches = nodeArray.data;
|
||||
// final int nodeSize = branches.size();
|
||||
// for (int i = 0; i < nodeSize; ++i) {
|
||||
// CharGroup group = branches.get(i);
|
||||
// if (null != group.children) {
|
||||
// String pseudoHash = getPseudoHash(group.children);
|
||||
// ArrayList<Node> similarList = map.get(pseudoHash);
|
||||
// ArrayList<PtNodeArray> similarList = map.get(pseudoHash);
|
||||
// if (null == similarList) {
|
||||
// similarList = new ArrayList<Node>();
|
||||
// similarList = new ArrayList<PtNodeArray>();
|
||||
// map.put(pseudoHash, similarList);
|
||||
// }
|
||||
// boolean merged = false;
|
||||
// for (Node similar : similarList) {
|
||||
// for (PtNodeArray similar : similarList) {
|
||||
// if (isEqual(group.children, similar)) {
|
||||
// group.children = similar;
|
||||
// merged = true;
|
||||
|
@ -811,9 +812,9 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
// return map;
|
||||
// }
|
||||
|
||||
// private static String getPseudoHash(final Node node) {
|
||||
// private static String getPseudoHash(final PtNodeArray nodeArray) {
|
||||
// StringBuilder s = new StringBuilder();
|
||||
// for (CharGroup g : node.data) {
|
||||
// for (CharGroup g : nodeArray.data) {
|
||||
// s.append(g.frequency);
|
||||
// for (int ch : g.chars) {
|
||||
// s.append(Character.toChars(ch));
|
||||
|
@ -901,6 +902,6 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
*/
|
||||
@Override
|
||||
public Iterator<Word> iterator() {
|
||||
return new DictionaryIterator(mRoot.mData);
|
||||
return new DictionaryIterator(mRootNodeArray.mData);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
|
|||
import com.android.inputmethod.latin.makedict.BinaryDictReader;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
import com.android.inputmethod.latin.makedict.PendingAttribute;
|
||||
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
||||
import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList;
|
||||
|
@ -78,7 +78,7 @@ public final class UserHistoryDictIOUtils {
|
|||
@UsedForTesting
|
||||
static FusionDictionary constructFusionDictionary(
|
||||
final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) {
|
||||
final FusionDictionary fusionDict = new FusionDictionary(new Node(),
|
||||
final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(),
|
||||
new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
|
||||
false));
|
||||
int profTotal = 0;
|
||||
|
@ -102,7 +102,7 @@ public final class UserHistoryDictIOUtils {
|
|||
if (word1 == null) { // unigram
|
||||
fusionDict.add(word2, freq, null, false /* isNotAWord */);
|
||||
} else { // bigram
|
||||
if (FusionDictionary.findWordInTree(fusionDict.mRoot, word1) == null) {
|
||||
if (FusionDictionary.findWordInTree(fusionDict.mRootNodeArray, word1) == null) {
|
||||
fusionDict.add(word1, 2, null, false /* isNotAWord */);
|
||||
}
|
||||
fusionDict.setBigram(word1, word2, freq);
|
||||
|
|
|
@ -20,7 +20,7 @@ import android.test.AndroidTestCase;
|
|||
import android.test.suitebuilder.annotation.SmallTest;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
|
@ -30,21 +30,21 @@ import java.util.HashMap;
|
|||
@SmallTest
|
||||
public class FusionDictionaryTests extends AndroidTestCase {
|
||||
public void testFindWordInTree() {
|
||||
FusionDictionary dict = new FusionDictionary(new Node(),
|
||||
FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
|
||||
|
||||
dict.add("abc", 10, null, false /* isNotAWord */);
|
||||
assertNull(FusionDictionary.findWordInTree(dict.mRoot, "aaa"));
|
||||
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "abc"));
|
||||
assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa"));
|
||||
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "abc"));
|
||||
|
||||
dict.add("aa", 10, null, false /* isNotAWord */);
|
||||
assertNull(FusionDictionary.findWordInTree(dict.mRoot, "aaa"));
|
||||
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "aa"));
|
||||
assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa"));
|
||||
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aa"));
|
||||
|
||||
dict.add("babcd", 10, null, false /* isNotAWord */);
|
||||
dict.add("bacde", 10, null, false /* isNotAWord */);
|
||||
assertNull(FusionDictionary.findWordInTree(dict.mRoot, "ba"));
|
||||
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "babcd"));
|
||||
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "bacde"));
|
||||
assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "ba"));
|
||||
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "babcd"));
|
||||
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "bacde"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ import android.util.SparseArray;
|
|||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||
|
||||
|
@ -226,7 +226,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
|
||||
// check unigram
|
||||
for (final String word : words) {
|
||||
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, word);
|
||||
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
|
||||
assertNotNull(cg);
|
||||
}
|
||||
|
||||
|
@ -234,7 +234,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
for (int i = 0; i < bigrams.size(); ++i) {
|
||||
final int w1 = bigrams.keyAt(i);
|
||||
for (final int w2 : bigrams.valueAt(i)) {
|
||||
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, words.get(w1));
|
||||
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray,
|
||||
words.get(w1));
|
||||
assertNotNull(words.get(w1) + "," + words.get(w2), cg.getBigram(words.get(w2)));
|
||||
}
|
||||
}
|
||||
|
@ -242,7 +243,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
// check shortcut
|
||||
if (shortcutMap != null) {
|
||||
for (final Map.Entry<String, List<String>> entry : shortcutMap.entrySet()) {
|
||||
final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, entry.getKey());
|
||||
final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray,
|
||||
entry.getKey());
|
||||
for (final String word : entry.getValue()) {
|
||||
assertNotNull("shortcut not found: " + entry.getKey() + ", " + word,
|
||||
group.getShortcut(word));
|
||||
|
@ -297,7 +299,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
}
|
||||
assertNotNull(file);
|
||||
|
||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
|
||||
addUnigrams(words.size(), dict, words, shortcuts);
|
||||
addBigrams(dict, words, bigrams);
|
||||
|
@ -440,7 +442,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
assertNotNull(file);
|
||||
|
||||
// making the dictionary from lists of words.
|
||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||
new FusionDictionary.DictionaryOptions(
|
||||
new HashMap<String, String>(), false, false));
|
||||
addUnigrams(words.size(), dict, words, null /* shortcutMap */);
|
||||
|
@ -538,7 +540,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
}
|
||||
assertNotNull(file);
|
||||
|
||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||
new FusionDictionary.DictionaryOptions(
|
||||
new HashMap<String, String>(), false, false));
|
||||
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
|
||||
|
@ -599,7 +601,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
}
|
||||
assertNotNull(file);
|
||||
|
||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||
new FusionDictionary.DictionaryOptions(
|
||||
new HashMap<String, String>(), false, false));
|
||||
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
|
||||
|
|
|
@ -24,7 +24,7 @@ import android.util.Log;
|
|||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.ByteBufferWrapper;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||
|
||||
|
@ -277,7 +277,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
// set an initial dictionary.
|
||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
|
||||
dict.add("abcd", 10, null, false);
|
||||
|
||||
|
@ -328,7 +328,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
// set an initial dictionary.
|
||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
|
||||
dict.add("abcd", 10, null, false);
|
||||
dict.add("efgh", 15, null, false);
|
||||
|
@ -365,7 +365,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
|||
assertNotNull(file);
|
||||
|
||||
// set an initial dictionary.
|
||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||
new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
|
||||
false));
|
||||
dict.add("initial", 10, null, false);
|
||||
|
|
|
@ -86,7 +86,7 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
|
|||
|
||||
private void checkWordInFusionDict(final FusionDictionary dict, final String word,
|
||||
final ArrayList<String> expectedBigrams) {
|
||||
final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, word);
|
||||
final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
|
||||
assertNotNull(group);
|
||||
assertTrue(group.isTerminal());
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ package com.android.inputmethod.latin.dicttool;
|
|||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
import com.android.inputmethod.latin.makedict.Word;
|
||||
|
||||
|
@ -117,7 +117,7 @@ public class CombinedInputOutput {
|
|||
final boolean processLigatures =
|
||||
FRENCH_LIGATURE_PROCESSING_OPTION.equals(attributes.get(OPTIONS_TAG));
|
||||
attributes.remove(OPTIONS_TAG);
|
||||
final FusionDictionary dict = new FusionDictionary(new Node(), new DictionaryOptions(
|
||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new DictionaryOptions(
|
||||
attributes, processUmlauts, processLigatures));
|
||||
|
||||
String line;
|
||||
|
|
|
@ -121,7 +121,8 @@ public class Diff extends Dicttool.Command {
|
|||
private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) {
|
||||
boolean hasDifferences = false;
|
||||
for (final Word word0 : dict0) {
|
||||
final CharGroup word1 = FusionDictionary.findWordInTree(dict1.mRoot, word0.mWord);
|
||||
final CharGroup word1 = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
|
||||
word0.mWord);
|
||||
if (null == word1) {
|
||||
// This word is not in dict1
|
||||
System.out.println("Deleted: " + word0.mWord + " " + word0.mFrequency);
|
||||
|
@ -150,7 +151,8 @@ public class Diff extends Dicttool.Command {
|
|||
}
|
||||
}
|
||||
for (final Word word1 : dict1) {
|
||||
final CharGroup word0 = FusionDictionary.findWordInTree(dict0.mRoot, word1.mWord);
|
||||
final CharGroup word0 = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
|
||||
word1.mWord);
|
||||
if (null == word0) {
|
||||
// This word is not in dict0
|
||||
System.out.println("Added: " + word1.mWord + " " + word1.mFrequency);
|
||||
|
|
|
@ -65,7 +65,7 @@ public class Info extends Dicttool.Command {
|
|||
|
||||
private static void showWordInfo(final FusionDictionary dict, final String word,
|
||||
final boolean plumbing) {
|
||||
final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, word);
|
||||
final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
|
||||
if (null == group) {
|
||||
System.out.println(word + " is not in the dictionary");
|
||||
return;
|
||||
|
|
|
@ -18,7 +18,7 @@ package com.android.inputmethod.latin.dicttool;
|
|||
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
import com.android.inputmethod.latin.makedict.Word;
|
||||
|
||||
|
@ -124,8 +124,8 @@ public class XmlDictInputOutput {
|
|||
GERMAN_UMLAUT_PROCESSING_OPTION.equals(optionsString);
|
||||
final boolean processLigatures =
|
||||
FRENCH_LIGATURE_PROCESSING_OPTION.equals(optionsString);
|
||||
mDictionary = new FusionDictionary(new Node(), new DictionaryOptions(attributes,
|
||||
processUmlauts, processLigatures));
|
||||
mDictionary = new FusionDictionary(new PtNodeArray(),
|
||||
new DictionaryOptions(attributes, processUmlauts, processLigatures));
|
||||
} else {
|
||||
mState = UNKNOWN;
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictReader;
|
|||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
@ -42,7 +42,7 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
|
|||
|
||||
public void testGetRawDictWorks() throws IOException, UnsupportedFormatException {
|
||||
// Create a thrice-compressed dictionary file.
|
||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||
new DictionaryOptions(new HashMap<String, String>(),
|
||||
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
|
||||
dict.add("foo", TEST_FREQ, null, false /* isNotAWord */);
|
||||
|
@ -72,7 +72,8 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
|
|||
final FusionDictionary resultDict = BinaryDictDecoder.readDictionaryBinary(reader,
|
||||
null /* dict : an optional dictionary to add words to, or null */);
|
||||
assertEquals("Dictionary can't be read back correctly",
|
||||
FusionDictionary.findWordInTree(resultDict.mRoot, "foo").getFrequency(), TEST_FREQ);
|
||||
FusionDictionary.findWordInTree(resultDict.mRootNodeArray, "foo").getFrequency(),
|
||||
TEST_FREQ);
|
||||
}
|
||||
|
||||
public void testGetRawDictFails() throws IOException {
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
package com.android.inputmethod.latin.makedict;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
|
@ -31,7 +31,7 @@ public class BinaryDictEncoderFlattenTreeTests extends TestCase {
|
|||
// Test the flattened array contains the expected number of nodes, and
|
||||
// that it does not contain any duplicates.
|
||||
public void testFlattenNodes() {
|
||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||
new DictionaryOptions(new HashMap<String, String>(),
|
||||
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
|
||||
dict.add("foo", 1, null, false /* isNotAWord */);
|
||||
|
@ -39,10 +39,10 @@ public class BinaryDictEncoderFlattenTreeTests extends TestCase {
|
|||
dict.add("ftb", 1, null, false /* isNotAWord */);
|
||||
dict.add("bar", 1, null, false /* isNotAWord */);
|
||||
dict.add("fool", 1, null, false /* isNotAWord */);
|
||||
final ArrayList<Node> result = BinaryDictEncoder.flattenTree(dict.mRoot);
|
||||
final ArrayList<PtNodeArray> result = BinaryDictEncoder.flattenTree(dict.mRootNodeArray);
|
||||
assertEquals(4, result.size());
|
||||
while (!result.isEmpty()) {
|
||||
final Node n = result.remove(0);
|
||||
final PtNodeArray n = result.remove(0);
|
||||
assertFalse("Flattened array contained the same node twice", result.contains(n));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ package com.android.inputmethod.latin.makedict;
|
|||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
import com.android.inputmethod.latin.makedict.Word;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
@ -72,7 +72,7 @@ public class FusionDictionaryTest extends TestCase {
|
|||
assertNotNull(dict);
|
||||
for (final String word : words) {
|
||||
if (--limit < 0) return;
|
||||
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, word);
|
||||
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
|
||||
assertNotNull(cg);
|
||||
}
|
||||
}
|
||||
|
@ -95,7 +95,7 @@ public class FusionDictionaryTest extends TestCase {
|
|||
// Test the flattened array contains the expected number of nodes, and
|
||||
// that it does not contain any duplicates.
|
||||
public void testFusion() {
|
||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||
new DictionaryOptions(new HashMap<String, String>(),
|
||||
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
|
||||
final long time = System.currentTimeMillis();
|
||||
|
|
Loading…
Reference in a new issue