Rename Node to PtNodeArray
Bug: 10247660 Change-Id: I1a0ac19f58f96adb5efac5fd35c6404831618c99main
parent
a440aa391c
commit
af30cbf0ee
|
@ -23,7 +23,7 @@ import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
|
import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
||||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||||
|
@ -51,7 +51,7 @@ public class DictionaryWriter extends AbstractDictionaryWriter {
|
||||||
@Override
|
@Override
|
||||||
public void clear() {
|
public void clear() {
|
||||||
final HashMap<String, String> attributes = CollectionUtils.newHashMap();
|
final HashMap<String, String> attributes = CollectionUtils.newHashMap();
|
||||||
mFusionDictionary = new FusionDictionary(new Node(),
|
mFusionDictionary = new FusionDictionary(new PtNodeArray(),
|
||||||
new FusionDictionary.DictionaryOptions(attributes, false, false));
|
new FusionDictionary.DictionaryOptions(attributes, false, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ import com.android.inputmethod.annotations.UsedForTesting;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
import com.android.inputmethod.latin.utils.JniUtils;
|
import com.android.inputmethod.latin.utils.JniUtils;
|
||||||
|
|
||||||
|
@ -548,31 +548,31 @@ public final class BinaryDictDecoder {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads a single node from a buffer.
|
* Reads a single node array from a buffer.
|
||||||
*
|
*
|
||||||
* This methods reads the file at the current position. A node is fully expected to start at
|
* This methods reads the file at the current position. A node array is fully expected to start
|
||||||
* the current position.
|
* at the current position.
|
||||||
* This will recursively read other nodes into the structure, populating the reverse
|
* This will recursively read other node arrays into the structure, populating the reverse
|
||||||
* maps on the fly and using them to keep track of already read nodes.
|
* maps on the fly and using them to keep track of already read nodes.
|
||||||
*
|
*
|
||||||
* @param buffer the buffer, correctly positioned at the start of a node.
|
* @param buffer the buffer, correctly positioned at the start of a node array.
|
||||||
* @param headerSize the size, in bytes, of the file header.
|
* @param headerSize the size, in bytes, of the file header.
|
||||||
* @param reverseNodeMap a mapping from addresses to already read nodes.
|
* @param reverseNodeArrayMap a mapping from addresses to already read node arrays.
|
||||||
* @param reverseGroupMap a mapping from addresses to already read character groups.
|
* @param reverseGroupMap a mapping from addresses to already read character groups.
|
||||||
* @param options file format options.
|
* @param options file format options.
|
||||||
* @return the read node with all his children already read.
|
* @return the read node array with all his children already read.
|
||||||
*/
|
*/
|
||||||
private static Node readNode(final FusionDictionaryBufferInterface buffer, final int headerSize,
|
private static PtNodeArray readNodeArray(final FusionDictionaryBufferInterface buffer,
|
||||||
final Map<Integer, Node> reverseNodeMap, final Map<Integer, CharGroup> reverseGroupMap,
|
final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap,
|
||||||
final FormatOptions options)
|
final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final ArrayList<CharGroup> nodeContents = new ArrayList<CharGroup>();
|
final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>();
|
||||||
final int nodeOrigin = buffer.position() - headerSize;
|
final int nodeArrayOrigin = buffer.position() - headerSize;
|
||||||
|
|
||||||
do { // Scan the linked-list node.
|
do { // Scan the linked-list node.
|
||||||
final int nodeHeadPosition = buffer.position() - headerSize;
|
final int nodeArrayHeadPosition = buffer.position() - headerSize;
|
||||||
final int count = readCharGroupCount(buffer);
|
final int count = readCharGroupCount(buffer);
|
||||||
int groupOffset = nodeHeadPosition + BinaryDictIOUtils.getGroupCountSize(count);
|
int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count);
|
||||||
for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
|
for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
|
||||||
CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
|
CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
|
||||||
if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue;
|
if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue;
|
||||||
|
@ -589,21 +589,21 @@ public final class BinaryDictDecoder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
|
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
|
||||||
Node children = reverseNodeMap.get(info.mChildrenAddress);
|
PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress);
|
||||||
if (null == children) {
|
if (null == children) {
|
||||||
final int currentPosition = buffer.position();
|
final int currentPosition = buffer.position();
|
||||||
buffer.position(info.mChildrenAddress + headerSize);
|
buffer.position(info.mChildrenAddress + headerSize);
|
||||||
children = readNode(
|
children = readNodeArray(
|
||||||
buffer, headerSize, reverseNodeMap, reverseGroupMap, options);
|
buffer, headerSize, reverseNodeArrayMap, reverseGroupMap, options);
|
||||||
buffer.position(currentPosition);
|
buffer.position(currentPosition);
|
||||||
}
|
}
|
||||||
nodeContents.add(
|
nodeArrayContents.add(
|
||||||
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
|
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
|
||||||
info.mFrequency,
|
info.mFrequency,
|
||||||
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
|
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
|
||||||
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children));
|
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children));
|
||||||
} else {
|
} else {
|
||||||
nodeContents.add(
|
nodeArrayContents.add(
|
||||||
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
|
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
|
||||||
info.mFrequency,
|
info.mFrequency,
|
||||||
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
|
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
|
||||||
|
@ -624,11 +624,11 @@ public final class BinaryDictDecoder {
|
||||||
} while (options.mSupportsDynamicUpdate &&
|
} while (options.mSupportsDynamicUpdate &&
|
||||||
buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
||||||
|
|
||||||
final Node node = new Node(nodeContents);
|
final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
|
||||||
node.mCachedAddressBeforeUpdate = nodeOrigin;
|
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin;
|
||||||
node.mCachedAddressAfterUpdate = nodeOrigin;
|
nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin;
|
||||||
reverseNodeMap.put(node.mCachedAddressAfterUpdate, node);
|
reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray);
|
||||||
return node;
|
return nodeArray;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -733,10 +733,10 @@ public final class BinaryDictDecoder {
|
||||||
// Read header
|
// Read header
|
||||||
final FileHeader header = readHeader(reader.getBuffer());
|
final FileHeader header = readHeader(reader.getBuffer());
|
||||||
|
|
||||||
Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>();
|
Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
|
||||||
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
|
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
|
||||||
final Node root = readNode(reader.getBuffer(), header.mHeaderSize, reverseNodeMapping,
|
final PtNodeArray root = readNodeArray(reader.getBuffer(), header.mHeaderSize,
|
||||||
reverseGroupMapping, header.mFormatOptions);
|
reverseNodeArrayMapping, reverseGroupMapping, header.mFormatOptions);
|
||||||
|
|
||||||
FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions);
|
FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions);
|
||||||
if (null != dict) {
|
if (null != dict) {
|
||||||
|
@ -803,8 +803,6 @@ public final class BinaryDictDecoder {
|
||||||
/**
|
/**
|
||||||
* Calculate bigram frequency from compressed value
|
* Calculate bigram frequency from compressed value
|
||||||
*
|
*
|
||||||
* @see #makeBigramFlags
|
|
||||||
*
|
|
||||||
* @param unigramFrequency
|
* @param unigramFrequency
|
||||||
* @param bigramFrequency compressed frequency
|
* @param bigramFrequency compressed frequency
|
||||||
* @return approximate bigram frequency
|
* @return approximate bigram frequency
|
||||||
|
|
|
@ -20,7 +20,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
|
@ -78,12 +78,12 @@ public class BinaryDictEncoder {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compute the binary size of the group count for a node
|
* Compute the binary size of the group count for a node array.
|
||||||
* @param node the node
|
* @param nodeArray the nodeArray
|
||||||
* @return the size of the group count, either 1 or 2 bytes.
|
* @return the size of the group count, either 1 or 2 bytes.
|
||||||
*/
|
*/
|
||||||
private static int getGroupCountSize(final Node node) {
|
private static int getGroupCountSize(final PtNodeArray nodeArray) {
|
||||||
return BinaryDictIOUtils.getGroupCountSize(node.mData.size());
|
return BinaryDictIOUtils.getGroupCountSize(nodeArray.mData.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -138,15 +138,17 @@ public class BinaryDictEncoder {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compute the maximum size of a node, assuming 3-byte addresses for everything, and caches
|
* Compute the maximum size of each node of a node array, assuming 3-byte addresses for
|
||||||
* it in the 'actualSize' member of the node.
|
* everything, and caches it in the `mCachedSize' member of the nodes; deduce the size of
|
||||||
|
* the containing node array, and cache it it its 'mCachedSize' member.
|
||||||
*
|
*
|
||||||
* @param node the node to compute the maximum size of.
|
* @param nodeArray the node array to compute the maximum size of.
|
||||||
* @param options file format options.
|
* @param options file format options.
|
||||||
*/
|
*/
|
||||||
private static void calculateNodeMaximumSize(final Node node, final FormatOptions options) {
|
private static void calculateNodeArrayMaximumSize(final PtNodeArray nodeArray,
|
||||||
int size = getGroupCountSize(node);
|
final FormatOptions options) {
|
||||||
for (CharGroup g : node.mData) {
|
int size = getGroupCountSize(nodeArray);
|
||||||
|
for (CharGroup g : nodeArray.mData) {
|
||||||
final int groupSize = getCharGroupMaximumSize(g, options);
|
final int groupSize = getCharGroupMaximumSize(g, options);
|
||||||
g.mCachedSize = groupSize;
|
g.mCachedSize = groupSize;
|
||||||
size += groupSize;
|
size += groupSize;
|
||||||
|
@ -154,7 +156,7 @@ public class BinaryDictEncoder {
|
||||||
if (options.mSupportsDynamicUpdate) {
|
if (options.mSupportsDynamicUpdate) {
|
||||||
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
||||||
}
|
}
|
||||||
node.mCachedSize = size;
|
nodeArray.mCachedSize = size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -199,14 +201,16 @@ public class BinaryDictEncoder {
|
||||||
|
|
||||||
// This method is responsible for finding a nice ordering of the nodes that favors run-time
|
// This method is responsible for finding a nice ordering of the nodes that favors run-time
|
||||||
// cache performance and dictionary size.
|
// cache performance and dictionary size.
|
||||||
/* package for tests */ static ArrayList<Node> flattenTree(final Node root) {
|
/* package for tests */ static ArrayList<PtNodeArray> flattenTree(
|
||||||
final int treeSize = FusionDictionary.countCharGroups(root);
|
final PtNodeArray rootNodeArray) {
|
||||||
|
final int treeSize = FusionDictionary.countCharGroups(rootNodeArray);
|
||||||
MakedictLog.i("Counted nodes : " + treeSize);
|
MakedictLog.i("Counted nodes : " + treeSize);
|
||||||
final ArrayList<Node> flatTree = new ArrayList<Node>(treeSize);
|
final ArrayList<PtNodeArray> flatTree = new ArrayList<PtNodeArray>(treeSize);
|
||||||
return flattenTreeInner(flatTree, root);
|
return flattenTreeInner(flatTree, rootNodeArray);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static ArrayList<Node> flattenTreeInner(final ArrayList<Node> list, final Node node) {
|
private static ArrayList<PtNodeArray> flattenTreeInner(final ArrayList<PtNodeArray> list,
|
||||||
|
final PtNodeArray nodeArray) {
|
||||||
// Removing the node is necessary if the tails are merged, because we would then
|
// Removing the node is necessary if the tails are merged, because we would then
|
||||||
// add the same node several times when we only want it once. A number of places in
|
// add the same node several times when we only want it once. A number of places in
|
||||||
// the code also depends on any node being only once in the list.
|
// the code also depends on any node being only once in the list.
|
||||||
|
@ -224,8 +228,8 @@ public class BinaryDictEncoder {
|
||||||
// this simple list.remove operation O(n*n) overall. On Android this overhead is very
|
// this simple list.remove operation O(n*n) overall. On Android this overhead is very
|
||||||
// high.
|
// high.
|
||||||
// For future reference, the code to remove duplicate is a simple : list.remove(node);
|
// For future reference, the code to remove duplicate is a simple : list.remove(node);
|
||||||
list.add(node);
|
list.add(nodeArray);
|
||||||
final ArrayList<CharGroup> branches = node.mData;
|
final ArrayList<CharGroup> branches = nodeArray.mData;
|
||||||
final int nodeSize = branches.size();
|
final int nodeSize = branches.size();
|
||||||
for (CharGroup group : branches) {
|
for (CharGroup group : branches) {
|
||||||
if (null != group.mChildren) flattenTreeInner(list, group.mChildren);
|
if (null != group.mChildren) flattenTreeInner(list, group.mChildren);
|
||||||
|
@ -234,52 +238,60 @@ public class BinaryDictEncoder {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the offset from a position inside a current node to a target node, during update.
|
* Get the offset from a position inside a current node array to a target node array, during
|
||||||
|
* update.
|
||||||
*
|
*
|
||||||
* If the current node is before the target node, the target node has not been updated yet,
|
* If the current node array is before the target node array, the target node array has not
|
||||||
* so we should return the offset from the old position of the current node to the old position
|
* been updated yet, so we should return the offset from the old position of the current node
|
||||||
* of the target node. If on the other hand the target is before the current node, it already
|
* array to the old position of the target node array. If on the other hand the target is
|
||||||
* has been updated, so we should return the offset from the new position in the current node
|
* before the current node array, it already has been updated, so we should return the offset
|
||||||
* to the new position in the target node.
|
* from the new position in the current node array to the new position in the target node
|
||||||
* @param currentNode the node containing the CharGroup where the offset will be written
|
* array.
|
||||||
* @param offsetFromStartOfCurrentNode the offset, in bytes, from the start of currentNode
|
*
|
||||||
* @param targetNode the target node to get the offset to
|
* @param currentNodeArray node array containing the CharGroup where the offset will be written
|
||||||
* @return the offset to the target node
|
* @param offsetFromStartOfCurrentNodeArray offset, in bytes, from the start of currentNodeArray
|
||||||
|
* @param targetNodeArray the target node array to get the offset to
|
||||||
|
* @return the offset to the target node array
|
||||||
*/
|
*/
|
||||||
private static int getOffsetToTargetNodeDuringUpdate(final Node currentNode,
|
private static int getOffsetToTargetNodeArrayDuringUpdate(final PtNodeArray currentNodeArray,
|
||||||
final int offsetFromStartOfCurrentNode, final Node targetNode) {
|
final int offsetFromStartOfCurrentNodeArray, final PtNodeArray targetNodeArray) {
|
||||||
final boolean isTargetBeforeCurrent = (targetNode.mCachedAddressBeforeUpdate
|
final boolean isTargetBeforeCurrent = (targetNodeArray.mCachedAddressBeforeUpdate
|
||||||
< currentNode.mCachedAddressBeforeUpdate);
|
< currentNodeArray.mCachedAddressBeforeUpdate);
|
||||||
if (isTargetBeforeCurrent) {
|
if (isTargetBeforeCurrent) {
|
||||||
return targetNode.mCachedAddressAfterUpdate
|
return targetNodeArray.mCachedAddressAfterUpdate
|
||||||
- (currentNode.mCachedAddressAfterUpdate + offsetFromStartOfCurrentNode);
|
- (currentNodeArray.mCachedAddressAfterUpdate
|
||||||
|
+ offsetFromStartOfCurrentNodeArray);
|
||||||
} else {
|
} else {
|
||||||
return targetNode.mCachedAddressBeforeUpdate
|
return targetNodeArray.mCachedAddressBeforeUpdate
|
||||||
- (currentNode.mCachedAddressBeforeUpdate + offsetFromStartOfCurrentNode);
|
- (currentNodeArray.mCachedAddressBeforeUpdate
|
||||||
|
+ offsetFromStartOfCurrentNodeArray);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the offset from a position inside a current node to a target CharGroup, during update.
|
* Get the offset from a position inside a current node array to a target CharGroup, during
|
||||||
* @param currentNode the node containing the CharGroup where the offset will be written
|
* update.
|
||||||
* @param offsetFromStartOfCurrentNode the offset, in bytes, from the start of currentNode
|
*
|
||||||
|
* @param currentNodeArray node array containing the CharGroup where the offset will be written
|
||||||
|
* @param offsetFromStartOfCurrentNodeArray offset, in bytes, from the start of currentNodeArray
|
||||||
* @param targetCharGroup the target CharGroup to get the offset to
|
* @param targetCharGroup the target CharGroup to get the offset to
|
||||||
* @return the offset to the target CharGroup
|
* @return the offset to the target CharGroup
|
||||||
*/
|
*/
|
||||||
// TODO: is there any way to factorize this method with the one above?
|
// TODO: is there any way to factorize this method with the one above?
|
||||||
private static int getOffsetToTargetCharGroupDuringUpdate(final Node currentNode,
|
private static int getOffsetToTargetCharGroupDuringUpdate(final PtNodeArray currentNodeArray,
|
||||||
final int offsetFromStartOfCurrentNode, final CharGroup targetCharGroup) {
|
final int offsetFromStartOfCurrentNodeArray, final CharGroup targetCharGroup) {
|
||||||
final int oldOffsetBasePoint = currentNode.mCachedAddressBeforeUpdate
|
final int oldOffsetBasePoint = currentNodeArray.mCachedAddressBeforeUpdate
|
||||||
+ offsetFromStartOfCurrentNode;
|
+ offsetFromStartOfCurrentNodeArray;
|
||||||
final boolean isTargetBeforeCurrent = (targetCharGroup.mCachedAddressBeforeUpdate
|
final boolean isTargetBeforeCurrent = (targetCharGroup.mCachedAddressBeforeUpdate
|
||||||
< oldOffsetBasePoint);
|
< oldOffsetBasePoint);
|
||||||
// If the target is before the current node, then its address has already been updated.
|
// If the target is before the current node array, then its address has already been
|
||||||
// We can use the AfterUpdate member, and compare it to our own member after update.
|
// updated. We can use the AfterUpdate member, and compare it to our own member after
|
||||||
// Otherwise, the AfterUpdate member is not updated yet, so we need to use the BeforeUpdate
|
// update. Otherwise, the AfterUpdate member is not updated yet, so we need to use the
|
||||||
// member, and of course we have to compare this to our own address before update.
|
// BeforeUpdate member, and of course we have to compare this to our own address before
|
||||||
|
// update.
|
||||||
if (isTargetBeforeCurrent) {
|
if (isTargetBeforeCurrent) {
|
||||||
final int newOffsetBasePoint = currentNode.mCachedAddressAfterUpdate
|
final int newOffsetBasePoint = currentNodeArray.mCachedAddressAfterUpdate
|
||||||
+ offsetFromStartOfCurrentNode;
|
+ offsetFromStartOfCurrentNodeArray;
|
||||||
return targetCharGroup.mCachedAddressAfterUpdate - newOffsetBasePoint;
|
return targetCharGroup.mCachedAddressAfterUpdate - newOffsetBasePoint;
|
||||||
} else {
|
} else {
|
||||||
return targetCharGroup.mCachedAddressBeforeUpdate - oldOffsetBasePoint;
|
return targetCharGroup.mCachedAddressBeforeUpdate - oldOffsetBasePoint;
|
||||||
|
@ -287,26 +299,26 @@ public class BinaryDictEncoder {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the actual node size, based on the cached addresses of the children nodes.
|
* Computes the actual node array size, based on the cached addresses of the children nodes.
|
||||||
*
|
*
|
||||||
* Each node stores its tentative address. During dictionary address computing, these
|
* Each node array stores its tentative address. During dictionary address computing, these
|
||||||
* are not final, but they can be used to compute the node size (the node size depends
|
* are not final, but they can be used to compute the node array size (the node array size
|
||||||
* on the address of the children because the number of bytes necessary to store an
|
* depends on the address of the children because the number of bytes necessary to store an
|
||||||
* address depends on its numeric value. The return value indicates whether the node
|
* address depends on its numeric value. The return value indicates whether the node array
|
||||||
* contents (as in, any of the addresses stored in the cache fields) have changed with
|
* contents (as in, any of the addresses stored in the cache fields) have changed with
|
||||||
* respect to their previous value.
|
* respect to their previous value.
|
||||||
*
|
*
|
||||||
* @param node the node to compute the size of.
|
* @param nodeArray the node array to compute the size of.
|
||||||
* @param dict the dictionary in which the word/attributes are to be found.
|
* @param dict the dictionary in which the word/attributes are to be found.
|
||||||
* @param formatOptions file format options.
|
* @param formatOptions file format options.
|
||||||
* @return false if none of the cached addresses inside the node changed, true otherwise.
|
* @return false if none of the cached addresses inside the node array changed, true otherwise.
|
||||||
*/
|
*/
|
||||||
private static boolean computeActualNodeSize(final Node node, final FusionDictionary dict,
|
private static boolean computeActualNodeArraySize(final PtNodeArray nodeArray,
|
||||||
final FormatOptions formatOptions) {
|
final FusionDictionary dict, final FormatOptions formatOptions) {
|
||||||
boolean changed = false;
|
boolean changed = false;
|
||||||
int size = getGroupCountSize(node);
|
int size = getGroupCountSize(nodeArray);
|
||||||
for (CharGroup group : node.mData) {
|
for (CharGroup group : nodeArray.mData) {
|
||||||
group.mCachedAddressAfterUpdate = node.mCachedAddressAfterUpdate + size;
|
group.mCachedAddressAfterUpdate = nodeArray.mCachedAddressAfterUpdate + size;
|
||||||
if (group.mCachedAddressAfterUpdate != group.mCachedAddressBeforeUpdate) {
|
if (group.mCachedAddressAfterUpdate != group.mCachedAddressBeforeUpdate) {
|
||||||
changed = true;
|
changed = true;
|
||||||
}
|
}
|
||||||
|
@ -318,16 +330,16 @@ public class BinaryDictEncoder {
|
||||||
if (formatOptions.mSupportsDynamicUpdate) {
|
if (formatOptions.mSupportsDynamicUpdate) {
|
||||||
groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
|
groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
|
||||||
} else {
|
} else {
|
||||||
groupSize += getByteSize(getOffsetToTargetNodeDuringUpdate(node,
|
groupSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(nodeArray,
|
||||||
groupSize + size, group.mChildren));
|
groupSize + size, group.mChildren));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
groupSize += getShortcutListSize(group.mShortcutTargets);
|
groupSize += getShortcutListSize(group.mShortcutTargets);
|
||||||
if (null != group.mBigrams) {
|
if (null != group.mBigrams) {
|
||||||
for (WeightedString bigram : group.mBigrams) {
|
for (WeightedString bigram : group.mBigrams) {
|
||||||
final int offset = getOffsetToTargetCharGroupDuringUpdate(node,
|
final int offset = getOffsetToTargetCharGroupDuringUpdate(nodeArray,
|
||||||
groupSize + size + FormatSpec.GROUP_FLAGS_SIZE,
|
groupSize + size + FormatSpec.GROUP_FLAGS_SIZE,
|
||||||
FusionDictionary.findWordInTree(dict.mRoot, bigram.mWord));
|
FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord));
|
||||||
groupSize += getByteSize(offset) + FormatSpec.GROUP_FLAGS_SIZE;
|
groupSize += getByteSize(offset) + FormatSpec.GROUP_FLAGS_SIZE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -337,49 +349,49 @@ public class BinaryDictEncoder {
|
||||||
if (formatOptions.mSupportsDynamicUpdate) {
|
if (formatOptions.mSupportsDynamicUpdate) {
|
||||||
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
||||||
}
|
}
|
||||||
if (node.mCachedSize != size) {
|
if (nodeArray.mCachedSize != size) {
|
||||||
node.mCachedSize = size;
|
nodeArray.mCachedSize = size;
|
||||||
changed = true;
|
changed = true;
|
||||||
}
|
}
|
||||||
return changed;
|
return changed;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes the cached addresses of nodes from their size.
|
* Initializes the cached addresses of node arrays and their containing nodes from their size.
|
||||||
*
|
*
|
||||||
* @param flatNodes the array of nodes.
|
* @param flatNodes the list of node arrays.
|
||||||
* @param formatOptions file format options.
|
* @param formatOptions file format options.
|
||||||
* @return the byte size of the entire stack.
|
* @return the byte size of the entire stack.
|
||||||
*/
|
*/
|
||||||
private static int initializeNodesCachedAddresses(final ArrayList<Node> flatNodes,
|
private static int initializeNodeArraysCachedAddresses(final ArrayList<PtNodeArray> flatNodes,
|
||||||
final FormatOptions formatOptions) {
|
final FormatOptions formatOptions) {
|
||||||
int nodeOffset = 0;
|
int nodeArrayOffset = 0;
|
||||||
for (final Node n : flatNodes) {
|
for (final PtNodeArray nodeArray : flatNodes) {
|
||||||
n.mCachedAddressBeforeUpdate = nodeOffset;
|
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOffset;
|
||||||
int groupCountSize = getGroupCountSize(n);
|
int groupCountSize = getGroupCountSize(nodeArray);
|
||||||
int groupOffset = 0;
|
int groupOffset = 0;
|
||||||
for (final CharGroup g : n.mData) {
|
for (final CharGroup g : nodeArray.mData) {
|
||||||
g.mCachedAddressBeforeUpdate = g.mCachedAddressAfterUpdate =
|
g.mCachedAddressBeforeUpdate = g.mCachedAddressAfterUpdate =
|
||||||
groupCountSize + nodeOffset + groupOffset;
|
groupCountSize + nodeArrayOffset + groupOffset;
|
||||||
groupOffset += g.mCachedSize;
|
groupOffset += g.mCachedSize;
|
||||||
}
|
}
|
||||||
final int nodeSize = groupCountSize + groupOffset
|
final int nodeSize = groupCountSize + groupOffset
|
||||||
+ (formatOptions.mSupportsDynamicUpdate
|
+ (formatOptions.mSupportsDynamicUpdate
|
||||||
? FormatSpec.FORWARD_LINK_ADDRESS_SIZE : 0);
|
? FormatSpec.FORWARD_LINK_ADDRESS_SIZE : 0);
|
||||||
nodeOffset += n.mCachedSize;
|
nodeArrayOffset += nodeArray.mCachedSize;
|
||||||
}
|
}
|
||||||
return nodeOffset;
|
return nodeArrayOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Updates the cached addresses of nodes after recomputing their new positions.
|
* Updates the cached addresses of node arrays after recomputing their new positions.
|
||||||
*
|
*
|
||||||
* @param flatNodes the array of nodes.
|
* @param flatNodes the list of node arrays.
|
||||||
*/
|
*/
|
||||||
private static void updateNodeCachedAddresses(final ArrayList<Node> flatNodes) {
|
private static void updateNodeArraysCachedAddresses(final ArrayList<PtNodeArray> flatNodes) {
|
||||||
for (final Node n : flatNodes) {
|
for (final PtNodeArray nodeArray : flatNodes) {
|
||||||
n.mCachedAddressBeforeUpdate = n.mCachedAddressAfterUpdate;
|
nodeArray.mCachedAddressBeforeUpdate = nodeArray.mCachedAddressAfterUpdate;
|
||||||
for (final CharGroup g : n.mData) {
|
for (final CharGroup g : nodeArray.mData) {
|
||||||
g.mCachedAddressBeforeUpdate = g.mCachedAddressAfterUpdate;
|
g.mCachedAddressBeforeUpdate = g.mCachedAddressAfterUpdate;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -391,11 +403,11 @@ public class BinaryDictEncoder {
|
||||||
* The parent addresses are used by some binary formats at write-to-disk time. Not all formats
|
* The parent addresses are used by some binary formats at write-to-disk time. Not all formats
|
||||||
* need them. In particular, version 2 does not need them, and version 3 does.
|
* need them. In particular, version 2 does not need them, and version 3 does.
|
||||||
*
|
*
|
||||||
* @param flatNodes the flat array of nodes to fill in
|
* @param flatNodes the flat array of node arrays to fill in
|
||||||
*/
|
*/
|
||||||
private static void computeParentAddresses(final ArrayList<Node> flatNodes) {
|
private static void computeParentAddresses(final ArrayList<PtNodeArray> flatNodes) {
|
||||||
for (final Node node : flatNodes) {
|
for (final PtNodeArray nodeArray : flatNodes) {
|
||||||
for (final CharGroup group : node.mData) {
|
for (final CharGroup group : nodeArray.mData) {
|
||||||
if (null != group.mChildren) {
|
if (null != group.mChildren) {
|
||||||
// Assign my address to children's parent address
|
// Assign my address to children's parent address
|
||||||
// Here BeforeUpdate and AfterUpdate addresses have the same value, so it
|
// Here BeforeUpdate and AfterUpdate addresses have the same value, so it
|
||||||
|
@ -408,25 +420,25 @@ public class BinaryDictEncoder {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compute the addresses and sizes of an ordered node array.
|
* Compute the addresses and sizes of an ordered list of node arrays.
|
||||||
*
|
*
|
||||||
* This method takes a node array and will update its cached address and size values
|
* This method takes a list of node arrays and will update their cached address and size
|
||||||
* so that they can be written into a file. It determines the smallest size each of the
|
* values so that they can be written into a file. It determines the smallest size each of the
|
||||||
* nodes can be given the addresses of its children and attributes, and store that into
|
* nodes arrays can be given the addresses of its children and attributes, and store that into
|
||||||
* each node.
|
* each node.
|
||||||
* The order of the node is given by the order of the array. This method makes no effort
|
* The order of the node is given by the order of the array. This method makes no effort
|
||||||
* to find a good order; it only mechanically computes the size this order results in.
|
* to find a good order; it only mechanically computes the size this order results in.
|
||||||
*
|
*
|
||||||
* @param dict the dictionary
|
* @param dict the dictionary
|
||||||
* @param flatNodes the ordered array of nodes
|
* @param flatNodes the ordered list of nodes arrays
|
||||||
* @param formatOptions file format options.
|
* @param formatOptions file format options.
|
||||||
* @return the same array it was passed. The nodes have been updated for address and size.
|
* @return the same array it was passed. The nodes have been updated for address and size.
|
||||||
*/
|
*/
|
||||||
private static ArrayList<Node> computeAddresses(final FusionDictionary dict,
|
private static ArrayList<PtNodeArray> computeAddresses(final FusionDictionary dict,
|
||||||
final ArrayList<Node> flatNodes, final FormatOptions formatOptions) {
|
final ArrayList<PtNodeArray> flatNodes, final FormatOptions formatOptions) {
|
||||||
// First get the worst possible sizes and offsets
|
// First get the worst possible sizes and offsets
|
||||||
for (final Node n : flatNodes) calculateNodeMaximumSize(n, formatOptions);
|
for (final PtNodeArray n : flatNodes) calculateNodeArrayMaximumSize(n, formatOptions);
|
||||||
final int offset = initializeNodesCachedAddresses(flatNodes, formatOptions);
|
final int offset = initializeNodeArraysCachedAddresses(flatNodes, formatOptions);
|
||||||
|
|
||||||
MakedictLog.i("Compressing the array addresses. Original size : " + offset);
|
MakedictLog.i("Compressing the array addresses. Original size : " + offset);
|
||||||
MakedictLog.i("(Recursively seen size : " + offset + ")");
|
MakedictLog.i("(Recursively seen size : " + offset + ")");
|
||||||
|
@ -435,17 +447,19 @@ public class BinaryDictEncoder {
|
||||||
boolean changesDone = false;
|
boolean changesDone = false;
|
||||||
do {
|
do {
|
||||||
changesDone = false;
|
changesDone = false;
|
||||||
int nodeStartOffset = 0;
|
int nodeArrayStartOffset = 0;
|
||||||
for (final Node n : flatNodes) {
|
for (final PtNodeArray nodeArray : flatNodes) {
|
||||||
n.mCachedAddressAfterUpdate = nodeStartOffset;
|
nodeArray.mCachedAddressAfterUpdate = nodeArrayStartOffset;
|
||||||
final int oldNodeSize = n.mCachedSize;
|
final int oldNodeArraySize = nodeArray.mCachedSize;
|
||||||
final boolean changed = computeActualNodeSize(n, dict, formatOptions);
|
final boolean changed = computeActualNodeArraySize(nodeArray, dict, formatOptions);
|
||||||
final int newNodeSize = n.mCachedSize;
|
final int newNodeArraySize = nodeArray.mCachedSize;
|
||||||
if (oldNodeSize < newNodeSize) throw new RuntimeException("Increased size ?!");
|
if (oldNodeArraySize < newNodeArraySize) {
|
||||||
nodeStartOffset += newNodeSize;
|
throw new RuntimeException("Increased size ?!");
|
||||||
|
}
|
||||||
|
nodeArrayStartOffset += newNodeArraySize;
|
||||||
changesDone |= changed;
|
changesDone |= changed;
|
||||||
}
|
}
|
||||||
updateNodeCachedAddresses(flatNodes);
|
updateNodeArraysCachedAddresses(flatNodes);
|
||||||
++passes;
|
++passes;
|
||||||
if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug");
|
if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug");
|
||||||
} while (changesDone);
|
} while (changesDone);
|
||||||
|
@ -453,10 +467,10 @@ public class BinaryDictEncoder {
|
||||||
if (formatOptions.mSupportsDynamicUpdate) {
|
if (formatOptions.mSupportsDynamicUpdate) {
|
||||||
computeParentAddresses(flatNodes);
|
computeParentAddresses(flatNodes);
|
||||||
}
|
}
|
||||||
final Node lastNode = flatNodes.get(flatNodes.size() - 1);
|
final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
|
||||||
MakedictLog.i("Compression complete in " + passes + " passes.");
|
MakedictLog.i("Compression complete in " + passes + " passes.");
|
||||||
MakedictLog.i("After address compression : "
|
MakedictLog.i("After address compression : "
|
||||||
+ (lastNode.mCachedAddressAfterUpdate + lastNode.mCachedSize));
|
+ (lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize));
|
||||||
|
|
||||||
return flatNodes;
|
return flatNodes;
|
||||||
}
|
}
|
||||||
|
@ -464,25 +478,25 @@ public class BinaryDictEncoder {
|
||||||
/**
|
/**
|
||||||
* Sanity-checking method.
|
* Sanity-checking method.
|
||||||
*
|
*
|
||||||
* This method checks an array of node for juxtaposition, that is, it will do
|
* This method checks a list of node arrays for juxtaposition, that is, it will do
|
||||||
* nothing if each node's cached address is actually the previous node's address
|
* nothing if each node array's cached address is actually the previous node array's address
|
||||||
* plus the previous node's size.
|
* plus the previous node's size.
|
||||||
* If this is not the case, it will throw an exception.
|
* If this is not the case, it will throw an exception.
|
||||||
*
|
*
|
||||||
* @param array the array node to check
|
* @param arrays the list of node arrays to check
|
||||||
*/
|
*/
|
||||||
private static void checkFlatNodeArray(final ArrayList<Node> array) {
|
private static void checkFlatNodeArrayList(final ArrayList<PtNodeArray> arrays) {
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
int index = 0;
|
int index = 0;
|
||||||
for (final Node n : array) {
|
for (final PtNodeArray nodeArray : arrays) {
|
||||||
// BeforeUpdate and AfterUpdate addresses are the same here, so it does not matter
|
// BeforeUpdate and AfterUpdate addresses are the same here, so it does not matter
|
||||||
// which we use.
|
// which we use.
|
||||||
if (n.mCachedAddressAfterUpdate != offset) {
|
if (nodeArray.mCachedAddressAfterUpdate != offset) {
|
||||||
throw new RuntimeException("Wrong address for node " + index
|
throw new RuntimeException("Wrong address for node " + index
|
||||||
+ " : expected " + offset + ", got " + n.mCachedAddressAfterUpdate);
|
+ " : expected " + offset + ", got " + nodeArray.mCachedAddressAfterUpdate);
|
||||||
}
|
}
|
||||||
++index;
|
++index;
|
||||||
offset += n.mCachedSize;
|
offset += nodeArray.mCachedSize;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -707,26 +721,23 @@ public class BinaryDictEncoder {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Write a node to memory. The node is expected to have its final position cached.
|
* Write a node array to memory. The node array is expected to have its final position cached.
|
||||||
*
|
*
|
||||||
* This can be an empty map, but the more is inside the faster the lookups will be. It can
|
* @param dict the dictionary the node array is a part of (for relative offsets).
|
||||||
* be carried on as long as nodes do not move.
|
|
||||||
*
|
|
||||||
* @param dict the dictionary the node is a part of (for relative offsets).
|
|
||||||
* @param buffer the memory buffer to write to.
|
* @param buffer the memory buffer to write to.
|
||||||
* @param node the node to write.
|
* @param nodeArray the node array to write.
|
||||||
* @param formatOptions file format options.
|
* @param formatOptions file format options.
|
||||||
* @return the address of the END of the node.
|
* @return the address of the END of the node.
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
private static int writePlacedNode(final FusionDictionary dict, byte[] buffer,
|
private static int writePlacedNode(final FusionDictionary dict, byte[] buffer,
|
||||||
final Node node, final FormatOptions formatOptions) {
|
final PtNodeArray nodeArray, final FormatOptions formatOptions) {
|
||||||
// TODO: Make the code in common with BinaryDictIOUtils#writeCharGroup
|
// TODO: Make the code in common with BinaryDictIOUtils#writeCharGroup
|
||||||
int index = node.mCachedAddressAfterUpdate;
|
int index = nodeArray.mCachedAddressAfterUpdate;
|
||||||
|
|
||||||
final int groupCount = node.mData.size();
|
final int groupCount = nodeArray.mData.size();
|
||||||
final int countSize = getGroupCountSize(node);
|
final int countSize = getGroupCountSize(nodeArray);
|
||||||
final int parentAddress = node.mCachedParentAddress;
|
final int parentAddress = nodeArray.mCachedParentAddress;
|
||||||
if (1 == countSize) {
|
if (1 == countSize) {
|
||||||
buffer[index++] = (byte)groupCount;
|
buffer[index++] = (byte)groupCount;
|
||||||
} else if (2 == countSize) {
|
} else if (2 == countSize) {
|
||||||
|
@ -739,7 +750,7 @@ public class BinaryDictEncoder {
|
||||||
}
|
}
|
||||||
int groupAddress = index;
|
int groupAddress = index;
|
||||||
for (int i = 0; i < groupCount; ++i) {
|
for (int i = 0; i < groupCount; ++i) {
|
||||||
final CharGroup group = node.mData.get(i);
|
final CharGroup group = nodeArray.mData.get(i);
|
||||||
if (index != group.mCachedAddressAfterUpdate) {
|
if (index != group.mCachedAddressAfterUpdate) {
|
||||||
throw new RuntimeException("Bug: write index is not the same as the cached address "
|
throw new RuntimeException("Bug: write index is not the same as the cached address "
|
||||||
+ "of the group : " + index + " <> " + group.mCachedAddressAfterUpdate);
|
+ "of the group : " + index + " <> " + group.mCachedAddressAfterUpdate);
|
||||||
|
@ -762,7 +773,7 @@ public class BinaryDictEncoder {
|
||||||
index = writeParentAddress(buffer, index, parentAddress, formatOptions);
|
index = writeParentAddress(buffer, index, parentAddress, formatOptions);
|
||||||
} else {
|
} else {
|
||||||
index = writeParentAddress(buffer, index, parentAddress
|
index = writeParentAddress(buffer, index, parentAddress
|
||||||
+ (node.mCachedAddressAfterUpdate - group.mCachedAddressAfterUpdate),
|
+ (nodeArray.mCachedAddressAfterUpdate - group.mCachedAddressAfterUpdate),
|
||||||
formatOptions);
|
formatOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -812,7 +823,7 @@ public class BinaryDictEncoder {
|
||||||
while (bigramIterator.hasNext()) {
|
while (bigramIterator.hasNext()) {
|
||||||
final WeightedString bigram = bigramIterator.next();
|
final WeightedString bigram = bigramIterator.next();
|
||||||
final CharGroup target =
|
final CharGroup target =
|
||||||
FusionDictionary.findWordInTree(dict.mRoot, bigram.mWord);
|
FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
|
||||||
final int addressOfBigram = target.mCachedAddressAfterUpdate;
|
final int addressOfBigram = target.mCachedAddressAfterUpdate;
|
||||||
final int unigramFrequencyForThisWord = target.mFrequency;
|
final int unigramFrequencyForThisWord = target.mFrequency;
|
||||||
++groupAddress;
|
++groupAddress;
|
||||||
|
@ -832,57 +843,58 @@ public class BinaryDictEncoder {
|
||||||
= FormatSpec.NO_FORWARD_LINK_ADDRESS;
|
= FormatSpec.NO_FORWARD_LINK_ADDRESS;
|
||||||
index += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
index += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
||||||
}
|
}
|
||||||
if (index != node.mCachedAddressAfterUpdate + node.mCachedSize) throw new RuntimeException(
|
if (index != nodeArray.mCachedAddressAfterUpdate + nodeArray.mCachedSize) {
|
||||||
"Not the same size : written "
|
throw new RuntimeException(
|
||||||
+ (index - node.mCachedAddressAfterUpdate) + " bytes from a node that should have "
|
"Not the same size : written " + (index - nodeArray.mCachedAddressAfterUpdate)
|
||||||
+ node.mCachedSize + " bytes");
|
+ " bytes from a node that should have " + nodeArray.mCachedSize + " bytes");
|
||||||
|
}
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Dumps a collection of useful statistics about a node array.
|
* Dumps a collection of useful statistics about a list of node arrays.
|
||||||
*
|
*
|
||||||
* This prints purely informative stuff, like the total estimated file size, the
|
* This prints purely informative stuff, like the total estimated file size, the
|
||||||
* number of nodes, of character groups, the repartition of each address size, etc
|
* number of node arrays, of character groups, the repartition of each address size, etc
|
||||||
*
|
*
|
||||||
* @param nodes the node array.
|
* @param nodeArrays the list of node arrays.
|
||||||
*/
|
*/
|
||||||
private static void showStatistics(ArrayList<Node> nodes) {
|
private static void showStatistics(ArrayList<PtNodeArray> nodeArrays) {
|
||||||
int firstTerminalAddress = Integer.MAX_VALUE;
|
int firstTerminalAddress = Integer.MAX_VALUE;
|
||||||
int lastTerminalAddress = Integer.MIN_VALUE;
|
int lastTerminalAddress = Integer.MIN_VALUE;
|
||||||
int size = 0;
|
int size = 0;
|
||||||
int charGroups = 0;
|
int charGroups = 0;
|
||||||
int maxGroups = 0;
|
int maxGroups = 0;
|
||||||
int maxRuns = 0;
|
int maxRuns = 0;
|
||||||
for (final Node n : nodes) {
|
for (final PtNodeArray nodeArray : nodeArrays) {
|
||||||
if (maxGroups < n.mData.size()) maxGroups = n.mData.size();
|
if (maxGroups < nodeArray.mData.size()) maxGroups = nodeArray.mData.size();
|
||||||
for (final CharGroup cg : n.mData) {
|
for (final CharGroup cg : nodeArray.mData) {
|
||||||
++charGroups;
|
++charGroups;
|
||||||
if (cg.mChars.length > maxRuns) maxRuns = cg.mChars.length;
|
if (cg.mChars.length > maxRuns) maxRuns = cg.mChars.length;
|
||||||
if (cg.mFrequency >= 0) {
|
if (cg.mFrequency >= 0) {
|
||||||
if (n.mCachedAddressAfterUpdate < firstTerminalAddress)
|
if (nodeArray.mCachedAddressAfterUpdate < firstTerminalAddress)
|
||||||
firstTerminalAddress = n.mCachedAddressAfterUpdate;
|
firstTerminalAddress = nodeArray.mCachedAddressAfterUpdate;
|
||||||
if (n.mCachedAddressAfterUpdate > lastTerminalAddress)
|
if (nodeArray.mCachedAddressAfterUpdate > lastTerminalAddress)
|
||||||
lastTerminalAddress = n.mCachedAddressAfterUpdate;
|
lastTerminalAddress = nodeArray.mCachedAddressAfterUpdate;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (n.mCachedAddressAfterUpdate + n.mCachedSize > size) {
|
if (nodeArray.mCachedAddressAfterUpdate + nodeArray.mCachedSize > size) {
|
||||||
size = n.mCachedAddressAfterUpdate + n.mCachedSize;
|
size = nodeArray.mCachedAddressAfterUpdate + nodeArray.mCachedSize;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
final int[] groupCounts = new int[maxGroups + 1];
|
final int[] groupCounts = new int[maxGroups + 1];
|
||||||
final int[] runCounts = new int[maxRuns + 1];
|
final int[] runCounts = new int[maxRuns + 1];
|
||||||
for (final Node n : nodes) {
|
for (final PtNodeArray nodeArray : nodeArrays) {
|
||||||
++groupCounts[n.mData.size()];
|
++groupCounts[nodeArray.mData.size()];
|
||||||
for (final CharGroup cg : n.mData) {
|
for (final CharGroup cg : nodeArray.mData) {
|
||||||
++runCounts[cg.mChars.length];
|
++runCounts[cg.mChars.length];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
MakedictLog.i("Statistics:\n"
|
MakedictLog.i("Statistics:\n"
|
||||||
+ " total file size " + size + "\n"
|
+ " total file size " + size + "\n"
|
||||||
+ " " + nodes.size() + " nodes\n"
|
+ " " + nodeArrays.size() + " node arrays\n"
|
||||||
+ " " + charGroups + " groups (" + ((float)charGroups / nodes.size())
|
+ " " + charGroups + " groups (" + ((float)charGroups / nodeArrays.size())
|
||||||
+ " groups per node)\n"
|
+ " groups per node)\n"
|
||||||
+ " first terminal at " + firstTerminalAddress + "\n"
|
+ " first terminal at " + firstTerminalAddress + "\n"
|
||||||
+ " last terminal at " + lastTerminalAddress + "\n"
|
+ " last terminal at " + lastTerminalAddress + "\n"
|
||||||
|
@ -909,11 +921,12 @@ public class BinaryDictEncoder {
|
||||||
final FusionDictionary dict, final FormatOptions formatOptions)
|
final FusionDictionary dict, final FormatOptions formatOptions)
|
||||||
throws IOException, UnsupportedFormatException {
|
throws IOException, UnsupportedFormatException {
|
||||||
|
|
||||||
// Addresses are limited to 3 bytes, but since addresses can be relative to each node, the
|
// Addresses are limited to 3 bytes, but since addresses can be relative to each node
|
||||||
// structure itself is not limited to 16MB. However, if it is over 16MB deciding the order
|
// array, the structure itself is not limited to 16MB. However, if it is over 16MB deciding
|
||||||
// of the nodes becomes a quite complicated problem, because though the dictionary itself
|
// the order of the node arrays becomes a quite complicated problem, because though the
|
||||||
// does not have a size limit, each node must still be within 16MB of all its children and
|
// dictionary itself does not have a size limit, each node array must still be within 16MB
|
||||||
// parents. As long as this is ensured, the dictionary file may grow to any size.
|
// of all its children and parents. As long as this is ensured, the dictionary file may
|
||||||
|
// grow to any size.
|
||||||
|
|
||||||
final int version = formatOptions.mVersion;
|
final int version = formatOptions.mVersion;
|
||||||
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|
||||||
|
@ -964,23 +977,23 @@ public class BinaryDictEncoder {
|
||||||
|
|
||||||
// Leave the choice of the optimal node order to the flattenTree function.
|
// Leave the choice of the optimal node order to the flattenTree function.
|
||||||
MakedictLog.i("Flattening the tree...");
|
MakedictLog.i("Flattening the tree...");
|
||||||
ArrayList<Node> flatNodes = flattenTree(dict.mRoot);
|
ArrayList<PtNodeArray> flatNodes = flattenTree(dict.mRootNodeArray);
|
||||||
|
|
||||||
MakedictLog.i("Computing addresses...");
|
MakedictLog.i("Computing addresses...");
|
||||||
computeAddresses(dict, flatNodes, formatOptions);
|
computeAddresses(dict, flatNodes, formatOptions);
|
||||||
MakedictLog.i("Checking array...");
|
MakedictLog.i("Checking array...");
|
||||||
if (DBG) checkFlatNodeArray(flatNodes);
|
if (DBG) checkFlatNodeArrayList(flatNodes);
|
||||||
|
|
||||||
// Create a buffer that matches the final dictionary size.
|
// Create a buffer that matches the final dictionary size.
|
||||||
final Node lastNode = flatNodes.get(flatNodes.size() - 1);
|
final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
|
||||||
final int bufferSize = lastNode.mCachedAddressAfterUpdate + lastNode.mCachedSize;
|
final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize;
|
||||||
final byte[] buffer = new byte[bufferSize];
|
final byte[] buffer = new byte[bufferSize];
|
||||||
int index = 0;
|
int index = 0;
|
||||||
|
|
||||||
MakedictLog.i("Writing file...");
|
MakedictLog.i("Writing file...");
|
||||||
int dataEndOffset = 0;
|
int dataEndOffset = 0;
|
||||||
for (Node n : flatNodes) {
|
for (PtNodeArray nodeArray : flatNodes) {
|
||||||
dataEndOffset = writePlacedNode(dict, buffer, n, formatOptions);
|
dataEndOffset = writePlacedNode(dict, buffer, nodeArray, formatOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (DBG) showStatistics(flatNodes);
|
if (DBG) showStatistics(flatNodes);
|
||||||
|
|
|
@ -59,7 +59,7 @@ public final class BinaryDictIOUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tours all node without recursive call.
|
* Retrieves all node arrays without recursive call.
|
||||||
*/
|
*/
|
||||||
private static void readUnigramsAndBigramsBinaryInner(
|
private static void readUnigramsAndBigramsBinaryInner(
|
||||||
final FusionDictionaryBufferInterface buffer, final int headerSize,
|
final FusionDictionaryBufferInterface buffer, final int headerSize,
|
||||||
|
@ -116,7 +116,7 @@ public final class BinaryDictIOUtils {
|
||||||
if (formatOptions.mSupportsDynamicUpdate) {
|
if (formatOptions.mSupportsDynamicUpdate) {
|
||||||
final int forwardLinkAddress = buffer.readUnsignedInt24();
|
final int forwardLinkAddress = buffer.readUnsignedInt24();
|
||||||
if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) {
|
if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) {
|
||||||
// the node has a forward link.
|
// The node array has a forward link.
|
||||||
p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT;
|
p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT;
|
||||||
p.mAddress = forwardLinkAddress;
|
p.mAddress = forwardLinkAddress;
|
||||||
} else {
|
} else {
|
||||||
|
@ -126,7 +126,7 @@ public final class BinaryDictIOUtils {
|
||||||
stack.pop();
|
stack.pop();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// the node has more groups.
|
// The node array has more groups.
|
||||||
p.mAddress = buffer.position();
|
p.mAddress = buffer.position();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -139,14 +139,14 @@ public final class BinaryDictIOUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads unigrams and bigrams from the binary file.
|
* Reads unigrams and bigrams from the binary file.
|
||||||
* Doesn't make the memory representation of the dictionary.
|
* Doesn't store a full memory representation of the dictionary.
|
||||||
*
|
*
|
||||||
* @param reader the reader.
|
* @param reader the reader.
|
||||||
* @param words the map to store the address as a key and the word as a value.
|
* @param words the map to store the address as a key and the word as a value.
|
||||||
* @param frequencies the map to store the address as a key and the frequency as a value.
|
* @param frequencies the map to store the address as a key and the frequency as a value.
|
||||||
* @param bigrams the map to store the address as a key and the list of address as a value.
|
* @param bigrams the map to store the address as a key and the list of address as a value.
|
||||||
* @throws IOException
|
* @throws IOException if the file can't be read.
|
||||||
* @throws UnsupportedFormatException
|
* @throws UnsupportedFormatException if the format of the file is not recognized.
|
||||||
*/
|
*/
|
||||||
public static void readUnigramsAndBigramsBinary(final BinaryDictReader reader,
|
public static void readUnigramsAndBigramsBinary(final BinaryDictReader reader,
|
||||||
final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
|
final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
|
||||||
|
@ -165,8 +165,8 @@ public final class BinaryDictIOUtils {
|
||||||
* @param buffer the buffer to read.
|
* @param buffer the buffer to read.
|
||||||
* @param word the word we search for.
|
* @param word the word we search for.
|
||||||
* @return the address of the terminal node.
|
* @return the address of the terminal node.
|
||||||
* @throws IOException
|
* @throws IOException if the file can't be read.
|
||||||
* @throws UnsupportedFormatException
|
* @throws UnsupportedFormatException if the format of the file is not recognized.
|
||||||
*/
|
*/
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public static int getTerminalPosition(final FusionDictionaryBufferInterface buffer,
|
public static int getTerminalPosition(final FusionDictionaryBufferInterface buffer,
|
||||||
|
@ -224,9 +224,9 @@ public final class BinaryDictIOUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we found the next char group, it is under the file pointer.
|
// If we found the next char group, it is under the file pointer.
|
||||||
// But if not, we are at the end of this node so we expect to have
|
// But if not, we are at the end of this node array so we expect to have
|
||||||
// a forward link address that we need to consult and possibly resume
|
// a forward link address that we need to consult and possibly resume
|
||||||
// search on the next node in the linked list.
|
// search on the next node array in the linked list.
|
||||||
if (foundNextCharGroup) break;
|
if (foundNextCharGroup) break;
|
||||||
if (!header.mFormatOptions.mSupportsDynamicUpdate) {
|
if (!header.mFormatOptions.mSupportsDynamicUpdate) {
|
||||||
return FormatSpec.NOT_VALID_WORD;
|
return FormatSpec.NOT_VALID_WORD;
|
||||||
|
@ -365,9 +365,10 @@ public final class BinaryDictIOUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Write a char group to an output stream.
|
* Write a char group to an output stream from a CharGroupInfo.
|
||||||
* A char group is an in-memory representation of a node in trie.
|
* A char group is an in-memory representation of a node in the patricia trie.
|
||||||
* A char group info is an on-disk representation of a node.
|
* A char group info is a container for low-level information about how the
|
||||||
|
* char group is stored in the binary format.
|
||||||
*
|
*
|
||||||
* @param destination the stream to write.
|
* @param destination the stream to write.
|
||||||
* @param info the char group info to be written.
|
* @param info the char group info to be written.
|
||||||
|
@ -427,7 +428,7 @@ public final class BinaryDictIOUtils {
|
||||||
|
|
||||||
if (info.mBigrams != null) {
|
if (info.mBigrams != null) {
|
||||||
// TODO: Consolidate this code with the code that computes the size of the bigram list
|
// TODO: Consolidate this code with the code that computes the size of the bigram list
|
||||||
// in BinaryDictEncoder#computeActualNodeSize
|
// in BinaryDictEncoder#computeActualNodeArraySize
|
||||||
for (int i = 0; i < info.mBigrams.size(); ++i) {
|
for (int i = 0; i < info.mBigrams.size(); ++i) {
|
||||||
|
|
||||||
final int bigramFrequency = info.mBigrams.get(i).mFrequency;
|
final int bigramFrequency = info.mBigrams.get(i).mFrequency;
|
||||||
|
@ -479,14 +480,14 @@ public final class BinaryDictIOUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Write a node to the stream.
|
* Write a node array to the stream.
|
||||||
*
|
*
|
||||||
* @param destination the stream to write.
|
* @param destination the stream to write.
|
||||||
* @param infos groups to be written.
|
* @param infos an array of CharGroupInfo to be written.
|
||||||
* @return the size written, in bytes.
|
* @return the size written, in bytes.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
static int writeNode(final OutputStream destination, final CharGroupInfo[] infos)
|
static int writeNodes(final OutputStream destination, final CharGroupInfo[] infos)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
int size = getGroupCountSize(infos.length);
|
int size = getGroupCountSize(infos.length);
|
||||||
switch (getGroupCountSize(infos.length)) {
|
switch (getGroupCountSize(infos.length)) {
|
||||||
|
@ -604,12 +605,12 @@ public final class BinaryDictIOUtils {
|
||||||
public static int getGroupCountSize(final int count) {
|
public static int getGroupCountSize(final int count) {
|
||||||
if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= count) {
|
if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= count) {
|
||||||
return 1;
|
return 1;
|
||||||
} else if (FormatSpec.MAX_CHARGROUPS_IN_A_NODE >= count) {
|
} else if (FormatSpec.MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY >= count) {
|
||||||
return 2;
|
return 2;
|
||||||
} else {
|
} else {
|
||||||
throw new RuntimeException("Can't have more than "
|
throw new RuntimeException("Can't have more than "
|
||||||
+ FormatSpec.MAX_CHARGROUPS_IN_A_NODE + " groups in a node (found " + count
|
+ FormatSpec.MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY + " groups in a node (found "
|
||||||
+ ")");
|
+ count + ")");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -86,7 +86,7 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
}
|
}
|
||||||
final int flags = buffer.readUnsignedByte();
|
final int flags = buffer.readUnsignedByte();
|
||||||
if (BinaryDictIOUtils.isMovedGroup(flags, formatOptions)) {
|
if (BinaryDictIOUtils.isMovedGroup(flags, formatOptions)) {
|
||||||
// if the group is moved, the parent address is stored in the destination group.
|
// If the group is moved, the parent address is stored in the destination group.
|
||||||
// We are guaranteed to process the destination group later, so there is no need to
|
// We are guaranteed to process the destination group later, so there is no need to
|
||||||
// update anything here.
|
// update anything here.
|
||||||
buffer.position(originalPosition);
|
buffer.position(originalPosition);
|
||||||
|
@ -101,10 +101,10 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update parent addresses in a Node that is referred to by nodeOriginAddress.
|
* Update parent addresses in a node array stored at nodeOriginAddress.
|
||||||
*
|
*
|
||||||
* @param buffer the buffer to be modified.
|
* @param buffer the buffer to be modified.
|
||||||
* @param nodeOriginAddress the address of a modified Node.
|
* @param nodeOriginAddress the address of the node array to update.
|
||||||
* @param newParentAddress the address to be written.
|
* @param newParentAddress the address to be written.
|
||||||
* @param formatOptions file format options.
|
* @param formatOptions file format options.
|
||||||
*/
|
*/
|
||||||
|
@ -154,7 +154,7 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
*/
|
*/
|
||||||
private static int moveCharGroup(final OutputStream destination,
|
private static int moveCharGroup(final OutputStream destination,
|
||||||
final FusionDictionaryBufferInterface buffer, final CharGroupInfo info,
|
final FusionDictionaryBufferInterface buffer, final CharGroupInfo info,
|
||||||
final int nodeOriginAddress, final int oldGroupAddress,
|
final int nodeArrayOriginAddress, final int oldGroupAddress,
|
||||||
final FormatOptions formatOptions) throws IOException {
|
final FormatOptions formatOptions) throws IOException {
|
||||||
updateParentAddress(buffer, oldGroupAddress, buffer.limit() + 1, formatOptions);
|
updateParentAddress(buffer, oldGroupAddress, buffer.limit() + 1, formatOptions);
|
||||||
buffer.position(oldGroupAddress);
|
buffer.position(oldGroupAddress);
|
||||||
|
@ -163,15 +163,16 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
buffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags
|
buffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags
|
||||||
& (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG))));
|
& (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG))));
|
||||||
int size = FormatSpec.GROUP_FLAGS_SIZE;
|
int size = FormatSpec.GROUP_FLAGS_SIZE;
|
||||||
updateForwardLink(buffer, nodeOriginAddress, buffer.limit(), formatOptions);
|
updateForwardLink(buffer, nodeArrayOriginAddress, buffer.limit(), formatOptions);
|
||||||
size += BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { info });
|
size += BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { info });
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
private static void updateForwardLink(final FusionDictionaryBufferInterface buffer,
|
private static void updateForwardLink(final FusionDictionaryBufferInterface buffer,
|
||||||
final int nodeOriginAddress, final int newNodeAddress,
|
final int nodeArrayOriginAddress, final int newNodeArrayAddress,
|
||||||
final FormatOptions formatOptions) {
|
final FormatOptions formatOptions) {
|
||||||
buffer.position(nodeOriginAddress);
|
buffer.position(nodeArrayOriginAddress);
|
||||||
int jumpCount = 0;
|
int jumpCount = 0;
|
||||||
while (jumpCount++ < MAX_JUMPS) {
|
while (jumpCount++ < MAX_JUMPS) {
|
||||||
final int count = BinaryDictDecoder.readCharGroupCount(buffer);
|
final int count = BinaryDictDecoder.readCharGroupCount(buffer);
|
||||||
|
@ -179,7 +180,7 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
final int forwardLinkAddress = buffer.readUnsignedInt24();
|
final int forwardLinkAddress = buffer.readUnsignedInt24();
|
||||||
if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
|
if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
|
||||||
buffer.position(buffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
|
buffer.position(buffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
|
||||||
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeAddress);
|
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeArrayAddress);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
buffer.position(forwardLinkAddress);
|
buffer.position(forwardLinkAddress);
|
||||||
|
@ -190,57 +191,59 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Move a group that is referred to by oldGroupOrigin to the tail of the file.
|
* Move a group that is referred to by oldGroupOrigin to the tail of the file, and set the
|
||||||
* And set the children address to the byte after the group.
|
* children address to the byte after the group
|
||||||
*
|
*
|
||||||
* @param nodeOrigin the address of the tail of the file.
|
* @param fileEndAddress the address of the tail of the file.
|
||||||
* @param characters
|
* @param codePoints the characters to put inside the group.
|
||||||
* @param length
|
* @param length how many code points to read from codePoints.
|
||||||
* @param flags
|
* @param flags the flags for this group.
|
||||||
* @param frequency
|
* @param frequency the frequency of this terminal.
|
||||||
* @param parentAddress
|
* @param parentAddress the address of the parent group of this group.
|
||||||
* @param shortcutTargets
|
* @param shortcutTargets the shortcut targets for this group.
|
||||||
* @param bigrams
|
* @param bigrams the bigrams for this group.
|
||||||
* @param destination the stream representing the tail of the file.
|
* @param destination the stream representing the tail of the file.
|
||||||
* @param buffer the buffer representing the (constant-size) body of the file.
|
* @param buffer the buffer representing the (constant-size) body of the file.
|
||||||
* @param oldNodeOrigin
|
* @param oldNodeArrayOrigin the origin of the old node array this group was a part of.
|
||||||
* @param oldGroupOrigin
|
* @param oldGroupOrigin the old origin where this group used to be stored.
|
||||||
* @param formatOptions
|
* @param formatOptions format options for this dictionary.
|
||||||
* @return the size written, in bytes.
|
* @return the size written, in bytes.
|
||||||
* @throws IOException
|
* @throws IOException if the file can't be accessed
|
||||||
*/
|
*/
|
||||||
private static int moveGroup(final int nodeOrigin, final int[] characters, final int length,
|
private static int moveGroup(final int fileEndAddress, final int[] codePoints,
|
||||||
final int flags, final int frequency, final int parentAddress,
|
final int length, final int flags, final int frequency, final int parentAddress,
|
||||||
final ArrayList<WeightedString> shortcutTargets,
|
final ArrayList<WeightedString> shortcutTargets,
|
||||||
final ArrayList<PendingAttribute> bigrams, final OutputStream destination,
|
final ArrayList<PendingAttribute> bigrams, final OutputStream destination,
|
||||||
final FusionDictionaryBufferInterface buffer, final int oldNodeOrigin,
|
final FusionDictionaryBufferInterface buffer, final int oldNodeArrayOrigin,
|
||||||
final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException {
|
final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException {
|
||||||
int size = 0;
|
int size = 0;
|
||||||
final int newGroupOrigin = nodeOrigin + 1;
|
final int newGroupOrigin = fileEndAddress + 1;
|
||||||
final int[] writtenCharacters = Arrays.copyOfRange(characters, 0, length);
|
final int[] writtenCharacters = Arrays.copyOfRange(codePoints, 0, length);
|
||||||
final CharGroupInfo tmpInfo = new CharGroupInfo(newGroupOrigin, -1 /* endAddress */,
|
final CharGroupInfo tmpInfo = new CharGroupInfo(newGroupOrigin, -1 /* endAddress */,
|
||||||
flags, writtenCharacters, frequency, parentAddress, FormatSpec.NO_CHILDREN_ADDRESS,
|
flags, writtenCharacters, frequency, parentAddress, FormatSpec.NO_CHILDREN_ADDRESS,
|
||||||
shortcutTargets, bigrams);
|
shortcutTargets, bigrams);
|
||||||
size = BinaryDictIOUtils.computeGroupSize(tmpInfo, formatOptions);
|
size = BinaryDictIOUtils.computeGroupSize(tmpInfo, formatOptions);
|
||||||
final CharGroupInfo newInfo = new CharGroupInfo(newGroupOrigin, newGroupOrigin + size,
|
final CharGroupInfo newInfo = new CharGroupInfo(newGroupOrigin, newGroupOrigin + size,
|
||||||
flags, writtenCharacters, frequency, parentAddress,
|
flags, writtenCharacters, frequency, parentAddress,
|
||||||
nodeOrigin + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets,
|
fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets,
|
||||||
bigrams);
|
bigrams);
|
||||||
moveCharGroup(destination, buffer, newInfo, oldNodeOrigin, oldGroupOrigin, formatOptions);
|
moveCharGroup(destination, buffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin,
|
||||||
|
formatOptions);
|
||||||
return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Insert a word into a binary dictionary.
|
* Insert a word into a binary dictionary.
|
||||||
*
|
*
|
||||||
* @param buffer
|
* @param buffer the buffer containing the existing dictionary.
|
||||||
* @param destination
|
* @param destination a stream to the underlying file, with the pointer at the end of the file.
|
||||||
* @param word
|
* @param word the word to insert.
|
||||||
* @param frequency
|
* @param frequency the frequency of the new word.
|
||||||
* @param bigramStrings
|
* @param bigramStrings bigram list, or null if none.
|
||||||
* @param shortcuts
|
* @param shortcuts shortcut list, or null if none.
|
||||||
* @throws IOException
|
* @param isBlackListEntry whether this should be a blacklist entry.
|
||||||
* @throws UnsupportedFormatException
|
* @throws IOException if the file can't be accessed.
|
||||||
|
* @throws UnsupportedFormatException if the existing dictionary is in an unexpected format.
|
||||||
*/
|
*/
|
||||||
// TODO: Support batch insertion.
|
// TODO: Support batch insertion.
|
||||||
// TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
|
// TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
|
||||||
|
@ -323,7 +326,7 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
currentInfo.mFlags, characters2, currentInfo.mFrequency,
|
currentInfo.mFlags, characters2, currentInfo.mFrequency,
|
||||||
newNodeAddress + 1, currentInfo.mChildrenAddress,
|
newNodeAddress + 1, currentInfo.mChildrenAddress,
|
||||||
currentInfo.mShortcutTargets, currentInfo.mBigrams);
|
currentInfo.mShortcutTargets, currentInfo.mBigrams);
|
||||||
BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { newInfo2 });
|
BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo2 });
|
||||||
return;
|
return;
|
||||||
} else if (codePoints[wordPos + p] != currentInfo.mCharacters[p]) {
|
} else if (codePoints[wordPos + p] != currentInfo.mCharacters[p]) {
|
||||||
if (p > 0) {
|
if (p > 0) {
|
||||||
|
@ -386,7 +389,7 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
newNodeAddress + written, -1 /* endAddress */, flags,
|
newNodeAddress + written, -1 /* endAddress */, flags,
|
||||||
newCharacters, frequency, newNodeAddress + 1,
|
newCharacters, frequency, newNodeAddress + 1,
|
||||||
FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams);
|
FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams);
|
||||||
BinaryDictIOUtils.writeNode(destination,
|
BinaryDictIOUtils.writeNodes(destination,
|
||||||
new CharGroupInfo[] { suffixInfo, newInfo });
|
new CharGroupInfo[] { suffixInfo, newInfo });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -438,7 +441,7 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
final CharGroupInfo newInfo = new CharGroupInfo(newGroupAddress, -1, flags,
|
final CharGroupInfo newInfo = new CharGroupInfo(newGroupAddress, -1, flags,
|
||||||
characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS,
|
characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS,
|
||||||
shortcuts, bigrams);
|
shortcuts, bigrams);
|
||||||
BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { newInfo });
|
BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
buffer.position(currentInfo.mChildrenAddress);
|
buffer.position(currentInfo.mChildrenAddress);
|
||||||
|
@ -482,7 +485,7 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1,
|
final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1,
|
||||||
-1 /* endAddress */, flags, characters, frequency, nodeParentAddress,
|
-1 /* endAddress */, flags, characters, frequency, nodeParentAddress,
|
||||||
FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams);
|
FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams);
|
||||||
BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[]{ newInfo });
|
BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[]{ newInfo });
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
depth--;
|
depth--;
|
||||||
|
|
|
@ -60,7 +60,7 @@ public final class FormatSpec {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Array of Node(FusionDictionary.Node) layout is as follows:
|
* Node array (FusionDictionary.PtNodeArray) layout is as follows:
|
||||||
*
|
*
|
||||||
* g |
|
* g |
|
||||||
* r | the number of groups, 1 or 2 bytes.
|
* r | the number of groups, 1 or 2 bytes.
|
||||||
|
@ -86,7 +86,7 @@ public final class FormatSpec {
|
||||||
* linkaddress
|
* linkaddress
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Node(CharGroup) layout is as follows:
|
/* Node (FusionDictionary.CharGroup) layout is as follows:
|
||||||
* | IF !SUPPORTS_DYNAMIC_UPDATE
|
* | IF !SUPPORTS_DYNAMIC_UPDATE
|
||||||
* | addressType xx : mask with MASK_GROUP_ADDRESS_TYPE
|
* | addressType xx : mask with MASK_GROUP_ADDRESS_TYPE
|
||||||
* | 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
|
* | 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
|
||||||
|
@ -251,7 +251,7 @@ public final class FormatSpec {
|
||||||
static final int INVALID_CHARACTER = -1;
|
static final int INVALID_CHARACTER = -1;
|
||||||
|
|
||||||
static final int MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT = 0x7F; // 127
|
static final int MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT = 0x7F; // 127
|
||||||
static final int MAX_CHARGROUPS_IN_A_NODE = 0x7FFF; // 32767
|
static final int MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY = 0x7FFF; // 32767
|
||||||
static final int MAX_BIGRAMS_IN_A_GROUP = 10000;
|
static final int MAX_BIGRAMS_IN_A_GROUP = 10000;
|
||||||
|
|
||||||
static final int MAX_TERMINAL_FREQUENCY = 255;
|
static final int MAX_TERMINAL_FREQUENCY = 255;
|
||||||
|
|
|
@ -37,14 +37,14 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
private static int CHARACTER_NOT_FOUND_INDEX = -1;
|
private static int CHARACTER_NOT_FOUND_INDEX = -1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A node of the dictionary, containing several CharGroups.
|
* A node array of the dictionary, containing several CharGroups.
|
||||||
*
|
*
|
||||||
* A node is but an ordered array of CharGroups, which essentially contain all the
|
* A PtNodeArray is but an ordered array of CharGroups, which essentially contain all the
|
||||||
* real information.
|
* real information.
|
||||||
* This class also contains fields to cache size and address, to help with binary
|
* This class also contains fields to cache size and address, to help with binary
|
||||||
* generation.
|
* generation.
|
||||||
*/
|
*/
|
||||||
public static final class Node {
|
public static final class PtNodeArray {
|
||||||
ArrayList<CharGroup> mData;
|
ArrayList<CharGroup> mData;
|
||||||
// To help with binary generation
|
// To help with binary generation
|
||||||
int mCachedSize = Integer.MIN_VALUE;
|
int mCachedSize = Integer.MIN_VALUE;
|
||||||
|
@ -57,10 +57,10 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
int mCachedAddressAfterUpdate = Integer.MIN_VALUE;
|
int mCachedAddressAfterUpdate = Integer.MIN_VALUE;
|
||||||
int mCachedParentAddress = 0;
|
int mCachedParentAddress = 0;
|
||||||
|
|
||||||
public Node() {
|
public PtNodeArray() {
|
||||||
mData = new ArrayList<CharGroup>();
|
mData = new ArrayList<CharGroup>();
|
||||||
}
|
}
|
||||||
public Node(ArrayList<CharGroup> data) {
|
public PtNodeArray(ArrayList<CharGroup> data) {
|
||||||
mData = data;
|
mData = data;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -98,7 +98,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
* This is the central class of the in-memory representation. A CharGroup is what can
|
* This is the central class of the in-memory representation. A CharGroup is what can
|
||||||
* be seen as a traditional "trie node", except it can hold several characters at the
|
* be seen as a traditional "trie node", except it can hold several characters at the
|
||||||
* same time. A CharGroup essentially represents one or several characters in the middle
|
* same time. A CharGroup essentially represents one or several characters in the middle
|
||||||
* of the trie trie; as such, it can be a terminal, and it can have children.
|
* of the trie tree; as such, it can be a terminal, and it can have children.
|
||||||
* In this in-memory representation, whether the CharGroup is a terminal or not is represented
|
* In this in-memory representation, whether the CharGroup is a terminal or not is represented
|
||||||
* in the frequency, where NOT_A_TERMINAL (= -1) means this is not a terminal and any other
|
* in the frequency, where NOT_A_TERMINAL (= -1) means this is not a terminal and any other
|
||||||
* value is the frequency of this terminal. A terminal may have non-null shortcuts and/or
|
* value is the frequency of this terminal. A terminal may have non-null shortcuts and/or
|
||||||
|
@ -110,7 +110,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
ArrayList<WeightedString> mShortcutTargets;
|
ArrayList<WeightedString> mShortcutTargets;
|
||||||
ArrayList<WeightedString> mBigrams;
|
ArrayList<WeightedString> mBigrams;
|
||||||
int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
|
int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
|
||||||
Node mChildren;
|
PtNodeArray mChildren;
|
||||||
boolean mIsNotAWord; // Only a shortcut
|
boolean mIsNotAWord; // Only a shortcut
|
||||||
boolean mIsBlacklistEntry;
|
boolean mIsBlacklistEntry;
|
||||||
// mCachedSize and mCachedAddressBefore/AfterUpdate are helpers for binary dictionary
|
// mCachedSize and mCachedAddressBefore/AfterUpdate are helpers for binary dictionary
|
||||||
|
@ -137,7 +137,8 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
|
|
||||||
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
|
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
|
||||||
final ArrayList<WeightedString> bigrams, final int frequency,
|
final ArrayList<WeightedString> bigrams, final int frequency,
|
||||||
final boolean isNotAWord, final boolean isBlacklistEntry, final Node children) {
|
final boolean isNotAWord, final boolean isBlacklistEntry,
|
||||||
|
final PtNodeArray children) {
|
||||||
mChars = chars;
|
mChars = chars;
|
||||||
mFrequency = frequency;
|
mFrequency = frequency;
|
||||||
mShortcutTargets = shortcutTargets;
|
mShortcutTargets = shortcutTargets;
|
||||||
|
@ -149,7 +150,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
|
|
||||||
public void addChild(CharGroup n) {
|
public void addChild(CharGroup n) {
|
||||||
if (null == mChildren) {
|
if (null == mChildren) {
|
||||||
mChildren = new Node();
|
mChildren = new PtNodeArray();
|
||||||
}
|
}
|
||||||
mChildren.mData.add(n);
|
mChildren.mData.add(n);
|
||||||
}
|
}
|
||||||
|
@ -344,10 +345,10 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public final DictionaryOptions mOptions;
|
public final DictionaryOptions mOptions;
|
||||||
public final Node mRoot;
|
public final PtNodeArray mRootNodeArray;
|
||||||
|
|
||||||
public FusionDictionary(final Node root, final DictionaryOptions options) {
|
public FusionDictionary(final PtNodeArray rootNodeArray, final DictionaryOptions options) {
|
||||||
mRoot = root;
|
mRootNodeArray = rootNodeArray;
|
||||||
mOptions = options;
|
mOptions = options;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -406,13 +407,13 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sanity check for a node.
|
* Sanity check for a node array.
|
||||||
*
|
*
|
||||||
* This method checks that all CharGroups in a node are ordered as expected.
|
* This method checks that all CharGroups in a node array are ordered as expected.
|
||||||
* If they are, nothing happens. If they aren't, an exception is thrown.
|
* If they are, nothing happens. If they aren't, an exception is thrown.
|
||||||
*/
|
*/
|
||||||
private void checkStack(Node node) {
|
private void checkStack(PtNodeArray nodeArray) {
|
||||||
ArrayList<CharGroup> stack = node.mData;
|
ArrayList<CharGroup> stack = nodeArray.mData;
|
||||||
int lastValue = -1;
|
int lastValue = -1;
|
||||||
for (int i = 0; i < stack.size(); ++i) {
|
for (int i = 0; i < stack.size(); ++i) {
|
||||||
int currentValue = stack.get(i).mChars[0];
|
int currentValue = stack.get(i).mChars[0];
|
||||||
|
@ -431,16 +432,16 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
* @param frequency the bigram frequency
|
* @param frequency the bigram frequency
|
||||||
*/
|
*/
|
||||||
public void setBigram(final String word1, final String word2, final int frequency) {
|
public void setBigram(final String word1, final String word2, final int frequency) {
|
||||||
CharGroup charGroup = findWordInTree(mRoot, word1);
|
CharGroup charGroup = findWordInTree(mRootNodeArray, word1);
|
||||||
if (charGroup != null) {
|
if (charGroup != null) {
|
||||||
final CharGroup charGroup2 = findWordInTree(mRoot, word2);
|
final CharGroup charGroup2 = findWordInTree(mRootNodeArray, word2);
|
||||||
if (charGroup2 == null) {
|
if (charGroup2 == null) {
|
||||||
add(getCodePoints(word2), 0, null, false /* isNotAWord */,
|
add(getCodePoints(word2), 0, null, false /* isNotAWord */,
|
||||||
false /* isBlacklistEntry */);
|
false /* isBlacklistEntry */);
|
||||||
// The chargroup for the first word may have moved by the above insertion,
|
// The chargroup for the first word may have moved by the above insertion,
|
||||||
// if word1 and word2 share a common stem that happens not to have been
|
// if word1 and word2 share a common stem that happens not to have been
|
||||||
// a cutting point until now. In this case, we need to refresh charGroup.
|
// a cutting point until now. In this case, we need to refresh charGroup.
|
||||||
charGroup = findWordInTree(mRoot, word1);
|
charGroup = findWordInTree(mRootNodeArray, word1);
|
||||||
}
|
}
|
||||||
charGroup.addBigram(word2, frequency);
|
charGroup.addBigram(word2, frequency);
|
||||||
} else {
|
} else {
|
||||||
|
@ -469,38 +470,38 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Node currentNode = mRoot;
|
PtNodeArray currentNodeArray = mRootNodeArray;
|
||||||
int charIndex = 0;
|
int charIndex = 0;
|
||||||
|
|
||||||
CharGroup currentGroup = null;
|
CharGroup currentGroup = null;
|
||||||
int differentCharIndex = 0; // Set by the loop to the index of the char that differs
|
int differentCharIndex = 0; // Set by the loop to the index of the char that differs
|
||||||
int nodeIndex = findIndexOfChar(mRoot, word[charIndex]);
|
int nodeIndex = findIndexOfChar(mRootNodeArray, word[charIndex]);
|
||||||
while (CHARACTER_NOT_FOUND_INDEX != nodeIndex) {
|
while (CHARACTER_NOT_FOUND_INDEX != nodeIndex) {
|
||||||
currentGroup = currentNode.mData.get(nodeIndex);
|
currentGroup = currentNodeArray.mData.get(nodeIndex);
|
||||||
differentCharIndex = compareArrays(currentGroup.mChars, word, charIndex);
|
differentCharIndex = compareCharArrays(currentGroup.mChars, word, charIndex);
|
||||||
if (ARRAYS_ARE_EQUAL != differentCharIndex
|
if (ARRAYS_ARE_EQUAL != differentCharIndex
|
||||||
&& differentCharIndex < currentGroup.mChars.length) break;
|
&& differentCharIndex < currentGroup.mChars.length) break;
|
||||||
if (null == currentGroup.mChildren) break;
|
if (null == currentGroup.mChildren) break;
|
||||||
charIndex += currentGroup.mChars.length;
|
charIndex += currentGroup.mChars.length;
|
||||||
if (charIndex >= word.length) break;
|
if (charIndex >= word.length) break;
|
||||||
currentNode = currentGroup.mChildren;
|
currentNodeArray = currentGroup.mChildren;
|
||||||
nodeIndex = findIndexOfChar(currentNode, word[charIndex]);
|
nodeIndex = findIndexOfChar(currentNodeArray, word[charIndex]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (CHARACTER_NOT_FOUND_INDEX == nodeIndex) {
|
if (CHARACTER_NOT_FOUND_INDEX == nodeIndex) {
|
||||||
// No node at this point to accept the word. Create one.
|
// No node at this point to accept the word. Create one.
|
||||||
final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
|
final int insertionIndex = findInsertionIndex(currentNodeArray, word[charIndex]);
|
||||||
final CharGroup newGroup = new CharGroup(
|
final CharGroup newGroup = new CharGroup(
|
||||||
Arrays.copyOfRange(word, charIndex, word.length),
|
Arrays.copyOfRange(word, charIndex, word.length),
|
||||||
shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry);
|
shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry);
|
||||||
currentNode.mData.add(insertionIndex, newGroup);
|
currentNodeArray.mData.add(insertionIndex, newGroup);
|
||||||
if (DBG) checkStack(currentNode);
|
if (DBG) checkStack(currentNodeArray);
|
||||||
} else {
|
} else {
|
||||||
// There is a word with a common prefix.
|
// There is a word with a common prefix.
|
||||||
if (differentCharIndex == currentGroup.mChars.length) {
|
if (differentCharIndex == currentGroup.mChars.length) {
|
||||||
if (charIndex + differentCharIndex >= word.length) {
|
if (charIndex + differentCharIndex >= word.length) {
|
||||||
// The new word is a prefix of an existing word, but the node on which it
|
// The new word is a prefix of an existing word, but the node on which it
|
||||||
// should end already exists as is. Since the old CharNode was not a terminal,
|
// should end already exists as is. Since the old CharGroup was not a terminal,
|
||||||
// make it one by filling in its frequency and other attributes
|
// make it one by filling in its frequency and other attributes
|
||||||
currentGroup.update(frequency, shortcutTargets, null, isNotAWord,
|
currentGroup.update(frequency, shortcutTargets, null, isNotAWord,
|
||||||
isBlacklistEntry);
|
isBlacklistEntry);
|
||||||
|
@ -511,7 +512,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
|
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
|
||||||
shortcutTargets, null /* bigrams */, frequency, isNotAWord,
|
shortcutTargets, null /* bigrams */, frequency, isNotAWord,
|
||||||
isBlacklistEntry);
|
isBlacklistEntry);
|
||||||
currentGroup.mChildren = new Node();
|
currentGroup.mChildren = new PtNodeArray();
|
||||||
currentGroup.mChildren.mData.add(newNode);
|
currentGroup.mChildren.mData.add(newNode);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -524,7 +525,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
} else {
|
} else {
|
||||||
// Partial prefix match only. We have to replace the current node with a node
|
// Partial prefix match only. We have to replace the current node with a node
|
||||||
// containing the current prefix and create two new ones for the tails.
|
// containing the current prefix and create two new ones for the tails.
|
||||||
Node newChildren = new Node();
|
PtNodeArray newChildren = new PtNodeArray();
|
||||||
final CharGroup newOldWord = new CharGroup(
|
final CharGroup newOldWord = new CharGroup(
|
||||||
Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
|
Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
|
||||||
currentGroup.mChars.length), currentGroup.mShortcutTargets,
|
currentGroup.mChars.length), currentGroup.mShortcutTargets,
|
||||||
|
@ -552,9 +553,9 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
> currentGroup.mChars[differentCharIndex] ? 1 : 0;
|
> currentGroup.mChars[differentCharIndex] ? 1 : 0;
|
||||||
newChildren.mData.add(addIndex, newWord);
|
newChildren.mData.add(addIndex, newWord);
|
||||||
}
|
}
|
||||||
currentNode.mData.set(nodeIndex, newParent);
|
currentNodeArray.mData.set(nodeIndex, newParent);
|
||||||
}
|
}
|
||||||
if (DBG) checkStack(currentNode);
|
if (DBG) checkStack(currentNodeArray);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -576,7 +577,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
* @param dstOffset the offset in the right-hand side string.
|
* @param dstOffset the offset in the right-hand side string.
|
||||||
* @return the index at which the strings differ, or ARRAYS_ARE_EQUAL = 0 if they don't.
|
* @return the index at which the strings differ, or ARRAYS_ARE_EQUAL = 0 if they don't.
|
||||||
*/
|
*/
|
||||||
private static int compareArrays(final int[] src, final int[] dst, int dstOffset) {
|
private static int compareCharArrays(final int[] src, final int[] dst, int dstOffset) {
|
||||||
// We do NOT test the first char, because we come from a method that already
|
// We do NOT test the first char, because we come from a method that already
|
||||||
// tested it.
|
// tested it.
|
||||||
for (int i = 1; i < src.length; ++i) {
|
for (int i = 1; i < src.length; ++i) {
|
||||||
|
@ -603,10 +604,10 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
final static private CharGroupComparator CHARGROUP_COMPARATOR = new CharGroupComparator();
|
final static private CharGroupComparator CHARGROUP_COMPARATOR = new CharGroupComparator();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finds the insertion index of a character within a node.
|
* Finds the insertion index of a character within a node array.
|
||||||
*/
|
*/
|
||||||
private static int findInsertionIndex(final Node node, int character) {
|
private static int findInsertionIndex(final PtNodeArray nodeArray, int character) {
|
||||||
final ArrayList<CharGroup> data = node.mData;
|
final ArrayList<CharGroup> data = nodeArray.mData;
|
||||||
final CharGroup reference = new CharGroup(new int[] { character },
|
final CharGroup reference = new CharGroup(new int[] { character },
|
||||||
null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */,
|
null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */,
|
||||||
false /* isBlacklistEntry */);
|
false /* isBlacklistEntry */);
|
||||||
|
@ -615,16 +616,16 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find the index of a char in a node, if it exists.
|
* Find the index of a char in a node array, if it exists.
|
||||||
*
|
*
|
||||||
* @param node the node to search in.
|
* @param nodeArray the node array to search in.
|
||||||
* @param character the character to search for.
|
* @param character the character to search for.
|
||||||
* @return the position of the character if it's there, or CHARACTER_NOT_FOUND_INDEX = -1 else.
|
* @return the position of the character if it's there, or CHARACTER_NOT_FOUND_INDEX = -1 else.
|
||||||
*/
|
*/
|
||||||
private static int findIndexOfChar(final Node node, int character) {
|
private static int findIndexOfChar(final PtNodeArray nodeArray, int character) {
|
||||||
final int insertionIndex = findInsertionIndex(node, character);
|
final int insertionIndex = findInsertionIndex(nodeArray, character);
|
||||||
if (node.mData.size() <= insertionIndex) return CHARACTER_NOT_FOUND_INDEX;
|
if (nodeArray.mData.size() <= insertionIndex) return CHARACTER_NOT_FOUND_INDEX;
|
||||||
return character == node.mData.get(insertionIndex).mChars[0] ? insertionIndex
|
return character == nodeArray.mData.get(insertionIndex).mChars[0] ? insertionIndex
|
||||||
: CHARACTER_NOT_FOUND_INDEX;
|
: CHARACTER_NOT_FOUND_INDEX;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -632,16 +633,16 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
* Helper method to find a word in a given branch.
|
* Helper method to find a word in a given branch.
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
public static CharGroup findWordInTree(Node node, final String string) {
|
public static CharGroup findWordInTree(PtNodeArray nodeArray, final String string) {
|
||||||
int index = 0;
|
int index = 0;
|
||||||
final StringBuilder checker = DBG ? new StringBuilder() : null;
|
final StringBuilder checker = DBG ? new StringBuilder() : null;
|
||||||
final int[] codePoints = getCodePoints(string);
|
final int[] codePoints = getCodePoints(string);
|
||||||
|
|
||||||
CharGroup currentGroup;
|
CharGroup currentGroup;
|
||||||
do {
|
do {
|
||||||
int indexOfGroup = findIndexOfChar(node, codePoints[index]);
|
int indexOfGroup = findIndexOfChar(nodeArray, codePoints[index]);
|
||||||
if (CHARACTER_NOT_FOUND_INDEX == indexOfGroup) return null;
|
if (CHARACTER_NOT_FOUND_INDEX == indexOfGroup) return null;
|
||||||
currentGroup = node.mData.get(indexOfGroup);
|
currentGroup = nodeArray.mData.get(indexOfGroup);
|
||||||
|
|
||||||
if (codePoints.length - index < currentGroup.mChars.length) return null;
|
if (codePoints.length - index < currentGroup.mChars.length) return null;
|
||||||
int newIndex = index;
|
int newIndex = index;
|
||||||
|
@ -653,9 +654,9 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
|
|
||||||
if (DBG) checker.append(new String(currentGroup.mChars, 0, currentGroup.mChars.length));
|
if (DBG) checker.append(new String(currentGroup.mChars, 0, currentGroup.mChars.length));
|
||||||
if (index < codePoints.length) {
|
if (index < codePoints.length) {
|
||||||
node = currentGroup.mChildren;
|
nodeArray = currentGroup.mChildren;
|
||||||
}
|
}
|
||||||
} while (null != node && index < codePoints.length);
|
} while (null != nodeArray && index < codePoints.length);
|
||||||
|
|
||||||
if (index < codePoints.length) return null;
|
if (index < codePoints.length) return null;
|
||||||
if (!currentGroup.isTerminal()) return null;
|
if (!currentGroup.isTerminal()) return null;
|
||||||
|
@ -670,20 +671,20 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
if (null == s || "".equals(s)) {
|
if (null == s || "".equals(s)) {
|
||||||
throw new RuntimeException("Can't search for a null or empty string");
|
throw new RuntimeException("Can't search for a null or empty string");
|
||||||
}
|
}
|
||||||
return null != findWordInTree(mRoot, s);
|
return null != findWordInTree(mRootNodeArray, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Recursively count the number of character groups in a given branch of the trie.
|
* Recursively count the number of character groups in a given branch of the trie.
|
||||||
*
|
*
|
||||||
* @param node the parent node.
|
* @param nodeArray the parent node.
|
||||||
* @return the number of char groups in all the branch under this node.
|
* @return the number of char groups in all the branch under this node.
|
||||||
*/
|
*/
|
||||||
public static int countCharGroups(final Node node) {
|
public static int countCharGroups(final PtNodeArray nodeArray) {
|
||||||
final int nodeSize = node.mData.size();
|
final int nodeSize = nodeArray.mData.size();
|
||||||
int size = nodeSize;
|
int size = nodeSize;
|
||||||
for (int i = nodeSize - 1; i >= 0; --i) {
|
for (int i = nodeSize - 1; i >= 0; --i) {
|
||||||
CharGroup group = node.mData.get(i);
|
CharGroup group = nodeArray.mData.get(i);
|
||||||
if (null != group.mChildren)
|
if (null != group.mChildren)
|
||||||
size += countCharGroups(group.mChildren);
|
size += countCharGroups(group.mChildren);
|
||||||
}
|
}
|
||||||
|
@ -693,15 +694,15 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
/**
|
/**
|
||||||
* Recursively count the number of nodes in a given branch of the trie.
|
* Recursively count the number of nodes in a given branch of the trie.
|
||||||
*
|
*
|
||||||
* @param node the node to count.
|
* @param nodeArray the node array to count.
|
||||||
* @return the number of nodes in this branch.
|
* @return the number of nodes in this branch.
|
||||||
*/
|
*/
|
||||||
public static int countNodes(final Node node) {
|
public static int countNodeArrays(final PtNodeArray nodeArray) {
|
||||||
int size = 1;
|
int size = 1;
|
||||||
for (int i = node.mData.size() - 1; i >= 0; --i) {
|
for (int i = nodeArray.mData.size() - 1; i >= 0; --i) {
|
||||||
CharGroup group = node.mData.get(i);
|
CharGroup group = nodeArray.mData.get(i);
|
||||||
if (null != group.mChildren)
|
if (null != group.mChildren)
|
||||||
size += countNodes(group.mChildren);
|
size += countNodeArrays(group.mChildren);
|
||||||
}
|
}
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
@ -709,10 +710,10 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
// Recursively find out whether there are any bigrams.
|
// Recursively find out whether there are any bigrams.
|
||||||
// This can be pretty expensive especially if there aren't any (we return as soon
|
// This can be pretty expensive especially if there aren't any (we return as soon
|
||||||
// as we find one, so it's much cheaper if there are bigrams)
|
// as we find one, so it's much cheaper if there are bigrams)
|
||||||
private static boolean hasBigramsInternal(final Node node) {
|
private static boolean hasBigramsInternal(final PtNodeArray nodeArray) {
|
||||||
if (null == node) return false;
|
if (null == nodeArray) return false;
|
||||||
for (int i = node.mData.size() - 1; i >= 0; --i) {
|
for (int i = nodeArray.mData.size() - 1; i >= 0; --i) {
|
||||||
CharGroup group = node.mData.get(i);
|
CharGroup group = nodeArray.mData.get(i);
|
||||||
if (null != group.mBigrams) return true;
|
if (null != group.mBigrams) return true;
|
||||||
if (hasBigramsInternal(group.mChildren)) return true;
|
if (hasBigramsInternal(group.mChildren)) return true;
|
||||||
}
|
}
|
||||||
|
@ -729,7 +730,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
// find a more efficient way of doing this, without compromising too much on memory
|
// find a more efficient way of doing this, without compromising too much on memory
|
||||||
// and ease of use.
|
// and ease of use.
|
||||||
public boolean hasBigrams() {
|
public boolean hasBigrams() {
|
||||||
return hasBigramsInternal(mRoot);
|
return hasBigramsInternal(mRootNodeArray);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Historically, the tails of the words were going to be merged to save space.
|
// Historically, the tails of the words were going to be merged to save space.
|
||||||
|
@ -750,13 +751,13 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
// MakedictLog.i("Merging nodes. Number of nodes : " + countNodes(root));
|
// MakedictLog.i("Merging nodes. Number of nodes : " + countNodes(root));
|
||||||
// MakedictLog.i("Number of groups : " + countCharGroups(root));
|
// MakedictLog.i("Number of groups : " + countCharGroups(root));
|
||||||
//
|
//
|
||||||
// final HashMap<String, ArrayList<Node>> repository =
|
// final HashMap<String, ArrayList<PtNodeArray>> repository =
|
||||||
// new HashMap<String, ArrayList<Node>>();
|
// new HashMap<String, ArrayList<PtNodeArray>>();
|
||||||
// mergeTailsInner(repository, root);
|
// mergeTailsInner(repository, root);
|
||||||
//
|
//
|
||||||
// MakedictLog.i("Number of different pseudohashes : " + repository.size());
|
// MakedictLog.i("Number of different pseudohashes : " + repository.size());
|
||||||
// int size = 0;
|
// int size = 0;
|
||||||
// for (ArrayList<Node> a : repository.values()) {
|
// for (ArrayList<PtNodeArray> a : repository.values()) {
|
||||||
// size += a.size();
|
// size += a.size();
|
||||||
// }
|
// }
|
||||||
// MakedictLog.i("Number of nodes after merge : " + (1 + size));
|
// MakedictLog.i("Number of nodes after merge : " + (1 + size));
|
||||||
|
@ -764,7 +765,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// The following methods are used by the deactivated mergeTails()
|
// The following methods are used by the deactivated mergeTails()
|
||||||
// private static boolean isEqual(Node a, Node b) {
|
// private static boolean isEqual(PtNodeArray a, PtNodeArray b) {
|
||||||
// if (null == a && null == b) return true;
|
// if (null == a && null == b) return true;
|
||||||
// if (null == a || null == b) return false;
|
// if (null == a || null == b) return false;
|
||||||
// if (a.data.size() != b.data.size()) return false;
|
// if (a.data.size() != b.data.size()) return false;
|
||||||
|
@ -781,21 +782,21 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
// return true;
|
// return true;
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// static private HashMap<String, ArrayList<Node>> mergeTailsInner(
|
// static private HashMap<String, ArrayList<PtNodeArray>> mergeTailsInner(
|
||||||
// final HashMap<String, ArrayList<Node>> map, final Node node) {
|
// final HashMap<String, ArrayList<PtNodeArray>> map, final PtNodeArray nodeArray) {
|
||||||
// final ArrayList<CharGroup> branches = node.data;
|
// final ArrayList<CharGroup> branches = nodeArray.data;
|
||||||
// final int nodeSize = branches.size();
|
// final int nodeSize = branches.size();
|
||||||
// for (int i = 0; i < nodeSize; ++i) {
|
// for (int i = 0; i < nodeSize; ++i) {
|
||||||
// CharGroup group = branches.get(i);
|
// CharGroup group = branches.get(i);
|
||||||
// if (null != group.children) {
|
// if (null != group.children) {
|
||||||
// String pseudoHash = getPseudoHash(group.children);
|
// String pseudoHash = getPseudoHash(group.children);
|
||||||
// ArrayList<Node> similarList = map.get(pseudoHash);
|
// ArrayList<PtNodeArray> similarList = map.get(pseudoHash);
|
||||||
// if (null == similarList) {
|
// if (null == similarList) {
|
||||||
// similarList = new ArrayList<Node>();
|
// similarList = new ArrayList<PtNodeArray>();
|
||||||
// map.put(pseudoHash, similarList);
|
// map.put(pseudoHash, similarList);
|
||||||
// }
|
// }
|
||||||
// boolean merged = false;
|
// boolean merged = false;
|
||||||
// for (Node similar : similarList) {
|
// for (PtNodeArray similar : similarList) {
|
||||||
// if (isEqual(group.children, similar)) {
|
// if (isEqual(group.children, similar)) {
|
||||||
// group.children = similar;
|
// group.children = similar;
|
||||||
// merged = true;
|
// merged = true;
|
||||||
|
@ -811,9 +812,9 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
// return map;
|
// return map;
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// private static String getPseudoHash(final Node node) {
|
// private static String getPseudoHash(final PtNodeArray nodeArray) {
|
||||||
// StringBuilder s = new StringBuilder();
|
// StringBuilder s = new StringBuilder();
|
||||||
// for (CharGroup g : node.data) {
|
// for (CharGroup g : nodeArray.data) {
|
||||||
// s.append(g.frequency);
|
// s.append(g.frequency);
|
||||||
// for (int ch : g.chars) {
|
// for (int ch : g.chars) {
|
||||||
// s.append(Character.toChars(ch));
|
// s.append(Character.toChars(ch));
|
||||||
|
@ -901,6 +902,6 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public Iterator<Word> iterator() {
|
public Iterator<Word> iterator() {
|
||||||
return new DictionaryIterator(mRoot.mData);
|
return new DictionaryIterator(mRootNodeArray.mData);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,7 +25,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictReader;
|
import com.android.inputmethod.latin.makedict.BinaryDictReader;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
import com.android.inputmethod.latin.makedict.PendingAttribute;
|
import com.android.inputmethod.latin.makedict.PendingAttribute;
|
||||||
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
||||||
import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList;
|
import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList;
|
||||||
|
@ -78,7 +78,7 @@ public final class UserHistoryDictIOUtils {
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
static FusionDictionary constructFusionDictionary(
|
static FusionDictionary constructFusionDictionary(
|
||||||
final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) {
|
final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) {
|
||||||
final FusionDictionary fusionDict = new FusionDictionary(new Node(),
|
final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(),
|
||||||
new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
|
new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
|
||||||
false));
|
false));
|
||||||
int profTotal = 0;
|
int profTotal = 0;
|
||||||
|
@ -102,7 +102,7 @@ public final class UserHistoryDictIOUtils {
|
||||||
if (word1 == null) { // unigram
|
if (word1 == null) { // unigram
|
||||||
fusionDict.add(word2, freq, null, false /* isNotAWord */);
|
fusionDict.add(word2, freq, null, false /* isNotAWord */);
|
||||||
} else { // bigram
|
} else { // bigram
|
||||||
if (FusionDictionary.findWordInTree(fusionDict.mRoot, word1) == null) {
|
if (FusionDictionary.findWordInTree(fusionDict.mRootNodeArray, word1) == null) {
|
||||||
fusionDict.add(word1, 2, null, false /* isNotAWord */);
|
fusionDict.add(word1, 2, null, false /* isNotAWord */);
|
||||||
}
|
}
|
||||||
fusionDict.setBigram(word1, word2, freq);
|
fusionDict.setBigram(word1, word2, freq);
|
||||||
|
|
|
@ -20,7 +20,7 @@ import android.test.AndroidTestCase;
|
||||||
import android.test.suitebuilder.annotation.SmallTest;
|
import android.test.suitebuilder.annotation.SmallTest;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
@ -30,21 +30,21 @@ import java.util.HashMap;
|
||||||
@SmallTest
|
@SmallTest
|
||||||
public class FusionDictionaryTests extends AndroidTestCase {
|
public class FusionDictionaryTests extends AndroidTestCase {
|
||||||
public void testFindWordInTree() {
|
public void testFindWordInTree() {
|
||||||
FusionDictionary dict = new FusionDictionary(new Node(),
|
FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
|
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
|
||||||
|
|
||||||
dict.add("abc", 10, null, false /* isNotAWord */);
|
dict.add("abc", 10, null, false /* isNotAWord */);
|
||||||
assertNull(FusionDictionary.findWordInTree(dict.mRoot, "aaa"));
|
assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa"));
|
||||||
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "abc"));
|
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "abc"));
|
||||||
|
|
||||||
dict.add("aa", 10, null, false /* isNotAWord */);
|
dict.add("aa", 10, null, false /* isNotAWord */);
|
||||||
assertNull(FusionDictionary.findWordInTree(dict.mRoot, "aaa"));
|
assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa"));
|
||||||
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "aa"));
|
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aa"));
|
||||||
|
|
||||||
dict.add("babcd", 10, null, false /* isNotAWord */);
|
dict.add("babcd", 10, null, false /* isNotAWord */);
|
||||||
dict.add("bacde", 10, null, false /* isNotAWord */);
|
dict.add("bacde", 10, null, false /* isNotAWord */);
|
||||||
assertNull(FusionDictionary.findWordInTree(dict.mRoot, "ba"));
|
assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "ba"));
|
||||||
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "babcd"));
|
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "babcd"));
|
||||||
assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "bacde"));
|
assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "bacde"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,7 +25,7 @@ import android.util.SparseArray;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||||
|
|
||||||
|
@ -226,7 +226,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
|
|
||||||
// check unigram
|
// check unigram
|
||||||
for (final String word : words) {
|
for (final String word : words) {
|
||||||
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, word);
|
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
|
||||||
assertNotNull(cg);
|
assertNotNull(cg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -234,7 +234,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
for (int i = 0; i < bigrams.size(); ++i) {
|
for (int i = 0; i < bigrams.size(); ++i) {
|
||||||
final int w1 = bigrams.keyAt(i);
|
final int w1 = bigrams.keyAt(i);
|
||||||
for (final int w2 : bigrams.valueAt(i)) {
|
for (final int w2 : bigrams.valueAt(i)) {
|
||||||
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, words.get(w1));
|
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray,
|
||||||
|
words.get(w1));
|
||||||
assertNotNull(words.get(w1) + "," + words.get(w2), cg.getBigram(words.get(w2)));
|
assertNotNull(words.get(w1) + "," + words.get(w2), cg.getBigram(words.get(w2)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -242,7 +243,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
// check shortcut
|
// check shortcut
|
||||||
if (shortcutMap != null) {
|
if (shortcutMap != null) {
|
||||||
for (final Map.Entry<String, List<String>> entry : shortcutMap.entrySet()) {
|
for (final Map.Entry<String, List<String>> entry : shortcutMap.entrySet()) {
|
||||||
final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, entry.getKey());
|
final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray,
|
||||||
|
entry.getKey());
|
||||||
for (final String word : entry.getValue()) {
|
for (final String word : entry.getValue()) {
|
||||||
assertNotNull("shortcut not found: " + entry.getKey() + ", " + word,
|
assertNotNull("shortcut not found: " + entry.getKey() + ", " + word,
|
||||||
group.getShortcut(word));
|
group.getShortcut(word));
|
||||||
|
@ -297,7 +299,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
assertNotNull(file);
|
assertNotNull(file);
|
||||||
|
|
||||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
|
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
|
||||||
addUnigrams(words.size(), dict, words, shortcuts);
|
addUnigrams(words.size(), dict, words, shortcuts);
|
||||||
addBigrams(dict, words, bigrams);
|
addBigrams(dict, words, bigrams);
|
||||||
|
@ -440,7 +442,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
assertNotNull(file);
|
assertNotNull(file);
|
||||||
|
|
||||||
// making the dictionary from lists of words.
|
// making the dictionary from lists of words.
|
||||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
new FusionDictionary.DictionaryOptions(
|
new FusionDictionary.DictionaryOptions(
|
||||||
new HashMap<String, String>(), false, false));
|
new HashMap<String, String>(), false, false));
|
||||||
addUnigrams(words.size(), dict, words, null /* shortcutMap */);
|
addUnigrams(words.size(), dict, words, null /* shortcutMap */);
|
||||||
|
@ -538,7 +540,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
assertNotNull(file);
|
assertNotNull(file);
|
||||||
|
|
||||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
new FusionDictionary.DictionaryOptions(
|
new FusionDictionary.DictionaryOptions(
|
||||||
new HashMap<String, String>(), false, false));
|
new HashMap<String, String>(), false, false));
|
||||||
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
|
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
|
||||||
|
@ -599,7 +601,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
assertNotNull(file);
|
assertNotNull(file);
|
||||||
|
|
||||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
new FusionDictionary.DictionaryOptions(
|
new FusionDictionary.DictionaryOptions(
|
||||||
new HashMap<String, String>(), false, false));
|
new HashMap<String, String>(), false, false));
|
||||||
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
|
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
|
||||||
|
|
|
@ -24,7 +24,7 @@ import android.util.Log;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.ByteBufferWrapper;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.ByteBufferWrapper;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||||
|
|
||||||
|
@ -277,7 +277,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
// set an initial dictionary.
|
// set an initial dictionary.
|
||||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
|
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
|
||||||
dict.add("abcd", 10, null, false);
|
dict.add("abcd", 10, null, false);
|
||||||
|
|
||||||
|
@ -328,7 +328,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
// set an initial dictionary.
|
// set an initial dictionary.
|
||||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
|
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
|
||||||
dict.add("abcd", 10, null, false);
|
dict.add("abcd", 10, null, false);
|
||||||
dict.add("efgh", 15, null, false);
|
dict.add("efgh", 15, null, false);
|
||||||
|
@ -365,7 +365,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
||||||
assertNotNull(file);
|
assertNotNull(file);
|
||||||
|
|
||||||
// set an initial dictionary.
|
// set an initial dictionary.
|
||||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
|
new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
|
||||||
false));
|
false));
|
||||||
dict.add("initial", 10, null, false);
|
dict.add("initial", 10, null, false);
|
||||||
|
|
|
@ -86,7 +86,7 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
|
||||||
|
|
||||||
private void checkWordInFusionDict(final FusionDictionary dict, final String word,
|
private void checkWordInFusionDict(final FusionDictionary dict, final String word,
|
||||||
final ArrayList<String> expectedBigrams) {
|
final ArrayList<String> expectedBigrams) {
|
||||||
final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, word);
|
final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
|
||||||
assertNotNull(group);
|
assertNotNull(group);
|
||||||
assertTrue(group.isTerminal());
|
assertTrue(group.isTerminal());
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,7 @@ package com.android.inputmethod.latin.dicttool;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
import com.android.inputmethod.latin.makedict.Word;
|
import com.android.inputmethod.latin.makedict.Word;
|
||||||
|
|
||||||
|
@ -117,7 +117,7 @@ public class CombinedInputOutput {
|
||||||
final boolean processLigatures =
|
final boolean processLigatures =
|
||||||
FRENCH_LIGATURE_PROCESSING_OPTION.equals(attributes.get(OPTIONS_TAG));
|
FRENCH_LIGATURE_PROCESSING_OPTION.equals(attributes.get(OPTIONS_TAG));
|
||||||
attributes.remove(OPTIONS_TAG);
|
attributes.remove(OPTIONS_TAG);
|
||||||
final FusionDictionary dict = new FusionDictionary(new Node(), new DictionaryOptions(
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new DictionaryOptions(
|
||||||
attributes, processUmlauts, processLigatures));
|
attributes, processUmlauts, processLigatures));
|
||||||
|
|
||||||
String line;
|
String line;
|
||||||
|
|
|
@ -121,7 +121,8 @@ public class Diff extends Dicttool.Command {
|
||||||
private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) {
|
private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) {
|
||||||
boolean hasDifferences = false;
|
boolean hasDifferences = false;
|
||||||
for (final Word word0 : dict0) {
|
for (final Word word0 : dict0) {
|
||||||
final CharGroup word1 = FusionDictionary.findWordInTree(dict1.mRoot, word0.mWord);
|
final CharGroup word1 = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
|
||||||
|
word0.mWord);
|
||||||
if (null == word1) {
|
if (null == word1) {
|
||||||
// This word is not in dict1
|
// This word is not in dict1
|
||||||
System.out.println("Deleted: " + word0.mWord + " " + word0.mFrequency);
|
System.out.println("Deleted: " + word0.mWord + " " + word0.mFrequency);
|
||||||
|
@ -150,7 +151,8 @@ public class Diff extends Dicttool.Command {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (final Word word1 : dict1) {
|
for (final Word word1 : dict1) {
|
||||||
final CharGroup word0 = FusionDictionary.findWordInTree(dict0.mRoot, word1.mWord);
|
final CharGroup word0 = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
|
||||||
|
word1.mWord);
|
||||||
if (null == word0) {
|
if (null == word0) {
|
||||||
// This word is not in dict0
|
// This word is not in dict0
|
||||||
System.out.println("Added: " + word1.mWord + " " + word1.mFrequency);
|
System.out.println("Added: " + word1.mWord + " " + word1.mFrequency);
|
||||||
|
|
|
@ -65,7 +65,7 @@ public class Info extends Dicttool.Command {
|
||||||
|
|
||||||
private static void showWordInfo(final FusionDictionary dict, final String word,
|
private static void showWordInfo(final FusionDictionary dict, final String word,
|
||||||
final boolean plumbing) {
|
final boolean plumbing) {
|
||||||
final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, word);
|
final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
|
||||||
if (null == group) {
|
if (null == group) {
|
||||||
System.out.println(word + " is not in the dictionary");
|
System.out.println(word + " is not in the dictionary");
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -18,7 +18,7 @@ package com.android.inputmethod.latin.dicttool;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
import com.android.inputmethod.latin.makedict.Word;
|
import com.android.inputmethod.latin.makedict.Word;
|
||||||
|
|
||||||
|
@ -124,8 +124,8 @@ public class XmlDictInputOutput {
|
||||||
GERMAN_UMLAUT_PROCESSING_OPTION.equals(optionsString);
|
GERMAN_UMLAUT_PROCESSING_OPTION.equals(optionsString);
|
||||||
final boolean processLigatures =
|
final boolean processLigatures =
|
||||||
FRENCH_LIGATURE_PROCESSING_OPTION.equals(optionsString);
|
FRENCH_LIGATURE_PROCESSING_OPTION.equals(optionsString);
|
||||||
mDictionary = new FusionDictionary(new Node(), new DictionaryOptions(attributes,
|
mDictionary = new FusionDictionary(new PtNodeArray(),
|
||||||
processUmlauts, processLigatures));
|
new DictionaryOptions(attributes, processUmlauts, processLigatures));
|
||||||
} else {
|
} else {
|
||||||
mState = UNKNOWN;
|
mState = UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,7 +22,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictReader;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
@ -42,7 +42,7 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
|
||||||
|
|
||||||
public void testGetRawDictWorks() throws IOException, UnsupportedFormatException {
|
public void testGetRawDictWorks() throws IOException, UnsupportedFormatException {
|
||||||
// Create a thrice-compressed dictionary file.
|
// Create a thrice-compressed dictionary file.
|
||||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
new DictionaryOptions(new HashMap<String, String>(),
|
new DictionaryOptions(new HashMap<String, String>(),
|
||||||
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
|
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
|
||||||
dict.add("foo", TEST_FREQ, null, false /* isNotAWord */);
|
dict.add("foo", TEST_FREQ, null, false /* isNotAWord */);
|
||||||
|
@ -72,7 +72,8 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
|
||||||
final FusionDictionary resultDict = BinaryDictDecoder.readDictionaryBinary(reader,
|
final FusionDictionary resultDict = BinaryDictDecoder.readDictionaryBinary(reader,
|
||||||
null /* dict : an optional dictionary to add words to, or null */);
|
null /* dict : an optional dictionary to add words to, or null */);
|
||||||
assertEquals("Dictionary can't be read back correctly",
|
assertEquals("Dictionary can't be read back correctly",
|
||||||
FusionDictionary.findWordInTree(resultDict.mRoot, "foo").getFrequency(), TEST_FREQ);
|
FusionDictionary.findWordInTree(resultDict.mRootNodeArray, "foo").getFrequency(),
|
||||||
|
TEST_FREQ);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testGetRawDictFails() throws IOException {
|
public void testGetRawDictFails() throws IOException {
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
package com.android.inputmethod.latin.makedict;
|
package com.android.inputmethod.latin.makedict;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ public class BinaryDictEncoderFlattenTreeTests extends TestCase {
|
||||||
// Test the flattened array contains the expected number of nodes, and
|
// Test the flattened array contains the expected number of nodes, and
|
||||||
// that it does not contain any duplicates.
|
// that it does not contain any duplicates.
|
||||||
public void testFlattenNodes() {
|
public void testFlattenNodes() {
|
||||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
new DictionaryOptions(new HashMap<String, String>(),
|
new DictionaryOptions(new HashMap<String, String>(),
|
||||||
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
|
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
|
||||||
dict.add("foo", 1, null, false /* isNotAWord */);
|
dict.add("foo", 1, null, false /* isNotAWord */);
|
||||||
|
@ -39,10 +39,10 @@ public class BinaryDictEncoderFlattenTreeTests extends TestCase {
|
||||||
dict.add("ftb", 1, null, false /* isNotAWord */);
|
dict.add("ftb", 1, null, false /* isNotAWord */);
|
||||||
dict.add("bar", 1, null, false /* isNotAWord */);
|
dict.add("bar", 1, null, false /* isNotAWord */);
|
||||||
dict.add("fool", 1, null, false /* isNotAWord */);
|
dict.add("fool", 1, null, false /* isNotAWord */);
|
||||||
final ArrayList<Node> result = BinaryDictEncoder.flattenTree(dict.mRoot);
|
final ArrayList<PtNodeArray> result = BinaryDictEncoder.flattenTree(dict.mRootNodeArray);
|
||||||
assertEquals(4, result.size());
|
assertEquals(4, result.size());
|
||||||
while (!result.isEmpty()) {
|
while (!result.isEmpty()) {
|
||||||
final Node n = result.remove(0);
|
final PtNodeArray n = result.remove(0);
|
||||||
assertFalse("Flattened array contained the same node twice", result.contains(n));
|
assertFalse("Flattened array contained the same node twice", result.contains(n));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@ package com.android.inputmethod.latin.makedict;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
import com.android.inputmethod.latin.makedict.Word;
|
import com.android.inputmethod.latin.makedict.Word;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
@ -72,7 +72,7 @@ public class FusionDictionaryTest extends TestCase {
|
||||||
assertNotNull(dict);
|
assertNotNull(dict);
|
||||||
for (final String word : words) {
|
for (final String word : words) {
|
||||||
if (--limit < 0) return;
|
if (--limit < 0) return;
|
||||||
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, word);
|
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
|
||||||
assertNotNull(cg);
|
assertNotNull(cg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -95,7 +95,7 @@ public class FusionDictionaryTest extends TestCase {
|
||||||
// Test the flattened array contains the expected number of nodes, and
|
// Test the flattened array contains the expected number of nodes, and
|
||||||
// that it does not contain any duplicates.
|
// that it does not contain any duplicates.
|
||||||
public void testFusion() {
|
public void testFusion() {
|
||||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
new DictionaryOptions(new HashMap<String, String>(),
|
new DictionaryOptions(new HashMap<String, String>(),
|
||||||
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
|
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
|
||||||
final long time = System.currentTimeMillis();
|
final long time = System.currentTimeMillis();
|
||||||
|
|
Loading…
Reference in New Issue