am 800225e0: Merge "Rename CharGroup to PtNode."

* commit '800225e0b19c8a368fe74f6b73b40a01340a0c0f':
  Rename CharGroup to PtNode.
main
Ken Wakasa 2013-08-26 00:12:37 -07:00 committed by Android Git Automerger
commit 69f9cfe212
15 changed files with 710 additions and 697 deletions

View File

@ -19,7 +19,7 @@ package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
@ -41,6 +41,7 @@ import java.util.TreeMap;
* *
* TODO: Remove calls from classes except Ver3DictDecoder * TODO: Remove calls from classes except Ver3DictDecoder
* TODO: Move this file to makedict/internal. * TODO: Move this file to makedict/internal.
* TODO: Rename this class to DictDecoderUtils.
*/ */
public final class BinaryDictDecoderUtils { public final class BinaryDictDecoderUtils {
@ -213,7 +214,7 @@ public final class BinaryDictDecoderUtils {
buffer[index++] = (byte)(0xFF & codePoint); buffer[index++] = (byte)(0xFF & codePoint);
} }
} }
buffer[index++] = FormatSpec.GROUP_CHARACTERS_TERMINATOR; buffer[index++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR;
return index - origin; return index - origin;
} }
@ -237,7 +238,7 @@ public final class BinaryDictDecoderUtils {
buffer.write((byte) (0xFF & codePoint)); buffer.write((byte) (0xFF & codePoint));
} }
} }
buffer.write(FormatSpec.GROUP_CHARACTERS_TERMINATOR); buffer.write(FormatSpec.PTNODE_CHARACTERS_TERMINATOR);
} }
/** /**
@ -264,7 +265,7 @@ public final class BinaryDictDecoderUtils {
static int readChar(final DictBuffer dictBuffer) { static int readChar(final DictBuffer dictBuffer) {
int character = dictBuffer.readUnsignedByte(); int character = dictBuffer.readUnsignedByte();
if (!fitsOnOneByte(character)) { if (!fitsOnOneByte(character)) {
if (FormatSpec.GROUP_CHARACTERS_TERMINATOR == character) { if (FormatSpec.PTNODE_CHARACTERS_TERMINATOR == character) {
return FormatSpec.INVALID_CHARACTER; return FormatSpec.INVALID_CHARACTER;
} }
character <<= 16; character <<= 16;
@ -295,14 +296,14 @@ public final class BinaryDictDecoderUtils {
} }
} }
int address; int address;
switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) { switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE: case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
return dictBuffer.readUnsignedByte(); return dictBuffer.readUnsignedByte();
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES: case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
return dictBuffer.readUnsignedShort(); return dictBuffer.readUnsignedShort();
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES: case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
return dictBuffer.readUnsignedInt24(); return dictBuffer.readUnsignedInt24();
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS: case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
default: default:
return FormatSpec.NO_CHILDREN_ADDRESS; return FormatSpec.NO_CHILDREN_ADDRESS;
} }
@ -320,14 +321,14 @@ public final class BinaryDictDecoderUtils {
} }
/** /**
* Reads and returns the char group count out of a buffer and forwards the pointer. * Reads and returns the PtNode count out of a buffer and forwards the pointer.
*/ */
public static int readCharGroupCount(final DictBuffer dictBuffer) { public static int readPtNodeCount(final DictBuffer dictBuffer) {
final int msb = dictBuffer.readUnsignedByte(); final int msb = dictBuffer.readUnsignedByte();
if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= msb) { if (FormatSpec.MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT >= msb) {
return msb; return msb;
} else { } else {
return ((FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT & msb) << 8) return ((FormatSpec.MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT & msb) << 8)
+ dictBuffer.readUnsignedByte(); + dictBuffer.readUnsignedByte();
} }
} }
@ -369,18 +370,18 @@ public final class BinaryDictDecoderUtils {
final StringBuilder builder = new StringBuilder(); final StringBuilder builder = new StringBuilder();
// the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH // the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH
for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) { for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) {
CharGroupInfo currentInfo; PtNodeInfo currentInfo;
int loopCounter = 0; int loopCounter = 0;
do { do {
dictBuffer.position(currentPos); dictBuffer.position(currentPos);
currentInfo = dictDecoder.readPtNode(currentPos, options); currentInfo = dictDecoder.readPtNode(currentPos, options);
if (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)) { if (BinaryDictIOUtils.isMovedPtNode(currentInfo.mFlags, options)) {
currentPos = currentInfo.mParentAddress + currentInfo.mOriginalAddress; currentPos = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
} }
if (DBG && loopCounter++ > MAX_JUMPS) { if (DBG && loopCounter++ > MAX_JUMPS) {
MakedictLog.d("Too many jumps - probably a bug"); MakedictLog.d("Too many jumps - probably a bug");
} }
} while (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)); } while (BinaryDictIOUtils.isMovedPtNode(currentInfo.mFlags, options));
if (Integer.MIN_VALUE == frequency) frequency = currentInfo.mFrequency; if (Integer.MIN_VALUE == frequency) frequency = currentInfo.mFrequency;
builder.insert(0, builder.insert(0,
new String(currentInfo.mCharacters, 0, currentInfo.mCharacters.length)); new String(currentInfo.mCharacters, 0, currentInfo.mCharacters.length));
@ -395,14 +396,14 @@ public final class BinaryDictDecoderUtils {
final FormatOptions options) { final FormatOptions options) {
final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
dictBuffer.position(headerSize); dictBuffer.position(headerSize);
final int count = readCharGroupCount(dictBuffer); final int count = readPtNodeCount(dictBuffer);
int groupPos = headerSize + BinaryDictIOUtils.getGroupCountSize(count); int groupPos = headerSize + BinaryDictIOUtils.getPtNodeCountSize(count);
final StringBuilder builder = new StringBuilder(); final StringBuilder builder = new StringBuilder();
WeightedString result = null; WeightedString result = null;
CharGroupInfo last = null; PtNodeInfo last = null;
for (int i = count - 1; i >= 0; --i) { for (int i = count - 1; i >= 0; --i) {
CharGroupInfo info = dictDecoder.readPtNode(groupPos, options); PtNodeInfo info = dictDecoder.readPtNode(groupPos, options);
groupPos = info.mEndAddress; groupPos = info.mEndAddress;
if (info.mOriginalAddress == pos) { if (info.mOriginalAddress == pos) {
builder.append(new String(info.mCharacters, 0, info.mCharacters.length)); builder.append(new String(info.mCharacters, 0, info.mCharacters.length));
@ -414,8 +415,8 @@ public final class BinaryDictDecoderUtils {
if (null == last) continue; if (null == last) continue;
builder.append(new String(last.mCharacters, 0, last.mCharacters.length)); builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
dictBuffer.position(last.mChildrenAddress); dictBuffer.position(last.mChildrenAddress);
i = readCharGroupCount(dictBuffer); i = readPtNodeCount(dictBuffer);
groupPos = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i); groupPos = last.mChildrenAddress + BinaryDictIOUtils.getPtNodeCountSize(i);
last = null; last = null;
continue; continue;
} }
@ -424,8 +425,8 @@ public final class BinaryDictDecoderUtils {
if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) { if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) {
builder.append(new String(last.mCharacters, 0, last.mCharacters.length)); builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
dictBuffer.position(last.mChildrenAddress); dictBuffer.position(last.mChildrenAddress);
i = readCharGroupCount(dictBuffer); i = readPtNodeCount(dictBuffer);
groupPos = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i); groupPos = last.mChildrenAddress + BinaryDictIOUtils.getPtNodeCountSize(i);
last = null; last = null;
continue; continue;
} }
@ -444,25 +445,25 @@ public final class BinaryDictDecoderUtils {
* @param dictDecoder the dict decoder, correctly positioned at the start of a node array. * @param dictDecoder the dict decoder, correctly positioned at the start of a node array.
* @param headerSize the size, in bytes, of the file header. * @param headerSize the size, in bytes, of the file header.
* @param reverseNodeArrayMap a mapping from addresses to already read node arrays. * @param reverseNodeArrayMap a mapping from addresses to already read node arrays.
* @param reverseGroupMap a mapping from addresses to already read character groups. * @param reversePtNodeMap a mapping from addresses to already read PtNodes.
* @param options file format options. * @param options file format options.
* @return the read node array with all his children already read. * @return the read node array with all his children already read.
*/ */
private static PtNodeArray readNodeArray(final Ver3DictDecoder dictDecoder, private static PtNodeArray readNodeArray(final Ver3DictDecoder dictDecoder,
final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap, final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap,
final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options) final Map<Integer, PtNode> reversePtNodeMap, final FormatOptions options)
throws IOException { throws IOException {
final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>(); final ArrayList<PtNode> nodeArrayContents = new ArrayList<PtNode>();
final int nodeArrayOriginPos = dictBuffer.position(); final int nodeArrayOriginPos = dictBuffer.position();
do { // Scan the linked-list node. do { // Scan the linked-list node.
final int nodeArrayHeadPos = dictBuffer.position(); final int nodeArrayHeadPos = dictBuffer.position();
final int count = readCharGroupCount(dictBuffer); final int count = readPtNodeCount(dictBuffer);
int groupOffsetPos = nodeArrayHeadPos + BinaryDictIOUtils.getGroupCountSize(count); int groupOffsetPos = nodeArrayHeadPos + BinaryDictIOUtils.getPtNodeCountSize(count);
for (int i = count; i > 0; --i) { // Scan the array of CharGroup. for (int i = count; i > 0; --i) { // Scan the array of PtNode.
CharGroupInfo info = dictDecoder.readPtNode(groupOffsetPos, options); PtNodeInfo info = dictDecoder.readPtNode(groupOffsetPos, options);
if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue; if (BinaryDictIOUtils.isMovedPtNode(info.mFlags, options)) continue;
ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets; ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
ArrayList<WeightedString> bigrams = null; ArrayList<WeightedString> bigrams = null;
if (null != info.mBigrams) { if (null != info.mBigrams) {
@ -482,17 +483,17 @@ public final class BinaryDictDecoderUtils {
final int currentPosition = dictBuffer.position(); final int currentPosition = dictBuffer.position();
dictBuffer.position(info.mChildrenAddress); dictBuffer.position(info.mChildrenAddress);
children = readNodeArray(dictDecoder, headerSize, reverseNodeArrayMap, children = readNodeArray(dictDecoder, headerSize, reverseNodeArrayMap,
reverseGroupMap, options); reversePtNodeMap, options);
dictBuffer.position(currentPosition); dictBuffer.position(currentPosition);
} }
nodeArrayContents.add( nodeArrayContents.add(
new CharGroup(info.mCharacters, shortcutTargets, bigrams, new PtNode(info.mCharacters, shortcutTargets, bigrams,
info.mFrequency, info.mFrequency,
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children)); 0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children));
} else { } else {
nodeArrayContents.add( nodeArrayContents.add(
new CharGroup(info.mCharacters, shortcutTargets, bigrams, new PtNode(info.mCharacters, shortcutTargets, bigrams,
info.mFrequency, info.mFrequency,
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED))); 0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED)));
@ -566,9 +567,9 @@ public final class BinaryDictDecoderUtils {
final FileHeader fileHeader = dictDecoder.readHeader(); final FileHeader fileHeader = dictDecoder.readHeader();
Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>(); Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>(); Map<Integer, PtNode> reversePtNodeMapping = new TreeMap<Integer, PtNode>();
final PtNodeArray root = readNodeArray(dictDecoder, fileHeader.mHeaderSize, final PtNodeArray root = readNodeArray(dictDecoder, fileHeader.mHeaderSize,
reverseNodeArrayMapping, reverseGroupMapping, fileHeader.mFormatOptions); reverseNodeArrayMapping, reversePtNodeMapping, fileHeader.mFormatOptions);
FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions); FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);
if (null != dict) { if (null != dict) {

View File

@ -18,7 +18,7 @@ package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
@ -33,6 +33,8 @@ import java.util.Iterator;
* Encodes binary files for a FusionDictionary. * Encodes binary files for a FusionDictionary.
* *
* All the methods in this class are static. * All the methods in this class are static.
*
* TODO: Rename this class to DictEncoderUtils.
*/ */
public class BinaryDictEncoderUtils { public class BinaryDictEncoderUtils {
@ -58,46 +60,46 @@ public class BinaryDictEncoderUtils {
* @param characters the character array * @param characters the character array
* @return the size of the char array, including the terminator if any * @return the size of the char array, including the terminator if any
*/ */
static int getGroupCharactersSize(final int[] characters) { static int getPtNodeCharactersSize(final int[] characters) {
int size = CharEncoding.getCharArraySize(characters); int size = CharEncoding.getCharArraySize(characters);
if (characters.length > 1) size += FormatSpec.GROUP_TERMINATOR_SIZE; if (characters.length > 1) size += FormatSpec.PTNODE_TERMINATOR_SIZE;
return size; return size;
} }
/** /**
* Compute the binary size of the character array in a group * Compute the binary size of the character array in a PtNode
* *
* If only one character, this is the size of this character. If many, it's the sum of their * If only one character, this is the size of this character. If many, it's the sum of their
* sizes + 1 byte for the terminator. * sizes + 1 byte for the terminator.
* *
* @param group the group * @param ptNode the PtNode
* @return the size of the char array, including the terminator if any * @return the size of the char array, including the terminator if any
*/ */
private static int getGroupCharactersSize(final CharGroup group) { private static int getPtNodeCharactersSize(final PtNode ptNode) {
return getGroupCharactersSize(group.mChars); return getPtNodeCharactersSize(ptNode.mChars);
} }
/** /**
* Compute the binary size of the group count for a node array. * Compute the binary size of the PtNode count for a node array.
* @param nodeArray the nodeArray * @param nodeArray the nodeArray
* @return the size of the group count, either 1 or 2 bytes. * @return the size of the PtNode count, either 1 or 2 bytes.
*/ */
private static int getGroupCountSize(final PtNodeArray nodeArray) { private static int getPtNodeCountSize(final PtNodeArray nodeArray) {
return BinaryDictIOUtils.getGroupCountSize(nodeArray.mData.size()); return BinaryDictIOUtils.getPtNodeCountSize(nodeArray.mData.size());
} }
/** /**
* Compute the size of a shortcut in bytes. * Compute the size of a shortcut in bytes.
*/ */
private static int getShortcutSize(final WeightedString shortcut) { private static int getShortcutSize(final WeightedString shortcut) {
int size = FormatSpec.GROUP_ATTRIBUTE_FLAGS_SIZE; int size = FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE;
final String word = shortcut.mWord; final String word = shortcut.mWord;
final int length = word.length(); final int length = word.length();
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
final int codePoint = word.codePointAt(i); final int codePoint = word.codePointAt(i);
size += CharEncoding.getCharSize(codePoint); size += CharEncoding.getCharSize(codePoint);
} }
size += FormatSpec.GROUP_TERMINATOR_SIZE; size += FormatSpec.PTNODE_TERMINATOR_SIZE;
return size; return size;
} }
@ -109,7 +111,7 @@ public class BinaryDictEncoderUtils {
*/ */
static int getShortcutListSize(final ArrayList<WeightedString> shortcutList) { static int getShortcutListSize(final ArrayList<WeightedString> shortcutList) {
if (null == shortcutList || shortcutList.isEmpty()) return 0; if (null == shortcutList || shortcutList.isEmpty()) return 0;
int size = FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE; int size = FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE;
for (final WeightedString shortcut : shortcutList) { for (final WeightedString shortcut : shortcutList) {
size += getShortcutSize(shortcut); size += getShortcutSize(shortcut);
} }
@ -117,60 +119,60 @@ public class BinaryDictEncoderUtils {
} }
/** /**
* Compute the maximum size of a CharGroup, assuming 3-byte addresses for everything. * Compute the maximum size of a PtNode, assuming 3-byte addresses for everything.
* *
* @param group the CharGroup to compute the size of. * @param ptNode the PtNode to compute the size of.
* @param options file format options. * @param options file format options.
* @return the maximum size of the group. * @return the maximum size of the PtNode.
*/ */
private static int getCharGroupMaximumSize(final CharGroup group, final FormatOptions options) { private static int getPtNodeMaximumSize(final PtNode ptNode, final FormatOptions options) {
int size = getGroupHeaderSize(group, options); int size = getNodeHeaderSize(ptNode, options);
// If terminal, one byte for the frequency // If terminal, one byte for the frequency
if (group.isTerminal()) size += FormatSpec.GROUP_FREQUENCY_SIZE; if (ptNode.isTerminal()) size += FormatSpec.PTNODE_FREQUENCY_SIZE;
size += FormatSpec.GROUP_MAX_ADDRESS_SIZE; // For children address size += FormatSpec.PTNODE_MAX_ADDRESS_SIZE; // For children address
size += getShortcutListSize(group.mShortcutTargets); size += getShortcutListSize(ptNode.mShortcutTargets);
if (null != group.mBigrams) { if (null != ptNode.mBigrams) {
size += (FormatSpec.GROUP_ATTRIBUTE_FLAGS_SIZE size += (FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE
+ FormatSpec.GROUP_ATTRIBUTE_MAX_ADDRESS_SIZE) + FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE)
* group.mBigrams.size(); * ptNode.mBigrams.size();
} }
return size; return size;
} }
/** /**
* Compute the maximum size of each node of a node array, assuming 3-byte addresses for * Compute the maximum size of each PtNode of a PtNode array, assuming 3-byte addresses for
* everything, and caches it in the `mCachedSize' member of the nodes; deduce the size of * everything, and caches it in the `mCachedSize' member of the nodes; deduce the size of
* the containing node array, and cache it it its 'mCachedSize' member. * the containing node array, and cache it it its 'mCachedSize' member.
* *
* @param nodeArray the node array to compute the maximum size of. * @param ptNodeArray the node array to compute the maximum size of.
* @param options file format options. * @param options file format options.
*/ */
private static void calculateNodeArrayMaximumSize(final PtNodeArray nodeArray, private static void calculatePtNodeArrayMaximumSize(final PtNodeArray ptNodeArray,
final FormatOptions options) { final FormatOptions options) {
int size = getGroupCountSize(nodeArray); int size = getPtNodeCountSize(ptNodeArray);
for (CharGroup g : nodeArray.mData) { for (PtNode node : ptNodeArray.mData) {
final int groupSize = getCharGroupMaximumSize(g, options); final int nodeSize = getPtNodeMaximumSize(node, options);
g.mCachedSize = groupSize; node.mCachedSize = nodeSize;
size += groupSize; size += nodeSize;
} }
if (options.mSupportsDynamicUpdate) { if (options.mSupportsDynamicUpdate) {
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE; size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
} }
nodeArray.mCachedSize = size; ptNodeArray.mCachedSize = size;
} }
/** /**
* Compute the size of the header (flag + [parent address] + characters size) of a CharGroup. * Compute the size of the header (flag + [parent address] + characters size) of a PtNode.
* *
* @param group the group of which to compute the size of the header * @param ptNode the PtNode of which to compute the size of the header
* @param options file format options. * @param options file format options.
*/ */
private static int getGroupHeaderSize(final CharGroup group, final FormatOptions options) { private static int getNodeHeaderSize(final PtNode ptNode, final FormatOptions options) {
if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { if (BinaryDictIOUtils.supportsDynamicUpdate(options)) {
return FormatSpec.GROUP_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE return FormatSpec.PTNODE_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE
+ getGroupCharactersSize(group); + getPtNodeCharactersSize(ptNode);
} else { } else {
return FormatSpec.GROUP_FLAGS_SIZE + getGroupCharactersSize(group); return FormatSpec.PTNODE_FLAGS_SIZE + getPtNodeCharactersSize(ptNode);
} }
} }
@ -203,14 +205,14 @@ public class BinaryDictEncoderUtils {
// cache performance and dictionary size. // cache performance and dictionary size.
/* package for tests */ static ArrayList<PtNodeArray> flattenTree( /* package for tests */ static ArrayList<PtNodeArray> flattenTree(
final PtNodeArray rootNodeArray) { final PtNodeArray rootNodeArray) {
final int treeSize = FusionDictionary.countCharGroups(rootNodeArray); final int treeSize = FusionDictionary.countPtNodes(rootNodeArray);
MakedictLog.i("Counted nodes : " + treeSize); MakedictLog.i("Counted nodes : " + treeSize);
final ArrayList<PtNodeArray> flatTree = new ArrayList<PtNodeArray>(treeSize); final ArrayList<PtNodeArray> flatTree = new ArrayList<PtNodeArray>(treeSize);
return flattenTreeInner(flatTree, rootNodeArray); return flattenTreeInner(flatTree, rootNodeArray);
} }
private static ArrayList<PtNodeArray> flattenTreeInner(final ArrayList<PtNodeArray> list, private static ArrayList<PtNodeArray> flattenTreeInner(final ArrayList<PtNodeArray> list,
final PtNodeArray nodeArray) { final PtNodeArray ptNodeArray) {
// Removing the node is necessary if the tails are merged, because we would then // Removing the node is necessary if the tails are merged, because we would then
// add the same node several times when we only want it once. A number of places in // add the same node several times when we only want it once. A number of places in
// the code also depends on any node being only once in the list. // the code also depends on any node being only once in the list.
@ -228,11 +230,11 @@ public class BinaryDictEncoderUtils {
// this simple list.remove operation O(n*n) overall. On Android this overhead is very // this simple list.remove operation O(n*n) overall. On Android this overhead is very
// high. // high.
// For future reference, the code to remove duplicate is a simple : list.remove(node); // For future reference, the code to remove duplicate is a simple : list.remove(node);
list.add(nodeArray); list.add(ptNodeArray);
final ArrayList<CharGroup> branches = nodeArray.mData; final ArrayList<PtNode> branches = ptNodeArray.mData;
final int nodeSize = branches.size(); final int nodeSize = branches.size();
for (CharGroup group : branches) { for (PtNode ptNode : branches) {
if (null != group.mChildren) flattenTreeInner(list, group.mChildren); if (null != ptNode.mChildren) flattenTreeInner(list, ptNode.mChildren);
} }
return list; return list;
} }
@ -248,7 +250,7 @@ public class BinaryDictEncoderUtils {
* from the new position in the current node array to the new position in the target node * from the new position in the current node array to the new position in the target node
* array. * array.
* *
* @param currentNodeArray node array containing the CharGroup where the offset will be written * @param currentNodeArray node array containing the PtNode where the offset will be written
* @param offsetFromStartOfCurrentNodeArray offset, in bytes, from the start of currentNodeArray * @param offsetFromStartOfCurrentNodeArray offset, in bytes, from the start of currentNodeArray
* @param targetNodeArray the target node array to get the offset to * @param targetNodeArray the target node array to get the offset to
* @return the offset to the target node array * @return the offset to the target node array
@ -269,20 +271,20 @@ public class BinaryDictEncoderUtils {
} }
/** /**
* Get the offset from a position inside a current node array to a target CharGroup, during * Get the offset from a position inside a current node array to a target PtNode, during
* update. * update.
* *
* @param currentNodeArray node array containing the CharGroup where the offset will be written * @param currentNodeArray node array containing the PtNode where the offset will be written
* @param offsetFromStartOfCurrentNodeArray offset, in bytes, from the start of currentNodeArray * @param offsetFromStartOfCurrentNodeArray offset, in bytes, from the start of currentNodeArray
* @param targetCharGroup the target CharGroup to get the offset to * @param targetPtNode the target PtNode to get the offset to
* @return the offset to the target CharGroup * @return the offset to the target PtNode
*/ */
// TODO: is there any way to factorize this method with the one above? // TODO: is there any way to factorize this method with the one above?
private static int getOffsetToTargetCharGroupDuringUpdate(final PtNodeArray currentNodeArray, private static int getOffsetToTargetPtNodeDuringUpdate(final PtNodeArray currentNodeArray,
final int offsetFromStartOfCurrentNodeArray, final CharGroup targetCharGroup) { final int offsetFromStartOfCurrentNodeArray, final PtNode targetPtNode) {
final int oldOffsetBasePoint = currentNodeArray.mCachedAddressBeforeUpdate final int oldOffsetBasePoint = currentNodeArray.mCachedAddressBeforeUpdate
+ offsetFromStartOfCurrentNodeArray; + offsetFromStartOfCurrentNodeArray;
final boolean isTargetBeforeCurrent = (targetCharGroup.mCachedAddressBeforeUpdate final boolean isTargetBeforeCurrent = (targetPtNode.mCachedAddressBeforeUpdate
< oldOffsetBasePoint); < oldOffsetBasePoint);
// If the target is before the current node array, then its address has already been // If the target is before the current node array, then its address has already been
// updated. We can use the AfterUpdate member, and compare it to our own member after // updated. We can use the AfterUpdate member, and compare it to our own member after
@ -292,9 +294,9 @@ public class BinaryDictEncoderUtils {
if (isTargetBeforeCurrent) { if (isTargetBeforeCurrent) {
final int newOffsetBasePoint = currentNodeArray.mCachedAddressAfterUpdate final int newOffsetBasePoint = currentNodeArray.mCachedAddressAfterUpdate
+ offsetFromStartOfCurrentNodeArray; + offsetFromStartOfCurrentNodeArray;
return targetCharGroup.mCachedAddressAfterUpdate - newOffsetBasePoint; return targetPtNode.mCachedAddressAfterUpdate - newOffsetBasePoint;
} else { } else {
return targetCharGroup.mCachedAddressBeforeUpdate - oldOffsetBasePoint; return targetPtNode.mCachedAddressBeforeUpdate - oldOffsetBasePoint;
} }
} }
@ -308,49 +310,49 @@ public class BinaryDictEncoderUtils {
* contents (as in, any of the addresses stored in the cache fields) have changed with * contents (as in, any of the addresses stored in the cache fields) have changed with
* respect to their previous value. * respect to their previous value.
* *
* @param nodeArray the node array to compute the size of. * @param ptNodeArray the node array to compute the size of.
* @param dict the dictionary in which the word/attributes are to be found. * @param dict the dictionary in which the word/attributes are to be found.
* @param formatOptions file format options. * @param formatOptions file format options.
* @return false if none of the cached addresses inside the node array changed, true otherwise. * @return false if none of the cached addresses inside the node array changed, true otherwise.
*/ */
private static boolean computeActualNodeArraySize(final PtNodeArray nodeArray, private static boolean computeActualPtNodeArraySize(final PtNodeArray ptNodeArray,
final FusionDictionary dict, final FormatOptions formatOptions) { final FusionDictionary dict, final FormatOptions formatOptions) {
boolean changed = false; boolean changed = false;
int size = getGroupCountSize(nodeArray); int size = getPtNodeCountSize(ptNodeArray);
for (CharGroup group : nodeArray.mData) { for (PtNode ptNode : ptNodeArray.mData) {
group.mCachedAddressAfterUpdate = nodeArray.mCachedAddressAfterUpdate + size; ptNode.mCachedAddressAfterUpdate = ptNodeArray.mCachedAddressAfterUpdate + size;
if (group.mCachedAddressAfterUpdate != group.mCachedAddressBeforeUpdate) { if (ptNode.mCachedAddressAfterUpdate != ptNode.mCachedAddressBeforeUpdate) {
changed = true; changed = true;
} }
int groupSize = getGroupHeaderSize(group, formatOptions); int nodeSize = getNodeHeaderSize(ptNode, formatOptions);
if (group.isTerminal()) groupSize += FormatSpec.GROUP_FREQUENCY_SIZE; if (ptNode.isTerminal()) nodeSize += FormatSpec.PTNODE_FREQUENCY_SIZE;
if (null == group.mChildren && formatOptions.mSupportsDynamicUpdate) { if (null == ptNode.mChildren && formatOptions.mSupportsDynamicUpdate) {
groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; nodeSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
} else if (null != group.mChildren) { } else if (null != ptNode.mChildren) {
if (formatOptions.mSupportsDynamicUpdate) { if (formatOptions.mSupportsDynamicUpdate) {
groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; nodeSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
} else { } else {
groupSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(nodeArray, nodeSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(ptNodeArray,
groupSize + size, group.mChildren)); nodeSize + size, ptNode.mChildren));
} }
} }
groupSize += getShortcutListSize(group.mShortcutTargets); nodeSize += getShortcutListSize(ptNode.mShortcutTargets);
if (null != group.mBigrams) { if (null != ptNode.mBigrams) {
for (WeightedString bigram : group.mBigrams) { for (WeightedString bigram : ptNode.mBigrams) {
final int offset = getOffsetToTargetCharGroupDuringUpdate(nodeArray, final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray,
groupSize + size + FormatSpec.GROUP_FLAGS_SIZE, nodeSize + size + FormatSpec.PTNODE_FLAGS_SIZE,
FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord)); FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord));
groupSize += getByteSize(offset) + FormatSpec.GROUP_FLAGS_SIZE; nodeSize += getByteSize(offset) + FormatSpec.PTNODE_FLAGS_SIZE;
} }
} }
group.mCachedSize = groupSize; ptNode.mCachedSize = nodeSize;
size += groupSize; size += nodeSize;
} }
if (formatOptions.mSupportsDynamicUpdate) { if (formatOptions.mSupportsDynamicUpdate) {
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE; size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
} }
if (nodeArray.mCachedSize != size) { if (ptNodeArray.mCachedSize != size) {
nodeArray.mCachedSize = size; ptNodeArray.mCachedSize = size;
changed = true; changed = true;
} }
return changed; return changed;
@ -363,19 +365,19 @@ public class BinaryDictEncoderUtils {
* @param formatOptions file format options. * @param formatOptions file format options.
* @return the byte size of the entire stack. * @return the byte size of the entire stack.
*/ */
private static int initializeNodeArraysCachedAddresses(final ArrayList<PtNodeArray> flatNodes, private static int initializePtNodeArraysCachedAddresses(final ArrayList<PtNodeArray> flatNodes,
final FormatOptions formatOptions) { final FormatOptions formatOptions) {
int nodeArrayOffset = 0; int nodeArrayOffset = 0;
for (final PtNodeArray nodeArray : flatNodes) { for (final PtNodeArray nodeArray : flatNodes) {
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOffset; nodeArray.mCachedAddressBeforeUpdate = nodeArrayOffset;
int groupCountSize = getGroupCountSize(nodeArray); int nodeCountSize = getPtNodeCountSize(nodeArray);
int groupOffset = 0; int nodeffset = 0;
for (final CharGroup g : nodeArray.mData) { for (final PtNode ptNode : nodeArray.mData) {
g.mCachedAddressBeforeUpdate = g.mCachedAddressAfterUpdate = ptNode.mCachedAddressBeforeUpdate = ptNode.mCachedAddressAfterUpdate =
groupCountSize + nodeArrayOffset + groupOffset; nodeCountSize + nodeArrayOffset + nodeffset;
groupOffset += g.mCachedSize; nodeffset += ptNode.mCachedSize;
} }
final int nodeSize = groupCountSize + groupOffset final int nodeSize = nodeCountSize + nodeffset
+ (formatOptions.mSupportsDynamicUpdate + (formatOptions.mSupportsDynamicUpdate
? FormatSpec.FORWARD_LINK_ADDRESS_SIZE : 0); ? FormatSpec.FORWARD_LINK_ADDRESS_SIZE : 0);
nodeArrayOffset += nodeArray.mCachedSize; nodeArrayOffset += nodeArray.mCachedSize;
@ -388,11 +390,11 @@ public class BinaryDictEncoderUtils {
* *
* @param flatNodes the list of node arrays. * @param flatNodes the list of node arrays.
*/ */
private static void updateNodeArraysCachedAddresses(final ArrayList<PtNodeArray> flatNodes) { private static void updatePtNodeArraysCachedAddresses(final ArrayList<PtNodeArray> flatNodes) {
for (final PtNodeArray nodeArray : flatNodes) { for (final PtNodeArray nodeArray : flatNodes) {
nodeArray.mCachedAddressBeforeUpdate = nodeArray.mCachedAddressAfterUpdate; nodeArray.mCachedAddressBeforeUpdate = nodeArray.mCachedAddressAfterUpdate;
for (final CharGroup g : nodeArray.mData) { for (final PtNode ptNode : nodeArray.mData) {
g.mCachedAddressBeforeUpdate = g.mCachedAddressAfterUpdate; ptNode.mCachedAddressBeforeUpdate = ptNode.mCachedAddressAfterUpdate;
} }
} }
} }
@ -407,38 +409,38 @@ public class BinaryDictEncoderUtils {
*/ */
private static void computeParentAddresses(final ArrayList<PtNodeArray> flatNodes) { private static void computeParentAddresses(final ArrayList<PtNodeArray> flatNodes) {
for (final PtNodeArray nodeArray : flatNodes) { for (final PtNodeArray nodeArray : flatNodes) {
for (final CharGroup group : nodeArray.mData) { for (final PtNode ptNode : nodeArray.mData) {
if (null != group.mChildren) { if (null != ptNode.mChildren) {
// Assign my address to children's parent address // Assign my address to children's parent address
// Here BeforeUpdate and AfterUpdate addresses have the same value, so it // Here BeforeUpdate and AfterUpdate addresses have the same value, so it
// does not matter which we use. // does not matter which we use.
group.mChildren.mCachedParentAddress = group.mCachedAddressAfterUpdate ptNode.mChildren.mCachedParentAddress = ptNode.mCachedAddressAfterUpdate
- group.mChildren.mCachedAddressAfterUpdate; - ptNode.mChildren.mCachedAddressAfterUpdate;
} }
} }
} }
} }
/** /**
* Compute the addresses and sizes of an ordered list of node arrays. * Compute the addresses and sizes of an ordered list of PtNode arrays.
* *
* This method takes a list of node arrays and will update their cached address and size * This method takes a list of PtNode arrays and will update their cached address and size
* values so that they can be written into a file. It determines the smallest size each of the * values so that they can be written into a file. It determines the smallest size each of the
* nodes arrays can be given the addresses of its children and attributes, and store that into * PtNode arrays can be given the addresses of its children and attributes, and store that into
* each node. * each PtNode.
* The order of the node is given by the order of the array. This method makes no effort * The order of the PtNode is given by the order of the array. This method makes no effort
* to find a good order; it only mechanically computes the size this order results in. * to find a good order; it only mechanically computes the size this order results in.
* *
* @param dict the dictionary * @param dict the dictionary
* @param flatNodes the ordered list of nodes arrays * @param flatNodes the ordered list of PtNode arrays
* @param formatOptions file format options. * @param formatOptions file format options.
* @return the same array it was passed. The nodes have been updated for address and size. * @return the same array it was passed. The nodes have been updated for address and size.
*/ */
private static ArrayList<PtNodeArray> computeAddresses(final FusionDictionary dict, private static ArrayList<PtNodeArray> computeAddresses(final FusionDictionary dict,
final ArrayList<PtNodeArray> flatNodes, final FormatOptions formatOptions) { final ArrayList<PtNodeArray> flatNodes, final FormatOptions formatOptions) {
// First get the worst possible sizes and offsets // First get the worst possible sizes and offsets
for (final PtNodeArray n : flatNodes) calculateNodeArrayMaximumSize(n, formatOptions); for (final PtNodeArray n : flatNodes) calculatePtNodeArrayMaximumSize(n, formatOptions);
final int offset = initializeNodeArraysCachedAddresses(flatNodes, formatOptions); final int offset = initializePtNodeArraysCachedAddresses(flatNodes, formatOptions);
MakedictLog.i("Compressing the array addresses. Original size : " + offset); MakedictLog.i("Compressing the array addresses. Original size : " + offset);
MakedictLog.i("(Recursively seen size : " + offset + ")"); MakedictLog.i("(Recursively seen size : " + offset + ")");
@ -447,19 +449,20 @@ public class BinaryDictEncoderUtils {
boolean changesDone = false; boolean changesDone = false;
do { do {
changesDone = false; changesDone = false;
int nodeArrayStartOffset = 0; int ptNodeArrayStartOffset = 0;
for (final PtNodeArray nodeArray : flatNodes) { for (final PtNodeArray ptNodeArray : flatNodes) {
nodeArray.mCachedAddressAfterUpdate = nodeArrayStartOffset; ptNodeArray.mCachedAddressAfterUpdate = ptNodeArrayStartOffset;
final int oldNodeArraySize = nodeArray.mCachedSize; final int oldNodeArraySize = ptNodeArray.mCachedSize;
final boolean changed = computeActualNodeArraySize(nodeArray, dict, formatOptions); final boolean changed =
final int newNodeArraySize = nodeArray.mCachedSize; computeActualPtNodeArraySize(ptNodeArray, dict, formatOptions);
final int newNodeArraySize = ptNodeArray.mCachedSize;
if (oldNodeArraySize < newNodeArraySize) { if (oldNodeArraySize < newNodeArraySize) {
throw new RuntimeException("Increased size ?!"); throw new RuntimeException("Increased size ?!");
} }
nodeArrayStartOffset += newNodeArraySize; ptNodeArrayStartOffset += newNodeArraySize;
changesDone |= changed; changesDone |= changed;
} }
updateNodeArraysCachedAddresses(flatNodes); updatePtNodeArraysCachedAddresses(flatNodes);
++passes; ++passes;
if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug"); if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug");
} while (changesDone); } while (changesDone);
@ -467,10 +470,10 @@ public class BinaryDictEncoderUtils {
if (formatOptions.mSupportsDynamicUpdate) { if (formatOptions.mSupportsDynamicUpdate) {
computeParentAddresses(flatNodes); computeParentAddresses(flatNodes);
} }
final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1); final PtNodeArray lastPtNodeArray = flatNodes.get(flatNodes.size() - 1);
MakedictLog.i("Compression complete in " + passes + " passes."); MakedictLog.i("Compression complete in " + passes + " passes.");
MakedictLog.i("After address compression : " MakedictLog.i("After address compression : "
+ (lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize)); + (lastPtNodeArray.mCachedAddressAfterUpdate + lastPtNodeArray.mCachedSize));
return flatNodes; return flatNodes;
} }
@ -478,25 +481,26 @@ public class BinaryDictEncoderUtils {
/** /**
* Sanity-checking method. * Sanity-checking method.
* *
* This method checks a list of node arrays for juxtaposition, that is, it will do * This method checks a list of PtNode arrays for juxtaposition, that is, it will do
* nothing if each node array's cached address is actually the previous node array's address * nothing if each node array's cached address is actually the previous node array's address
* plus the previous node's size. * plus the previous node's size.
* If this is not the case, it will throw an exception. * If this is not the case, it will throw an exception.
* *
* @param arrays the list of node arrays to check * @param arrays the list of node arrays to check
*/ */
private static void checkFlatNodeArrayList(final ArrayList<PtNodeArray> arrays) { private static void checkFlatPtNodeArrayList(final ArrayList<PtNodeArray> arrays) {
int offset = 0; int offset = 0;
int index = 0; int index = 0;
for (final PtNodeArray nodeArray : arrays) { for (final PtNodeArray ptNodeArray : arrays) {
// BeforeUpdate and AfterUpdate addresses are the same here, so it does not matter // BeforeUpdate and AfterUpdate addresses are the same here, so it does not matter
// which we use. // which we use.
if (nodeArray.mCachedAddressAfterUpdate != offset) { if (ptNodeArray.mCachedAddressAfterUpdate != offset) {
throw new RuntimeException("Wrong address for node " + index throw new RuntimeException("Wrong address for node " + index
+ " : expected " + offset + ", got " + nodeArray.mCachedAddressAfterUpdate); + " : expected " + offset + ", got " +
ptNodeArray.mCachedAddressAfterUpdate);
} }
++index; ++index;
offset += nodeArray.mCachedSize; offset += ptNodeArray.mCachedSize;
} }
} }
@ -552,19 +556,19 @@ public class BinaryDictEncoderUtils {
} }
/** /**
* Makes the flag value for a char group. * Makes the flag value for a PtNode.
* *
* @param hasMultipleChars whether the group has multiple chars. * @param hasMultipleChars whether the PtNode has multiple chars.
* @param isTerminal whether the group is terminal. * @param isTerminal whether the PtNode is terminal.
* @param childrenAddressSize the size of a children address. * @param childrenAddressSize the size of a children address.
* @param hasShortcuts whether the group has shortcuts. * @param hasShortcuts whether the PtNode has shortcuts.
* @param hasBigrams whether the group has bigrams. * @param hasBigrams whether the PtNode has bigrams.
* @param isNotAWord whether the group is not a word. * @param isNotAWord whether the PtNode is not a word.
* @param isBlackListEntry whether the group is a blacklist entry. * @param isBlackListEntry whether the PtNode is a blacklist entry.
* @param formatOptions file format options. * @param formatOptions file format options.
* @return the flags * @return the flags
*/ */
static int makeCharGroupFlags(final boolean hasMultipleChars, final boolean isTerminal, static int makePtNodeFlags(final boolean hasMultipleChars, final boolean isTerminal,
final int childrenAddressSize, final boolean hasShortcuts, final boolean hasBigrams, final int childrenAddressSize, final boolean hasShortcuts, final boolean hasBigrams,
final boolean isNotAWord, final boolean isBlackListEntry, final boolean isNotAWord, final boolean isBlackListEntry,
final FormatOptions formatOptions) { final FormatOptions formatOptions) {
@ -576,16 +580,16 @@ public class BinaryDictEncoderUtils {
} else if (true) { } else if (true) {
switch (childrenAddressSize) { switch (childrenAddressSize) {
case 1: case 1:
flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE; flags |= FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE;
break; break;
case 2: case 2:
flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES; flags |= FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES;
break; break;
case 3: case 3:
flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES; flags |= FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES;
break; break;
case 0: case 0:
flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS; flags |= FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS;
break; break;
default: default:
throw new RuntimeException("Node with a strange address"); throw new RuntimeException("Node with a strange address");
@ -598,12 +602,12 @@ public class BinaryDictEncoderUtils {
return flags; return flags;
} }
private static byte makeCharGroupFlags(final CharGroup group, final int groupAddress, private static byte makePtNodeFlags(final PtNode node, final int ptNodeAddress,
final int childrenOffset, final FormatOptions formatOptions) { final int childrenOffset, final FormatOptions formatOptions) {
return (byte) makeCharGroupFlags(group.mChars.length > 1, group.mFrequency >= 0, return (byte) makePtNodeFlags(node.mChars.length > 1, node.mFrequency >= 0,
getByteSize(childrenOffset), getByteSize(childrenOffset),
group.mShortcutTargets != null && !group.mShortcutTargets.isEmpty(), node.mShortcutTargets != null && !node.mShortcutTargets.isEmpty(),
group.mBigrams != null, group.mIsNotAWord, group.mIsBlacklistEntry, formatOptions); node.mBigrams != null, node.mIsNotAWord, node.mIsBlacklistEntry, formatOptions);
} }
/** /**
@ -618,17 +622,17 @@ public class BinaryDictEncoderUtils {
*/ */
private static final int makeBigramFlags(final boolean more, final int offset, private static final int makeBigramFlags(final boolean more, final int offset,
int bigramFrequency, final int unigramFrequency, final String word) { int bigramFrequency, final int unigramFrequency, final String word) {
int bigramFlags = (more ? FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT : 0) int bigramFlags = (more ? FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT : 0)
+ (offset < 0 ? FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE : 0); + (offset < 0 ? FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE : 0);
switch (getByteSize(offset)) { switch (getByteSize(offset)) {
case 1: case 1:
bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE; bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE;
break; break;
case 2: case 2:
bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES; bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES;
break; break;
case 3: case 3:
bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES; bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES;
break; break;
default: default:
throw new RuntimeException("Strange offset size"); throw new RuntimeException("Strange offset size");
@ -673,7 +677,7 @@ public class BinaryDictEncoderUtils {
// small over-estimation that we get in this case. TODO: actually remove this bigram // small over-estimation that we get in this case. TODO: actually remove this bigram
// if discretizedFrequency < 0. // if discretizedFrequency < 0.
final int finalBigramFrequency = discretizedFrequency > 0 ? discretizedFrequency : 0; final int finalBigramFrequency = discretizedFrequency > 0 ? discretizedFrequency : 0;
bigramFlags += finalBigramFrequency & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY; bigramFlags += finalBigramFrequency & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY;
return bigramFlags; return bigramFlags;
} }
@ -698,8 +702,8 @@ public class BinaryDictEncoderUtils {
* @return the flags * @return the flags
*/ */
static final int makeShortcutFlags(final boolean more, final int frequency) { static final int makeShortcutFlags(final boolean more, final int frequency) {
return (more ? FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT : 0) return (more ? FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT : 0)
+ (frequency & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY); + (frequency & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY);
} }
private static final int writeParentAddress(final byte[] buffer, final int index, private static final int writeParentAddress(final byte[] buffer, final int index,
@ -722,68 +726,69 @@ public class BinaryDictEncoderUtils {
} }
/** /**
* Write a node array to memory. The node array is expected to have its final position cached. * Write a PtNodeArray to memory. The PtNodeArray is expected to have its final position cached.
* *
* @param dict the dictionary the node array is a part of (for relative offsets). * @param dict the dictionary the node array is a part of (for relative offsets).
* @param buffer the memory buffer to write to. * @param buffer the memory buffer to write to.
* @param nodeArray the node array to write. * @param ptNodeArray the node array to write.
* @param formatOptions file format options. * @param formatOptions file format options.
* @return the address of the END of the node. * @return the address of the END of the node.
*/ */
@SuppressWarnings("unused") @SuppressWarnings("unused")
private static int writePlacedNode(final FusionDictionary dict, byte[] buffer, private static int writePlacedNode(final FusionDictionary dict, byte[] buffer,
final PtNodeArray nodeArray, final FormatOptions formatOptions) { final PtNodeArray ptNodeArray, final FormatOptions formatOptions) {
// TODO: Make the code in common with BinaryDictIOUtils#writeCharGroup // TODO: Make the code in common with BinaryDictIOUtils#writePtNode
int index = nodeArray.mCachedAddressAfterUpdate; int index = ptNodeArray.mCachedAddressAfterUpdate;
final int groupCount = nodeArray.mData.size(); final int ptNodeCount = ptNodeArray.mData.size();
final int countSize = getGroupCountSize(nodeArray); final int countSize = getPtNodeCountSize(ptNodeArray);
final int parentAddress = nodeArray.mCachedParentAddress; final int parentAddress = ptNodeArray.mCachedParentAddress;
if (1 == countSize) { if (1 == countSize) {
buffer[index++] = (byte)groupCount; buffer[index++] = (byte)ptNodeCount;
} else if (2 == countSize) { } else if (2 == countSize) {
// We need to signal 2-byte size by setting the top bit of the MSB to 1, so // We need to signal 2-byte size by setting the top bit of the MSB to 1, so
// we | 0x80 to do this. // we | 0x80 to do this.
buffer[index++] = (byte)((groupCount >> 8) | 0x80); buffer[index++] = (byte)((ptNodeCount >> 8) | 0x80);
buffer[index++] = (byte)(groupCount & 0xFF); buffer[index++] = (byte)(ptNodeCount & 0xFF);
} else { } else {
throw new RuntimeException("Strange size from getGroupCountSize : " + countSize); throw new RuntimeException("Strange size from getGroupCountSize : " + countSize);
} }
int groupAddress = index; int ptNodeAddress = index;
for (int i = 0; i < groupCount; ++i) { for (int i = 0; i < ptNodeCount; ++i) {
final CharGroup group = nodeArray.mData.get(i); final PtNode ptNode = ptNodeArray.mData.get(i);
if (index != group.mCachedAddressAfterUpdate) { if (index != ptNode.mCachedAddressAfterUpdate) {
throw new RuntimeException("Bug: write index is not the same as the cached address " throw new RuntimeException("Bug: write index is not the same as the cached address "
+ "of the group : " + index + " <> " + group.mCachedAddressAfterUpdate); + "of the node : " + index + " <> " + ptNode.mCachedAddressAfterUpdate);
} }
groupAddress += getGroupHeaderSize(group, formatOptions); ptNodeAddress += getNodeHeaderSize(ptNode, formatOptions);
// Sanity checks. // Sanity checks.
if (DBG && group.mFrequency > FormatSpec.MAX_TERMINAL_FREQUENCY) { if (DBG && ptNode.mFrequency > FormatSpec.MAX_TERMINAL_FREQUENCY) {
throw new RuntimeException("A node has a frequency > " throw new RuntimeException("A node has a frequency > "
+ FormatSpec.MAX_TERMINAL_FREQUENCY + FormatSpec.MAX_TERMINAL_FREQUENCY
+ " : " + group.mFrequency); + " : " + ptNode.mFrequency);
} }
if (group.mFrequency >= 0) groupAddress += FormatSpec.GROUP_FREQUENCY_SIZE; if (ptNode.mFrequency >= 0) ptNodeAddress += FormatSpec.PTNODE_FREQUENCY_SIZE;
final int childrenOffset = null == group.mChildren final int childrenOffset = null == ptNode.mChildren
? FormatSpec.NO_CHILDREN_ADDRESS ? FormatSpec.NO_CHILDREN_ADDRESS
: group.mChildren.mCachedAddressAfterUpdate - groupAddress; : ptNode.mChildren.mCachedAddressAfterUpdate - ptNodeAddress;
buffer[index++] = buffer[index++] =
makeCharGroupFlags(group, groupAddress, childrenOffset, formatOptions); makePtNodeFlags(ptNode, ptNodeAddress, childrenOffset, formatOptions);
if (parentAddress == FormatSpec.NO_PARENT_ADDRESS) { if (parentAddress == FormatSpec.NO_PARENT_ADDRESS) {
index = writeParentAddress(buffer, index, parentAddress, formatOptions); index = writeParentAddress(buffer, index, parentAddress, formatOptions);
} else { } else {
index = writeParentAddress(buffer, index, parentAddress index = writeParentAddress(buffer, index, parentAddress
+ (nodeArray.mCachedAddressAfterUpdate - group.mCachedAddressAfterUpdate), + (ptNodeArray.mCachedAddressAfterUpdate
- ptNode.mCachedAddressAfterUpdate),
formatOptions); formatOptions);
} }
index = CharEncoding.writeCharArray(group.mChars, buffer, index); index = CharEncoding.writeCharArray(ptNode.mChars, buffer, index);
if (group.hasSeveralChars()) { if (ptNode.hasSeveralChars()) {
buffer[index++] = FormatSpec.GROUP_CHARACTERS_TERMINATOR; buffer[index++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR;
} }
if (group.mFrequency >= 0) { if (ptNode.mFrequency >= 0) {
buffer[index++] = (byte) group.mFrequency; buffer[index++] = (byte) ptNode.mFrequency;
} }
final int shift; final int shift;
@ -793,23 +798,24 @@ public class BinaryDictEncoderUtils {
shift = writeVariableAddress(buffer, index, childrenOffset); shift = writeVariableAddress(buffer, index, childrenOffset);
} }
index += shift; index += shift;
groupAddress += shift; ptNodeAddress += shift;
// Write shortcuts // Write shortcuts
if (null != group.mShortcutTargets && !group.mShortcutTargets.isEmpty()) { if (null != ptNode.mShortcutTargets && !ptNode.mShortcutTargets.isEmpty()) {
final int indexOfShortcutByteSize = index; final int indexOfShortcutByteSize = index;
index += FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE; index += FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE;
groupAddress += FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE; ptNodeAddress += FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE;
final Iterator<WeightedString> shortcutIterator = group.mShortcutTargets.iterator(); final Iterator<WeightedString> shortcutIterator =
ptNode.mShortcutTargets.iterator();
while (shortcutIterator.hasNext()) { while (shortcutIterator.hasNext()) {
final WeightedString target = shortcutIterator.next(); final WeightedString target = shortcutIterator.next();
++groupAddress; ++ptNodeAddress;
int shortcutFlags = makeShortcutFlags(shortcutIterator.hasNext(), int shortcutFlags = makeShortcutFlags(shortcutIterator.hasNext(),
target.mFrequency); target.mFrequency);
buffer[index++] = (byte)shortcutFlags; buffer[index++] = (byte)shortcutFlags;
final int shortcutShift = CharEncoding.writeString(buffer, index, target.mWord); final int shortcutShift = CharEncoding.writeString(buffer, index, target.mWord);
index += shortcutShift; index += shortcutShift;
groupAddress += shortcutShift; ptNodeAddress += shortcutShift;
} }
final int shortcutByteSize = index - indexOfShortcutByteSize; final int shortcutByteSize = index - indexOfShortcutByteSize;
if (shortcutByteSize > 0xFFFF) { if (shortcutByteSize > 0xFFFF) {
@ -819,22 +825,22 @@ public class BinaryDictEncoderUtils {
buffer[indexOfShortcutByteSize + 1] = (byte)(shortcutByteSize & 0xFF); buffer[indexOfShortcutByteSize + 1] = (byte)(shortcutByteSize & 0xFF);
} }
// Write bigrams // Write bigrams
if (null != group.mBigrams) { if (null != ptNode.mBigrams) {
final Iterator<WeightedString> bigramIterator = group.mBigrams.iterator(); final Iterator<WeightedString> bigramIterator = ptNode.mBigrams.iterator();
while (bigramIterator.hasNext()) { while (bigramIterator.hasNext()) {
final WeightedString bigram = bigramIterator.next(); final WeightedString bigram = bigramIterator.next();
final CharGroup target = final PtNode target =
FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord); FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
final int addressOfBigram = target.mCachedAddressAfterUpdate; final int addressOfBigram = target.mCachedAddressAfterUpdate;
final int unigramFrequencyForThisWord = target.mFrequency; final int unigramFrequencyForThisWord = target.mFrequency;
++groupAddress; ++ptNodeAddress;
final int offset = addressOfBigram - groupAddress; final int offset = addressOfBigram - ptNodeAddress;
int bigramFlags = makeBigramFlags(bigramIterator.hasNext(), offset, int bigramFlags = makeBigramFlags(bigramIterator.hasNext(), offset,
bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord); bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord);
buffer[index++] = (byte)bigramFlags; buffer[index++] = (byte)bigramFlags;
final int bigramShift = writeVariableAddress(buffer, index, Math.abs(offset)); final int bigramShift = writeVariableAddress(buffer, index, Math.abs(offset));
index += bigramShift; index += bigramShift;
groupAddress += bigramShift; ptNodeAddress += bigramShift;
} }
} }
@ -844,64 +850,64 @@ public class BinaryDictEncoderUtils {
= FormatSpec.NO_FORWARD_LINK_ADDRESS; = FormatSpec.NO_FORWARD_LINK_ADDRESS;
index += FormatSpec.FORWARD_LINK_ADDRESS_SIZE; index += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
} }
if (index != nodeArray.mCachedAddressAfterUpdate + nodeArray.mCachedSize) { if (index != ptNodeArray.mCachedAddressAfterUpdate + ptNodeArray.mCachedSize) {
throw new RuntimeException( throw new RuntimeException(
"Not the same size : written " + (index - nodeArray.mCachedAddressAfterUpdate) "Not the same size : written " + (index - ptNodeArray.mCachedAddressAfterUpdate)
+ " bytes from a node that should have " + nodeArray.mCachedSize + " bytes"); + " bytes from a node that should have " + ptNodeArray.mCachedSize + " bytes");
} }
return index; return index;
} }
/** /**
* Dumps a collection of useful statistics about a list of node arrays. * Dumps a collection of useful statistics about a list of PtNode arrays.
* *
* This prints purely informative stuff, like the total estimated file size, the * This prints purely informative stuff, like the total estimated file size, the
* number of node arrays, of character groups, the repartition of each address size, etc * number of PtNode arrays, of PtNodes, the repartition of each address size, etc
* *
* @param nodeArrays the list of node arrays. * @param ptNodeArrays the list of PtNode arrays.
*/ */
private static void showStatistics(ArrayList<PtNodeArray> nodeArrays) { private static void showStatistics(ArrayList<PtNodeArray> ptNodeArrays) {
int firstTerminalAddress = Integer.MAX_VALUE; int firstTerminalAddress = Integer.MAX_VALUE;
int lastTerminalAddress = Integer.MIN_VALUE; int lastTerminalAddress = Integer.MIN_VALUE;
int size = 0; int size = 0;
int charGroups = 0; int ptNodes = 0;
int maxGroups = 0; int maxNodes = 0;
int maxRuns = 0; int maxRuns = 0;
for (final PtNodeArray nodeArray : nodeArrays) { for (final PtNodeArray ptNodeArray : ptNodeArrays) {
if (maxGroups < nodeArray.mData.size()) maxGroups = nodeArray.mData.size(); if (maxNodes < ptNodeArray.mData.size()) maxNodes = ptNodeArray.mData.size();
for (final CharGroup cg : nodeArray.mData) { for (final PtNode ptNode : ptNodeArray.mData) {
++charGroups; ++ptNodes;
if (cg.mChars.length > maxRuns) maxRuns = cg.mChars.length; if (ptNode.mChars.length > maxRuns) maxRuns = ptNode.mChars.length;
if (cg.mFrequency >= 0) { if (ptNode.mFrequency >= 0) {
if (nodeArray.mCachedAddressAfterUpdate < firstTerminalAddress) if (ptNodeArray.mCachedAddressAfterUpdate < firstTerminalAddress)
firstTerminalAddress = nodeArray.mCachedAddressAfterUpdate; firstTerminalAddress = ptNodeArray.mCachedAddressAfterUpdate;
if (nodeArray.mCachedAddressAfterUpdate > lastTerminalAddress) if (ptNodeArray.mCachedAddressAfterUpdate > lastTerminalAddress)
lastTerminalAddress = nodeArray.mCachedAddressAfterUpdate; lastTerminalAddress = ptNodeArray.mCachedAddressAfterUpdate;
} }
} }
if (nodeArray.mCachedAddressAfterUpdate + nodeArray.mCachedSize > size) { if (ptNodeArray.mCachedAddressAfterUpdate + ptNodeArray.mCachedSize > size) {
size = nodeArray.mCachedAddressAfterUpdate + nodeArray.mCachedSize; size = ptNodeArray.mCachedAddressAfterUpdate + ptNodeArray.mCachedSize;
} }
} }
final int[] groupCounts = new int[maxGroups + 1]; final int[] ptNodeCounts = new int[maxNodes + 1];
final int[] runCounts = new int[maxRuns + 1]; final int[] runCounts = new int[maxRuns + 1];
for (final PtNodeArray nodeArray : nodeArrays) { for (final PtNodeArray ptNodeArray : ptNodeArrays) {
++groupCounts[nodeArray.mData.size()]; ++ptNodeCounts[ptNodeArray.mData.size()];
for (final CharGroup cg : nodeArray.mData) { for (final PtNode ptNode : ptNodeArray.mData) {
++runCounts[cg.mChars.length]; ++runCounts[ptNode.mChars.length];
} }
} }
MakedictLog.i("Statistics:\n" MakedictLog.i("Statistics:\n"
+ " total file size " + size + "\n" + " total file size " + size + "\n"
+ " " + nodeArrays.size() + " node arrays\n" + " " + ptNodeArrays.size() + " node arrays\n"
+ " " + charGroups + " groups (" + ((float)charGroups / nodeArrays.size()) + " " + ptNodes + " PtNodes (" + ((float)ptNodes / ptNodeArrays.size())
+ " groups per node)\n" + " PtNodes per node)\n"
+ " first terminal at " + firstTerminalAddress + "\n" + " first terminal at " + firstTerminalAddress + "\n"
+ " last terminal at " + lastTerminalAddress + "\n" + " last terminal at " + lastTerminalAddress + "\n"
+ " Group stats : max = " + maxGroups); + " PtNode stats : max = " + maxNodes);
for (int i = 0; i < groupCounts.length; ++i) { for (int i = 0; i < ptNodeCounts.length; ++i) {
MakedictLog.i(" " + i + " : " + groupCounts[i]); MakedictLog.i(" " + i + " : " + ptNodeCounts[i]);
} }
MakedictLog.i(" Character run stats : max = " + maxRuns); MakedictLog.i(" Character run stats : max = " + maxRuns);
for (int i = 0; i < runCounts.length; ++i) { for (int i = 0; i < runCounts.length; ++i) {
@ -922,7 +928,7 @@ public class BinaryDictEncoderUtils {
// Addresses are limited to 3 bytes, but since addresses can be relative to each node // Addresses are limited to 3 bytes, but since addresses can be relative to each node
// array, the structure itself is not limited to 16MB. However, if it is over 16MB deciding // array, the structure itself is not limited to 16MB. However, if it is over 16MB deciding
// the order of the node arrays becomes a quite complicated problem, because though the // the order of the PtNode arrays becomes a quite complicated problem, because though the
// dictionary itself does not have a size limit, each node array must still be within 16MB // dictionary itself does not have a size limit, each node array must still be within 16MB
// of all its children and parents. As long as this is ensured, the dictionary file may // of all its children and parents. As long as this is ensured, the dictionary file may
// grow to any size. // grow to any size.
@ -980,8 +986,8 @@ public class BinaryDictEncoderUtils {
MakedictLog.i("Computing addresses..."); MakedictLog.i("Computing addresses...");
computeAddresses(dict, flatNodes, formatOptions); computeAddresses(dict, flatNodes, formatOptions);
MakedictLog.i("Checking array..."); MakedictLog.i("Checking PtNode array...");
if (DBG) checkFlatNodeArrayList(flatNodes); if (DBG) checkFlatPtNodeArrayList(flatNodes);
// Create a buffer that matches the final dictionary size. // Create a buffer that matches the final dictionary size.
final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1); final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);

View File

@ -22,7 +22,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncodin
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
@ -44,17 +44,17 @@ public final class BinaryDictIOUtils {
} }
private static final class Position { private static final class Position {
public static final int NOT_READ_GROUPCOUNT = -1; public static final int NOT_READ_PTNODE_COUNT = -1;
public int mAddress; public int mAddress;
public int mNumOfCharGroup; public int mNumOfPtNode;
public int mPosition; public int mPosition;
public int mLength; public int mLength;
public Position(int address, int length) { public Position(int address, int length) {
mAddress = address; mAddress = address;
mLength = length; mLength = length;
mNumOfCharGroup = NOT_READ_GROUPCOUNT; mNumOfPtNode = NOT_READ_PTNODE_COUNT;
} }
} }
@ -79,45 +79,45 @@ public final class BinaryDictIOUtils {
Position p = stack.peek(); Position p = stack.peek();
if (DBG) { if (DBG) {
MakedictLog.d("read: address=" + p.mAddress + ", numOfCharGroup=" + MakedictLog.d("read: address=" + p.mAddress + ", numOfPtNode=" +
p.mNumOfCharGroup + ", position=" + p.mPosition + ", length=" + p.mLength); p.mNumOfPtNode + ", position=" + p.mPosition + ", length=" + p.mLength);
} }
if (dictBuffer.position() != p.mAddress) dictBuffer.position(p.mAddress); if (dictBuffer.position() != p.mAddress) dictBuffer.position(p.mAddress);
if (index != p.mLength) index = p.mLength; if (index != p.mLength) index = p.mLength;
if (p.mNumOfCharGroup == Position.NOT_READ_GROUPCOUNT) { if (p.mNumOfPtNode == Position.NOT_READ_PTNODE_COUNT) {
p.mNumOfCharGroup = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer); p.mNumOfPtNode = BinaryDictDecoderUtils.readPtNodeCount(dictBuffer);
p.mAddress += getGroupCountSize(p.mNumOfCharGroup); p.mAddress += getPtNodeCountSize(p.mNumOfPtNode);
p.mPosition = 0; p.mPosition = 0;
} }
if (p.mNumOfCharGroup == 0) { if (p.mNumOfPtNode == 0) {
stack.pop(); stack.pop();
continue; continue;
} }
CharGroupInfo info = dictDecoder.readPtNode(p.mAddress, formatOptions); PtNodeInfo info = dictDecoder.readPtNode(p.mAddress, formatOptions);
for (int i = 0; i < info.mCharacters.length; ++i) { for (int i = 0; i < info.mCharacters.length; ++i) {
pushedChars[index++] = info.mCharacters[i]; pushedChars[index++] = info.mCharacters[i];
} }
p.mPosition++; p.mPosition++;
final boolean isMovedGroup = isMovedGroup(info.mFlags, final boolean isMovedPtNode = isMovedPtNode(info.mFlags,
formatOptions); formatOptions);
final boolean isDeletedGroup = isDeletedGroup(info.mFlags, final boolean isDeletedPtNode = isDeletedPtNode(info.mFlags,
formatOptions); formatOptions);
if (!isMovedGroup && !isDeletedGroup if (!isMovedPtNode && !isDeletedPtNode
&& info.mFrequency != FusionDictionary.CharGroup.NOT_A_TERMINAL) {// found word && info.mFrequency != FusionDictionary.PtNode.NOT_A_TERMINAL) {// found word
words.put(info.mOriginalAddress, new String(pushedChars, 0, index)); words.put(info.mOriginalAddress, new String(pushedChars, 0, index));
frequencies.put(info.mOriginalAddress, info.mFrequency); frequencies.put(info.mOriginalAddress, info.mFrequency);
if (info.mBigrams != null) bigrams.put(info.mOriginalAddress, info.mBigrams); if (info.mBigrams != null) bigrams.put(info.mOriginalAddress, info.mBigrams);
} }
if (p.mPosition == p.mNumOfCharGroup) { if (p.mPosition == p.mNumOfPtNode) {
if (formatOptions.mSupportsDynamicUpdate) { if (formatOptions.mSupportsDynamicUpdate) {
final int forwardLinkAddress = dictBuffer.readUnsignedInt24(); final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) { if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) {
// The node array has a forward link. // The node array has a forward link.
p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT; p.mNumOfPtNode = Position.NOT_READ_PTNODE_COUNT;
p.mAddress = forwardLinkAddress; p.mAddress = forwardLinkAddress;
} else { } else {
stack.pop(); stack.pop();
@ -126,11 +126,11 @@ public final class BinaryDictIOUtils {
stack.pop(); stack.pop();
} }
} else { } else {
// The node array has more groups. // The Ptnode array has more PtNodes.
p.mAddress = dictBuffer.position(); p.mAddress = dictBuffer.position();
} }
if (!isMovedGroup && hasChildrenAddress(info.mChildrenAddress)) { if (!isMovedPtNode && hasChildrenAddress(info.mChildrenAddress)) {
final Position childrenPos = new Position(info.mChildrenAddress, index); final Position childrenPos = new Position(info.mChildrenAddress, index);
stack.push(childrenPos); stack.push(childrenPos);
} }
@ -159,7 +159,7 @@ public final class BinaryDictIOUtils {
} }
/** /**
* Gets the address of the last CharGroup of the exact matching word in the dictionary. * Gets the address of the last PtNode of the exact matching word in the dictionary.
* If no match is found, returns NOT_VALID_WORD. * If no match is found, returns NOT_VALID_WORD.
* *
* @param dictDecoder the dict decoder. * @param dictDecoder the dict decoder.
@ -182,17 +182,17 @@ public final class BinaryDictIOUtils {
if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD; if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD;
do { do {
final int charGroupCount = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer); final int ptNodeCount = BinaryDictDecoderUtils.readPtNodeCount(dictBuffer);
boolean foundNextCharGroup = false; boolean foundNextPtNode = false;
for (int i = 0; i < charGroupCount; ++i) { for (int i = 0; i < ptNodeCount; ++i) {
final int charGroupPos = dictBuffer.position(); final int ptNodePos = dictBuffer.position();
final CharGroupInfo currentInfo = dictDecoder.readPtNode(charGroupPos, final PtNodeInfo currentInfo = dictDecoder.readPtNode(ptNodePos,
header.mFormatOptions); header.mFormatOptions);
final boolean isMovedGroup = isMovedGroup(currentInfo.mFlags, final boolean isMovedNode = isMovedPtNode(currentInfo.mFlags,
header.mFormatOptions); header.mFormatOptions);
final boolean isDeletedGroup = isDeletedGroup(currentInfo.mFlags, final boolean isDeletedNode = isDeletedPtNode(currentInfo.mFlags,
header.mFormatOptions); header.mFormatOptions);
if (isMovedGroup) continue; if (isMovedNode) continue;
boolean same = true; boolean same = true;
for (int p = 0, j = word.offsetByCodePoints(0, wordPos); for (int p = 0, j = word.offsetByCodePoints(0, wordPos);
p < currentInfo.mCharacters.length; p < currentInfo.mCharacters.length;
@ -205,30 +205,30 @@ public final class BinaryDictIOUtils {
} }
if (same) { if (same) {
// found the group matches the word. // found the PtNode matches the word.
if (wordPos + currentInfo.mCharacters.length == wordLen) { if (wordPos + currentInfo.mCharacters.length == wordLen) {
if (currentInfo.mFrequency == CharGroup.NOT_A_TERMINAL if (currentInfo.mFrequency == PtNode.NOT_A_TERMINAL
|| isDeletedGroup) { || isDeletedNode) {
return FormatSpec.NOT_VALID_WORD; return FormatSpec.NOT_VALID_WORD;
} else { } else {
return charGroupPos; return ptNodePos;
} }
} }
wordPos += currentInfo.mCharacters.length; wordPos += currentInfo.mCharacters.length;
if (currentInfo.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) { if (currentInfo.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) {
return FormatSpec.NOT_VALID_WORD; return FormatSpec.NOT_VALID_WORD;
} }
foundNextCharGroup = true; foundNextPtNode = true;
dictBuffer.position(currentInfo.mChildrenAddress); dictBuffer.position(currentInfo.mChildrenAddress);
break; break;
} }
} }
// If we found the next char group, it is under the file pointer. // If we found the next PtNode, it is under the file pointer.
// But if not, we are at the end of this node array so we expect to have // But if not, we are at the end of this node array so we expect to have
// a forward link address that we need to consult and possibly resume // a forward link address that we need to consult and possibly resume
// search on the next node array in the linked list. // search on the next node array in the linked list.
if (foundNextCharGroup) break; if (foundNextPtNode) break;
if (!header.mFormatOptions.mSupportsDynamicUpdate) { if (!header.mFormatOptions.mSupportsDynamicUpdate) {
return FormatSpec.NOT_VALID_WORD; return FormatSpec.NOT_VALID_WORD;
} }
@ -289,8 +289,7 @@ public final class BinaryDictIOUtils {
return BinaryDictEncoderUtils.getByteSize(value); return BinaryDictEncoderUtils.getByteSize(value);
} }
static void skipCharGroup(final DictBuffer dictBuffer, static void skipPtNode(final DictBuffer dictBuffer, final FormatOptions formatOptions) {
final FormatOptions formatOptions) {
final int flags = dictBuffer.readUnsignedByte(); final int flags = dictBuffer.readUnsignedByte();
BinaryDictDecoderUtils.readParentAddress(dictBuffer, formatOptions); BinaryDictDecoderUtils.readParentAddress(dictBuffer, formatOptions);
skipString(dictBuffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0); skipString(dictBuffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
@ -299,27 +298,27 @@ public final class BinaryDictIOUtils {
if ((flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) != 0) { if ((flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) != 0) {
final int shortcutsSize = dictBuffer.readUnsignedShort(); final int shortcutsSize = dictBuffer.readUnsignedShort();
dictBuffer.position(dictBuffer.position() + shortcutsSize dictBuffer.position(dictBuffer.position() + shortcutsSize
- FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE); - FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE);
} }
if ((flags & FormatSpec.FLAG_HAS_BIGRAMS) != 0) { if ((flags & FormatSpec.FLAG_HAS_BIGRAMS) != 0) {
int bigramCount = 0; int bigramCount = 0;
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
final int bigramFlags = dictBuffer.readUnsignedByte(); final int bigramFlags = dictBuffer.readUnsignedByte();
switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) { switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) {
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE:
dictBuffer.readUnsignedByte(); dictBuffer.readUnsignedByte();
break; break;
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES:
dictBuffer.readUnsignedShort(); dictBuffer.readUnsignedShort();
break; break;
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES:
dictBuffer.readUnsignedInt24(); dictBuffer.readUnsignedInt24();
break; break;
} }
if ((bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT) == 0) break; if ((bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT) == 0) break;
} }
if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
throw new RuntimeException("Too many bigrams in a group."); throw new RuntimeException("Too many bigrams in a PtNode.");
} }
} }
} }
@ -360,24 +359,24 @@ public final class BinaryDictIOUtils {
size += 3; size += 3;
} }
} }
destination.write((byte)FormatSpec.GROUP_CHARACTERS_TERMINATOR); destination.write((byte)FormatSpec.PTNODE_CHARACTERS_TERMINATOR);
size += FormatSpec.GROUP_TERMINATOR_SIZE; size += FormatSpec.PTNODE_TERMINATOR_SIZE;
return size; return size;
} }
/** /**
* Write a char group to an output stream from a CharGroupInfo. * Write a PtNode to an output stream from a PtNodeInfo.
* A char group is an in-memory representation of a node in the patricia trie. * A PtNode is an in-memory representation of a node in the patricia trie.
* A char group info is a container for low-level information about how the * A PtNode info is a container for low-level information about how the
* char group is stored in the binary format. * PtNode is stored in the binary format.
* *
* @param destination the stream to write. * @param destination the stream to write.
* @param info the char group info to be written. * @param info the PtNode info to be written.
* @return the size written, in bytes. * @return the size written, in bytes.
*/ */
private static int writeCharGroup(final OutputStream destination, final CharGroupInfo info) private static int writePtNode(final OutputStream destination, final PtNodeInfo info)
throws IOException { throws IOException {
int size = FormatSpec.GROUP_FLAGS_SIZE; int size = FormatSpec.PTNODE_FLAGS_SIZE;
destination.write((byte)info.mFlags); destination.write((byte)info.mFlags);
final int parentOffset = info.mParentAddress == FormatSpec.NO_PARENT_ADDRESS ? final int parentOffset = info.mParentAddress == FormatSpec.NO_PARENT_ADDRESS ?
FormatSpec.NO_PARENT_ADDRESS : info.mParentAddress - info.mOriginalAddress; FormatSpec.NO_PARENT_ADDRESS : info.mParentAddress - info.mOriginalAddress;
@ -392,7 +391,7 @@ public final class BinaryDictIOUtils {
} }
} }
if (info.mCharacters.length > 1) { if (info.mCharacters.length > 1) {
destination.write((byte)FormatSpec.GROUP_CHARACTERS_TERMINATOR); destination.write((byte)FormatSpec.PTNODE_CHARACTERS_TERMINATOR);
size++; size++;
} }
@ -402,7 +401,7 @@ public final class BinaryDictIOUtils {
} }
if (DBG) { if (DBG) {
MakedictLog.d("writeCharGroup origin=" + info.mOriginalAddress + ", size=" + size MakedictLog.d("writePtNode origin=" + info.mOriginalAddress + ", size=" + size
+ ", child=" + info.mChildrenAddress + ", characters =" + ", child=" + info.mChildrenAddress + ", characters ="
+ new String(info.mCharacters, 0, info.mCharacters.length)); + new String(info.mCharacters, 0, info.mCharacters.length));
} }
@ -434,23 +433,23 @@ public final class BinaryDictIOUtils {
final int bigramFrequency = info.mBigrams.get(i).mFrequency; final int bigramFrequency = info.mBigrams.get(i).mFrequency;
int bigramFlags = (i < info.mBigrams.size() - 1) int bigramFlags = (i < info.mBigrams.size() - 1)
? FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT : 0; ? FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT : 0;
size++; size++;
final int bigramOffset = info.mBigrams.get(i).mAddress - (info.mOriginalAddress final int bigramOffset = info.mBigrams.get(i).mAddress - (info.mOriginalAddress
+ size); + size);
bigramFlags |= (bigramOffset < 0) ? FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE : 0; bigramFlags |= (bigramOffset < 0) ? FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE : 0;
switch (BinaryDictEncoderUtils.getByteSize(bigramOffset)) { switch (BinaryDictEncoderUtils.getByteSize(bigramOffset)) {
case 1: case 1:
bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE; bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE;
break; break;
case 2: case 2:
bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES; bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES;
break; break;
case 3: case 3:
bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES; bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES;
break; break;
} }
bigramFlags |= bigramFrequency & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY; bigramFlags |= bigramFrequency & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY;
destination.write((byte)bigramFlags); destination.write((byte)bigramFlags);
size += writeVariableAddress(destination, Math.abs(bigramOffset)); size += writeVariableAddress(destination, Math.abs(bigramOffset));
} }
@ -459,21 +458,21 @@ public final class BinaryDictIOUtils {
} }
/** /**
* Compute the size of the char group. * Compute the size of the PtNode.
*/ */
static int computeGroupSize(final CharGroupInfo info, final FormatOptions formatOptions) { static int computePtNodeSize(final PtNodeInfo info, final FormatOptions formatOptions) {
int size = FormatSpec.GROUP_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE int size = FormatSpec.PTNODE_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE
+ BinaryDictEncoderUtils.getGroupCharactersSize(info.mCharacters) + BinaryDictEncoderUtils.getPtNodeCharactersSize(info.mCharacters)
+ getChildrenAddressSize(info.mFlags, formatOptions); + getChildrenAddressSize(info.mFlags, formatOptions);
if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) { if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) {
size += FormatSpec.GROUP_FREQUENCY_SIZE; size += FormatSpec.PTNODE_FREQUENCY_SIZE;
} }
if (info.mShortcutTargets != null && !info.mShortcutTargets.isEmpty()) { if (info.mShortcutTargets != null && !info.mShortcutTargets.isEmpty()) {
size += BinaryDictEncoderUtils.getShortcutListSize(info.mShortcutTargets); size += BinaryDictEncoderUtils.getShortcutListSize(info.mShortcutTargets);
} }
if (info.mBigrams != null) { if (info.mBigrams != null) {
for (final PendingAttribute attr : info.mBigrams) { for (final PendingAttribute attr : info.mBigrams) {
size += FormatSpec.GROUP_FLAGS_SIZE; size += FormatSpec.PTNODE_FLAGS_SIZE;
size += BinaryDictEncoderUtils.getByteSize(attr.mAddress); size += BinaryDictEncoderUtils.getByteSize(attr.mAddress);
} }
} }
@ -484,14 +483,14 @@ public final class BinaryDictIOUtils {
* Write a node array to the stream. * Write a node array to the stream.
* *
* @param destination the stream to write. * @param destination the stream to write.
* @param infos an array of CharGroupInfo to be written. * @param infos an array of PtNodeInfo to be written.
* @return the size written, in bytes. * @return the size written, in bytes.
* @throws IOException * @throws IOException
*/ */
static int writeNodes(final OutputStream destination, final CharGroupInfo[] infos) static int writeNodes(final OutputStream destination, final PtNodeInfo[] infos)
throws IOException { throws IOException {
int size = getGroupCountSize(infos.length); int size = getPtNodeCountSize(infos.length);
switch (getGroupCountSize(infos.length)) { switch (getPtNodeCountSize(infos.length)) {
case 1: case 1:
destination.write((byte)infos.length); destination.write((byte)infos.length);
break; break;
@ -500,9 +499,9 @@ public final class BinaryDictIOUtils {
destination.write((byte)(infos.length & 0xFF)); destination.write((byte)(infos.length & 0xFF));
break; break;
default: default:
throw new RuntimeException("Invalid group count size."); throw new RuntimeException("Invalid node count size.");
} }
for (final CharGroupInfo info : infos) size += writeCharGroup(destination, info); for (final PtNodeInfo info : infos) size += writePtNode(destination, info);
writeSInt24ToStream(destination, FormatSpec.NO_FORWARD_LINK_ADDRESS); writeSInt24ToStream(destination, FormatSpec.NO_FORWARD_LINK_ADDRESS);
return size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE; return size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
} }
@ -560,11 +559,11 @@ public final class BinaryDictIOUtils {
} }
/** /**
* Helper method to check whether the group is moved. * Helper method to check whether the node is moved.
*/ */
public static boolean isMovedGroup(final int flags, final FormatOptions options) { public static boolean isMovedPtNode(final int flags, final FormatOptions options) {
return options.mSupportsDynamicUpdate return options.mSupportsDynamicUpdate
&& ((flags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) == FormatSpec.FLAG_IS_MOVED); && ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_MOVED);
} }
/** /**
@ -576,26 +575,26 @@ public final class BinaryDictIOUtils {
} }
/** /**
* Helper method to check whether the group is deleted. * Helper method to check whether the node is deleted.
*/ */
public static boolean isDeletedGroup(final int flags, final FormatOptions formatOptions) { public static boolean isDeletedPtNode(final int flags, final FormatOptions formatOptions) {
return formatOptions.mSupportsDynamicUpdate return formatOptions.mSupportsDynamicUpdate
&& ((flags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) == FormatSpec.FLAG_IS_DELETED); && ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_DELETED);
} }
/** /**
* Compute the binary size of the group count * Compute the binary size of the node count
* @param count the group count * @param count the node count
* @return the size of the group count, either 1 or 2 bytes. * @return the size of the node count, either 1 or 2 bytes.
*/ */
public static int getGroupCountSize(final int count) { public static int getPtNodeCountSize(final int count) {
if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= count) { if (FormatSpec.MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT >= count) {
return 1; return 1;
} else if (FormatSpec.MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY >= count) { } else if (FormatSpec.MAX_PTNODES_IN_A_PT_NODE_ARRAY >= count) {
return 2; return 2;
} else { } else {
throw new RuntimeException("Can't have more than " throw new RuntimeException("Can't have more than "
+ FormatSpec.MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY + " groups in a node (found " + FormatSpec.MAX_PTNODES_IN_A_PT_NODE_ARRAY + " PtNode in a PtNodeArray (found "
+ count + ")"); + count + ")");
} }
} }
@ -603,14 +602,14 @@ public final class BinaryDictIOUtils {
static int getChildrenAddressSize(final int optionFlags, static int getChildrenAddressSize(final int optionFlags,
final FormatOptions formatOptions) { final FormatOptions formatOptions) {
if (formatOptions.mSupportsDynamicUpdate) return FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; if (formatOptions.mSupportsDynamicUpdate) return FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) { switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE: case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
return 1; return 1;
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES: case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
return 2; return 2;
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES: case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
return 3; return 3;
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS: case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
default: default:
return 0; return 0;
} }

View File

@ -40,9 +40,9 @@ public interface DictDecoder {
* Reads PtNode from nodeAddress. * Reads PtNode from nodeAddress.
* @param ptNodePos the position of PtNode. * @param ptNodePos the position of PtNode.
* @param formatOptions the format options. * @param formatOptions the format options.
* @return CharGroupInfo. * @return PtNodeInfo.
*/ */
public CharGroupInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions); public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions);
/** /**
* Reads a buffer and returns the memory representation of the dictionary. * Reads a buffer and returns the memory representation of the dictionary.

View File

@ -43,7 +43,7 @@ public final class DynamicBinaryDictIOUtils {
} }
private static int markAsDeleted(final int flags) { private static int markAsDeleted(final int flags) {
return (flags & (~FormatSpec.MASK_GROUP_ADDRESS_TYPE)) | FormatSpec.FLAG_IS_DELETED; return (flags & (~FormatSpec.MASK_CHILDREN_ADDRESS_TYPE)) | FormatSpec.FLAG_IS_DELETED;
} }
/** /**
@ -70,56 +70,56 @@ public final class DynamicBinaryDictIOUtils {
} }
/** /**
* Update a parent address in a CharGroup that is referred to by groupOriginAddress. * Update a parent address in a PtNode that is referred to by ptNodeOriginAddress.
* *
* @param dictBuffer the DictBuffer to write. * @param dictBuffer the DictBuffer to write.
* @param groupOriginAddress the address of the group. * @param ptNodeOriginAddress the address of the PtNode.
* @param newParentAddress the absolute address of the parent. * @param newParentAddress the absolute address of the parent.
* @param formatOptions file format options. * @param formatOptions file format options.
*/ */
public static void updateParentAddress(final DictBuffer dictBuffer, public static void updateParentAddress(final DictBuffer dictBuffer,
final int groupOriginAddress, final int newParentAddress, final int ptNodeOriginAddress, final int newParentAddress,
final FormatOptions formatOptions) { final FormatOptions formatOptions) {
final int originalPosition = dictBuffer.position(); final int originalPosition = dictBuffer.position();
dictBuffer.position(groupOriginAddress); dictBuffer.position(ptNodeOriginAddress);
if (!formatOptions.mSupportsDynamicUpdate) { if (!formatOptions.mSupportsDynamicUpdate) {
throw new RuntimeException("this file format does not support parent addresses"); throw new RuntimeException("this file format does not support parent addresses");
} }
final int flags = dictBuffer.readUnsignedByte(); final int flags = dictBuffer.readUnsignedByte();
if (BinaryDictIOUtils.isMovedGroup(flags, formatOptions)) { if (BinaryDictIOUtils.isMovedPtNode(flags, formatOptions)) {
// If the group is moved, the parent address is stored in the destination group. // If the node is moved, the parent address is stored in the destination node.
// We are guaranteed to process the destination group later, so there is no need to // We are guaranteed to process the destination node later, so there is no need to
// update anything here. // update anything here.
dictBuffer.position(originalPosition); dictBuffer.position(originalPosition);
return; return;
} }
if (DBG) { if (DBG) {
MakedictLog.d("update parent address flags=" + flags + ", " + groupOriginAddress); MakedictLog.d("update parent address flags=" + flags + ", " + ptNodeOriginAddress);
} }
final int parentOffset = newParentAddress - groupOriginAddress; final int parentOffset = newParentAddress - ptNodeOriginAddress;
BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, parentOffset); BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, parentOffset);
dictBuffer.position(originalPosition); dictBuffer.position(originalPosition);
} }
/** /**
* Update parent addresses in a node array stored at nodeOriginAddress. * Update parent addresses in a node array stored at ptNodeOriginAddress.
* *
* @param dictBuffer the DictBuffer to be modified. * @param dictBuffer the DictBuffer to be modified.
* @param nodeOriginAddress the address of the node array to update. * @param ptNodeOriginAddress the address of the node array to update.
* @param newParentAddress the address to be written. * @param newParentAddress the address to be written.
* @param formatOptions file format options. * @param formatOptions file format options.
*/ */
public static void updateParentAddresses(final DictBuffer dictBuffer, public static void updateParentAddresses(final DictBuffer dictBuffer,
final int nodeOriginAddress, final int newParentAddress, final int ptNodeOriginAddress, final int newParentAddress,
final FormatOptions formatOptions) { final FormatOptions formatOptions) {
final int originalPosition = dictBuffer.position(); final int originalPosition = dictBuffer.position();
dictBuffer.position(nodeOriginAddress); dictBuffer.position(ptNodeOriginAddress);
do { do {
final int count = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer); final int count = BinaryDictDecoderUtils.readPtNodeCount(dictBuffer);
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
updateParentAddress(dictBuffer, dictBuffer.position(), newParentAddress, updateParentAddress(dictBuffer, dictBuffer.position(), newParentAddress,
formatOptions); formatOptions);
BinaryDictIOUtils.skipCharGroup(dictBuffer, formatOptions); BinaryDictIOUtils.skipPtNode(dictBuffer, formatOptions);
} }
final int forwardLinkAddress = dictBuffer.readUnsignedInt24(); final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
dictBuffer.position(forwardLinkAddress); dictBuffer.position(forwardLinkAddress);
@ -129,18 +129,18 @@ public final class DynamicBinaryDictIOUtils {
} }
/** /**
* Update a children address in a CharGroup that is addressed by groupOriginAddress. * Update a children address in a PtNode that is addressed by ptNodeOriginAddress.
* *
* @param dictBuffer the DictBuffer to write. * @param dictBuffer the DictBuffer to write.
* @param groupOriginAddress the address of the group. * @param ptNodeOriginAddress the address of the PtNode.
* @param newChildrenAddress the absolute address of the child. * @param newChildrenAddress the absolute address of the child.
* @param formatOptions file format options. * @param formatOptions file format options.
*/ */
public static void updateChildrenAddress(final DictBuffer dictBuffer, public static void updateChildrenAddress(final DictBuffer dictBuffer,
final int groupOriginAddress, final int newChildrenAddress, final int ptNodeOriginAddress, final int newChildrenAddress,
final FormatOptions formatOptions) { final FormatOptions formatOptions) {
final int originalPosition = dictBuffer.position(); final int originalPosition = dictBuffer.position();
dictBuffer.position(groupOriginAddress); dictBuffer.position(ptNodeOriginAddress);
final int flags = dictBuffer.readUnsignedByte(); final int flags = dictBuffer.readUnsignedByte();
final int parentAddress = BinaryDictDecoderUtils.readParentAddress(dictBuffer, final int parentAddress = BinaryDictDecoderUtils.readParentAddress(dictBuffer,
formatOptions); formatOptions);
@ -153,21 +153,21 @@ public final class DynamicBinaryDictIOUtils {
} }
/** /**
* Helper method to move a char group to the tail of the file. * Helper method to move a PtNode to the tail of the file.
*/ */
private static int moveCharGroup(final OutputStream destination, private static int movePtNode(final OutputStream destination,
final DictBuffer dictBuffer, final CharGroupInfo info, final DictBuffer dictBuffer, final PtNodeInfo info,
final int nodeArrayOriginAddress, final int oldGroupAddress, final int nodeArrayOriginAddress, final int oldNodeAddress,
final FormatOptions formatOptions) throws IOException { final FormatOptions formatOptions) throws IOException {
updateParentAddress(dictBuffer, oldGroupAddress, dictBuffer.limit() + 1, formatOptions); updateParentAddress(dictBuffer, oldNodeAddress, dictBuffer.limit() + 1, formatOptions);
dictBuffer.position(oldGroupAddress); dictBuffer.position(oldNodeAddress);
final int currentFlags = dictBuffer.readUnsignedByte(); final int currentFlags = dictBuffer.readUnsignedByte();
dictBuffer.position(oldGroupAddress); dictBuffer.position(oldNodeAddress);
dictBuffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags dictBuffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags
& (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG)))); & (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG))));
int size = FormatSpec.GROUP_FLAGS_SIZE; int size = FormatSpec.PTNODE_FLAGS_SIZE;
updateForwardLink(dictBuffer, nodeArrayOriginAddress, dictBuffer.limit(), formatOptions); updateForwardLink(dictBuffer, nodeArrayOriginAddress, dictBuffer.limit(), formatOptions);
size += BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { info }); size += BinaryDictIOUtils.writeNodes(destination, new PtNodeInfo[] { info });
return size; return size;
} }
@ -178,9 +178,9 @@ public final class DynamicBinaryDictIOUtils {
dictBuffer.position(nodeArrayOriginAddress); dictBuffer.position(nodeArrayOriginAddress);
int jumpCount = 0; int jumpCount = 0;
while (jumpCount++ < MAX_JUMPS) { while (jumpCount++ < MAX_JUMPS) {
final int count = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer); final int count = BinaryDictDecoderUtils.readPtNodeCount(dictBuffer);
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
BinaryDictIOUtils.skipCharGroup(dictBuffer, formatOptions); BinaryDictIOUtils.skipPtNode(dictBuffer, formatOptions);
} }
final int forwardLinkAddress = dictBuffer.readUnsignedInt24(); final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) { if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
@ -196,43 +196,43 @@ public final class DynamicBinaryDictIOUtils {
} }
/** /**
* Move a group that is referred to by oldGroupOrigin to the tail of the file, and set the * Move a PtNode that is referred to by oldPtNodeOrigin to the tail of the file, and set the
* children address to the byte after the group * children address to the byte after the PtNode.
* *
* @param fileEndAddress the address of the tail of the file. * @param fileEndAddress the address of the tail of the file.
* @param codePoints the characters to put inside the group. * @param codePoints the characters to put inside the PtNode.
* @param length how many code points to read from codePoints. * @param length how many code points to read from codePoints.
* @param flags the flags for this group. * @param flags the flags for this PtNode.
* @param frequency the frequency of this terminal. * @param frequency the frequency of this terminal.
* @param parentAddress the address of the parent group of this group. * @param parentAddress the address of the parent PtNode of this PtNode.
* @param shortcutTargets the shortcut targets for this group. * @param shortcutTargets the shortcut targets for this PtNode.
* @param bigrams the bigrams for this group. * @param bigrams the bigrams for this PtNode.
* @param destination the stream representing the tail of the file. * @param destination the stream representing the tail of the file.
* @param dictBuffer the DictBuffer representing the (constant-size) body of the file. * @param dictBuffer the DictBuffer representing the (constant-size) body of the file.
* @param oldNodeArrayOrigin the origin of the old node array this group was a part of. * @param oldPtNodeArrayOrigin the origin of the old PtNode array this PtNode was a part of.
* @param oldGroupOrigin the old origin where this group used to be stored. * @param oldPtNodeOrigin the old origin where this PtNode used to be stored.
* @param formatOptions format options for this dictionary. * @param formatOptions format options for this dictionary.
* @return the size written, in bytes. * @return the size written, in bytes.
* @throws IOException if the file can't be accessed * @throws IOException if the file can't be accessed
*/ */
private static int moveGroup(final int fileEndAddress, final int[] codePoints, private static int movePtNode(final int fileEndAddress, final int[] codePoints,
final int length, final int flags, final int frequency, final int parentAddress, final int length, final int flags, final int frequency, final int parentAddress,
final ArrayList<WeightedString> shortcutTargets, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<PendingAttribute> bigrams, final OutputStream destination, final ArrayList<PendingAttribute> bigrams, final OutputStream destination,
final DictBuffer dictBuffer, final int oldNodeArrayOrigin, final DictBuffer dictBuffer, final int oldPtNodeArrayOrigin,
final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException { final int oldPtNodeOrigin, final FormatOptions formatOptions) throws IOException {
int size = 0; int size = 0;
final int newGroupOrigin = fileEndAddress + 1; final int newPtNodeOrigin = fileEndAddress + 1;
final int[] writtenCharacters = Arrays.copyOfRange(codePoints, 0, length); final int[] writtenCharacters = Arrays.copyOfRange(codePoints, 0, length);
final CharGroupInfo tmpInfo = new CharGroupInfo(newGroupOrigin, -1 /* endAddress */, final PtNodeInfo tmpInfo = new PtNodeInfo(newPtNodeOrigin, -1 /* endAddress */,
flags, writtenCharacters, frequency, parentAddress, FormatSpec.NO_CHILDREN_ADDRESS, flags, writtenCharacters, frequency, parentAddress, FormatSpec.NO_CHILDREN_ADDRESS,
shortcutTargets, bigrams); shortcutTargets, bigrams);
size = BinaryDictIOUtils.computeGroupSize(tmpInfo, formatOptions); size = BinaryDictIOUtils.computePtNodeSize(tmpInfo, formatOptions);
final CharGroupInfo newInfo = new CharGroupInfo(newGroupOrigin, newGroupOrigin + size, final PtNodeInfo newInfo = new PtNodeInfo(newPtNodeOrigin, newPtNodeOrigin + size,
flags, writtenCharacters, frequency, parentAddress, flags, writtenCharacters, frequency, parentAddress,
fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets, fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets,
bigrams); bigrams);
moveCharGroup(destination, dictBuffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin, movePtNode(destination, dictBuffer, newInfo, oldPtNodeArrayOrigin, oldPtNodeOrigin,
formatOptions); formatOptions);
return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE; return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
} }
@ -288,16 +288,16 @@ public final class DynamicBinaryDictIOUtils {
if (wordPos >= wordLen) break; if (wordPos >= wordLen) break;
nodeOriginAddress = dictBuffer.position(); nodeOriginAddress = dictBuffer.position();
int nodeParentAddress = -1; int nodeParentAddress = -1;
final int charGroupCount = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer); final int ptNodeCount = BinaryDictDecoderUtils.readPtNodeCount(dictBuffer);
boolean foundNextGroup = false; boolean foundNextNode = false;
for (int i = 0; i < charGroupCount; ++i) { for (int i = 0; i < ptNodeCount; ++i) {
address = dictBuffer.position(); address = dictBuffer.position();
final CharGroupInfo currentInfo = dictDecoder.readPtNode(address, final PtNodeInfo currentInfo = dictDecoder.readPtNode(address,
fileHeader.mFormatOptions); fileHeader.mFormatOptions);
final boolean isMovedGroup = BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, final boolean isMovedNode = BinaryDictIOUtils.isMovedPtNode(currentInfo.mFlags,
fileHeader.mFormatOptions); fileHeader.mFormatOptions);
if (isMovedGroup) continue; if (isMovedNode) continue;
nodeParentAddress = (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) nodeParentAddress = (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS)
? FormatSpec.NO_PARENT_ADDRESS : currentInfo.mParentAddress + address; ? FormatSpec.NO_PARENT_ADDRESS : currentInfo.mParentAddress + address;
boolean matched = true; boolean matched = true;
@ -314,10 +314,10 @@ public final class DynamicBinaryDictIOUtils {
* abc - d - ef * abc - d - ef
*/ */
final int newNodeAddress = dictBuffer.limit(); final int newNodeAddress = dictBuffer.limit();
final int flags = BinaryDictEncoderUtils.makeCharGroupFlags(p > 1, final int flags = BinaryDictEncoderUtils.makePtNodeFlags(p > 1,
isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */, isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */,
false /* isBlackListEntry */, fileHeader.mFormatOptions); false /* isBlackListEntry */, fileHeader.mFormatOptions);
int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, flags, int written = movePtNode(newNodeAddress, currentInfo.mCharacters, p, flags,
frequency, nodeParentAddress, shortcuts, bigrams, destination, frequency, nodeParentAddress, shortcuts, bigrams, destination,
dictBuffer, nodeOriginAddress, address, fileHeader.mFormatOptions); dictBuffer, nodeOriginAddress, address, fileHeader.mFormatOptions);
@ -327,12 +327,12 @@ public final class DynamicBinaryDictIOUtils {
updateParentAddresses(dictBuffer, currentInfo.mChildrenAddress, updateParentAddresses(dictBuffer, currentInfo.mChildrenAddress,
newNodeAddress + written + 1, fileHeader.mFormatOptions); newNodeAddress + written + 1, fileHeader.mFormatOptions);
} }
final CharGroupInfo newInfo2 = new CharGroupInfo( final PtNodeInfo newInfo2 = new PtNodeInfo(
newNodeAddress + written + 1, -1 /* endAddress */, newNodeAddress + written + 1, -1 /* endAddress */,
currentInfo.mFlags, characters2, currentInfo.mFrequency, currentInfo.mFlags, characters2, currentInfo.mFrequency,
newNodeAddress + 1, currentInfo.mChildrenAddress, newNodeAddress + 1, currentInfo.mChildrenAddress,
currentInfo.mShortcutTargets, currentInfo.mBigrams); currentInfo.mShortcutTargets, currentInfo.mBigrams);
BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo2 }); BinaryDictIOUtils.writeNodes(destination, new PtNodeInfo[] { newInfo2 });
return; return;
} else if (codePoints[wordPos + p] != currentInfo.mCharacters[p]) { } else if (codePoints[wordPos + p] != currentInfo.mCharacters[p]) {
if (p > 0) { if (p > 0) {
@ -353,12 +353,12 @@ public final class DynamicBinaryDictIOUtils {
final int childrenAddress = currentInfo.mChildrenAddress; final int childrenAddress = currentInfo.mChildrenAddress;
// move prefix // move prefix
final int prefixFlags = BinaryDictEncoderUtils.makeCharGroupFlags(p > 1, final int prefixFlags = BinaryDictEncoderUtils.makePtNodeFlags(p > 1,
false /* isTerminal */, 0 /* childrenAddressSize*/, false /* isTerminal */, 0 /* childrenAddressSize*/,
false /* hasShortcut */, false /* hasBigrams */, false /* hasShortcut */, false /* hasBigrams */,
false /* isNotAWord */, false /* isBlackListEntry */, false /* isNotAWord */, false /* isBlackListEntry */,
fileHeader.mFormatOptions); fileHeader.mFormatOptions);
int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, int written = movePtNode(newNodeAddress, currentInfo.mCharacters, p,
prefixFlags, -1 /* frequency */, nodeParentAddress, null, null, prefixFlags, -1 /* frequency */, nodeParentAddress, null, null,
destination, dictBuffer, nodeOriginAddress, address, destination, dictBuffer, nodeOriginAddress, address,
fileHeader.mFormatOptions); fileHeader.mFormatOptions);
@ -369,7 +369,7 @@ public final class DynamicBinaryDictIOUtils {
updateParentAddresses(dictBuffer, currentInfo.mChildrenAddress, updateParentAddresses(dictBuffer, currentInfo.mChildrenAddress,
newNodeAddress + written + 1, fileHeader.mFormatOptions); newNodeAddress + written + 1, fileHeader.mFormatOptions);
} }
final int suffixFlags = BinaryDictEncoderUtils.makeCharGroupFlags( final int suffixFlags = BinaryDictEncoderUtils.makePtNodeFlags(
suffixCharacters.length > 1, suffixCharacters.length > 1,
(currentInfo.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0, (currentInfo.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0,
0 /* childrenAddressSize */, 0 /* childrenAddressSize */,
@ -377,26 +377,26 @@ public final class DynamicBinaryDictIOUtils {
!= 0, != 0,
(currentInfo.mFlags & FormatSpec.FLAG_HAS_BIGRAMS) != 0, (currentInfo.mFlags & FormatSpec.FLAG_HAS_BIGRAMS) != 0,
isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
final CharGroupInfo suffixInfo = new CharGroupInfo( final PtNodeInfo suffixInfo = new PtNodeInfo(
newNodeAddress + written + 1, -1 /* endAddress */, suffixFlags, newNodeAddress + written + 1, -1 /* endAddress */, suffixFlags,
suffixCharacters, currentInfo.mFrequency, newNodeAddress + 1, suffixCharacters, currentInfo.mFrequency, newNodeAddress + 1,
currentInfo.mChildrenAddress, currentInfo.mShortcutTargets, currentInfo.mChildrenAddress, currentInfo.mShortcutTargets,
currentInfo.mBigrams); currentInfo.mBigrams);
written += BinaryDictIOUtils.computeGroupSize(suffixInfo, written += BinaryDictIOUtils.computePtNodeSize(suffixInfo,
fileHeader.mFormatOptions) + 1; fileHeader.mFormatOptions) + 1;
final int[] newCharacters = Arrays.copyOfRange(codePoints, wordPos + p, final int[] newCharacters = Arrays.copyOfRange(codePoints, wordPos + p,
codePoints.length); codePoints.length);
final int flags = BinaryDictEncoderUtils.makeCharGroupFlags( final int flags = BinaryDictEncoderUtils.makePtNodeFlags(
newCharacters.length > 1, isTerminal, newCharacters.length > 1, isTerminal,
0 /* childrenAddressSize */, hasShortcuts, hasBigrams, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
final CharGroupInfo newInfo = new CharGroupInfo( final PtNodeInfo newInfo = new PtNodeInfo(
newNodeAddress + written, -1 /* endAddress */, flags, newNodeAddress + written, -1 /* endAddress */, flags,
newCharacters, frequency, newNodeAddress + 1, newCharacters, frequency, newNodeAddress + 1,
FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams);
BinaryDictIOUtils.writeNodes(destination, BinaryDictIOUtils.writeNodes(destination,
new CharGroupInfo[] { suffixInfo, newInfo }); new PtNodeInfo[] { suffixInfo, newInfo });
return; return;
} }
matched = false; matched = false;
@ -407,17 +407,17 @@ public final class DynamicBinaryDictIOUtils {
if (matched) { if (matched) {
if (wordPos + currentInfo.mCharacters.length == wordLen) { if (wordPos + currentInfo.mCharacters.length == wordLen) {
// the word exists in the dictionary. // the word exists in the dictionary.
// only update group. // only update the PtNode.
final int newNodeAddress = dictBuffer.limit(); final int newNodeAddress = dictBuffer.limit();
final boolean hasMultipleChars = currentInfo.mCharacters.length > 1; final boolean hasMultipleChars = currentInfo.mCharacters.length > 1;
final int flags = BinaryDictEncoderUtils.makeCharGroupFlags(hasMultipleChars, final int flags = BinaryDictEncoderUtils.makePtNodeFlags(hasMultipleChars,
isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1, final PtNodeInfo newInfo = new PtNodeInfo(newNodeAddress + 1,
-1 /* endAddress */, flags, currentInfo.mCharacters, frequency, -1 /* endAddress */, flags, currentInfo.mCharacters, frequency,
nodeParentAddress, currentInfo.mChildrenAddress, shortcuts, nodeParentAddress, currentInfo.mChildrenAddress, shortcuts,
bigrams); bigrams);
moveCharGroup(destination, dictBuffer, newInfo, nodeOriginAddress, address, movePtNode(destination, dictBuffer, newInfo, nodeOriginAddress, address,
fileHeader.mFormatOptions); fileHeader.mFormatOptions);
return; return;
} }
@ -425,7 +425,7 @@ public final class DynamicBinaryDictIOUtils {
if (currentInfo.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) { if (currentInfo.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) {
/* /*
* found the prefix of the word. * found the prefix of the word.
* make new node and link to the node from this group. * make new PtNode and link to the PtNode from this PtNode.
* *
* before * before
* ab - cd * ab - cd
@ -435,28 +435,28 @@ public final class DynamicBinaryDictIOUtils {
* after * after
* ab - cd - e * ab - cd - e
*/ */
final int newNodeAddress = dictBuffer.limit(); final int newNodeArrayAddress = dictBuffer.limit();
updateChildrenAddress(dictBuffer, address, newNodeAddress, updateChildrenAddress(dictBuffer, address, newNodeArrayAddress,
fileHeader.mFormatOptions); fileHeader.mFormatOptions);
final int newGroupAddress = newNodeAddress + 1; final int newNodeAddress = newNodeArrayAddress + 1;
final boolean hasMultipleChars = (wordLen - wordPos) > 1; final boolean hasMultipleChars = (wordLen - wordPos) > 1;
final int flags = BinaryDictEncoderUtils.makeCharGroupFlags(hasMultipleChars, final int flags = BinaryDictEncoderUtils.makePtNodeFlags(hasMultipleChars,
isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen); final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen);
final CharGroupInfo newInfo = new CharGroupInfo(newGroupAddress, -1, flags, final PtNodeInfo newInfo = new PtNodeInfo(newNodeAddress, -1, flags,
characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS, characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS,
shortcuts, bigrams); shortcuts, bigrams);
BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo }); BinaryDictIOUtils.writeNodes(destination, new PtNodeInfo[] { newInfo });
return; return;
} }
dictBuffer.position(currentInfo.mChildrenAddress); dictBuffer.position(currentInfo.mChildrenAddress);
foundNextGroup = true; foundNextNode = true;
break; break;
} }
} }
if (foundNextGroup) continue; if (foundNextNode) continue;
// reached the end of the array. // reached the end of the array.
final int linkAddressPosition = dictBuffer.position(); final int linkAddressPosition = dictBuffer.position();
@ -485,13 +485,13 @@ public final class DynamicBinaryDictIOUtils {
BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, newNodeAddress); BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, newNodeAddress);
final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen); final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen);
final int flags = BinaryDictEncoderUtils.makeCharGroupFlags(characters.length > 1, final int flags = BinaryDictEncoderUtils.makePtNodeFlags(characters.length > 1,
isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1, final PtNodeInfo newInfo = new PtNodeInfo(newNodeAddress + 1,
-1 /* endAddress */, flags, characters, frequency, nodeParentAddress, -1 /* endAddress */, flags, characters, frequency, nodeParentAddress,
FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams);
BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[]{ newInfo }); BinaryDictIOUtils.writeNodes(destination, new PtNodeInfo[]{ newInfo });
return; return;
} else { } else {
depth--; depth--;

View File

@ -62,19 +62,19 @@ public final class FormatSpec {
/* /*
* Node array (FusionDictionary.PtNodeArray) layout is as follows: * Node array (FusionDictionary.PtNodeArray) layout is as follows:
* *
* g | * n |
* r | the number of groups, 1 or 2 bytes. * o | the number of PtNodes, 1 or 2 bytes.
* o | 1 byte = bbbbbbbb match * d | 1 byte = bbbbbbbb match
* u | case 1xxxxxxx => xxxxxxx << 8 + next byte * e | case 1xxxxxxx => xxxxxxx << 8 + next byte
* p | otherwise => bbbbbbbb * c | otherwise => bbbbbbbb
* c | * o |
* ount * unt
* *
* g | * n |
* r | sequence of groups, * o | sequence of PtNodes,
* o | the layout of each group is described below. * d | the layout of each PtNode is described below.
* u | * e |
* ps * s
* *
* f | * f |
* o | IF SUPPORTS_DYNAMIC_UPDATE (defined in the file header) * o | IF SUPPORTS_DYNAMIC_UPDATE (defined in the file header)
@ -86,16 +86,16 @@ public final class FormatSpec {
* linkaddress * linkaddress
*/ */
/* Node (FusionDictionary.CharGroup) layout is as follows: /* Node (FusionDictionary.PtNode) layout is as follows:
* | IF !SUPPORTS_DYNAMIC_UPDATE * | IF !SUPPORTS_DYNAMIC_UPDATE
* | addressType xx : mask with MASK_GROUP_ADDRESS_TYPE * | addressType xx : mask with MASK_CHILDREN_ADDRESS_TYPE
* | 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS * | 2 bits, 00 = no children : FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS
* f | 01 = 1 byte : FLAG_GROUP_ADDRESS_TYPE_ONEBYTE * f | 01 = 1 byte : FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE
* l | 10 = 2 bytes : FLAG_GROUP_ADDRESS_TYPE_TWOBYTES * l | 10 = 2 bytes : FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES
* a | 11 = 3 bytes : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES * a | 11 = 3 bytes : FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES
* g | ELSE * g | ELSE
* s | is moved ? 2 bits, 11 = no : FLAG_IS_NOT_MOVED * s | is moved ? 2 bits, 11 = no : FLAG_IS_NOT_MOVED
* | This must be the same as FLAG_GROUP_ADDRESS_TYPE_THREEBYTES * | This must be the same as FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES
* | 01 = yes : FLAG_IS_MOVED * | 01 = yes : FLAG_IS_MOVED
* | the new address is stored in the same place as the parent address * | the new address is stored in the same place as the parent address
* | is deleted? 10 = yes : FLAG_IS_DELETED * | is deleted? 10 = yes : FLAG_IS_DELETED
@ -116,7 +116,7 @@ public final class FormatSpec {
* ddress * ddress
* *
* c | IF FLAG_HAS_MULTIPLE_CHARS * c | IF FLAG_HAS_MULTIPLE_CHARS
* h | char, char, char, char n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers * h | char, char, char, char n * (1 or 3 bytes) : use PtNodeInfo for i/o helpers
* a | end 1 byte, = 0 * a | end 1 byte, = 0
* r | ELSE * r | ELSE
* s | char 1 or 3 bytes * s | char 1 or 3 bytes
@ -127,17 +127,22 @@ public final class FormatSpec {
* e | frequency 1 byte * e | frequency 1 byte
* q | * q |
* *
* c | IF 00 = FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = addressType * c | IF SUPPORTS_DYNAMIC_UPDATE
* h | // nothing * h | children address, 3 bytes
* i | ELSIF 01 = FLAG_GROUP_ADDRESS_TYPE_ONEBYTE == addressType * i | 1 byte = bbbbbbbb match
* l | children address, 1 byte * l | case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte)
* d | ELSIF 10 = FLAG_GROUP_ADDRESS_TYPE_TWOBYTES == addressType * d | otherwise => (bbbbbbbb<<16) + (next byte << 8) + next byte
* r | children address, 2 bytes * r | ELSIF 00 = FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS == addressType
* e | ELSE // 11 = FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = addressType * e | // nothing
* n | children address, 3 bytes * n | ELSIF 01 = FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE == addressType
* A | END * A | children address, 1 byte
* d * d | ELSIF 10 = FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES == addressType
* dress * d | children address, 2 bytes
* r | ELSE // 11 = FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES = addressType
* e | children address, 3 bytes
* s | END
* s
* ress
* *
* | IF FLAG_IS_TERMINAL && FLAG_HAS_SHORTCUT_TARGETS * | IF FLAG_IS_TERMINAL && FLAG_HAS_SHORTCUT_TARGETS
* | shortcut string list * | shortcut string list
@ -156,33 +161,33 @@ public final class FormatSpec {
* characters which should never happen anyway (and still work, but take 3 bytes). * characters which should never happen anyway (and still work, but take 3 bytes).
* *
* bigram address list is: * bigram address list is:
* <flags> = | hasNext = 1 bit, 1 = yes, 0 = no : FLAG_ATTRIBUTE_HAS_NEXT * <flags> = | hasNext = 1 bit, 1 = yes, 0 = no : FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT
* | addressSign = 1 bit, : FLAG_ATTRIBUTE_OFFSET_NEGATIVE * | addressSign = 1 bit, : FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE
* | 1 = must take -address, 0 = must take +address * | 1 = must take -address, 0 = must take +address
* | xx : mask with MASK_ATTRIBUTE_ADDRESS_TYPE * | xx : mask with MASK_BIGRAM_ATTR_ADDRESS_TYPE
* | addressFormat = 2 bits, 00 = unused : FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE * | addressFormat = 2 bits, 00 = unused : FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE
* | 01 = 1 byte : FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE * | 01 = 1 byte : FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE
* | 10 = 2 bytes : FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES * | 10 = 2 bytes : FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES
* | 11 = 3 bytes : FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES * | 11 = 3 bytes : FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES
* | 4 bits : frequency : mask with FLAG_ATTRIBUTE_FREQUENCY * | 4 bits : frequency : mask with FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY
* <address> | IF (01 == FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE == addressFormat) * <address> | IF (01 == FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE == addressFormat)
* | read 1 byte, add top 4 bits * | read 1 byte, add top 4 bits
* | ELSIF (10 == FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES == addressFormat) * | ELSIF (10 == FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES == addressFormat)
* | read 2 bytes, add top 4 bits * | read 2 bytes, add top 4 bits
* | ELSE // 11 == FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES == addressFormat * | ELSE // 11 == FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES == addressFormat
* | read 3 bytes, add top 4 bits * | read 3 bytes, add top 4 bits
* | END * | END
* | if (FLAG_ATTRIBUTE_OFFSET_NEGATIVE) then address = -address * | if (FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE) then address = -address
* if (FLAG_ATTRIBUTE_HAS_NEXT) goto bigram_and_shortcut_address_list_is * if (FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT) goto bigram_and_shortcut_address_list_is
* *
* shortcut string list is: * shortcut string list is:
* <byte size> = GROUP_SHORTCUT_LIST_SIZE_SIZE bytes, big-endian: size of the list, in bytes. * <byte size> = PTNODE_SHORTCUT_LIST_SIZE_SIZE bytes, big-endian: size of the list, in bytes.
* <flags> = | hasNext = 1 bit, 1 = yes, 0 = no : FLAG_ATTRIBUTE_HAS_NEXT * <flags> = | hasNext = 1 bit, 1 = yes, 0 = no : FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT
* | reserved = 3 bits, must be 0 * | reserved = 3 bits, must be 0
* | 4 bits : frequency : mask with FLAG_ATTRIBUTE_FREQUENCY * | 4 bits : frequency : mask with FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY
* <shortcut> = | string of characters at the char format described above, with the terminator * <shortcut> = | string of characters at the char format described above, with the terminator
* | used to signal the end of the string. * | used to signal the end of the string.
* if (FLAG_ATTRIBUTE_HAS_NEXT goto flags * if (FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT goto flags
*/ */
public static final int MAGIC_NUMBER = 0x9BC13AFE; public static final int MAGIC_NUMBER = 0x9BC13AFE;
@ -206,11 +211,11 @@ public final class FormatSpec {
static final int FORWARD_LINK_ADDRESS_SIZE = 3; static final int FORWARD_LINK_ADDRESS_SIZE = 3;
// These flags are used only in the static dictionary. // These flags are used only in the static dictionary.
static final int MASK_GROUP_ADDRESS_TYPE = 0xC0; static final int MASK_CHILDREN_ADDRESS_TYPE = 0xC0;
static final int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00; static final int FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS = 0x00;
static final int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40; static final int FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE = 0x40;
static final int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80; static final int FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES = 0x80;
static final int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0; static final int FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES = 0xC0;
static final int FLAG_HAS_MULTIPLE_CHARS = 0x20; static final int FLAG_HAS_MULTIPLE_CHARS = 0x20;
@ -227,32 +232,32 @@ public final class FormatSpec {
static final int FLAG_IS_NOT_MOVED = 0x80 | FIXED_BIT_OF_DYNAMIC_UPDATE_MOVE; static final int FLAG_IS_NOT_MOVED = 0x80 | FIXED_BIT_OF_DYNAMIC_UPDATE_MOVE;
static final int FLAG_IS_DELETED = 0x80; static final int FLAG_IS_DELETED = 0x80;
static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80; static final int FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT = 0x80;
static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40; static final int FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE = 0x40;
static final int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; static final int MASK_BIGRAM_ATTR_ADDRESS_TYPE = 0x30;
static final int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10; static final int FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE = 0x10;
static final int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; static final int FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES = 0x20;
static final int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; static final int FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES = 0x30;
static final int FLAG_ATTRIBUTE_FREQUENCY = 0x0F; static final int FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY = 0x0F;
static final int GROUP_CHARACTERS_TERMINATOR = 0x1F; static final int PTNODE_CHARACTERS_TERMINATOR = 0x1F;
static final int GROUP_TERMINATOR_SIZE = 1; static final int PTNODE_TERMINATOR_SIZE = 1;
static final int GROUP_FLAGS_SIZE = 1; static final int PTNODE_FLAGS_SIZE = 1;
static final int GROUP_FREQUENCY_SIZE = 1; static final int PTNODE_FREQUENCY_SIZE = 1;
static final int GROUP_MAX_ADDRESS_SIZE = 3; static final int PTNODE_MAX_ADDRESS_SIZE = 3;
static final int GROUP_ATTRIBUTE_FLAGS_SIZE = 1; static final int PTNODE_ATTRIBUTE_FLAGS_SIZE = 1;
static final int GROUP_ATTRIBUTE_MAX_ADDRESS_SIZE = 3; static final int PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE = 3;
static final int GROUP_SHORTCUT_LIST_SIZE_SIZE = 2; static final int PTNODE_SHORTCUT_LIST_SIZE_SIZE = 2;
static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE; static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE;
static final int NO_PARENT_ADDRESS = 0; static final int NO_PARENT_ADDRESS = 0;
static final int NO_FORWARD_LINK_ADDRESS = 0; static final int NO_FORWARD_LINK_ADDRESS = 0;
static final int INVALID_CHARACTER = -1; static final int INVALID_CHARACTER = -1;
static final int MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT = 0x7F; // 127 static final int MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT = 0x7F; // 127
static final int MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY = 0x7FFF; // 32767 static final int MAX_PTNODES_IN_A_PT_NODE_ARRAY = 0x7FFF; // 32767
static final int MAX_BIGRAMS_IN_A_GROUP = 10000; static final int MAX_BIGRAMS_IN_A_PTNODE = 10000;
static final int MAX_TERMINAL_FREQUENCY = 255; static final int MAX_TERMINAL_FREQUENCY = 255;
static final int MAX_BIGRAM_FREQUENCY = 15; static final int MAX_BIGRAM_FREQUENCY = 15;

View File

@ -37,15 +37,15 @@ public final class FusionDictionary implements Iterable<Word> {
private static int CHARACTER_NOT_FOUND_INDEX = -1; private static int CHARACTER_NOT_FOUND_INDEX = -1;
/** /**
* A node array of the dictionary, containing several CharGroups. * A node array of the dictionary, containing several PtNodes.
* *
* A PtNodeArray is but an ordered array of CharGroups, which essentially contain all the * A PtNodeArray is but an ordered array of PtNodes, which essentially contain all the
* real information. * real information.
* This class also contains fields to cache size and address, to help with binary * This class also contains fields to cache size and address, to help with binary
* generation. * generation.
*/ */
public static final class PtNodeArray { public static final class PtNodeArray {
ArrayList<CharGroup> mData; ArrayList<PtNode> mData;
// To help with binary generation // To help with binary generation
int mCachedSize = Integer.MIN_VALUE; int mCachedSize = Integer.MIN_VALUE;
// mCachedAddressBefore/AfterUpdate are helpers for binary dictionary generation. They // mCachedAddressBefore/AfterUpdate are helpers for binary dictionary generation. They
@ -58,9 +58,9 @@ public final class FusionDictionary implements Iterable<Word> {
int mCachedParentAddress = 0; int mCachedParentAddress = 0;
public PtNodeArray() { public PtNodeArray() {
mData = new ArrayList<CharGroup>(); mData = new ArrayList<PtNode>();
} }
public PtNodeArray(ArrayList<CharGroup> data) { public PtNodeArray(ArrayList<PtNode> data) {
mData = data; mData = data;
} }
} }
@ -93,18 +93,19 @@ public final class FusionDictionary implements Iterable<Word> {
} }
/** /**
* A group of characters, with a frequency, shortcut targets, bigrams, and children. * PtNode is a group of characters, with a frequency, shortcut targets, bigrams, and children
* (Pt means Patricia Trie).
* *
* This is the central class of the in-memory representation. A CharGroup is what can * This is the central class of the in-memory representation. A PtNode is what can
* be seen as a traditional "trie node", except it can hold several characters at the * be seen as a traditional "trie node", except it can hold several characters at the
* same time. A CharGroup essentially represents one or several characters in the middle * same time. A PtNode essentially represents one or several characters in the middle
* of the trie tree; as such, it can be a terminal, and it can have children. * of the trie tree; as such, it can be a terminal, and it can have children.
* In this in-memory representation, whether the CharGroup is a terminal or not is represented * In this in-memory representation, whether the PtNode is a terminal or not is represented
* in the frequency, where NOT_A_TERMINAL (= -1) means this is not a terminal and any other * in the frequency, where NOT_A_TERMINAL (= -1) means this is not a terminal and any other
* value is the frequency of this terminal. A terminal may have non-null shortcuts and/or * value is the frequency of this terminal. A terminal may have non-null shortcuts and/or
* bigrams, but a non-terminal may not. Moreover, children, if present, are null. * bigrams, but a non-terminal may not. Moreover, children, if present, are null.
*/ */
public static final class CharGroup { public static final class PtNode {
public static final int NOT_A_TERMINAL = -1; public static final int NOT_A_TERMINAL = -1;
final int mChars[]; final int mChars[];
ArrayList<WeightedString> mShortcutTargets; ArrayList<WeightedString> mShortcutTargets;
@ -119,11 +120,11 @@ public final class FusionDictionary implements Iterable<Word> {
// same time. Updating will update the AfterUpdate value, and the code will move them // same time. Updating will update the AfterUpdate value, and the code will move them
// to BeforeUpdate before the next update pass. // to BeforeUpdate before the next update pass.
// The update process does not need two versions of mCachedSize. // The update process does not need two versions of mCachedSize.
int mCachedSize; // The size, in bytes, of this char group. int mCachedSize; // The size, in bytes, of this PtNode.
int mCachedAddressBeforeUpdate; // The address of this char group (before update) int mCachedAddressBeforeUpdate; // The address of this PtNode (before update)
int mCachedAddressAfterUpdate; // The address of this char group (after update) int mCachedAddressAfterUpdate; // The address of this PtNode (after update)
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets, public PtNode(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final int frequency, final ArrayList<WeightedString> bigrams, final int frequency,
final boolean isNotAWord, final boolean isBlacklistEntry) { final boolean isNotAWord, final boolean isBlacklistEntry) {
mChars = chars; mChars = chars;
@ -135,7 +136,7 @@ public final class FusionDictionary implements Iterable<Word> {
mIsBlacklistEntry = isBlacklistEntry; mIsBlacklistEntry = isBlacklistEntry;
} }
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets, public PtNode(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final int frequency, final ArrayList<WeightedString> bigrams, final int frequency,
final boolean isNotAWord, final boolean isBlacklistEntry, final boolean isNotAWord, final boolean isBlacklistEntry,
final PtNodeArray children) { final PtNodeArray children) {
@ -148,7 +149,7 @@ public final class FusionDictionary implements Iterable<Word> {
mIsBlacklistEntry = isBlacklistEntry; mIsBlacklistEntry = isBlacklistEntry;
} }
public void addChild(CharGroup n) { public void addChild(PtNode n) {
if (null == mChildren) { if (null == mChildren) {
mChildren = new PtNodeArray(); mChildren = new PtNodeArray();
} }
@ -245,7 +246,7 @@ public final class FusionDictionary implements Iterable<Word> {
} }
/** /**
* Updates the CharGroup with the given properties. Adds the shortcut and bigram lists to * Updates the PtNode with the given properties. Adds the shortcut and bigram lists to
* the existing ones if any. Note: unigram, bigram, and shortcut frequencies are only * the existing ones if any. Note: unigram, bigram, and shortcut frequencies are only
* updated if they are higher than the existing ones. * updated if they are higher than the existing ones.
*/ */
@ -407,13 +408,13 @@ public final class FusionDictionary implements Iterable<Word> {
} }
/** /**
* Sanity check for a node array. * Sanity check for a PtNode array.
* *
* This method checks that all CharGroups in a node array are ordered as expected. * This method checks that all PtNodes in a node array are ordered as expected.
* If they are, nothing happens. If they aren't, an exception is thrown. * If they are, nothing happens. If they aren't, an exception is thrown.
*/ */
private void checkStack(PtNodeArray nodeArray) { private void checkStack(PtNodeArray ptNodeArray) {
ArrayList<CharGroup> stack = nodeArray.mData; ArrayList<PtNode> stack = ptNodeArray.mData;
int lastValue = -1; int lastValue = -1;
for (int i = 0; i < stack.size(); ++i) { for (int i = 0; i < stack.size(); ++i) {
int currentValue = stack.get(i).mChars[0]; int currentValue = stack.get(i).mChars[0];
@ -432,18 +433,18 @@ public final class FusionDictionary implements Iterable<Word> {
* @param frequency the bigram frequency * @param frequency the bigram frequency
*/ */
public void setBigram(final String word1, final String word2, final int frequency) { public void setBigram(final String word1, final String word2, final int frequency) {
CharGroup charGroup = findWordInTree(mRootNodeArray, word1); PtNode ptNode = findWordInTree(mRootNodeArray, word1);
if (charGroup != null) { if (ptNode != null) {
final CharGroup charGroup2 = findWordInTree(mRootNodeArray, word2); final PtNode ptNode2 = findWordInTree(mRootNodeArray, word2);
if (charGroup2 == null) { if (ptNode2 == null) {
add(getCodePoints(word2), 0, null, false /* isNotAWord */, add(getCodePoints(word2), 0, null, false /* isNotAWord */,
false /* isBlacklistEntry */); false /* isBlacklistEntry */);
// The chargroup for the first word may have moved by the above insertion, // The PtNode for the first word may have moved by the above insertion,
// if word1 and word2 share a common stem that happens not to have been // if word1 and word2 share a common stem that happens not to have been
// a cutting point until now. In this case, we need to refresh charGroup. // a cutting point until now. In this case, we need to refresh ptNode.
charGroup = findWordInTree(mRootNodeArray, word1); ptNode = findWordInTree(mRootNodeArray, word1);
} }
charGroup.addBigram(word2, frequency); ptNode.addBigram(word2, frequency);
} else { } else {
throw new RuntimeException("First word of bigram not found"); throw new RuntimeException("First word of bigram not found");
} }
@ -473,84 +474,83 @@ public final class FusionDictionary implements Iterable<Word> {
PtNodeArray currentNodeArray = mRootNodeArray; PtNodeArray currentNodeArray = mRootNodeArray;
int charIndex = 0; int charIndex = 0;
CharGroup currentGroup = null; PtNode currentPtNode = null;
int differentCharIndex = 0; // Set by the loop to the index of the char that differs int differentCharIndex = 0; // Set by the loop to the index of the char that differs
int nodeIndex = findIndexOfChar(mRootNodeArray, word[charIndex]); int nodeIndex = findIndexOfChar(mRootNodeArray, word[charIndex]);
while (CHARACTER_NOT_FOUND_INDEX != nodeIndex) { while (CHARACTER_NOT_FOUND_INDEX != nodeIndex) {
currentGroup = currentNodeArray.mData.get(nodeIndex); currentPtNode = currentNodeArray.mData.get(nodeIndex);
differentCharIndex = compareCharArrays(currentGroup.mChars, word, charIndex); differentCharIndex = compareCharArrays(currentPtNode.mChars, word, charIndex);
if (ARRAYS_ARE_EQUAL != differentCharIndex if (ARRAYS_ARE_EQUAL != differentCharIndex
&& differentCharIndex < currentGroup.mChars.length) break; && differentCharIndex < currentPtNode.mChars.length) break;
if (null == currentGroup.mChildren) break; if (null == currentPtNode.mChildren) break;
charIndex += currentGroup.mChars.length; charIndex += currentPtNode.mChars.length;
if (charIndex >= word.length) break; if (charIndex >= word.length) break;
currentNodeArray = currentGroup.mChildren; currentNodeArray = currentPtNode.mChildren;
nodeIndex = findIndexOfChar(currentNodeArray, word[charIndex]); nodeIndex = findIndexOfChar(currentNodeArray, word[charIndex]);
} }
if (CHARACTER_NOT_FOUND_INDEX == nodeIndex) { if (CHARACTER_NOT_FOUND_INDEX == nodeIndex) {
// No node at this point to accept the word. Create one. // No node at this point to accept the word. Create one.
final int insertionIndex = findInsertionIndex(currentNodeArray, word[charIndex]); final int insertionIndex = findInsertionIndex(currentNodeArray, word[charIndex]);
final CharGroup newGroup = new CharGroup( final PtNode newPtNode = new PtNode(Arrays.copyOfRange(word, charIndex, word.length),
Arrays.copyOfRange(word, charIndex, word.length),
shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry); shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry);
currentNodeArray.mData.add(insertionIndex, newGroup); currentNodeArray.mData.add(insertionIndex, newPtNode);
if (DBG) checkStack(currentNodeArray); if (DBG) checkStack(currentNodeArray);
} else { } else {
// There is a word with a common prefix. // There is a word with a common prefix.
if (differentCharIndex == currentGroup.mChars.length) { if (differentCharIndex == currentPtNode.mChars.length) {
if (charIndex + differentCharIndex >= word.length) { if (charIndex + differentCharIndex >= word.length) {
// The new word is a prefix of an existing word, but the node on which it // The new word is a prefix of an existing word, but the node on which it
// should end already exists as is. Since the old CharGroup was not a terminal, // should end already exists as is. Since the old PtNode was not a terminal,
// make it one by filling in its frequency and other attributes // make it one by filling in its frequency and other attributes
currentGroup.update(frequency, shortcutTargets, null, isNotAWord, currentPtNode.update(frequency, shortcutTargets, null, isNotAWord,
isBlacklistEntry); isBlacklistEntry);
} else { } else {
// The new word matches the full old word and extends past it. // The new word matches the full old word and extends past it.
// We only have to create a new node and add it to the end of this. // We only have to create a new node and add it to the end of this.
final CharGroup newNode = new CharGroup( final PtNode newNode = new PtNode(
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
shortcutTargets, null /* bigrams */, frequency, isNotAWord, shortcutTargets, null /* bigrams */, frequency, isNotAWord,
isBlacklistEntry); isBlacklistEntry);
currentGroup.mChildren = new PtNodeArray(); currentPtNode.mChildren = new PtNodeArray();
currentGroup.mChildren.mData.add(newNode); currentPtNode.mChildren.mData.add(newNode);
} }
} else { } else {
if (0 == differentCharIndex) { if (0 == differentCharIndex) {
// Exact same word. Update the frequency if higher. This will also add the // Exact same word. Update the frequency if higher. This will also add the
// new shortcuts to the existing shortcut list if it already exists. // new shortcuts to the existing shortcut list if it already exists.
currentGroup.update(frequency, shortcutTargets, null, currentPtNode.update(frequency, shortcutTargets, null,
currentGroup.mIsNotAWord && isNotAWord, currentPtNode.mIsNotAWord && isNotAWord,
currentGroup.mIsBlacklistEntry || isBlacklistEntry); currentPtNode.mIsBlacklistEntry || isBlacklistEntry);
} else { } else {
// Partial prefix match only. We have to replace the current node with a node // Partial prefix match only. We have to replace the current node with a node
// containing the current prefix and create two new ones for the tails. // containing the current prefix and create two new ones for the tails.
PtNodeArray newChildren = new PtNodeArray(); PtNodeArray newChildren = new PtNodeArray();
final CharGroup newOldWord = new CharGroup( final PtNode newOldWord = new PtNode(
Arrays.copyOfRange(currentGroup.mChars, differentCharIndex, Arrays.copyOfRange(currentPtNode.mChars, differentCharIndex,
currentGroup.mChars.length), currentGroup.mShortcutTargets, currentPtNode.mChars.length), currentPtNode.mShortcutTargets,
currentGroup.mBigrams, currentGroup.mFrequency, currentPtNode.mBigrams, currentPtNode.mFrequency,
currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry, currentPtNode.mIsNotAWord, currentPtNode.mIsBlacklistEntry,
currentGroup.mChildren); currentPtNode.mChildren);
newChildren.mData.add(newOldWord); newChildren.mData.add(newOldWord);
final CharGroup newParent; final PtNode newParent;
if (charIndex + differentCharIndex >= word.length) { if (charIndex + differentCharIndex >= word.length) {
newParent = new CharGroup( newParent = new PtNode(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), Arrays.copyOfRange(currentPtNode.mChars, 0, differentCharIndex),
shortcutTargets, null /* bigrams */, frequency, shortcutTargets, null /* bigrams */, frequency,
isNotAWord, isBlacklistEntry, newChildren); isNotAWord, isBlacklistEntry, newChildren);
} else { } else {
newParent = new CharGroup( newParent = new PtNode(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), Arrays.copyOfRange(currentPtNode.mChars, 0, differentCharIndex),
null /* shortcutTargets */, null /* bigrams */, -1, null /* shortcutTargets */, null /* bigrams */, -1,
false /* isNotAWord */, false /* isBlacklistEntry */, newChildren); false /* isNotAWord */, false /* isBlacklistEntry */, newChildren);
final CharGroup newWord = new CharGroup(Arrays.copyOfRange(word, final PtNode newWord = new PtNode(Arrays.copyOfRange(word,
charIndex + differentCharIndex, word.length), charIndex + differentCharIndex, word.length),
shortcutTargets, null /* bigrams */, frequency, shortcutTargets, null /* bigrams */, frequency,
isNotAWord, isBlacklistEntry); isNotAWord, isBlacklistEntry);
final int addIndex = word[charIndex + differentCharIndex] final int addIndex = word[charIndex + differentCharIndex]
> currentGroup.mChars[differentCharIndex] ? 1 : 0; > currentPtNode.mChars[differentCharIndex] ? 1 : 0;
newChildren.mData.add(addIndex, newWord); newChildren.mData.add(addIndex, newWord);
} }
currentNodeArray.mData.set(nodeIndex, newParent); currentNodeArray.mData.set(nodeIndex, newParent);
@ -589,29 +589,29 @@ public final class FusionDictionary implements Iterable<Word> {
} }
/** /**
* Helper class that compares and sorts two chargroups according to their * Helper class that compares and sorts two PtNodes according to their
* first element only. I repeat: ONLY the first element is considered, the rest * first element only. I repeat: ONLY the first element is considered, the rest
* is ignored. * is ignored.
* This comparator imposes orderings that are inconsistent with equals. * This comparator imposes orderings that are inconsistent with equals.
*/ */
static private final class CharGroupComparator implements java.util.Comparator<CharGroup> { static private final class PtNodeComparator implements java.util.Comparator<PtNode> {
@Override @Override
public int compare(CharGroup c1, CharGroup c2) { public int compare(PtNode p1, PtNode p2) {
if (c1.mChars[0] == c2.mChars[0]) return 0; if (p1.mChars[0] == p2.mChars[0]) return 0;
return c1.mChars[0] < c2.mChars[0] ? -1 : 1; return p1.mChars[0] < p2.mChars[0] ? -1 : 1;
} }
} }
final static private CharGroupComparator CHARGROUP_COMPARATOR = new CharGroupComparator(); final static private PtNodeComparator PTNODE_COMPARATOR = new PtNodeComparator();
/** /**
* Finds the insertion index of a character within a node array. * Finds the insertion index of a character within a node array.
*/ */
private static int findInsertionIndex(final PtNodeArray nodeArray, int character) { private static int findInsertionIndex(final PtNodeArray nodeArray, int character) {
final ArrayList<CharGroup> data = nodeArray.mData; final ArrayList<PtNode> data = nodeArray.mData;
final CharGroup reference = new CharGroup(new int[] { character }, final PtNode reference = new PtNode(new int[] { character },
null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */, null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */,
false /* isBlacklistEntry */); false /* isBlacklistEntry */);
int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR); int result = Collections.binarySearch(data, reference, PTNODE_COMPARATOR);
return result >= 0 ? result : -result - 1; return result >= 0 ? result : -result - 1;
} }
@ -633,35 +633,37 @@ public final class FusionDictionary implements Iterable<Word> {
* Helper method to find a word in a given branch. * Helper method to find a word in a given branch.
*/ */
@SuppressWarnings("unused") @SuppressWarnings("unused")
public static CharGroup findWordInTree(PtNodeArray nodeArray, final String string) { public static PtNode findWordInTree(PtNodeArray nodeArray, final String string) {
int index = 0; int index = 0;
final StringBuilder checker = DBG ? new StringBuilder() : null; final StringBuilder checker = DBG ? new StringBuilder() : null;
final int[] codePoints = getCodePoints(string); final int[] codePoints = getCodePoints(string);
CharGroup currentGroup; PtNode currentPtNode;
do { do {
int indexOfGroup = findIndexOfChar(nodeArray, codePoints[index]); int indexOfGroup = findIndexOfChar(nodeArray, codePoints[index]);
if (CHARACTER_NOT_FOUND_INDEX == indexOfGroup) return null; if (CHARACTER_NOT_FOUND_INDEX == indexOfGroup) return null;
currentGroup = nodeArray.mData.get(indexOfGroup); currentPtNode = nodeArray.mData.get(indexOfGroup);
if (codePoints.length - index < currentGroup.mChars.length) return null; if (codePoints.length - index < currentPtNode.mChars.length) return null;
int newIndex = index; int newIndex = index;
while (newIndex < codePoints.length && newIndex - index < currentGroup.mChars.length) { while (newIndex < codePoints.length && newIndex - index < currentPtNode.mChars.length) {
if (currentGroup.mChars[newIndex - index] != codePoints[newIndex]) return null; if (currentPtNode.mChars[newIndex - index] != codePoints[newIndex]) return null;
newIndex++; newIndex++;
} }
index = newIndex; index = newIndex;
if (DBG) checker.append(new String(currentGroup.mChars, 0, currentGroup.mChars.length)); if (DBG) {
checker.append(new String(currentPtNode.mChars, 0, currentPtNode.mChars.length));
}
if (index < codePoints.length) { if (index < codePoints.length) {
nodeArray = currentGroup.mChildren; nodeArray = currentPtNode.mChildren;
} }
} while (null != nodeArray && index < codePoints.length); } while (null != nodeArray && index < codePoints.length);
if (index < codePoints.length) return null; if (index < codePoints.length) return null;
if (!currentGroup.isTerminal()) return null; if (!currentPtNode.isTerminal()) return null;
if (DBG && !string.equals(checker.toString())) return null; if (DBG && !string.equals(checker.toString())) return null;
return currentGroup; return currentPtNode;
} }
/** /**
@ -675,18 +677,18 @@ public final class FusionDictionary implements Iterable<Word> {
} }
/** /**
* Recursively count the number of character groups in a given branch of the trie. * Recursively count the number of PtNodes in a given branch of the trie.
* *
* @param nodeArray the parent node. * @param nodeArray the parent node.
* @return the number of char groups in all the branch under this node. * @return the number of PtNodes in all the branch under this node.
*/ */
public static int countCharGroups(final PtNodeArray nodeArray) { public static int countPtNodes(final PtNodeArray nodeArray) {
final int nodeSize = nodeArray.mData.size(); final int nodeSize = nodeArray.mData.size();
int size = nodeSize; int size = nodeSize;
for (int i = nodeSize - 1; i >= 0; --i) { for (int i = nodeSize - 1; i >= 0; --i) {
CharGroup group = nodeArray.mData.get(i); PtNode ptNode = nodeArray.mData.get(i);
if (null != group.mChildren) if (null != ptNode.mChildren)
size += countCharGroups(group.mChildren); size += countPtNodes(ptNode.mChildren);
} }
return size; return size;
} }
@ -700,9 +702,9 @@ public final class FusionDictionary implements Iterable<Word> {
public static int countNodeArrays(final PtNodeArray nodeArray) { public static int countNodeArrays(final PtNodeArray nodeArray) {
int size = 1; int size = 1;
for (int i = nodeArray.mData.size() - 1; i >= 0; --i) { for (int i = nodeArray.mData.size() - 1; i >= 0; --i) {
CharGroup group = nodeArray.mData.get(i); PtNode ptNode = nodeArray.mData.get(i);
if (null != group.mChildren) if (null != ptNode.mChildren)
size += countNodeArrays(group.mChildren); size += countNodeArrays(ptNode.mChildren);
} }
return size; return size;
} }
@ -713,9 +715,9 @@ public final class FusionDictionary implements Iterable<Word> {
private static boolean hasBigramsInternal(final PtNodeArray nodeArray) { private static boolean hasBigramsInternal(final PtNodeArray nodeArray) {
if (null == nodeArray) return false; if (null == nodeArray) return false;
for (int i = nodeArray.mData.size() - 1; i >= 0; --i) { for (int i = nodeArray.mData.size() - 1; i >= 0; --i) {
CharGroup group = nodeArray.mData.get(i); PtNode ptNode = nodeArray.mData.get(i);
if (null != group.mBigrams) return true; if (null != ptNode.mBigrams) return true;
if (hasBigramsInternal(group.mChildren)) return true; if (hasBigramsInternal(ptNode.mChildren)) return true;
} }
return false; return false;
} }
@ -748,8 +750,8 @@ public final class FusionDictionary implements Iterable<Word> {
MakedictLog.i("Do not merge tails"); MakedictLog.i("Do not merge tails");
return; return;
// MakedictLog.i("Merging nodes. Number of nodes : " + countNodes(root)); // MakedictLog.i("Merging PtNodes. Number of PtNodes : " + countPtNodes(root));
// MakedictLog.i("Number of groups : " + countCharGroups(root)); // MakedictLog.i("Number of PtNodes : " + countPtNodes(root));
// //
// final HashMap<String, ArrayList<PtNodeArray>> repository = // final HashMap<String, ArrayList<PtNodeArray>> repository =
// new HashMap<String, ArrayList<PtNodeArray>>(); // new HashMap<String, ArrayList<PtNodeArray>>();
@ -771,25 +773,25 @@ public final class FusionDictionary implements Iterable<Word> {
// if (a.data.size() != b.data.size()) return false; // if (a.data.size() != b.data.size()) return false;
// final int size = a.data.size(); // final int size = a.data.size();
// for (int i = size - 1; i >= 0; --i) { // for (int i = size - 1; i >= 0; --i) {
// CharGroup aGroup = a.data.get(i); // PtNode aPtNode = a.data.get(i);
// CharGroup bGroup = b.data.get(i); // PtNode bPtNode = b.data.get(i);
// if (aGroup.frequency != bGroup.frequency) return false; // if (aPtNode.frequency != bPtNode.frequency) return false;
// if (aGroup.alternates == null && bGroup.alternates != null) return false; // if (aPtNode.alternates == null && bPtNode.alternates != null) return false;
// if (aGroup.alternates != null && !aGroup.equals(bGroup.alternates)) return false; // if (aPtNode.alternates != null && !aPtNode.equals(bPtNode.alternates)) return false;
// if (!Arrays.equals(aGroup.chars, bGroup.chars)) return false; // if (!Arrays.equals(aPtNode.chars, bPtNode.chars)) return false;
// if (!isEqual(aGroup.children, bGroup.children)) return false; // if (!isEqual(aPtNode.children, bPtNode.children)) return false;
// } // }
// return true; // return true;
// } // }
// static private HashMap<String, ArrayList<PtNodeArray>> mergeTailsInner( // static private HashMap<String, ArrayList<PtNodeArray>> mergeTailsInner(
// final HashMap<String, ArrayList<PtNodeArray>> map, final PtNodeArray nodeArray) { // final HashMap<String, ArrayList<PtNodeArray>> map, final PtNodeArray nodeArray) {
// final ArrayList<CharGroup> branches = nodeArray.data; // final ArrayList<PtNode> branches = nodeArray.data;
// final int nodeSize = branches.size(); // final int nodeSize = branches.size();
// for (int i = 0; i < nodeSize; ++i) { // for (int i = 0; i < nodeSize; ++i) {
// CharGroup group = branches.get(i); // PtNode ptNode = branches.get(i);
// if (null != group.children) { // if (null != ptNode.children) {
// String pseudoHash = getPseudoHash(group.children); // String pseudoHash = getPseudoHash(ptNode.children);
// ArrayList<PtNodeArray> similarList = map.get(pseudoHash); // ArrayList<PtNodeArray> similarList = map.get(pseudoHash);
// if (null == similarList) { // if (null == similarList) {
// similarList = new ArrayList<PtNodeArray>(); // similarList = new ArrayList<PtNodeArray>();
@ -797,16 +799,16 @@ public final class FusionDictionary implements Iterable<Word> {
// } // }
// boolean merged = false; // boolean merged = false;
// for (PtNodeArray similar : similarList) { // for (PtNodeArray similar : similarList) {
// if (isEqual(group.children, similar)) { // if (isEqual(ptNode.children, similar)) {
// group.children = similar; // ptNode.children = similar;
// merged = true; // merged = true;
// break; // break;
// } // }
// } // }
// if (!merged) { // if (!merged) {
// similarList.add(group.children); // similarList.add(ptNode.children);
// } // }
// mergeTailsInner(map, group.children); // mergeTailsInner(map, ptNode.children);
// } // }
// } // }
// return map; // return map;
@ -814,9 +816,9 @@ public final class FusionDictionary implements Iterable<Word> {
// private static String getPseudoHash(final PtNodeArray nodeArray) { // private static String getPseudoHash(final PtNodeArray nodeArray) {
// StringBuilder s = new StringBuilder(); // StringBuilder s = new StringBuilder();
// for (CharGroup g : nodeArray.data) { // for (PtNode ptNode : nodeArray.data) {
// s.append(g.frequency); // s.append(ptNode.frequency);
// for (int ch : g.chars) { // for (int ch : ptNode.chars) {
// s.append(Character.toChars(ch)); // s.append(Character.toChars(ch));
// } // }
// } // }
@ -830,20 +832,20 @@ public final class FusionDictionary implements Iterable<Word> {
*/ */
public static final class DictionaryIterator implements Iterator<Word> { public static final class DictionaryIterator implements Iterator<Word> {
private static final class Position { private static final class Position {
public Iterator<CharGroup> pos; public Iterator<PtNode> pos;
public int length; public int length;
public Position(ArrayList<CharGroup> groups) { public Position(ArrayList<PtNode> ptNodes) {
pos = groups.iterator(); pos = ptNodes.iterator();
length = 0; length = 0;
} }
} }
final StringBuilder mCurrentString; final StringBuilder mCurrentString;
final LinkedList<Position> mPositions; final LinkedList<Position> mPositions;
public DictionaryIterator(ArrayList<CharGroup> root) { public DictionaryIterator(ArrayList<PtNode> ptRoot) {
mCurrentString = new StringBuilder(); mCurrentString = new StringBuilder();
mPositions = new LinkedList<Position>(); mPositions = new LinkedList<Position>();
final Position rootPos = new Position(root); final Position rootPos = new Position(ptRoot);
mPositions.add(rootPos); mPositions.add(rootPos);
} }
@ -864,20 +866,20 @@ public final class FusionDictionary implements Iterable<Word> {
do { do {
if (currentPos.pos.hasNext()) { if (currentPos.pos.hasNext()) {
final CharGroup currentGroup = currentPos.pos.next(); final PtNode currentPtNode = currentPos.pos.next();
currentPos.length = mCurrentString.length(); currentPos.length = mCurrentString.length();
for (int i : currentGroup.mChars) { for (int i : currentPtNode.mChars) {
mCurrentString.append(Character.toChars(i)); mCurrentString.append(Character.toChars(i));
} }
if (null != currentGroup.mChildren) { if (null != currentPtNode.mChildren) {
currentPos = new Position(currentGroup.mChildren.mData); currentPos = new Position(currentPtNode.mChildren.mData);
currentPos.length = mCurrentString.length(); currentPos.length = mCurrentString.length();
mPositions.addLast(currentPos); mPositions.addLast(currentPos);
} }
if (currentGroup.mFrequency >= 0) { if (currentPtNode.mFrequency >= 0) {
return new Word(mCurrentString.toString(), currentGroup.mFrequency, return new Word(mCurrentString.toString(), currentPtNode.mFrequency,
currentGroup.mShortcutTargets, currentGroup.mBigrams, currentPtNode.mShortcutTargets, currentPtNode.mBigrams,
currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry); currentPtNode.mIsNotAWord, currentPtNode.mIsBlacklistEntry);
} }
} else { } else {
mPositions.removeLast(); mPositions.removeLast();

View File

@ -21,9 +21,9 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import java.util.ArrayList; import java.util.ArrayList;
/** /**
* Raw char group info straight out of a file. This will contain numbers for addresses. * Raw PtNode info straight out of a file. This will contain numbers for addresses.
*/ */
public final class CharGroupInfo { public final class PtNodeInfo {
public final int mOriginalAddress; public final int mOriginalAddress;
public final int mEndAddress; public final int mEndAddress;
@ -35,7 +35,7 @@ public final class CharGroupInfo {
public final ArrayList<WeightedString> mShortcutTargets; public final ArrayList<WeightedString> mShortcutTargets;
public final ArrayList<PendingAttribute> mBigrams; public final ArrayList<PendingAttribute> mBigrams;
public CharGroupInfo(final int originalAddress, final int endAddress, final int flags, public PtNodeInfo(final int originalAddress, final int endAddress, final int flags,
final int[] characters, final int frequency, final int parentAddress, final int[] characters, final int frequency, final int parentAddress,
final int childrenAddress, final ArrayList<WeightedString> shortcutTargets, final int childrenAddress, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<PendingAttribute> bigrams) { final ArrayList<PendingAttribute> bigrams) {

View File

@ -21,7 +21,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncodin
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.JniUtils; import com.android.inputmethod.latin.utils.JniUtils;
@ -99,14 +99,14 @@ public class Ver3DictDecoder implements DictDecoder {
if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS; if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
return address; return address;
} else { } else {
switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) { switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE: case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
return dictBuffer.readUnsignedByte(); return dictBuffer.readUnsignedByte();
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES: case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
return dictBuffer.readUnsignedShort(); return dictBuffer.readUnsignedShort();
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES: case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
return dictBuffer.readUnsignedInt24(); return dictBuffer.readUnsignedInt24();
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS: case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
default: default:
return FormatSpec.NO_CHILDREN_ADDRESS; return FormatSpec.NO_CHILDREN_ADDRESS;
} }
@ -122,8 +122,8 @@ public class Ver3DictDecoder implements DictDecoder {
final int targetFlags = dictBuffer.readUnsignedByte(); final int targetFlags = dictBuffer.readUnsignedByte();
final String word = CharEncoding.readString(dictBuffer); final String word = CharEncoding.readString(dictBuffer);
shortcutTargets.add(new WeightedString(word, shortcutTargets.add(new WeightedString(word,
targetFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY)); targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
if (0 == (targetFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break; if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
} }
return dictBuffer.position() - pointerBefore; return dictBuffer.position() - pointerBefore;
} }
@ -132,22 +132,22 @@ public class Ver3DictDecoder implements DictDecoder {
final ArrayList<PendingAttribute> bigrams, final int baseAddress) { final ArrayList<PendingAttribute> bigrams, final int baseAddress) {
int readLength = 0; int readLength = 0;
int bigramCount = 0; int bigramCount = 0;
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
final int bigramFlags = dictBuffer.readUnsignedByte(); final int bigramFlags = dictBuffer.readUnsignedByte();
++readLength; ++readLength;
final int sign = 0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE) final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE)
? 1 : -1; ? 1 : -1;
int bigramAddress = baseAddress + readLength; int bigramAddress = baseAddress + readLength;
switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) { switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) {
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE:
bigramAddress += sign * dictBuffer.readUnsignedByte(); bigramAddress += sign * dictBuffer.readUnsignedByte();
readLength += 1; readLength += 1;
break; break;
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES:
bigramAddress += sign * dictBuffer.readUnsignedShort(); bigramAddress += sign * dictBuffer.readUnsignedShort();
readLength += 2; readLength += 2;
break; break;
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES:
final int offset = (dictBuffer.readUnsignedByte() << 16) final int offset = (dictBuffer.readUnsignedByte() << 16)
+ dictBuffer.readUnsignedShort(); + dictBuffer.readUnsignedShort();
bigramAddress += sign * offset; bigramAddress += sign * offset;
@ -156,9 +156,10 @@ public class Ver3DictDecoder implements DictDecoder {
default: default:
throw new RuntimeException("Has bigrams with no address"); throw new RuntimeException("Has bigrams with no address");
} }
bigrams.add(new PendingAttribute(bigramFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY, bigrams.add(new PendingAttribute(
bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
bigramAddress)); bigramAddress));
if (0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break; if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
} }
return readLength; return readLength;
} }
@ -236,7 +237,7 @@ public class Ver3DictDecoder implements DictDecoder {
// TODO: Make this buffer multi thread safe. // TODO: Make this buffer multi thread safe.
private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH]; private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH];
@Override @Override
public CharGroupInfo readPtNode(final int ptNodePos, final FormatOptions options) { public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions options) {
int addressPointer = ptNodePos; int addressPointer = ptNodePos;
final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
++addressPointer; ++addressPointer;
@ -270,7 +271,7 @@ public class Ver3DictDecoder implements DictDecoder {
++addressPointer; ++addressPointer;
frequency = PtNodeReader.readFrequency(mDictBuffer); frequency = PtNodeReader.readFrequency(mDictBuffer);
} else { } else {
frequency = CharGroup.NOT_A_TERMINAL; frequency = PtNode.NOT_A_TERMINAL;
} }
int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options); int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options);
if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
@ -290,14 +291,13 @@ public class Ver3DictDecoder implements DictDecoder {
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
bigrams = new ArrayList<PendingAttribute>(); bigrams = new ArrayList<PendingAttribute>();
addressPointer += PtNodeReader.readBigrams(mDictBuffer, bigrams, addressPointer); addressPointer += PtNodeReader.readBigrams(mDictBuffer, bigrams, addressPointer);
if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
MakedictLog.d("too many bigrams in a group."); MakedictLog.d("too many bigrams in a PtNode.");
} }
} else { } else {
bigrams = null; bigrams = null;
} }
return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, frequency,
return new CharGroupInfo(ptNodePos, addressPointer, flags, characters, frequency,
parentAddress, childrenAddress, shortcutTargets, bigrams); parentAddress, childrenAddress, shortcutTargets, bigrams);
} }

View File

@ -25,7 +25,7 @@ import android.util.SparseArray;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
@ -239,17 +239,17 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
// check unigram // check unigram
for (final String word : words) { for (final String word : words) {
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
assertNotNull(cg); assertNotNull(ptNode);
} }
// check bigram // check bigram
for (int i = 0; i < bigrams.size(); ++i) { for (int i = 0; i < bigrams.size(); ++i) {
final int w1 = bigrams.keyAt(i); final int w1 = bigrams.keyAt(i);
for (final int w2 : bigrams.valueAt(i)) { for (final int w2 : bigrams.valueAt(i)) {
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray, final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray,
words.get(w1)); words.get(w1));
assertNotNull(words.get(w1) + "," + words.get(w2), cg.getBigram(words.get(w2))); assertNotNull(words.get(w1) + "," + words.get(w2), ptNode.getBigram(words.get(w2)));
} }
} }
@ -257,11 +257,11 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
if (shortcutMap != null) { if (shortcutMap != null) {
for (final Map.Entry<String, List<String>> entry : shortcutMap.entrySet()) { for (final Map.Entry<String, List<String>> entry : shortcutMap.entrySet()) {
assertTrue(words.contains(entry.getKey())); assertTrue(words.contains(entry.getKey()));
final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray, final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray,
entry.getKey()); entry.getKey());
for (final String word : entry.getValue()) { for (final String word : entry.getValue()) {
assertNotNull("shortcut not found: " + entry.getKey() + ", " + word, assertNotNull("shortcut not found: " + entry.getKey() + ", " + word,
group.getShortcut(word)); ptNode.getShortcut(word));
} }
} }
} }

View File

@ -86,8 +86,8 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
return builder.toString(); return builder.toString();
} }
private static void printCharGroup(final CharGroupInfo info) { private static void printPtNode(final PtNodeInfo info) {
Log.d(TAG, " CharGroup at " + info.mOriginalAddress); Log.d(TAG, " PtNode at " + info.mOriginalAddress);
Log.d(TAG, " flags = " + info.mFlags); Log.d(TAG, " flags = " + info.mFlags);
Log.d(TAG, " parentAddress = " + info.mParentAddress); Log.d(TAG, " parentAddress = " + info.mParentAddress);
Log.d(TAG, " characters = " + new String(info.mCharacters, 0, Log.d(TAG, " characters = " + new String(info.mCharacters, 0,
@ -115,12 +115,12 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
final FormatSpec.FormatOptions formatOptions) { final FormatSpec.FormatOptions formatOptions) {
final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
Log.d(TAG, "Node at " + dictBuffer.position()); Log.d(TAG, "Node at " + dictBuffer.position());
final int count = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer); final int count = BinaryDictDecoderUtils.readPtNodeCount(dictBuffer);
Log.d(TAG, " charGroupCount = " + count); Log.d(TAG, " ptNodeCount = " + count);
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
final CharGroupInfo currentInfo = dictDecoder.readPtNode(dictBuffer.position(), final PtNodeInfo currentInfo = dictDecoder.readPtNode(dictBuffer.position(),
formatOptions); formatOptions);
printCharGroup(currentInfo); printPtNode(currentInfo);
} }
if (formatOptions.mSupportsDynamicUpdate) { if (formatOptions.mSupportsDynamicUpdate) {
final int forwardLinkAddress = dictBuffer.readUnsignedInt24(); final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
@ -155,11 +155,11 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
* *
* @param dictDecoder the dict decoder * @param dictDecoder the dict decoder
* @param word the word searched * @param word the word searched
* @return the found group * @return the found ptNodeInfo
* @throws IOException * @throws IOException
* @throws UnsupportedFormatException * @throws UnsupportedFormatException
*/ */
private static CharGroupInfo findWordByBinaryDictReader(final Ver3DictDecoder dictDecoder, private static PtNodeInfo findWordByBinaryDictReader(final Ver3DictDecoder dictDecoder,
final String word) throws IOException, UnsupportedFormatException { final String word) throws IOException, UnsupportedFormatException {
int position = BinaryDictIOUtils.getTerminalPosition(dictDecoder, word); int position = BinaryDictIOUtils.getTerminalPosition(dictDecoder, word);
final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
@ -172,10 +172,10 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
return null; return null;
} }
private CharGroupInfo findWordFromFile(final File file, final String word) { private PtNodeInfo findWordFromFile(final File file, final String word) {
CharGroupInfo info = null;
try {
final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file); final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file);
PtNodeInfo info = null;
try {
dictDecoder.openDictBuffer(); dictDecoder.openDictBuffer();
info = findWordByBinaryDictReader(dictDecoder, word); info = findWordByBinaryDictReader(dictDecoder, word);
} catch (IOException e) { } catch (IOException e) {
@ -328,7 +328,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
insertAndCheckWord(file, "banana", 0, false, null, null); insertAndCheckWord(file, "banana", 0, false, null, null);
insertAndCheckWord(file, "recursive", 60, true, banana, null); insertAndCheckWord(file, "recursive", 60, true, banana, null);
final CharGroupInfo info = findWordFromFile(file, "recursive"); final PtNodeInfo info = findWordFromFile(file, "recursive");
int bananaPos = getWordPosition(file, "banana"); int bananaPos = getWordPosition(file, "banana");
assertNotNull(info.mBigrams); assertNotNull(info.mBigrams);
assertEquals(info.mBigrams.size(), 1); assertEquals(info.mBigrams.size(), 1);

View File

@ -25,7 +25,7 @@ import com.android.inputmethod.latin.makedict.DictDecoder;
import com.android.inputmethod.latin.makedict.DictEncoder; import com.android.inputmethod.latin.makedict.DictEncoder;
import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.Ver3DictDecoder; import com.android.inputmethod.latin.makedict.Ver3DictDecoder;
import com.android.inputmethod.latin.makedict.Ver3DictEncoder; import com.android.inputmethod.latin.makedict.Ver3DictEncoder;
import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList; import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList;
@ -89,12 +89,12 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
private void checkWordInFusionDict(final FusionDictionary dict, final String word, private void checkWordInFusionDict(final FusionDictionary dict, final String word,
final ArrayList<String> expectedBigrams) { final ArrayList<String> expectedBigrams) {
final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
assertNotNull(group); assertNotNull(ptNode);
assertTrue(group.isTerminal()); assertTrue(ptNode.isTerminal());
for (final String bigram : expectedBigrams) { for (final String bigram : expectedBigrams) {
assertNotNull(group.getBigram(bigram)); assertNotNull(ptNode.getBigram(bigram));
} }
} }

View File

@ -17,7 +17,7 @@
package com.android.inputmethod.latin.dicttool; package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.Word; import com.android.inputmethod.latin.makedict.Word;
@ -121,7 +121,7 @@ public class Diff extends Dicttool.Command {
private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) { private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) {
boolean hasDifferences = false; boolean hasDifferences = false;
for (final Word word0 : dict0) { for (final Word word0 : dict0) {
final CharGroup word1 = FusionDictionary.findWordInTree(dict1.mRootNodeArray, final PtNode word1 = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
word0.mWord); word0.mWord);
if (null == word1) { if (null == word1) {
// This word is not in dict1 // This word is not in dict1
@ -151,7 +151,7 @@ public class Diff extends Dicttool.Command {
} }
} }
for (final Word word1 : dict1) { for (final Word word1 : dict1) {
final CharGroup word0 = FusionDictionary.findWordInTree(dict0.mRootNodeArray, final PtNode word0 = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
word1.mWord); word1.mWord);
if (null == word0) { if (null == word0) {
// This word is not in dict0 // This word is not in dict0

View File

@ -18,7 +18,7 @@ package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.Word; import com.android.inputmethod.latin.makedict.Word;
@ -65,20 +65,20 @@ public class Info extends Dicttool.Command {
private static void showWordInfo(final FusionDictionary dict, final String word, private static void showWordInfo(final FusionDictionary dict, final String word,
final boolean plumbing) { final boolean plumbing) {
final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
if (null == group) { if (null == ptNode) {
System.out.println(word + " is not in the dictionary"); System.out.println(word + " is not in the dictionary");
return; return;
} }
System.out.println("Word: " + word); System.out.println("Word: " + word);
System.out.println(" Freq: " + group.getFrequency()); System.out.println(" Freq: " + ptNode.getFrequency());
if (group.getIsNotAWord()) { if (ptNode.getIsNotAWord()) {
System.out.println(" Is not a word"); System.out.println(" Is not a word");
} }
if (group.getIsBlacklistEntry()) { if (ptNode.getIsBlacklistEntry()) {
System.out.println(" Is a blacklist entry"); System.out.println(" Is a blacklist entry");
} }
final ArrayList<WeightedString> shortcutTargets = group.getShortcutTargets(); final ArrayList<WeightedString> shortcutTargets = ptNode.getShortcutTargets();
if (null == shortcutTargets || shortcutTargets.isEmpty()) { if (null == shortcutTargets || shortcutTargets.isEmpty()) {
System.out.println(" No shortcuts"); System.out.println(" No shortcuts");
} else { } else {
@ -88,7 +88,7 @@ public class Info extends Dicttool.Command {
? "whitelist" : shortcutTarget.mFrequency) + ")"); ? "whitelist" : shortcutTarget.mFrequency) + ")");
} }
} }
final ArrayList<WeightedString> bigrams = group.getBigrams(); final ArrayList<WeightedString> bigrams = ptNode.getBigrams();
if (null == bigrams || bigrams.isEmpty()) { if (null == bigrams || bigrams.isEmpty()) {
System.out.println(" No bigrams"); System.out.println(" No bigrams");
} else { } else {

View File

@ -17,7 +17,7 @@
package com.android.inputmethod.latin.makedict; package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.Word; import com.android.inputmethod.latin.makedict.Word;
@ -72,8 +72,8 @@ public class FusionDictionaryTest extends TestCase {
assertNotNull(dict); assertNotNull(dict);
for (final String word : words) { for (final String word : words) {
if (--limit < 0) return; if (--limit < 0) return;
final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
assertNotNull(cg); assertNotNull(ptNode);
} }
} }