Add writePtNode to DictEncoder.

Bug: 9618601
Change-Id: Ie4529444780edee8d0f0d5f23c5619c1a3344286
This commit is contained in:
Yuichiro Hanada 2013-09-10 17:33:42 +09:00
parent 95bc256f41
commit 7547a7042c
3 changed files with 76 additions and 65 deletions

View file

@ -198,6 +198,27 @@ public class BinaryDictEncoderUtils {
} }
} }
static int writeUIntToBuffer(final byte[] buffer, int position, final int value,
final int size) {
switch(size) {
case 4:
buffer[position++] = (byte) ((value >> 24) & 0xFF);
/* fall through */
case 3:
buffer[position++] = (byte) ((value >> 16) & 0xFF);
/* fall through */
case 2:
buffer[position++] = (byte) ((value >> 8) & 0xFF);
/* fall through */
case 1:
buffer[position++] = (byte) (value & 0xFF);
break;
default:
/* nop */
}
return position;
}
// End utility methods // End utility methods
// This method is responsible for finding a nice ordering of the nodes that favors run-time // This method is responsible for finding a nice ordering of the nodes that favors run-time
@ -733,7 +754,7 @@ public class BinaryDictEncoderUtils {
} }
/** /**
* Write a PtNodeArray to memory. The PtNodeArray is expected to have its final position cached. * Write a PtNodeArray. The PtNodeArray is expected to have its final position cached.
* *
* @param dict the dictionary the node array is a part of (for relative offsets). * @param dict the dictionary the node array is a part of (for relative offsets).
* @param dictEncoder the dictionary encoder. * @param dictEncoder the dictionary encoder.
@ -741,7 +762,7 @@ public class BinaryDictEncoderUtils {
* @param formatOptions file format options. * @param formatOptions file format options.
*/ */
@SuppressWarnings("unused") @SuppressWarnings("unused")
/* package */ static void writePlacedNode(final FusionDictionary dict, /* package */ static void writePlacedPtNodeArray(final FusionDictionary dict,
final DictEncoder dictEncoder, final PtNodeArray ptNodeArray, final DictEncoder dictEncoder, final PtNodeArray ptNodeArray,
final FormatOptions formatOptions) { final FormatOptions formatOptions) {
// TODO: Make the code in common with BinaryDictIOUtils#writePtNode // TODO: Make the code in common with BinaryDictIOUtils#writePtNode
@ -767,13 +788,7 @@ public class BinaryDictEncoderUtils {
+ " : " + ptNode.mFrequency); + " : " + ptNode.mFrequency);
} }
dictEncoder.writePtNodeFlags(ptNode, parentPosition, formatOptions); dictEncoder.writePtNode(ptNode, parentPosition, formatOptions, dict);
dictEncoder.writeParentPosition(parentPosition, ptNode, formatOptions);
dictEncoder.writeCharacters(ptNode.mChars, ptNode.hasSeveralChars());
dictEncoder.writeFrequency(ptNode.mFrequency);
dictEncoder.writeChildrenPosition(ptNode, formatOptions);
dictEncoder.writeShortcuts(ptNode.mShortcutTargets);
dictEncoder.writeBigrams(ptNode.mBigrams, dict);
} }
if (formatOptions.mSupportsDynamicUpdate) { if (formatOptions.mSupportsDynamicUpdate) {
dictEncoder.writeForwardLinkAddress(FormatSpec.NO_FORWARD_LINK_ADDRESS); dictEncoder.writeForwardLinkAddress(FormatSpec.NO_FORWARD_LINK_ADDRESS);

View file

@ -18,10 +18,8 @@ package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
/** /**
* An interface of binary dictionary encoder. * An interface of binary dictionary encoder.
@ -33,28 +31,8 @@ public interface DictEncoder {
public void setPosition(final int position); public void setPosition(final int position);
public int getPosition(); public int getPosition();
public void writePtNodeCount(final int ptNodeCount); public void writePtNodeCount(final int ptNodeCount);
public void writePtNodeFlags(final PtNode ptNode, final int parentAddress,
final FormatOptions formatOptions);
public void writeParentPosition(final int parentPosition, final PtNode ptNode,
final FormatOptions formatOptions);
public void writeCharacters(final int[] characters, final boolean hasSeveralChars);
public void writeFrequency(final int frequency);
public void writeChildrenPosition(final PtNode ptNode, final FormatOptions formatOptions);
/**
* Write a shortcut attributes list to memory.
*
* @param shortcuts the shortcut attributes list.
*/
public void writeShortcuts(final ArrayList<WeightedString> shortcuts);
/**
* Write a bigram attributes list to memory.
*
* @param bigrams the bigram attributes list.
* @param dict the dictionary the node array is a part of (for relative offsets).
*/
public void writeBigrams(final ArrayList<WeightedString> bigrams, final FusionDictionary dict);
public void writeForwardLinkAddress(final int forwardLinkAddress); public void writeForwardLinkAddress(final int forwardLinkAddress);
public void writePtNode(final PtNode ptNode, final int parentPosition,
final FormatOptions formatOptions, final FusionDictionary dict);
} }

View file

@ -103,7 +103,7 @@ public class Ver3DictEncoder implements DictEncoder {
MakedictLog.i("Writing file..."); MakedictLog.i("Writing file...");
for (PtNodeArray nodeArray : flatNodes) { for (PtNodeArray nodeArray : flatNodes) {
BinaryDictEncoderUtils.writePlacedNode(dict, this, nodeArray, formatOptions); BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray, formatOptions);
} }
if (MakedictLog.DBG) BinaryDictEncoderUtils.showStatistics(flatNodes); if (MakedictLog.DBG) BinaryDictEncoderUtils.showStatistics(flatNodes);
mOutStream.write(mBuffer, 0, mPosition); mOutStream.write(mBuffer, 0, mPosition);
@ -126,26 +126,23 @@ public class Ver3DictEncoder implements DictEncoder {
@Override @Override
public void writePtNodeCount(final int ptNodeCount) { public void writePtNodeCount(final int ptNodeCount) {
final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount); final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount);
if (1 == countSize) { if (countSize != 1 && countSize != 2) {
mBuffer[mPosition++] = (byte) ptNodeCount;
} else if (2 == countSize) {
mBuffer[mPosition++] = (byte) ((ptNodeCount >> 8) & 0xFF);
mBuffer[mPosition++] = (byte) (ptNodeCount & 0xFF);
} else {
throw new RuntimeException("Strange size from getGroupCountSize : " + countSize); throw new RuntimeException("Strange size from getGroupCountSize : " + countSize);
} }
mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, ptNodeCount,
countSize);
} }
@Override private void writePtNodeFlags(final PtNode ptNode, final int parentAddress,
public void writePtNodeFlags(final PtNode ptNode, final int parentAddress,
final FormatOptions formatOptions) { final FormatOptions formatOptions) {
final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
mBuffer[mPosition++] = BinaryDictEncoderUtils.makePtNodeFlags(ptNode, mPosition, mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition,
childrenPos, formatOptions); BinaryDictEncoderUtils.makePtNodeFlags(ptNode, mPosition, childrenPos,
formatOptions),
FormatSpec.PTNODE_FLAGS_SIZE);
} }
@Override private void writeParentPosition(final int parentPosition, final PtNode ptNode,
public void writeParentPosition(final int parentPosition, final PtNode ptNode,
final FormatOptions formatOptions) { final FormatOptions formatOptions) {
if (parentPosition == FormatSpec.NO_PARENT_ADDRESS) { if (parentPosition == FormatSpec.NO_PARENT_ADDRESS) {
mPosition = BinaryDictEncoderUtils.writeParentAddress(mBuffer, mPosition, mPosition = BinaryDictEncoderUtils.writeParentAddress(mBuffer, mPosition,
@ -156,22 +153,20 @@ public class Ver3DictEncoder implements DictEncoder {
} }
} }
@Override private void writeCharacters(final int[] codePoints, final boolean hasSeveralChars) {
public void writeCharacters(final int[] codePoints, final boolean hasSeveralChars) {
mPosition = CharEncoding.writeCharArray(codePoints, mBuffer, mPosition); mPosition = CharEncoding.writeCharArray(codePoints, mBuffer, mPosition);
if (hasSeveralChars) { if (hasSeveralChars) {
mBuffer[mPosition++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR; mBuffer[mPosition++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR;
} }
} }
@Override private void writeFrequency(final int frequency) {
public void writeFrequency(final int frequency) {
if (frequency >= 0) { if (frequency >= 0) {
mBuffer[mPosition++] = (byte) frequency; mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, frequency,
FormatSpec.PTNODE_FREQUENCY_SIZE);
} }
} }
@Override
public void writeChildrenPosition(final PtNode ptNode, final FormatOptions formatOptions) { public void writeChildrenPosition(final PtNode ptNode, final FormatOptions formatOptions) {
final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
if (formatOptions.mSupportsDynamicUpdate) { if (formatOptions.mSupportsDynamicUpdate) {
@ -183,8 +178,12 @@ public class Ver3DictEncoder implements DictEncoder {
} }
} }
@Override /**
public void writeShortcuts(final ArrayList<WeightedString> shortcuts) { * Write a shortcut attributes list to mBuffer.
*
* @param shortcuts the shortcut attributes list.
*/
private void writeShortcuts(final ArrayList<WeightedString> shortcuts) {
if (null == shortcuts || shortcuts.isEmpty()) return; if (null == shortcuts || shortcuts.isEmpty()) return;
final int indexOfShortcutByteSize = mPosition; final int indexOfShortcutByteSize = mPosition;
@ -195,7 +194,8 @@ public class Ver3DictEncoder implements DictEncoder {
final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags( final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
shortcutIterator.hasNext(), shortcutIterator.hasNext(),
target.mFrequency); target.mFrequency);
mBuffer[mPosition++] = (byte)shortcutFlags; mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, shortcutFlags,
FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
final int shortcutShift = CharEncoding.writeString(mBuffer, mPosition, target.mWord); final int shortcutShift = CharEncoding.writeString(mBuffer, mPosition, target.mWord);
mPosition += shortcutShift; mPosition += shortcutShift;
} }
@ -203,12 +203,18 @@ public class Ver3DictEncoder implements DictEncoder {
if (shortcutByteSize > 0xFFFF) { if (shortcutByteSize > 0xFFFF) {
throw new RuntimeException("Shortcut list too large"); throw new RuntimeException("Shortcut list too large");
} }
mBuffer[indexOfShortcutByteSize] = (byte)((shortcutByteSize >> 8) & 0xFF); BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, indexOfShortcutByteSize, shortcutByteSize,
mBuffer[indexOfShortcutByteSize + 1] = (byte)(shortcutByteSize & 0xFF); FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE);
} }
@Override /**
public void writeBigrams(final ArrayList<WeightedString> bigrams, final FusionDictionary dict) { * Write a bigram attributes list to mBuffer.
*
* @param bigrams the bigram attributes list.
* @param dict the dictionary the node array is a part of (for relative offsets).
*/
private void writeBigrams(final ArrayList<WeightedString> bigrams,
final FusionDictionary dict) {
if (bigrams == null) return; if (bigrams == null) return;
final Iterator<WeightedString> bigramIterator = bigrams.iterator(); final Iterator<WeightedString> bigramIterator = bigrams.iterator();
@ -220,9 +226,10 @@ public class Ver3DictEncoder implements DictEncoder {
final int unigramFrequencyForThisWord = target.mFrequency; final int unigramFrequencyForThisWord = target.mFrequency;
final int offset = addressOfBigram final int offset = addressOfBigram
- (mPosition + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); - (mPosition + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(), final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(),
offset, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord); offset, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord);
mBuffer[mPosition++] = (byte) bigramFlags; mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, bigramFlags,
FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition, mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition,
Math.abs(offset)); Math.abs(offset));
} }
@ -230,8 +237,19 @@ public class Ver3DictEncoder implements DictEncoder {
@Override @Override
public void writeForwardLinkAddress(final int forwardLinkAddress) { public void writeForwardLinkAddress(final int forwardLinkAddress) {
mBuffer[mPosition++] = (byte) ((forwardLinkAddress >> 16) & 0xFF); mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, forwardLinkAddress,
mBuffer[mPosition++] = (byte) ((forwardLinkAddress >> 8) & 0xFF); FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
mBuffer[mPosition++] = (byte) (forwardLinkAddress & 0xFF); }
@Override
public void writePtNode(final PtNode ptNode, final int parentPosition,
final FormatOptions formatOptions, final FusionDictionary dict) {
writePtNodeFlags(ptNode, parentPosition, formatOptions);
writeParentPosition(parentPosition, ptNode, formatOptions);
writeCharacters(ptNode.mChars, ptNode.hasSeveralChars());
writeFrequency(ptNode.mFrequency);
writeChildrenPosition(ptNode, formatOptions);
writeShortcuts(ptNode.mShortcutTargets);
writeBigrams(ptNode.mBigrams, dict);
} }
} }