am 95d16561: Remove unused code.

* commit '95d16561e0e6c38dbd99c893f09c5dbe9d4a465d':
  Remove unused code.
main
Keisuke Kuroyanagi 2014-02-15 01:40:02 -08:00 committed by Android Git Automerger
commit 37da7b4db8
12 changed files with 99 additions and 593 deletions

View File

@ -17,14 +17,10 @@
package com.android.inputmethod.latin.makedict; package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import java.util.TreeMap; import java.util.TreeMap;
/** /**
@ -35,34 +31,6 @@ public abstract class AbstractDictDecoder implements DictDecoder {
private static final int ERROR_CANNOT_READ = 1; private static final int ERROR_CANNOT_READ = 1;
private static final int ERROR_WRONG_FORMAT = 2; private static final int ERROR_WRONG_FORMAT = 2;
protected DictionaryHeader readHeader(final DictBuffer headerBuffer)
throws IOException, UnsupportedFormatException {
if (headerBuffer == null) {
openDictBuffer();
}
final int version = HeaderReader.readVersion(headerBuffer);
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|| version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
throw new UnsupportedFormatException("Unsupported version : " + version);
}
// TODO: Remove this field.
HeaderReader.readOptionFlags(headerBuffer);
final int headerSize = HeaderReader.readHeaderSize(headerBuffer);
if (headerSize < 0) {
throw new UnsupportedFormatException("header size can't be negative.");
}
final HashMap<String, String> attributes = HeaderReader.readAttributes(headerBuffer,
headerSize);
final DictionaryHeader header = new DictionaryHeader(headerSize,
new FusionDictionary.DictionaryOptions(attributes),
new FormatOptions(version, DictionaryHeader.ATTRIBUTE_VALUE_TRUE.equals(
attributes.get(DictionaryHeader.HAS_HISTORICAL_INFO_KEY))));
return header;
}
@Override @UsedForTesting @Override @UsedForTesting
public int getTerminalPosition(final String word) public int getTerminalPosition(final String word)
throws IOException, UnsupportedFormatException { throws IOException, UnsupportedFormatException {
@ -83,38 +51,6 @@ public abstract class AbstractDictDecoder implements DictDecoder {
BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams); BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams);
} }
/**
* A utility class for reading a file header.
*/
protected static class HeaderReader {
protected static int readVersion(final DictBuffer dictBuffer)
throws IOException, UnsupportedFormatException {
return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer);
}
protected static int readOptionFlags(final DictBuffer dictBuffer) {
return dictBuffer.readUnsignedShort();
}
protected static int readHeaderSize(final DictBuffer dictBuffer) {
return dictBuffer.readInt();
}
protected static HashMap<String, String> readAttributes(final DictBuffer dictBuffer,
final int headerSize) {
final HashMap<String, String> attributes = new HashMap<String, String>();
while (dictBuffer.position() < headerSize) {
// We can avoid an infinite loop here since dictBuffer.position() is always
// increased by calling CharEncoding.readString.
final String key = CharEncoding.readString(dictBuffer);
final String value = CharEncoding.readString(dictBuffer);
attributes.put(key, value);
}
dictBuffer.position(headerSize);
return attributes;
}
}
/** /**
* Check whether the header contains the expected information. This is a no-error method, * Check whether the header contains the expected information. This is a no-error method,
* that will return an error code and never throw a checked exception. * that will return an error code and never throw a checked exception.
@ -148,7 +84,7 @@ public abstract class AbstractDictDecoder implements DictDecoder {
} }
@Override @Override
public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions options) { public PtNodeInfo readPtNode(final int ptNodePos) {
return null; return null;
} }
@ -165,14 +101,4 @@ public abstract class AbstractDictDecoder implements DictDecoder {
public int readPtNodeCount() { public int readPtNodeCount() {
return 0; return 0;
} }
@Override
public boolean readAndFollowForwardLink() {
return false;
}
@Override
public boolean hasNextPtNodeArray() {
return false;
}
} }

View File

@ -17,18 +17,12 @@
package com.android.inputmethod.latin.makedict; package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Map;
import java.util.TreeMap;
/** /**
* Decodes binary files for a FusionDictionary. * Decodes binary files for a FusionDictionary.
@ -47,8 +41,6 @@ public final class BinaryDictDecoderUtils {
// This utility class is not publicly instantiable. // This utility class is not publicly instantiable.
} }
private static final int MAX_JUMPS = 12;
@UsedForTesting @UsedForTesting
public interface DictBuffer { public interface DictBuffer {
public int readUnsignedByte(); public int readUnsignedByte();
@ -296,60 +288,21 @@ public final class BinaryDictDecoderUtils {
* @param dictDecoder the dict decoder. * @param dictDecoder the dict decoder.
* @param headerSize the size of the header. * @param headerSize the size of the header.
* @param pos the position to seek. * @param pos the position to seek.
* @param formatOptions file format options.
* @return the word with its frequency, as a weighted string. * @return the word with its frequency, as a weighted string.
*/ */
@UsedForTesting
/* package for tests */ static WeightedString getWordAtPosition(final DictDecoder dictDecoder, /* package for tests */ static WeightedString getWordAtPosition(final DictDecoder dictDecoder,
final int headerSize, final int pos, final FormatOptions formatOptions) { final int headerSize, final int pos) {
final WeightedString result; final WeightedString result;
final int originalPos = dictDecoder.getPosition(); final int originalPos = dictDecoder.getPosition();
dictDecoder.setPosition(pos); dictDecoder.setPosition(pos);
result = getWordAtPositionWithoutParentAddress(dictDecoder, headerSize, pos);
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
result = getWordAtPositionWithParentAddress(dictDecoder, pos, formatOptions);
} else {
result = getWordAtPositionWithoutParentAddress(dictDecoder, headerSize, pos,
formatOptions);
}
dictDecoder.setPosition(originalPos); dictDecoder.setPosition(originalPos);
return result; return result;
} }
@SuppressWarnings("unused")
private static WeightedString getWordAtPositionWithParentAddress(final DictDecoder dictDecoder,
final int pos, final FormatOptions options) {
int currentPos = pos;
ProbabilityInfo probabilityInfo = null;
final StringBuilder builder = new StringBuilder();
// the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH
for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) {
PtNodeInfo currentInfo;
int loopCounter = 0;
do {
dictDecoder.setPosition(currentPos);
currentInfo = dictDecoder.readPtNode(currentPos, options);
if (BinaryDictIOUtils.isMovedPtNode(currentInfo.mFlags, options)) {
currentPos = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
}
if (DBG && loopCounter++ > MAX_JUMPS) {
MakedictLog.d("Too many jumps - probably a bug");
}
} while (BinaryDictIOUtils.isMovedPtNode(currentInfo.mFlags, options));
if (probabilityInfo == null) {
probabilityInfo = currentInfo.mProbabilityInfo;
}
builder.insert(0,
new String(currentInfo.mCharacters, 0, currentInfo.mCharacters.length));
if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break;
currentPos = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
}
return new WeightedString(builder.toString(), probabilityInfo);
}
private static WeightedString getWordAtPositionWithoutParentAddress( private static WeightedString getWordAtPositionWithoutParentAddress(
final DictDecoder dictDecoder, final int headerSize, final int pos, final DictDecoder dictDecoder, final int headerSize, final int pos) {
final FormatOptions options) {
dictDecoder.setPosition(headerSize); dictDecoder.setPosition(headerSize);
final int count = dictDecoder.readPtNodeCount(); final int count = dictDecoder.readPtNodeCount();
int groupPos = dictDecoder.getPosition(); int groupPos = dictDecoder.getPosition();
@ -358,7 +311,7 @@ public final class BinaryDictDecoderUtils {
PtNodeInfo last = null; PtNodeInfo last = null;
for (int i = count - 1; i >= 0; --i) { for (int i = count - 1; i >= 0; --i) {
PtNodeInfo info = dictDecoder.readPtNode(groupPos, options); PtNodeInfo info = dictDecoder.readPtNode(groupPos);
groupPos = info.mEndAddress; groupPos = info.mEndAddress;
if (info.mOriginalAddress == pos) { if (info.mOriginalAddress == pos) {
builder.append(new String(info.mCharacters, 0, info.mCharacters.length)); builder.append(new String(info.mCharacters, 0, info.mCharacters.length));
@ -389,135 +342,6 @@ public final class BinaryDictDecoderUtils {
return result; return result;
} }
/**
* Reads a single node array from a buffer.
*
* This methods reads the file at the current position. A node array is fully expected to start
* at the current position.
* This will recursively read other node arrays into the structure, populating the reverse
* maps on the fly and using them to keep track of already read nodes.
*
* @param dictDecoder the dict decoder, correctly positioned at the start of a node array.
* @param headerSize the size, in bytes, of the file header.
* @param reverseNodeArrayMap a mapping from addresses to already read node arrays.
* @param reversePtNodeMap a mapping from addresses to already read PtNodes.
* @param options file format options.
* @return the read node array with all his children already read.
*/
private static PtNodeArray readNodeArray(final DictDecoder dictDecoder,
final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap,
final Map<Integer, PtNode> reversePtNodeMap, final FormatOptions options)
throws IOException {
final ArrayList<PtNode> nodeArrayContents = new ArrayList<PtNode>();
final int nodeArrayOriginPos = dictDecoder.getPosition();
do { // Scan the linked-list node.
final int count = dictDecoder.readPtNodeCount();
int groupPos = dictDecoder.getPosition();
for (int i = count; i > 0; --i) { // Scan the array of PtNode.
PtNodeInfo info = dictDecoder.readPtNode(groupPos, options);
if (BinaryDictIOUtils.isMovedPtNode(info.mFlags, options)) continue;
ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
ArrayList<WeightedString> bigrams = null;
if (null != info.mBigrams) {
bigrams = new ArrayList<WeightedString>();
for (PendingAttribute bigram : info.mBigrams) {
final WeightedString word = getWordAtPosition(dictDecoder, headerSize,
bigram.mAddress, options);
final int reconstructedFrequency =
BinaryDictIOUtils.reconstructBigramFrequency(word.getProbability(),
bigram.mFrequency);
bigrams.add(new WeightedString(word.mWord, reconstructedFrequency));
}
}
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress);
if (null == children) {
final int currentPosition = dictDecoder.getPosition();
dictDecoder.setPosition(info.mChildrenAddress);
children = readNodeArray(dictDecoder, headerSize, reverseNodeArrayMap,
reversePtNodeMap, options);
dictDecoder.setPosition(currentPosition);
}
nodeArrayContents.add(
new PtNode(info.mCharacters, shortcutTargets, bigrams,
info.mProbabilityInfo,
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children));
} else {
nodeArrayContents.add(
new PtNode(info.mCharacters, shortcutTargets, bigrams,
info.mProbabilityInfo,
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED)));
}
groupPos = info.mEndAddress;
}
// reach the end of the array.
if (options.supportsDynamicUpdate()) {
final boolean hasValidForwardLink = dictDecoder.readAndFollowForwardLink();
if (!hasValidForwardLink) break;
}
} while (options.supportsDynamicUpdate() && dictDecoder.hasNextPtNodeArray());
final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOriginPos;
nodeArray.mCachedAddressAfterUpdate = nodeArrayOriginPos;
reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray);
return nodeArray;
}
/**
* Helper function to get the binary format version from the header.
* @throws IOException
*/
private static int getFormatVersion(final DictBuffer dictBuffer)
throws IOException {
final int magic = dictBuffer.readInt();
if (FormatSpec.MAGIC_NUMBER == magic) return dictBuffer.readUnsignedShort();
return FormatSpec.NOT_A_VERSION_NUMBER;
}
/**
* Helper function to get and validate the binary format version.
* @throws UnsupportedFormatException
* @throws IOException
*/
static int checkFormatVersion(final DictBuffer dictBuffer)
throws IOException, UnsupportedFormatException {
final int version = getFormatVersion(dictBuffer);
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|| version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
throw new UnsupportedFormatException("This file has version " + version
+ ", but this implementation does not support versions above "
+ FormatSpec.MAXIMUM_SUPPORTED_VERSION);
}
return version;
}
/**
* Reads a buffer and returns the memory representation of the dictionary.
*
* This high-level method takes a buffer and reads its contents, populating a
* FusionDictionary structure.
*
* @param dictDecoder the dict decoder.
* @return the created dictionary.
*/
@UsedForTesting
/* package */ static FusionDictionary readDictionaryBinary(final DictDecoder dictDecoder)
throws IOException, UnsupportedFormatException {
// Read header
final DictionaryHeader fileHeader = dictDecoder.readHeader();
Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
Map<Integer, PtNode> reversePtNodeMapping = new TreeMap<Integer, PtNode>();
final PtNodeArray root = readNodeArray(dictDecoder, fileHeader.mBodyOffset,
reverseNodeArrayMapping, reversePtNodeMapping, fileHeader.mFormatOptions);
return new FusionDictionary(root, fileHeader.mDictionaryOptions);
}
/** /**
* Helper method to pass a file name instead of a File object to isBinaryDictionary. * Helper method to pass a file name instead of a File object to isBinaryDictionary.
*/ */

View File

@ -122,18 +122,13 @@ public class BinaryDictEncoderUtils {
* Compute the maximum size of a PtNode, assuming 3-byte addresses for everything. * Compute the maximum size of a PtNode, assuming 3-byte addresses for everything.
* *
* @param ptNode the PtNode to compute the size of. * @param ptNode the PtNode to compute the size of.
* @param options file format options.
* @return the maximum size of the PtNode. * @return the maximum size of the PtNode.
*/ */
private static int getPtNodeMaximumSize(final PtNode ptNode, final FormatOptions options) { private static int getPtNodeMaximumSize(final PtNode ptNode) {
int size = getNodeHeaderSize(ptNode, options); int size = getNodeHeaderSize(ptNode);
if (ptNode.isTerminal()) { if (ptNode.isTerminal()) {
// If terminal, one byte for the frequency or four bytes for the terminal id. // If terminal, one byte for the frequency.
if (options.mHasTerminalId) { size += FormatSpec.PTNODE_FREQUENCY_SIZE;
size += FormatSpec.PTNODE_TERMINAL_ID_SIZE;
} else {
size += FormatSpec.PTNODE_FREQUENCY_SIZE;
}
} }
size += FormatSpec.PTNODE_MAX_ADDRESS_SIZE; // For children address size += FormatSpec.PTNODE_MAX_ADDRESS_SIZE; // For children address
size += getShortcutListSize(ptNode.mShortcutTargets); size += getShortcutListSize(ptNode.mShortcutTargets);
@ -151,19 +146,14 @@ public class BinaryDictEncoderUtils {
* the containing node array, and cache it it its 'mCachedSize' member. * the containing node array, and cache it it its 'mCachedSize' member.
* *
* @param ptNodeArray the node array to compute the maximum size of. * @param ptNodeArray the node array to compute the maximum size of.
* @param options file format options.
*/ */
private static void calculatePtNodeArrayMaximumSize(final PtNodeArray ptNodeArray, private static void calculatePtNodeArrayMaximumSize(final PtNodeArray ptNodeArray) {
final FormatOptions options) {
int size = getPtNodeCountSize(ptNodeArray); int size = getPtNodeCountSize(ptNodeArray);
for (PtNode node : ptNodeArray.mData) { for (PtNode node : ptNodeArray.mData) {
final int nodeSize = getPtNodeMaximumSize(node, options); final int nodeSize = getPtNodeMaximumSize(node);
node.mCachedSize = nodeSize; node.mCachedSize = nodeSize;
size += nodeSize; size += nodeSize;
} }
if (options.supportsDynamicUpdate()) {
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
}
ptNodeArray.mCachedSize = size; ptNodeArray.mCachedSize = size;
} }
@ -171,15 +161,9 @@ public class BinaryDictEncoderUtils {
* Compute the size of the header (flag + [parent address] + characters size) of a PtNode. * Compute the size of the header (flag + [parent address] + characters size) of a PtNode.
* *
* @param ptNode the PtNode of which to compute the size of the header * @param ptNode the PtNode of which to compute the size of the header
* @param options file format options.
*/ */
private static int getNodeHeaderSize(final PtNode ptNode, final FormatOptions options) { private static int getNodeHeaderSize(final PtNode ptNode) {
if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { return FormatSpec.PTNODE_FLAGS_SIZE + getPtNodeCharactersSize(ptNode);
return FormatSpec.PTNODE_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE
+ getPtNodeCharactersSize(ptNode);
} else {
return FormatSpec.PTNODE_FLAGS_SIZE + getPtNodeCharactersSize(ptNode);
}
} }
/** /**
@ -379,11 +363,10 @@ public class BinaryDictEncoderUtils {
* *
* @param ptNodeArray the node array to compute the size of. * @param ptNodeArray the node array to compute the size of.
* @param dict the dictionary in which the word/attributes are to be found. * @param dict the dictionary in which the word/attributes are to be found.
* @param formatOptions file format options.
* @return false if none of the cached addresses inside the node array changed, true otherwise. * @return false if none of the cached addresses inside the node array changed, true otherwise.
*/ */
private static boolean computeActualPtNodeArraySize(final PtNodeArray ptNodeArray, private static boolean computeActualPtNodeArraySize(final PtNodeArray ptNodeArray,
final FusionDictionary dict, final FormatOptions formatOptions) { final FusionDictionary dict) {
boolean changed = false; boolean changed = false;
int size = getPtNodeCountSize(ptNodeArray); int size = getPtNodeCountSize(ptNodeArray);
for (PtNode ptNode : ptNodeArray.mData) { for (PtNode ptNode : ptNodeArray.mData) {
@ -391,37 +374,26 @@ public class BinaryDictEncoderUtils {
if (ptNode.mCachedAddressAfterUpdate != ptNode.mCachedAddressBeforeUpdate) { if (ptNode.mCachedAddressAfterUpdate != ptNode.mCachedAddressBeforeUpdate) {
changed = true; changed = true;
} }
int nodeSize = getNodeHeaderSize(ptNode, formatOptions); int nodeSize = getNodeHeaderSize(ptNode);
if (ptNode.isTerminal()) { if (ptNode.isTerminal()) {
if (formatOptions.mHasTerminalId) { nodeSize += FormatSpec.PTNODE_FREQUENCY_SIZE;
nodeSize += FormatSpec.PTNODE_TERMINAL_ID_SIZE;
} else {
nodeSize += FormatSpec.PTNODE_FREQUENCY_SIZE;
}
} }
if (formatOptions.supportsDynamicUpdate()) { if (null != ptNode.mChildren) {
nodeSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
} else if (null != ptNode.mChildren) {
nodeSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(ptNodeArray, nodeSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(ptNodeArray,
nodeSize + size, ptNode.mChildren)); nodeSize + size, ptNode.mChildren));
} }
if (formatOptions.mVersion < FormatSpec.FIRST_VERSION_WITH_TERMINAL_ID) { nodeSize += getShortcutListSize(ptNode.mShortcutTargets);
nodeSize += getShortcutListSize(ptNode.mShortcutTargets); if (null != ptNode.mBigrams) {
if (null != ptNode.mBigrams) { for (WeightedString bigram : ptNode.mBigrams) {
for (WeightedString bigram : ptNode.mBigrams) { final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray,
final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray, nodeSize + size + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE,
nodeSize + size + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE, FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord));
FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord)); nodeSize += getByteSize(offset) + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE;
nodeSize += getByteSize(offset) + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE;
}
} }
} }
ptNode.mCachedSize = nodeSize; ptNode.mCachedSize = nodeSize;
size += nodeSize; size += nodeSize;
} }
if (formatOptions.supportsDynamicUpdate()) {
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
}
if (ptNodeArray.mCachedSize != size) { if (ptNodeArray.mCachedSize != size) {
ptNodeArray.mCachedSize = size; ptNodeArray.mCachedSize = size;
changed = true; changed = true;
@ -433,11 +405,10 @@ public class BinaryDictEncoderUtils {
* Initializes the cached addresses of node arrays and their containing nodes from their size. * Initializes the cached addresses of node arrays and their containing nodes from their size.
* *
* @param flatNodes the list of node arrays. * @param flatNodes the list of node arrays.
* @param formatOptions file format options.
* @return the byte size of the entire stack. * @return the byte size of the entire stack.
*/ */
private static int initializePtNodeArraysCachedAddresses(final ArrayList<PtNodeArray> flatNodes, private static int initializePtNodeArraysCachedAddresses(
final FormatOptions formatOptions) { final ArrayList<PtNodeArray> flatNodes) {
int nodeArrayOffset = 0; int nodeArrayOffset = 0;
for (final PtNodeArray nodeArray : flatNodes) { for (final PtNodeArray nodeArray : flatNodes) {
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOffset; nodeArray.mCachedAddressBeforeUpdate = nodeArrayOffset;
@ -467,28 +438,6 @@ public class BinaryDictEncoderUtils {
} }
} }
/**
* Compute the cached parent addresses after all has been updated.
*
* The parent addresses are used by some binary formats at write-to-disk time. Not all formats
* need them. In particular, version 2 does not need them, and version 3 does.
*
* @param flatNodes the flat array of node arrays to fill in
*/
private static void computeParentAddresses(final ArrayList<PtNodeArray> flatNodes) {
for (final PtNodeArray nodeArray : flatNodes) {
for (final PtNode ptNode : nodeArray.mData) {
if (null != ptNode.mChildren) {
// Assign my address to children's parent address
// Here BeforeUpdate and AfterUpdate addresses have the same value, so it
// does not matter which we use.
ptNode.mChildren.mCachedParentAddress = ptNode.mCachedAddressAfterUpdate
- ptNode.mChildren.mCachedAddressAfterUpdate;
}
}
}
}
/** /**
* Compute the addresses and sizes of an ordered list of PtNode arrays. * Compute the addresses and sizes of an ordered list of PtNode arrays.
* *
@ -501,14 +450,15 @@ public class BinaryDictEncoderUtils {
* *
* @param dict the dictionary * @param dict the dictionary
* @param flatNodes the ordered list of PtNode arrays * @param flatNodes the ordered list of PtNode arrays
* @param formatOptions file format options.
* @return the same array it was passed. The nodes have been updated for address and size. * @return the same array it was passed. The nodes have been updated for address and size.
*/ */
/* package */ static ArrayList<PtNodeArray> computeAddresses(final FusionDictionary dict, /* package */ static ArrayList<PtNodeArray> computeAddresses(final FusionDictionary dict,
final ArrayList<PtNodeArray> flatNodes, final FormatOptions formatOptions) { final ArrayList<PtNodeArray> flatNodes) {
// First get the worst possible sizes and offsets // First get the worst possible sizes and offsets
for (final PtNodeArray n : flatNodes) calculatePtNodeArrayMaximumSize(n, formatOptions); for (final PtNodeArray n : flatNodes) {
final int offset = initializePtNodeArraysCachedAddresses(flatNodes, formatOptions); calculatePtNodeArrayMaximumSize(n);
}
final int offset = initializePtNodeArraysCachedAddresses(flatNodes);
MakedictLog.i("Compressing the array addresses. Original size : " + offset); MakedictLog.i("Compressing the array addresses. Original size : " + offset);
MakedictLog.i("(Recursively seen size : " + offset + ")"); MakedictLog.i("(Recursively seen size : " + offset + ")");
@ -521,8 +471,7 @@ public class BinaryDictEncoderUtils {
for (final PtNodeArray ptNodeArray : flatNodes) { for (final PtNodeArray ptNodeArray : flatNodes) {
ptNodeArray.mCachedAddressAfterUpdate = ptNodeArrayStartOffset; ptNodeArray.mCachedAddressAfterUpdate = ptNodeArrayStartOffset;
final int oldNodeArraySize = ptNodeArray.mCachedSize; final int oldNodeArraySize = ptNodeArray.mCachedSize;
final boolean changed = final boolean changed = computeActualPtNodeArraySize(ptNodeArray, dict);
computeActualPtNodeArraySize(ptNodeArray, dict, formatOptions);
final int newNodeArraySize = ptNodeArray.mCachedSize; final int newNodeArraySize = ptNodeArray.mCachedSize;
if (oldNodeArraySize < newNodeArraySize) { if (oldNodeArraySize < newNodeArraySize) {
throw new RuntimeException("Increased size ?!"); throw new RuntimeException("Increased size ?!");
@ -535,9 +484,6 @@ public class BinaryDictEncoderUtils {
if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug"); if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug");
} while (changesDone); } while (changesDone);
if (formatOptions.supportsDynamicUpdate()) {
computeParentAddresses(flatNodes);
}
final PtNodeArray lastPtNodeArray = flatNodes.get(flatNodes.size() - 1); final PtNodeArray lastPtNodeArray = flatNodes.get(flatNodes.size() - 1);
MakedictLog.i("Compression complete in " + passes + " passes."); MakedictLog.i("Compression complete in " + passes + " passes.");
MakedictLog.i("After address compression : " MakedictLog.i("After address compression : "
@ -634,35 +580,29 @@ public class BinaryDictEncoderUtils {
* @param hasBigrams whether the PtNode has bigrams. * @param hasBigrams whether the PtNode has bigrams.
* @param isNotAWord whether the PtNode is not a word. * @param isNotAWord whether the PtNode is not a word.
* @param isBlackListEntry whether the PtNode is a blacklist entry. * @param isBlackListEntry whether the PtNode is a blacklist entry.
* @param formatOptions file format options.
* @return the flags * @return the flags
*/ */
static int makePtNodeFlags(final boolean hasMultipleChars, final boolean isTerminal, static int makePtNodeFlags(final boolean hasMultipleChars, final boolean isTerminal,
final int childrenAddressSize, final boolean hasShortcuts, final boolean hasBigrams, final int childrenAddressSize, final boolean hasShortcuts, final boolean hasBigrams,
final boolean isNotAWord, final boolean isBlackListEntry, final boolean isNotAWord, final boolean isBlackListEntry) {
final FormatOptions formatOptions) {
byte flags = 0; byte flags = 0;
if (hasMultipleChars) flags |= FormatSpec.FLAG_HAS_MULTIPLE_CHARS; if (hasMultipleChars) flags |= FormatSpec.FLAG_HAS_MULTIPLE_CHARS;
if (isTerminal) flags |= FormatSpec.FLAG_IS_TERMINAL; if (isTerminal) flags |= FormatSpec.FLAG_IS_TERMINAL;
if (formatOptions.supportsDynamicUpdate()) { switch (childrenAddressSize) {
flags |= FormatSpec.FLAG_IS_NOT_MOVED; case 1:
} else if (true) { flags |= FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE;
switch (childrenAddressSize) { break;
case 1: case 2:
flags |= FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE; flags |= FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES;
break; break;
case 2: case 3:
flags |= FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES; flags |= FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES;
break; break;
case 3: case 0:
flags |= FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES; flags |= FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS;
break; break;
case 0: default:
flags |= FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS; throw new RuntimeException("Node with a strange address");
break;
default:
throw new RuntimeException("Node with a strange address");
}
} }
if (hasShortcuts) flags |= FormatSpec.FLAG_HAS_SHORTCUT_TARGETS; if (hasShortcuts) flags |= FormatSpec.FLAG_HAS_SHORTCUT_TARGETS;
if (hasBigrams) flags |= FormatSpec.FLAG_HAS_BIGRAMS; if (hasBigrams) flags |= FormatSpec.FLAG_HAS_BIGRAMS;
@ -671,12 +611,12 @@ public class BinaryDictEncoderUtils {
return flags; return flags;
} }
/* package */ static byte makePtNodeFlags(final PtNode node, final int childrenOffset, /* package */ static byte makePtNodeFlags(final PtNode node, final int childrenOffset) {
final FormatOptions formatOptions) {
return (byte) makePtNodeFlags(node.mChars.length > 1, node.isTerminal(), return (byte) makePtNodeFlags(node.mChars.length > 1, node.isTerminal(),
getByteSize(childrenOffset), getByteSize(childrenOffset),
node.mShortcutTargets != null && !node.mShortcutTargets.isEmpty(), node.mShortcutTargets != null && !node.mShortcutTargets.isEmpty(),
node.mBigrams != null, node.mIsNotAWord, node.mIsBlacklistEntry, formatOptions); node.mBigrams != null && !node.mBigrams.isEmpty(),
node.mIsNotAWord, node.mIsBlacklistEntry);
} }
/** /**
@ -767,38 +707,14 @@ public class BinaryDictEncoderUtils {
+ (frequency & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY); + (frequency & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY);
} }
/* package */ static final int writeParentAddress(final byte[] buffer, final int index, /* package */ static final int getChildrenPosition(final PtNode ptNode) {
final int address, final FormatOptions formatOptions) {
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
if (address == FormatSpec.NO_PARENT_ADDRESS) {
buffer[index] = buffer[index + 1] = buffer[index + 2] = 0;
} else {
final int absAddress = Math.abs(address);
assert(absAddress <= FormatSpec.SINT24_MAX);
buffer[index] = (byte)((address < 0 ? FormatSpec.MSB8 : 0)
| ((absAddress >> 16) & 0xFF));
buffer[index + 1] = (byte)((absAddress >> 8) & 0xFF);
buffer[index + 2] = (byte)(absAddress & 0xFF);
}
return index + 3;
} else {
return index;
}
}
/* package */ static final int getChildrenPosition(final PtNode ptNode,
final FormatOptions formatOptions) {
int positionOfChildrenPosField = ptNode.mCachedAddressAfterUpdate int positionOfChildrenPosField = ptNode.mCachedAddressAfterUpdate
+ getNodeHeaderSize(ptNode, formatOptions); + getNodeHeaderSize(ptNode);
if (ptNode.isTerminal()) { if (ptNode.isTerminal()) {
// A terminal node has either the terminal id or the frequency. // A terminal node has the frequency.
// If positionOfChildrenPosField is incorrect, we may crash when jumping to the children // If positionOfChildrenPosField is incorrect, we may crash when jumping to the children
// position. // position.
if (formatOptions.mHasTerminalId) { positionOfChildrenPosField += FormatSpec.PTNODE_FREQUENCY_SIZE;
positionOfChildrenPosField += FormatSpec.PTNODE_TERMINAL_ID_SIZE;
} else {
positionOfChildrenPosField += FormatSpec.PTNODE_FREQUENCY_SIZE;
}
} }
return null == ptNode.mChildren ? FormatSpec.NO_CHILDREN_ADDRESS return null == ptNode.mChildren ? FormatSpec.NO_CHILDREN_ADDRESS
: ptNode.mChildren.mCachedAddressAfterUpdate - positionOfChildrenPosField; : ptNode.mChildren.mCachedAddressAfterUpdate - positionOfChildrenPosField;
@ -810,12 +726,10 @@ public class BinaryDictEncoderUtils {
* @param dict the dictionary the node array is a part of (for relative offsets). * @param dict the dictionary the node array is a part of (for relative offsets).
* @param dictEncoder the dictionary encoder. * @param dictEncoder the dictionary encoder.
* @param ptNodeArray the node array to write. * @param ptNodeArray the node array to write.
* @param formatOptions file format options.
*/ */
@SuppressWarnings("unused") @SuppressWarnings("unused")
/* package */ static void writePlacedPtNodeArray(final FusionDictionary dict, /* package */ static void writePlacedPtNodeArray(final FusionDictionary dict,
final DictEncoder dictEncoder, final PtNodeArray ptNodeArray, final DictEncoder dictEncoder, final PtNodeArray ptNodeArray) {
final FormatOptions formatOptions) {
// TODO: Make the code in common with BinaryDictIOUtils#writePtNode // TODO: Make the code in common with BinaryDictIOUtils#writePtNode
dictEncoder.setPosition(ptNodeArray.mCachedAddressAfterUpdate); dictEncoder.setPosition(ptNodeArray.mCachedAddressAfterUpdate);
@ -838,10 +752,7 @@ public class BinaryDictEncoderUtils {
+ FormatSpec.MAX_TERMINAL_FREQUENCY + FormatSpec.MAX_TERMINAL_FREQUENCY
+ " : " + ptNode.mProbabilityInfo.toString()); + " : " + ptNode.mProbabilityInfo.toString());
} }
dictEncoder.writePtNode(ptNode, parentPosition, formatOptions, dict); dictEncoder.writePtNode(ptNode, dict);
}
if (formatOptions.supportsDynamicUpdate()) {
dictEncoder.writeForwardLinkAddress(FormatSpec.NO_FORWARD_LINK_ADDRESS);
} }
if (dictEncoder.getPosition() != ptNodeArray.mCachedAddressAfterUpdate if (dictEncoder.getPosition() != ptNodeArray.mCachedAddressAfterUpdate
+ ptNodeArray.mCachedSize) { + ptNodeArray.mCachedSize) {

View File

@ -18,7 +18,6 @@ package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
@ -60,8 +59,7 @@ public final class BinaryDictIOUtils {
private static void readUnigramsAndBigramsBinaryInner(final DictDecoder dictDecoder, private static void readUnigramsAndBigramsBinaryInner(final DictDecoder dictDecoder,
final int bodyOffset, final Map<Integer, String> words, final int bodyOffset, final Map<Integer, String> words,
final Map<Integer, Integer> frequencies, final Map<Integer, Integer> frequencies,
final Map<Integer, ArrayList<PendingAttribute>> bigrams, final Map<Integer, ArrayList<PendingAttribute>> bigrams) {
final FormatOptions formatOptions) {
int[] pushedChars = new int[FormatSpec.MAX_WORD_LENGTH + 1]; int[] pushedChars = new int[FormatSpec.MAX_WORD_LENGTH + 1];
Stack<Position> stack = new Stack<Position>(); Stack<Position> stack = new Stack<Position>();
@ -90,17 +88,12 @@ public final class BinaryDictIOUtils {
stack.pop(); stack.pop();
continue; continue;
} }
final PtNodeInfo ptNodeInfo = dictDecoder.readPtNode(p.mAddress, formatOptions); final PtNodeInfo ptNodeInfo = dictDecoder.readPtNode(p.mAddress);
for (int i = 0; i < ptNodeInfo.mCharacters.length; ++i) { for (int i = 0; i < ptNodeInfo.mCharacters.length; ++i) {
pushedChars[index++] = ptNodeInfo.mCharacters[i]; pushedChars[index++] = ptNodeInfo.mCharacters[i];
} }
p.mPosition++; p.mPosition++;
if (ptNodeInfo.isTerminal()) {// found word
final boolean isMovedPtNode = isMovedPtNode(ptNodeInfo.mFlags,
formatOptions);
final boolean isDeletedPtNode = isDeletedPtNode(ptNodeInfo.mFlags,
formatOptions);
if (!isMovedPtNode && !isDeletedPtNode && ptNodeInfo.isTerminal()) {// found word
words.put(ptNodeInfo.mOriginalAddress, new String(pushedChars, 0, index)); words.put(ptNodeInfo.mOriginalAddress, new String(pushedChars, 0, index));
frequencies.put( frequencies.put(
ptNodeInfo.mOriginalAddress, ptNodeInfo.mProbabilityInfo.mProbability); ptNodeInfo.mOriginalAddress, ptNodeInfo.mProbabilityInfo.mProbability);
@ -110,25 +103,13 @@ public final class BinaryDictIOUtils {
} }
if (p.mPosition == p.mNumOfPtNode) { if (p.mPosition == p.mNumOfPtNode) {
if (formatOptions.supportsDynamicUpdate()) { stack.pop();
final boolean hasValidForwardLinkAddress =
dictDecoder.readAndFollowForwardLink();
if (hasValidForwardLinkAddress && dictDecoder.hasNextPtNodeArray()) {
// The node array has a forward link.
p.mNumOfPtNode = Position.NOT_READ_PTNODE_COUNT;
p.mAddress = dictDecoder.getPosition();
} else {
stack.pop();
}
} else {
stack.pop();
}
} else { } else {
// The Ptnode array has more PtNodes. // The PtNode array has more PtNodes.
p.mAddress = dictDecoder.getPosition(); p.mAddress = dictDecoder.getPosition();
} }
if (!isMovedPtNode && hasChildrenAddress(ptNodeInfo.mChildrenAddress)) { if (hasChildrenAddress(ptNodeInfo.mChildrenAddress)) {
final Position childrenPos = new Position(ptNodeInfo.mChildrenAddress, index); final Position childrenPos = new Position(ptNodeInfo.mChildrenAddress, index);
stack.push(childrenPos); stack.push(childrenPos);
} }
@ -153,7 +134,7 @@ public final class BinaryDictIOUtils {
// Read header // Read header
final DictionaryHeader header = dictDecoder.readHeader(); final DictionaryHeader header = dictDecoder.readHeader();
readUnigramsAndBigramsBinaryInner(dictDecoder, header.mBodyOffset, words, readUnigramsAndBigramsBinaryInner(dictDecoder, header.mBodyOffset, words,
frequencies, bigrams, header.mFormatOptions); frequencies, bigrams);
} }
/** /**
@ -171,8 +152,7 @@ public final class BinaryDictIOUtils {
final String word) throws IOException, UnsupportedFormatException { final String word) throws IOException, UnsupportedFormatException {
if (word == null) return FormatSpec.NOT_VALID_WORD; if (word == null) return FormatSpec.NOT_VALID_WORD;
dictDecoder.setPosition(0); dictDecoder.setPosition(0);
dictDecoder.readHeader();
final DictionaryHeader header = dictDecoder.readHeader();
int wordPos = 0; int wordPos = 0;
final int wordLen = word.codePointCount(0, word.length()); final int wordLen = word.codePointCount(0, word.length());
for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) { for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) {
@ -183,13 +163,7 @@ public final class BinaryDictIOUtils {
boolean foundNextPtNode = false; boolean foundNextPtNode = false;
for (int i = 0; i < ptNodeCount; ++i) { for (int i = 0; i < ptNodeCount; ++i) {
final int ptNodePos = dictDecoder.getPosition(); final int ptNodePos = dictDecoder.getPosition();
final PtNodeInfo currentInfo = dictDecoder.readPtNode(ptNodePos, final PtNodeInfo currentInfo = dictDecoder.readPtNode(ptNodePos);
header.mFormatOptions);
final boolean isMovedNode = isMovedPtNode(currentInfo.mFlags,
header.mFormatOptions);
final boolean isDeletedNode = isDeletedPtNode(currentInfo.mFlags,
header.mFormatOptions);
if (isMovedNode) continue;
boolean same = true; boolean same = true;
for (int p = 0, j = word.offsetByCodePoints(0, wordPos); for (int p = 0, j = word.offsetByCodePoints(0, wordPos);
p < currentInfo.mCharacters.length; p < currentInfo.mCharacters.length;
@ -204,7 +178,7 @@ public final class BinaryDictIOUtils {
if (same) { if (same) {
// found the PtNode matches the word. // found the PtNode matches the word.
if (wordPos + currentInfo.mCharacters.length == wordLen) { if (wordPos + currentInfo.mCharacters.length == wordLen) {
if (!currentInfo.isTerminal() || isDeletedNode) { if (!currentInfo.isTerminal()) {
return FormatSpec.NOT_VALID_WORD; return FormatSpec.NOT_VALID_WORD;
} else { } else {
return ptNodePos; return ptNodePos;
@ -219,64 +193,13 @@ public final class BinaryDictIOUtils {
break; break;
} }
} }
// If we found the next PtNode, it is under the file pointer.
// But if not, we are at the end of this node array so we expect to have
// a forward link address that we need to consult and possibly resume
// search on the next node array in the linked list.
if (foundNextPtNode) break; if (foundNextPtNode) break;
if (!header.mFormatOptions.supportsDynamicUpdate()) { return FormatSpec.NOT_VALID_WORD;
return FormatSpec.NOT_VALID_WORD;
}
final boolean hasValidForwardLinkAddress =
dictDecoder.readAndFollowForwardLink();
if (!hasValidForwardLinkAddress || !dictDecoder.hasNextPtNodeArray()) {
return FormatSpec.NOT_VALID_WORD;
}
} while(true); } while(true);
} }
return FormatSpec.NOT_VALID_WORD; return FormatSpec.NOT_VALID_WORD;
} }
/**
* @return the size written, in bytes. Always 3 bytes.
*/
@UsedForTesting
static int writeSInt24ToBuffer(final DictBuffer dictBuffer, final int value) {
final int absValue = Math.abs(value);
dictBuffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
dictBuffer.put((byte)((absValue >> 8) & 0xFF));
dictBuffer.put((byte)(absValue & 0xFF));
return 3;
}
/**
* @return the size written, in bytes. Always 3 bytes.
*/
@UsedForTesting
static int writeSInt24ToStream(final OutputStream destination, final int value)
throws IOException {
final int absValue = Math.abs(value);
destination.write((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
destination.write((byte)((absValue >> 8) & 0xFF));
destination.write((byte)(absValue & 0xFF));
return 3;
}
@UsedForTesting
static void skipString(final DictBuffer dictBuffer,
final boolean hasMultipleChars) {
if (hasMultipleChars) {
int character = CharEncoding.readChar(dictBuffer);
while (character != FormatSpec.INVALID_CHARACTER) {
character = CharEncoding.readChar(dictBuffer);
}
} else {
CharEncoding.readChar(dictBuffer);
}
}
/** /**
* Writes a PtNodeCount to the stream. * Writes a PtNodeCount to the stream.
* *
@ -355,30 +278,6 @@ public final class BinaryDictIOUtils {
return FormatSpec.NO_CHILDREN_ADDRESS != address; return FormatSpec.NO_CHILDREN_ADDRESS != address;
} }
/**
* Helper method to check whether the node is moved.
*/
public static boolean isMovedPtNode(final int flags, final FormatOptions options) {
return options.supportsDynamicUpdate()
&& ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_MOVED);
}
/**
* Helper method to check whether the dictionary can be updated dynamically.
*/
public static boolean supportsDynamicUpdate(final FormatOptions options) {
return options.mVersion >= FormatSpec.FIRST_VERSION_WITH_DYNAMIC_UPDATE
&& options.supportsDynamicUpdate();
}
/**
* Helper method to check whether the node is deleted.
*/
public static boolean isDeletedPtNode(final int flags, final FormatOptions formatOptions) {
return formatOptions.supportsDynamicUpdate()
&& ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_DELETED);
}
/** /**
* Compute the binary size of the node count * Compute the binary size of the node count
* @param count the node count * @param count the node count
@ -396,9 +295,7 @@ public final class BinaryDictIOUtils {
} }
} }
static int getChildrenAddressSize(final int optionFlags, static int getChildrenAddressSize(final int optionFlags) {
final FormatOptions formatOptions) {
if (formatOptions.supportsDynamicUpdate()) return FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) { switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE: case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
return 1; return 1;
@ -419,6 +316,7 @@ public final class BinaryDictIOUtils {
* @param bigramFrequency compressed frequency * @param bigramFrequency compressed frequency
* @return approximate bigram frequency * @return approximate bigram frequency
*/ */
@UsedForTesting
public static int reconstructBigramFrequency(final int unigramFrequency, public static int reconstructBigramFrequency(final int unigramFrequency,
final int bigramFrequency) { final int bigramFrequency) {
final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency) final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency)

View File

@ -18,7 +18,6 @@ package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
import java.io.File; import java.io.File;
@ -45,10 +44,9 @@ public interface DictDecoder {
/** /**
* Reads PtNode from ptNodePos. * Reads PtNode from ptNodePos.
* @param ptNodePos the position of PtNode. * @param ptNodePos the position of PtNode.
* @param formatOptions the format options.
* @return PtNodeInfo. * @return PtNodeInfo.
*/ */
public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions); public PtNodeInfo readPtNode(final int ptNodePos);
/** /**
* Reads a buffer and returns the memory representation of the dictionary. * Reads a buffer and returns the memory representation of the dictionary.
@ -112,14 +110,6 @@ public interface DictDecoder {
*/ */
public int readPtNodeCount(); public int readPtNodeCount();
/**
* Reads the forward link and advances the position.
*
* @return true if this method moves the file pointer, false otherwise.
*/
public boolean readAndFollowForwardLink();
public boolean hasNextPtNodeArray();
/** /**
* Opens the dictionary file and makes DictBuffer. * Opens the dictionary file and makes DictBuffer.
*/ */

View File

@ -32,7 +32,5 @@ public interface DictEncoder {
public int getPosition(); public int getPosition();
public void writePtNodeCount(final int ptNodeCount); public void writePtNodeCount(final int ptNodeCount);
public void writeForwardLinkAddress(final int forwardLinkAddress); public void writeForwardLinkAddress(final int forwardLinkAddress);
public void writePtNode(final PtNode ptNode, final FusionDictionary dict);
public void writePtNode(final PtNode ptNode, final int parentPosition,
final FormatOptions formatOptions, final FusionDictionary dict);
} }

View File

@ -309,7 +309,6 @@ public final class FormatSpec {
*/ */
public static final class FormatOptions { public static final class FormatOptions {
public final int mVersion; public final int mVersion;
public final boolean mHasTerminalId;
public final boolean mHasTimestamp; public final boolean mHasTimestamp;
@UsedForTesting @UsedForTesting
@ -319,13 +318,8 @@ public final class FormatSpec {
public FormatOptions(final int version, final boolean hasTimestamp) { public FormatOptions(final int version, final boolean hasTimestamp) {
mVersion = version; mVersion = version;
mHasTerminalId = (version >= FIRST_VERSION_WITH_TERMINAL_ID);
mHasTimestamp = hasTimestamp; mHasTimestamp = hasTimestamp;
} }
public boolean supportsDynamicUpdate() {
return mVersion >= FIRST_VERSION_WITH_DYNAMIC_UPDATE;
}
} }
/** /**

View File

@ -31,12 +31,11 @@ public final class PtNodeInfo {
public final int[] mCharacters; public final int[] mCharacters;
public final ProbabilityInfo mProbabilityInfo; public final ProbabilityInfo mProbabilityInfo;
public final int mChildrenAddress; public final int mChildrenAddress;
public final int mParentAddress;
public final ArrayList<WeightedString> mShortcutTargets; public final ArrayList<WeightedString> mShortcutTargets;
public final ArrayList<PendingAttribute> mBigrams; public final ArrayList<PendingAttribute> mBigrams;
public PtNodeInfo(final int originalAddress, final int endAddress, final int flags, public PtNodeInfo(final int originalAddress, final int endAddress, final int flags,
final int[] characters, final ProbabilityInfo probabilityInfo, final int parentAddress, final int[] characters, final ProbabilityInfo probabilityInfo,
final int childrenAddress, final ArrayList<WeightedString> shortcutTargets, final int childrenAddress, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<PendingAttribute> bigrams) { final ArrayList<PendingAttribute> bigrams) {
mOriginalAddress = originalAddress; mOriginalAddress = originalAddress;
@ -44,7 +43,6 @@ public final class PtNodeInfo {
mFlags = flags; mFlags = flags;
mCharacters = characters; mCharacters = characters;
mProbabilityInfo = probabilityInfo; mProbabilityInfo = probabilityInfo;
mParentAddress = parentAddress;
mChildrenAddress = childrenAddress; mChildrenAddress = childrenAddress;
mShortcutTargets = shortcutTargets; mShortcutTargets = shortcutTargets;
mBigrams = bigrams; mBigrams = bigrams;

View File

@ -20,7 +20,6 @@ import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.BinaryDictionary; import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.CollectionUtils;
@ -173,21 +172,23 @@ public class Ver2DictDecoder extends AbstractDictDecoder {
@Override @Override
public DictionaryHeader readHeader() throws IOException, UnsupportedFormatException { public DictionaryHeader readHeader() throws IOException, UnsupportedFormatException {
if (mDictBuffer == null) { final DictionaryHeader header = mBinaryDictionary.getHeader();
if (header.mFormatOptions.mVersion != FormatSpec.VERSION2) {
throw new UnsupportedFormatException("File header has a wrong version : "
+ header.mFormatOptions.mVersion);
}
if (!isDictBufferOpen()) {
openDictBuffer(); openDictBuffer();
} }
final DictionaryHeader header = super.readHeader(mDictBuffer); // Advance buffer reading position to the head of dictionary body.
final int version = header.mFormatOptions.mVersion; setPosition(header.mBodyOffset);
if (version != FormatSpec.VERSION2) {
throw new UnsupportedFormatException("File header has a wrong version : " + version);
}
return header; return header;
} }
// TODO: Make this buffer multi thread safe. // TODO: Make this buffer multi thread safe.
private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH]; private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH];
@Override @Override
public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions options) { public PtNodeInfo readPtNode(final int ptNodePos) {
int addressPointer = ptNodePos; int addressPointer = ptNodePos;
final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
addressPointer += FormatSpec.PTNODE_FLAGS_SIZE; addressPointer += FormatSpec.PTNODE_FLAGS_SIZE;
@ -221,7 +222,7 @@ public class Ver2DictDecoder extends AbstractDictDecoder {
if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
childrenAddress += addressPointer; childrenAddress += addressPointer;
} }
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options); addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags);
final ArrayList<WeightedString> shortcutTargets; final ArrayList<WeightedString> shortcutTargets;
if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) { if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
// readShortcut will add shortcuts to shortcutTargets. // readShortcut will add shortcuts to shortcutTargets.
@ -244,7 +245,7 @@ public class Ver2DictDecoder extends AbstractDictDecoder {
bigrams = null; bigrams = null;
} }
return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, probabilityInfo, return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, probabilityInfo,
FormatSpec.NO_PARENT_ADDRESS, childrenAddress, shortcutTargets, bigrams); childrenAddress, shortcutTargets, bigrams);
} }
@Override @Override
@ -307,19 +308,4 @@ public class Ver2DictDecoder extends AbstractDictDecoder {
public int readPtNodeCount() { public int readPtNodeCount() {
return BinaryDictDecoderUtils.readPtNodeCount(mDictBuffer); return BinaryDictDecoderUtils.readPtNodeCount(mDictBuffer);
} }
@Override
public boolean readAndFollowForwardLink() {
final int nextAddress = mDictBuffer.readUnsignedInt24();
if (nextAddress >= 0 && nextAddress < mDictBuffer.limit()) {
mDictBuffer.position(nextAddress);
return true;
}
return false;
}
@Override
public boolean hasNextPtNodeArray() {
return mDictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS;
}
} }

View File

@ -95,7 +95,7 @@ public class Ver2DictEncoder implements DictEncoder {
ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray); ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray);
MakedictLog.i("Computing addresses..."); MakedictLog.i("Computing addresses...");
BinaryDictEncoderUtils.computeAddresses(dict, flatNodes, formatOptions); BinaryDictEncoderUtils.computeAddresses(dict, flatNodes);
MakedictLog.i("Checking PtNode array..."); MakedictLog.i("Checking PtNode array...");
if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes);
@ -107,7 +107,7 @@ public class Ver2DictEncoder implements DictEncoder {
MakedictLog.i("Writing file..."); MakedictLog.i("Writing file...");
for (PtNodeArray nodeArray : flatNodes) { for (PtNodeArray nodeArray : flatNodes) {
BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray, formatOptions); BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray);
} }
if (MakedictLog.DBG) BinaryDictEncoderUtils.showStatistics(flatNodes); if (MakedictLog.DBG) BinaryDictEncoderUtils.showStatistics(flatNodes);
mOutStream.write(mBuffer, 0, mPosition); mOutStream.write(mBuffer, 0, mPosition);
@ -139,24 +139,13 @@ public class Ver2DictEncoder implements DictEncoder {
countSize); countSize);
} }
private void writePtNodeFlags(final PtNode ptNode, final FormatOptions formatOptions) { private void writePtNodeFlags(final PtNode ptNode) {
final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode);
mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition,
BinaryDictEncoderUtils.makePtNodeFlags(ptNode, childrenPos, formatOptions), BinaryDictEncoderUtils.makePtNodeFlags(ptNode, childrenPos),
FormatSpec.PTNODE_FLAGS_SIZE); FormatSpec.PTNODE_FLAGS_SIZE);
} }
private void writeParentPosition(final int parentPosition, final PtNode ptNode,
final FormatOptions formatOptions) {
if (parentPosition == FormatSpec.NO_PARENT_ADDRESS) {
mPosition = BinaryDictEncoderUtils.writeParentAddress(mBuffer, mPosition,
parentPosition, formatOptions);
} else {
mPosition = BinaryDictEncoderUtils.writeParentAddress(mBuffer, mPosition,
parentPosition - ptNode.mCachedAddressAfterUpdate, formatOptions);
}
}
private void writeCharacters(final int[] codePoints, final boolean hasSeveralChars) { private void writeCharacters(final int[] codePoints, final boolean hasSeveralChars) {
mPosition = CharEncoding.writeCharArray(codePoints, mBuffer, mPosition); mPosition = CharEncoding.writeCharArray(codePoints, mBuffer, mPosition);
if (hasSeveralChars) { if (hasSeveralChars) {
@ -171,15 +160,10 @@ public class Ver2DictEncoder implements DictEncoder {
} }
} }
private void writeChildrenPosition(final PtNode ptNode, final FormatOptions formatOptions) { private void writeChildrenPosition(final PtNode ptNode) {
final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode);
if (formatOptions.supportsDynamicUpdate()) { mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition,
mPosition += BinaryDictEncoderUtils.writeSignedChildrenPosition(mBuffer, mPosition, childrenPos);
childrenPos);
} else {
mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition,
childrenPos);
}
} }
/** /**
@ -246,13 +230,11 @@ public class Ver2DictEncoder implements DictEncoder {
} }
@Override @Override
public void writePtNode(final PtNode ptNode, final int parentPosition, public void writePtNode(final PtNode ptNode, final FusionDictionary dict) {
final FormatOptions formatOptions, final FusionDictionary dict) { writePtNodeFlags(ptNode);
writePtNodeFlags(ptNode, formatOptions);
writeParentPosition(parentPosition, ptNode, formatOptions);
writeCharacters(ptNode.mChars, ptNode.hasSeveralChars()); writeCharacters(ptNode.mChars, ptNode.hasSeveralChars());
writeFrequency(ptNode.getProbability()); writeFrequency(ptNode.getProbability());
writeChildrenPosition(ptNode, formatOptions); writeChildrenPosition(ptNode);
writeShortcuts(ptNode.mShortcutTargets); writeShortcuts(ptNode.mShortcutTargets);
writeBigrams(ptNode.mBigrams, dict); writeBigrams(ptNode.mBigrams, dict);
} }

View File

@ -122,7 +122,6 @@ public class Ver4DictEncoder implements DictEncoder {
} }
@Override @Override
public void writePtNode( public void writePtNode(PtNode ptNode, FusionDictionary dict) {
PtNode ptNode, int parentPosition, FormatOptions formatOptions, FusionDictionary dict) {
} }
} }

View File

@ -501,7 +501,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
} }
if (fileHeader == null) return null; if (fileHeader == null) return null;
return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset, return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset,
address, fileHeader.mFormatOptions).mWord; address).mWord;
} }
private long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word, private long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word,