Add readVer4PtNodeInfo to Ver4DictDecoder.

Change-Id: Ibffaf64a9bda1fb0779a9b53558ca56ed31660d3
main
Yuichiro Hanada 2013-10-16 12:02:45 +09:00
parent 36dd126ab4
commit ad5b9bcec2
2 changed files with 68 additions and 27 deletions

View File

@ -43,7 +43,7 @@ public interface DictDecoder {
public FileHeader readHeader() throws IOException, UnsupportedFormatException; public FileHeader readHeader() throws IOException, UnsupportedFormatException;
/** /**
* Reads PtNode from nodeAddress. * Reads PtNode from ptNodePos.
* @param ptNodePos the position of PtNode. * @param ptNodePos the position of PtNode.
* @param formatOptions the format options. * @param formatOptions the format options.
* @return PtNodeInfo. * @return PtNodeInfo.

View File

@ -54,6 +54,28 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
private BigramContentReader mBigramReader; private BigramContentReader mBigramReader;
private ShortcutContentReader mShortcutReader; private ShortcutContentReader mShortcutReader;
/**
* Raw PtNode info straight out of a trie file in version 4 dictionary.
*/
protected static final class Ver4PtNodeInfo {
public final int mFlags;
public final int[] mCharacters;
public final int mTerminalId;
public final int mChildrenPos;
public final int mParentPos;
public final int mNodeSize;
public Ver4PtNodeInfo(final int flags, final int[] characters, final int terminalId,
final int childrenPos, final int parentPos, final int nodeSize) {
mFlags = flags;
mCharacters = characters;
mTerminalId = terminalId;
mChildrenPos = childrenPos;
mParentPos = parentPos;
mNodeSize = nodeSize;
}
}
@UsedForTesting @UsedForTesting
/* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) { /* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) {
mDictDirectory = dictDirectory; mDictDirectory = dictDirectory;
@ -255,63 +277,82 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
} }
} }
private final int[] mCharacterBufferForReadingVer4PtNodeInfo
= new int[FormatSpec.MAX_WORD_LENGTH];
/**
* Reads PtNode from ptNodePos in the trie file and returns Ver4PtNodeInfo.
*
* @param ptNodePos the position of PtNode.
* @param options the format options.
* @return Ver4PtNodeInfo.
*/
// TODO: Make this buffer thread safe. // TODO: Make this buffer thread safe.
// TODO: Support words longer than FormatSpec.MAX_WORD_LENGTH. // TODO: Support words longer than FormatSpec.MAX_WORD_LENGTH.
private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH]; protected Ver4PtNodeInfo readVer4PtNodeInfo(final int ptNodePos, final FormatOptions options) {
@Override int readingPos = ptNodePos;
public PtNodeInfo readPtNode(int ptNodePos, FormatOptions options) {
int addressPointer = ptNodePos;
final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
addressPointer += FormatSpec.PTNODE_FLAGS_SIZE; readingPos += FormatSpec.PTNODE_FLAGS_SIZE;
final int parentAddress = PtNodeReader.readParentAddress(mDictBuffer, options); final int parentPos = PtNodeReader.readParentAddress(mDictBuffer, options);
if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { if (BinaryDictIOUtils.supportsDynamicUpdate(options)) {
addressPointer += FormatSpec.PARENT_ADDRESS_SIZE; readingPos += FormatSpec.PARENT_ADDRESS_SIZE;
} }
final int characters[]; final int characters[];
if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) { if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
int index = 0; int index = 0;
int character = CharEncoding.readChar(mDictBuffer); int character = CharEncoding.readChar(mDictBuffer);
addressPointer += CharEncoding.getCharSize(character); readingPos += CharEncoding.getCharSize(character);
while (FormatSpec.INVALID_CHARACTER != character while (FormatSpec.INVALID_CHARACTER != character
&& index < FormatSpec.MAX_WORD_LENGTH) { && index < FormatSpec.MAX_WORD_LENGTH) {
mCharacterBuffer[index++] = character; mCharacterBufferForReadingVer4PtNodeInfo[index++] = character;
character = CharEncoding.readChar(mDictBuffer); character = CharEncoding.readChar(mDictBuffer);
addressPointer += CharEncoding.getCharSize(character); readingPos += CharEncoding.getCharSize(character);
} }
characters = Arrays.copyOfRange(mCharacterBuffer, 0, index); characters = Arrays.copyOfRange(mCharacterBufferForReadingVer4PtNodeInfo, 0, index);
} else { } else {
final int character = CharEncoding.readChar(mDictBuffer); final int character = CharEncoding.readChar(mDictBuffer);
addressPointer += CharEncoding.getCharSize(character); readingPos += CharEncoding.getCharSize(character);
characters = new int[] { character }; characters = new int[] { character };
} }
final int terminalId; final int terminalId;
if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
terminalId = PtNodeReader.readTerminalId(mDictBuffer); terminalId = PtNodeReader.readTerminalId(mDictBuffer);
addressPointer += FormatSpec.PTNODE_TERMINAL_ID_SIZE; readingPos += FormatSpec.PTNODE_TERMINAL_ID_SIZE;
} else { } else {
terminalId = PtNode.NOT_A_TERMINAL; terminalId = PtNode.NOT_A_TERMINAL;
} }
int childrenPos = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options);
if (childrenPos != FormatSpec.NO_CHILDREN_ADDRESS) {
childrenPos += readingPos;
}
readingPos += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
return new Ver4PtNodeInfo(flags, characters, terminalId, childrenPos, parentPos,
readingPos - ptNodePos);
}
@Override
public PtNodeInfo readPtNode(int ptNodePos, FormatOptions options) {
final Ver4PtNodeInfo nodeInfo = readVer4PtNodeInfo(ptNodePos, options);
final int frequency; final int frequency;
if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { if (0 != (FormatSpec.FLAG_IS_TERMINAL & nodeInfo.mFlags)) {
frequency = PtNodeReader.readFrequency(mFrequencyBuffer, terminalId); frequency = PtNodeReader.readFrequency(mFrequencyBuffer, nodeInfo.mTerminalId);
} else { } else {
frequency = PtNode.NOT_A_TERMINAL; frequency = PtNode.NOT_A_TERMINAL;
} }
int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options);
if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
childrenAddress += addressPointer;
}
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
final ArrayList<WeightedString> shortcutTargets = mShortcutReader.readShortcuts(terminalId);
final ArrayList<PendingAttribute> bigrams =
mBigramReader.readTargetsAndFrequencies(terminalId,
mTerminalAddressTableBuffer);
return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, frequency, final ArrayList<WeightedString> shortcutTargets = mShortcutReader.readShortcuts(
parentAddress, childrenAddress, shortcutTargets, bigrams); nodeInfo.mTerminalId);
final ArrayList<PendingAttribute> bigrams = mBigramReader.readTargetsAndFrequencies(
nodeInfo.mTerminalId, mTerminalAddressTableBuffer);
return new PtNodeInfo(ptNodePos, ptNodePos + nodeInfo.mNodeSize, nodeInfo.mFlags,
nodeInfo.mCharacters, frequency, nodeInfo.mParentPos, nodeInfo.mChildrenPos,
shortcutTargets, bigrams);
} }
private void deleteDictFiles() { private void deleteDictFiles() {