Merge "Make readPtNode be called with the address from the beginning of the file."
commit
f9233e0b98
|
@ -31,7 +31,6 @@ import java.io.IOException;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
|
|
||||||
|
@ -334,40 +333,38 @@ public final class BinaryDictDecoderUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finds, as a string, the word at the address passed as an argument.
|
* Finds, as a string, the word at the position passed as an argument.
|
||||||
*
|
*
|
||||||
* @param dictDecoder the dict decoder.
|
* @param dictDecoder the dict decoder.
|
||||||
* @param headerSize the size of the header.
|
* @param headerSize the size of the header.
|
||||||
* @param address the address to seek.
|
* @param pos the position to seek.
|
||||||
* @param formatOptions file format options.
|
* @param formatOptions file format options.
|
||||||
* @return the word with its frequency, as a weighted string.
|
* @return the word with its frequency, as a weighted string.
|
||||||
*/
|
*/
|
||||||
/* package for tests */ static WeightedString getWordAtAddress(
|
/* package for tests */ static WeightedString getWordAtPosition(
|
||||||
final Ver3DictDecoder dictDecoder, final int headerSize, final int address,
|
final Ver3DictDecoder dictDecoder, final int headerSize, final int pos,
|
||||||
final FormatOptions formatOptions) {
|
final FormatOptions formatOptions) {
|
||||||
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
||||||
final WeightedString result;
|
final WeightedString result;
|
||||||
final int originalPointer = dictBuffer.position();
|
final int originalPos = dictBuffer.position();
|
||||||
dictBuffer.position(address);
|
dictBuffer.position(pos);
|
||||||
|
|
||||||
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
|
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
|
||||||
result = getWordAtAddressWithParentAddress(dictDecoder, headerSize, address,
|
result = getWordAtPositionWithParentAddress(dictDecoder, pos, formatOptions);
|
||||||
formatOptions);
|
|
||||||
} else {
|
} else {
|
||||||
result = getWordAtAddressWithoutParentAddress(dictDecoder, headerSize, address,
|
result = getWordAtPositionWithoutParentAddress(dictDecoder, headerSize, pos,
|
||||||
formatOptions);
|
formatOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
dictBuffer.position(originalPointer);
|
dictBuffer.position(originalPos);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
private static WeightedString getWordAtAddressWithParentAddress(
|
private static WeightedString getWordAtPositionWithParentAddress(
|
||||||
final Ver3DictDecoder dictDecoder, final int headerSize, final int address,
|
final Ver3DictDecoder dictDecoder, final int pos, final FormatOptions options) {
|
||||||
final FormatOptions options) {
|
|
||||||
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
||||||
int currentAddress = address;
|
int currentPos = pos;
|
||||||
int frequency = Integer.MIN_VALUE;
|
int frequency = Integer.MIN_VALUE;
|
||||||
final StringBuilder builder = new StringBuilder();
|
final StringBuilder builder = new StringBuilder();
|
||||||
// the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH
|
// the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH
|
||||||
|
@ -375,10 +372,10 @@ public final class BinaryDictDecoderUtils {
|
||||||
CharGroupInfo currentInfo;
|
CharGroupInfo currentInfo;
|
||||||
int loopCounter = 0;
|
int loopCounter = 0;
|
||||||
do {
|
do {
|
||||||
dictBuffer.position(currentAddress + headerSize);
|
dictBuffer.position(currentPos);
|
||||||
currentInfo = dictDecoder.readPtNode(currentAddress, options);
|
currentInfo = dictDecoder.readPtNode(currentPos, options);
|
||||||
if (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)) {
|
if (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)) {
|
||||||
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
|
currentPos = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
|
||||||
}
|
}
|
||||||
if (DBG && loopCounter++ > MAX_JUMPS) {
|
if (DBG && loopCounter++ > MAX_JUMPS) {
|
||||||
MakedictLog.d("Too many jumps - probably a bug");
|
MakedictLog.d("Too many jumps - probably a bug");
|
||||||
|
@ -388,37 +385,37 @@ public final class BinaryDictDecoderUtils {
|
||||||
builder.insert(0,
|
builder.insert(0,
|
||||||
new String(currentInfo.mCharacters, 0, currentInfo.mCharacters.length));
|
new String(currentInfo.mCharacters, 0, currentInfo.mCharacters.length));
|
||||||
if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break;
|
if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break;
|
||||||
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
|
currentPos = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
|
||||||
}
|
}
|
||||||
return new WeightedString(builder.toString(), frequency);
|
return new WeightedString(builder.toString(), frequency);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static WeightedString getWordAtAddressWithoutParentAddress(
|
private static WeightedString getWordAtPositionWithoutParentAddress(
|
||||||
final Ver3DictDecoder dictDecoder, final int headerSize, final int address,
|
final Ver3DictDecoder dictDecoder, final int headerSize, final int pos,
|
||||||
final FormatOptions options) {
|
final FormatOptions options) {
|
||||||
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
||||||
dictBuffer.position(headerSize);
|
dictBuffer.position(headerSize);
|
||||||
final int count = readCharGroupCount(dictBuffer);
|
final int count = readCharGroupCount(dictBuffer);
|
||||||
int groupOffset = BinaryDictIOUtils.getGroupCountSize(count);
|
int groupPos = headerSize + BinaryDictIOUtils.getGroupCountSize(count);
|
||||||
final StringBuilder builder = new StringBuilder();
|
final StringBuilder builder = new StringBuilder();
|
||||||
WeightedString result = null;
|
WeightedString result = null;
|
||||||
|
|
||||||
CharGroupInfo last = null;
|
CharGroupInfo last = null;
|
||||||
for (int i = count - 1; i >= 0; --i) {
|
for (int i = count - 1; i >= 0; --i) {
|
||||||
CharGroupInfo info = dictDecoder.readPtNode(groupOffset, options);
|
CharGroupInfo info = dictDecoder.readPtNode(groupPos, options);
|
||||||
groupOffset = info.mEndAddress;
|
groupPos = info.mEndAddress;
|
||||||
if (info.mOriginalAddress == address) {
|
if (info.mOriginalAddress == pos) {
|
||||||
builder.append(new String(info.mCharacters, 0, info.mCharacters.length));
|
builder.append(new String(info.mCharacters, 0, info.mCharacters.length));
|
||||||
result = new WeightedString(builder.toString(), info.mFrequency);
|
result = new WeightedString(builder.toString(), info.mFrequency);
|
||||||
break; // and return
|
break; // and return
|
||||||
}
|
}
|
||||||
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
|
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
|
||||||
if (info.mChildrenAddress > address) {
|
if (info.mChildrenAddress > pos) {
|
||||||
if (null == last) continue;
|
if (null == last) continue;
|
||||||
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
|
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
|
||||||
dictBuffer.position(last.mChildrenAddress + headerSize);
|
dictBuffer.position(last.mChildrenAddress);
|
||||||
i = readCharGroupCount(dictBuffer);
|
i = readCharGroupCount(dictBuffer);
|
||||||
groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
|
groupPos = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
|
||||||
last = null;
|
last = null;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -426,9 +423,9 @@ public final class BinaryDictDecoderUtils {
|
||||||
}
|
}
|
||||||
if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) {
|
if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) {
|
||||||
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
|
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
|
||||||
dictBuffer.position(last.mChildrenAddress + headerSize);
|
dictBuffer.position(last.mChildrenAddress);
|
||||||
i = readCharGroupCount(dictBuffer);
|
i = readCharGroupCount(dictBuffer);
|
||||||
groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
|
groupPos = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
|
||||||
last = null;
|
last = null;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -457,21 +454,21 @@ public final class BinaryDictDecoderUtils {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
||||||
final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>();
|
final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>();
|
||||||
final int nodeArrayOrigin = dictBuffer.position() - headerSize;
|
final int nodeArrayOriginPos = dictBuffer.position();
|
||||||
|
|
||||||
do { // Scan the linked-list node.
|
do { // Scan the linked-list node.
|
||||||
final int nodeArrayHeadPosition = dictBuffer.position() - headerSize;
|
final int nodeArrayHeadPos = dictBuffer.position();
|
||||||
final int count = readCharGroupCount(dictBuffer);
|
final int count = readCharGroupCount(dictBuffer);
|
||||||
int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count);
|
int groupOffsetPos = nodeArrayHeadPos + BinaryDictIOUtils.getGroupCountSize(count);
|
||||||
for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
|
for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
|
||||||
CharGroupInfo info = dictDecoder.readPtNode(groupOffset, options);
|
CharGroupInfo info = dictDecoder.readPtNode(groupOffsetPos, options);
|
||||||
if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue;
|
if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue;
|
||||||
ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
|
ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
|
||||||
ArrayList<WeightedString> bigrams = null;
|
ArrayList<WeightedString> bigrams = null;
|
||||||
if (null != info.mBigrams) {
|
if (null != info.mBigrams) {
|
||||||
bigrams = new ArrayList<WeightedString>();
|
bigrams = new ArrayList<WeightedString>();
|
||||||
for (PendingAttribute bigram : info.mBigrams) {
|
for (PendingAttribute bigram : info.mBigrams) {
|
||||||
final WeightedString word = getWordAtAddress(dictDecoder, headerSize,
|
final WeightedString word = getWordAtPosition(dictDecoder, headerSize,
|
||||||
bigram.mAddress, options);
|
bigram.mAddress, options);
|
||||||
final int reconstructedFrequency =
|
final int reconstructedFrequency =
|
||||||
BinaryDictIOUtils.reconstructBigramFrequency(word.mFrequency,
|
BinaryDictIOUtils.reconstructBigramFrequency(word.mFrequency,
|
||||||
|
@ -483,7 +480,7 @@ public final class BinaryDictDecoderUtils {
|
||||||
PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress);
|
PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress);
|
||||||
if (null == children) {
|
if (null == children) {
|
||||||
final int currentPosition = dictBuffer.position();
|
final int currentPosition = dictBuffer.position();
|
||||||
dictBuffer.position(info.mChildrenAddress + headerSize);
|
dictBuffer.position(info.mChildrenAddress);
|
||||||
children = readNodeArray(dictDecoder, headerSize, reverseNodeArrayMap,
|
children = readNodeArray(dictDecoder, headerSize, reverseNodeArrayMap,
|
||||||
reverseGroupMap, options);
|
reverseGroupMap, options);
|
||||||
dictBuffer.position(currentPosition);
|
dictBuffer.position(currentPosition);
|
||||||
|
@ -500,7 +497,7 @@ public final class BinaryDictDecoderUtils {
|
||||||
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
|
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
|
||||||
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED)));
|
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED)));
|
||||||
}
|
}
|
||||||
groupOffset = info.mEndAddress;
|
groupOffsetPos = info.mEndAddress;
|
||||||
}
|
}
|
||||||
|
|
||||||
// reach the end of the array.
|
// reach the end of the array.
|
||||||
|
@ -516,8 +513,8 @@ public final class BinaryDictDecoderUtils {
|
||||||
dictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
dictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
||||||
|
|
||||||
final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
|
final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
|
||||||
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin;
|
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOriginPos;
|
||||||
nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin;
|
nodeArray.mCachedAddressAfterUpdate = nodeArrayOriginPos;
|
||||||
reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray);
|
reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray);
|
||||||
return nodeArray;
|
return nodeArray;
|
||||||
}
|
}
|
||||||
|
|
|
@ -95,7 +95,7 @@ public final class BinaryDictIOUtils {
|
||||||
stack.pop();
|
stack.pop();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
CharGroupInfo info = dictDecoder.readPtNode(p.mAddress - headerSize, formatOptions);
|
CharGroupInfo info = dictDecoder.readPtNode(p.mAddress, formatOptions);
|
||||||
for (int i = 0; i < info.mCharacters.length; ++i) {
|
for (int i = 0; i < info.mCharacters.length; ++i) {
|
||||||
pushedChars[index++] = info.mCharacters[i];
|
pushedChars[index++] = info.mCharacters[i];
|
||||||
}
|
}
|
||||||
|
@ -131,7 +131,7 @@ public final class BinaryDictIOUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isMovedGroup && hasChildrenAddress(info.mChildrenAddress)) {
|
if (!isMovedGroup && hasChildrenAddress(info.mChildrenAddress)) {
|
||||||
Position childrenPos = new Position(info.mChildrenAddress + headerSize, index);
|
final Position childrenPos = new Position(info.mChildrenAddress, index);
|
||||||
stack.push(childrenPos);
|
stack.push(childrenPos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,11 +35,14 @@ import java.nio.channels.FileChannel;
|
||||||
*/
|
*/
|
||||||
public interface DictDecoder {
|
public interface DictDecoder {
|
||||||
public FileHeader readHeader() throws IOException, UnsupportedFormatException;
|
public FileHeader readHeader() throws IOException, UnsupportedFormatException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads a PtNode and returns CharGroupInfo.
|
* Reads PtNode from nodeAddress.
|
||||||
|
* @param ptNodePos the position of PtNode.
|
||||||
|
* @param formatOptions the format options.
|
||||||
|
* @return CharGroupInfo.
|
||||||
*/
|
*/
|
||||||
public CharGroupInfo readPtNode(final int originalGroupAddress,
|
public CharGroupInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions);
|
||||||
final FormatOptions formatOptions);
|
|
||||||
|
|
||||||
public interface DictionaryBufferFactory {
|
public interface DictionaryBufferFactory {
|
||||||
public DictBuffer getDictionaryBuffer(final File file)
|
public DictBuffer getDictionaryBuffer(final File file)
|
||||||
|
|
|
@ -214,9 +214,8 @@ public class Ver3DictDecoder implements DictDecoder {
|
||||||
// TODO: Make this buffer multi thread safe.
|
// TODO: Make this buffer multi thread safe.
|
||||||
private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH];
|
private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH];
|
||||||
@Override
|
@Override
|
||||||
public CharGroupInfo readPtNode(final int originalGroupAddress,
|
public CharGroupInfo readPtNode(final int ptNodePos, final FormatOptions options) {
|
||||||
final FormatOptions options) {
|
int addressPointer = ptNodePos;
|
||||||
int addressPointer = originalGroupAddress;
|
|
||||||
final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
|
final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
|
||||||
++addressPointer;
|
++addressPointer;
|
||||||
|
|
||||||
|
@ -268,7 +267,7 @@ public class Ver3DictDecoder implements DictDecoder {
|
||||||
MakedictLog.d("too many bigrams in a group.");
|
MakedictLog.d("too many bigrams in a group.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency,
|
return new CharGroupInfo(ptNodePos, addressPointer, flags, characters, frequency,
|
||||||
parentAddress, childrenAddress, shortcutTargets, bigrams);
|
parentAddress, childrenAddress, shortcutTargets, bigrams);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -512,8 +512,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
if (fileHeader == null) return null;
|
if (fileHeader == null) return null;
|
||||||
return BinaryDictDecoderUtils.getWordAtAddress(dictDecoder, fileHeader.mHeaderSize,
|
return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mHeaderSize,
|
||||||
address - fileHeader.mHeaderSize, fileHeader.mFormatOptions).mWord;
|
address, fileHeader.mFormatOptions).mWord;
|
||||||
}
|
}
|
||||||
|
|
||||||
private long runGetTerminalPosition(final Ver3DictDecoder dictDecoder, final String word,
|
private long runGetTerminalPosition(final Ver3DictDecoder dictDecoder, final String word,
|
||||||
|
|
|
@ -228,8 +228,8 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
||||||
new Ver3DictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
|
new Ver3DictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||||
final FileHeader fileHeader = dictDecoder.readHeader();
|
final FileHeader fileHeader = dictDecoder.readHeader();
|
||||||
assertEquals(word,
|
assertEquals(word,
|
||||||
BinaryDictDecoderUtils.getWordAtAddress(dictDecoder, fileHeader.mHeaderSize,
|
BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mHeaderSize,
|
||||||
position - fileHeader.mHeaderSize, fileHeader.mFormatOptions).mWord);
|
position, fileHeader.mFormatOptions).mWord);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
Log.e(TAG, "Raised an IOException while looking up a word", e);
|
Log.e(TAG, "Raised an IOException while looking up a word", e);
|
||||||
} catch (UnsupportedFormatException e) {
|
} catch (UnsupportedFormatException e) {
|
||||||
|
|
Loading…
Reference in New Issue