am 1f6b9d7f: am 10100b2c: am 14d31d46: Add AbstractDictDecoder.
* commit '1f6b9d7fefff6c94e63af5df8bc6e64879ad9316': Add AbstractDictDecoder.main
commit
07dcb6acab
|
@ -0,0 +1,206 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.android.inputmethod.latin.makedict;
|
||||||
|
|
||||||
|
import com.android.inputmethod.annotations.UsedForTesting;
|
||||||
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
|
||||||
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||||
|
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||||
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A base class of the binary dictionary decoder.
|
||||||
|
*/
|
||||||
|
public abstract class AbstractDictDecoder implements DictDecoder {
|
||||||
|
protected FileHeader readHeader(final DictBuffer dictBuffer)
|
||||||
|
throws IOException, UnsupportedFormatException {
|
||||||
|
if (dictBuffer == null) {
|
||||||
|
openDictBuffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
final int version = HeaderReader.readVersion(dictBuffer);
|
||||||
|
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|
||||||
|
|| version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
|
||||||
|
throw new UnsupportedFormatException("Unsupported version : " + version);
|
||||||
|
}
|
||||||
|
// TODO: Remove this field.
|
||||||
|
final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer);
|
||||||
|
|
||||||
|
final int headerSize = HeaderReader.readHeaderSize(dictBuffer);
|
||||||
|
|
||||||
|
if (headerSize < 0) {
|
||||||
|
throw new UnsupportedFormatException("header size can't be negative.");
|
||||||
|
}
|
||||||
|
|
||||||
|
final HashMap<String, String> attributes = HeaderReader.readAttributes(dictBuffer,
|
||||||
|
headerSize);
|
||||||
|
|
||||||
|
final FileHeader header = new FileHeader(headerSize,
|
||||||
|
new FusionDictionary.DictionaryOptions(attributes,
|
||||||
|
0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
|
||||||
|
0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
|
||||||
|
new FormatOptions(version,
|
||||||
|
0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE)));
|
||||||
|
return header;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override @UsedForTesting
|
||||||
|
public int getTerminalPosition(final String word)
|
||||||
|
throws IOException, UnsupportedFormatException {
|
||||||
|
if (!isDictBufferOpen()) {
|
||||||
|
openDictBuffer();
|
||||||
|
}
|
||||||
|
return BinaryDictIOUtils.getTerminalPosition(this, word);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override @UsedForTesting
|
||||||
|
public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words,
|
||||||
|
final TreeMap<Integer, Integer> frequencies,
|
||||||
|
final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams)
|
||||||
|
throws IOException, UnsupportedFormatException {
|
||||||
|
if (!isDictBufferOpen()) {
|
||||||
|
openDictBuffer();
|
||||||
|
}
|
||||||
|
BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A utility class for reading a file header.
|
||||||
|
*/
|
||||||
|
protected static class HeaderReader {
|
||||||
|
protected static int readVersion(final DictBuffer dictBuffer)
|
||||||
|
throws IOException, UnsupportedFormatException {
|
||||||
|
return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static int readOptionFlags(final DictBuffer dictBuffer) {
|
||||||
|
return dictBuffer.readUnsignedShort();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static int readHeaderSize(final DictBuffer dictBuffer) {
|
||||||
|
return dictBuffer.readInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static HashMap<String, String> readAttributes(final DictBuffer dictBuffer,
|
||||||
|
final int headerSize) {
|
||||||
|
final HashMap<String, String> attributes = new HashMap<String, String>();
|
||||||
|
while (dictBuffer.position() < headerSize) {
|
||||||
|
// We can avoid an infinite loop here since dictBuffer.position() is always
|
||||||
|
// increased by calling CharEncoding.readString.
|
||||||
|
final String key = CharEncoding.readString(dictBuffer);
|
||||||
|
final String value = CharEncoding.readString(dictBuffer);
|
||||||
|
attributes.put(key, value);
|
||||||
|
}
|
||||||
|
dictBuffer.position(headerSize);
|
||||||
|
return attributes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A utility class for reading a PtNode.
|
||||||
|
*/
|
||||||
|
protected static class PtNodeReader {
|
||||||
|
protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) {
|
||||||
|
return dictBuffer.readUnsignedByte();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static int readParentAddress(final DictBuffer dictBuffer,
|
||||||
|
final FormatOptions formatOptions) {
|
||||||
|
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
|
||||||
|
return BinaryDictDecoderUtils.readSInt24(dictBuffer);
|
||||||
|
} else {
|
||||||
|
return FormatSpec.NO_PARENT_ADDRESS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags,
|
||||||
|
final FormatOptions formatOptions) {
|
||||||
|
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
|
||||||
|
final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer);
|
||||||
|
if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
|
||||||
|
return address;
|
||||||
|
} else {
|
||||||
|
switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
|
||||||
|
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
|
||||||
|
return dictBuffer.readUnsignedByte();
|
||||||
|
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
|
||||||
|
return dictBuffer.readUnsignedShort();
|
||||||
|
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
|
||||||
|
return dictBuffer.readUnsignedInt24();
|
||||||
|
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
|
||||||
|
default:
|
||||||
|
return FormatSpec.NO_CHILDREN_ADDRESS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reads shortcuts and returns the read length.
|
||||||
|
protected static int readShortcut(final DictBuffer dictBuffer,
|
||||||
|
final ArrayList<WeightedString> shortcutTargets) {
|
||||||
|
final int pointerBefore = dictBuffer.position();
|
||||||
|
dictBuffer.readUnsignedShort(); // skip the size
|
||||||
|
while (true) {
|
||||||
|
final int targetFlags = dictBuffer.readUnsignedByte();
|
||||||
|
final String word = CharEncoding.readString(dictBuffer);
|
||||||
|
shortcutTargets.add(new WeightedString(word,
|
||||||
|
targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
|
||||||
|
if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
|
||||||
|
}
|
||||||
|
return dictBuffer.position() - pointerBefore;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static int readBigramAddresses(final DictBuffer dictBuffer,
|
||||||
|
final ArrayList<PendingAttribute> bigrams, final int baseAddress) {
|
||||||
|
int readLength = 0;
|
||||||
|
int bigramCount = 0;
|
||||||
|
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
|
||||||
|
final int bigramFlags = dictBuffer.readUnsignedByte();
|
||||||
|
++readLength;
|
||||||
|
final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE)
|
||||||
|
? 1 : -1;
|
||||||
|
int bigramAddress = baseAddress + readLength;
|
||||||
|
switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) {
|
||||||
|
case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE:
|
||||||
|
bigramAddress += sign * dictBuffer.readUnsignedByte();
|
||||||
|
readLength += 1;
|
||||||
|
break;
|
||||||
|
case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES:
|
||||||
|
bigramAddress += sign * dictBuffer.readUnsignedShort();
|
||||||
|
readLength += 2;
|
||||||
|
break;
|
||||||
|
case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES:
|
||||||
|
bigramAddress += sign * dictBuffer.readUnsignedInt24();
|
||||||
|
readLength += 3;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new RuntimeException("Has bigrams with no address");
|
||||||
|
}
|
||||||
|
bigrams.add(new PendingAttribute(
|
||||||
|
bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
|
||||||
|
bigramAddress));
|
||||||
|
if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
|
||||||
|
}
|
||||||
|
return readLength;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -17,11 +17,9 @@
|
||||||
package com.android.inputmethod.latin.makedict;
|
package com.android.inputmethod.latin.makedict;
|
||||||
|
|
||||||
import com.android.inputmethod.annotations.UsedForTesting;
|
import com.android.inputmethod.annotations.UsedForTesting;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
|
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
|
||||||
import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
|
import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
@ -32,50 +30,17 @@ import java.io.RandomAccessFile;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The base class of binary dictionary decoders.
|
* An interface of binary dictionary decoders.
|
||||||
*/
|
*/
|
||||||
public abstract class DictDecoder {
|
public interface DictDecoder {
|
||||||
|
|
||||||
protected FileHeader readHeader(final DictBuffer dictBuffer)
|
|
||||||
throws IOException, UnsupportedFormatException {
|
|
||||||
if (dictBuffer == null) {
|
|
||||||
openDictBuffer();
|
|
||||||
}
|
|
||||||
|
|
||||||
final int version = HeaderReader.readVersion(dictBuffer);
|
|
||||||
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|
|
||||||
|| version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
|
|
||||||
throw new UnsupportedFormatException("Unsupported version : " + version);
|
|
||||||
}
|
|
||||||
// TODO: Remove this field.
|
|
||||||
final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer);
|
|
||||||
|
|
||||||
final int headerSize = HeaderReader.readHeaderSize(dictBuffer);
|
|
||||||
|
|
||||||
if (headerSize < 0) {
|
|
||||||
throw new UnsupportedFormatException("header size can't be negative.");
|
|
||||||
}
|
|
||||||
|
|
||||||
final HashMap<String, String> attributes = HeaderReader.readAttributes(dictBuffer,
|
|
||||||
headerSize);
|
|
||||||
|
|
||||||
final FileHeader header = new FileHeader(headerSize,
|
|
||||||
new FusionDictionary.DictionaryOptions(attributes,
|
|
||||||
0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
|
|
||||||
0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
|
|
||||||
new FormatOptions(version,
|
|
||||||
0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE)));
|
|
||||||
return header;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads and returns the file header.
|
* Reads and returns the file header.
|
||||||
*/
|
*/
|
||||||
public abstract FileHeader readHeader() throws IOException, UnsupportedFormatException;
|
public FileHeader readHeader() throws IOException, UnsupportedFormatException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads PtNode from nodeAddress.
|
* Reads PtNode from nodeAddress.
|
||||||
|
@ -83,7 +48,7 @@ public abstract class DictDecoder {
|
||||||
* @param formatOptions the format options.
|
* @param formatOptions the format options.
|
||||||
* @return PtNodeInfo.
|
* @return PtNodeInfo.
|
||||||
*/
|
*/
|
||||||
public abstract PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions);
|
public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads a buffer and returns the memory representation of the dictionary.
|
* Reads a buffer and returns the memory representation of the dictionary.
|
||||||
|
@ -98,7 +63,7 @@ public abstract class DictDecoder {
|
||||||
* @return the created (or merged) dictionary.
|
* @return the created (or merged) dictionary.
|
||||||
*/
|
*/
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public abstract FusionDictionary readDictionaryBinary(final FusionDictionary dict,
|
public FusionDictionary readDictionaryBinary(final FusionDictionary dict,
|
||||||
final boolean deleteDictIfBroken)
|
final boolean deleteDictIfBroken)
|
||||||
throws FileNotFoundException, IOException, UnsupportedFormatException;
|
throws FileNotFoundException, IOException, UnsupportedFormatException;
|
||||||
|
|
||||||
|
@ -113,12 +78,7 @@ public abstract class DictDecoder {
|
||||||
*/
|
*/
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public int getTerminalPosition(final String word)
|
public int getTerminalPosition(final String word)
|
||||||
throws IOException, UnsupportedFormatException {
|
throws IOException, UnsupportedFormatException;
|
||||||
if (!isDictBufferOpen()) {
|
|
||||||
openDictBuffer();
|
|
||||||
}
|
|
||||||
return BinaryDictIOUtils.getTerminalPosition(this, word);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads unigrams and bigrams from the binary file.
|
* Reads unigrams and bigrams from the binary file.
|
||||||
|
@ -134,47 +94,42 @@ public abstract class DictDecoder {
|
||||||
public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words,
|
public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words,
|
||||||
final TreeMap<Integer, Integer> frequencies,
|
final TreeMap<Integer, Integer> frequencies,
|
||||||
final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams)
|
final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams)
|
||||||
throws IOException, UnsupportedFormatException {
|
throws IOException, UnsupportedFormatException;
|
||||||
if (!isDictBufferOpen()) {
|
|
||||||
openDictBuffer();
|
|
||||||
}
|
|
||||||
BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the position of the buffer to the given value.
|
* Sets the position of the buffer to the given value.
|
||||||
*
|
*
|
||||||
* @param newPos the new position
|
* @param newPos the new position
|
||||||
*/
|
*/
|
||||||
public abstract void setPosition(final int newPos);
|
public void setPosition(final int newPos);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the position of the buffer.
|
* Gets the position of the buffer.
|
||||||
*
|
*
|
||||||
* @return the position
|
* @return the position
|
||||||
*/
|
*/
|
||||||
public abstract int getPosition();
|
public int getPosition();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads and returns the PtNode count out of a buffer and forwards the pointer.
|
* Reads and returns the PtNode count out of a buffer and forwards the pointer.
|
||||||
*/
|
*/
|
||||||
public abstract int readPtNodeCount();
|
public int readPtNodeCount();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads the forward link and advances the position.
|
* Reads the forward link and advances the position.
|
||||||
*
|
*
|
||||||
* @return true if this method moves the file pointer, false otherwise.
|
* @return true if this method moves the file pointer, false otherwise.
|
||||||
*/
|
*/
|
||||||
public abstract boolean readAndFollowForwardLink();
|
public boolean readAndFollowForwardLink();
|
||||||
public abstract boolean hasNextPtNodeArray();
|
public boolean hasNextPtNodeArray();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Opens the dictionary file and makes DictBuffer.
|
* Opens the dictionary file and makes DictBuffer.
|
||||||
*/
|
*/
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public abstract void openDictBuffer() throws FileNotFoundException, IOException;
|
public void openDictBuffer() throws FileNotFoundException, IOException;
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public abstract boolean isDictBufferOpen();
|
public boolean isDictBufferOpen();
|
||||||
|
|
||||||
// Constants for DictionaryBufferFactory.
|
// Constants for DictionaryBufferFactory.
|
||||||
public static final int USE_READONLY_BYTEBUFFER = 0x01000000;
|
public static final int USE_READONLY_BYTEBUFFER = 0x01000000;
|
||||||
|
@ -272,125 +227,5 @@ public abstract class DictDecoder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public void skipPtNode(final FormatOptions formatOptions);
|
||||||
* A utility class for reading a file header.
|
|
||||||
*/
|
|
||||||
protected static class HeaderReader {
|
|
||||||
protected static int readVersion(final DictBuffer dictBuffer)
|
|
||||||
throws IOException, UnsupportedFormatException {
|
|
||||||
return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static int readOptionFlags(final DictBuffer dictBuffer) {
|
|
||||||
return dictBuffer.readUnsignedShort();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static int readHeaderSize(final DictBuffer dictBuffer) {
|
|
||||||
return dictBuffer.readInt();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static HashMap<String, String> readAttributes(final DictBuffer dictBuffer,
|
|
||||||
final int headerSize) {
|
|
||||||
final HashMap<String, String> attributes = new HashMap<String, String>();
|
|
||||||
while (dictBuffer.position() < headerSize) {
|
|
||||||
// We can avoid an infinite loop here since dictBuffer.position() is always
|
|
||||||
// increased by calling CharEncoding.readString.
|
|
||||||
final String key = CharEncoding.readString(dictBuffer);
|
|
||||||
final String value = CharEncoding.readString(dictBuffer);
|
|
||||||
attributes.put(key, value);
|
|
||||||
}
|
|
||||||
dictBuffer.position(headerSize);
|
|
||||||
return attributes;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A utility class for reading a PtNode.
|
|
||||||
*/
|
|
||||||
protected static class PtNodeReader {
|
|
||||||
protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) {
|
|
||||||
return dictBuffer.readUnsignedByte();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static int readParentAddress(final DictBuffer dictBuffer,
|
|
||||||
final FormatOptions formatOptions) {
|
|
||||||
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
|
|
||||||
return BinaryDictDecoderUtils.readSInt24(dictBuffer);
|
|
||||||
} else {
|
|
||||||
return FormatSpec.NO_PARENT_ADDRESS;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags,
|
|
||||||
final FormatOptions formatOptions) {
|
|
||||||
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
|
|
||||||
final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer);
|
|
||||||
if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
|
|
||||||
return address;
|
|
||||||
} else {
|
|
||||||
switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
|
|
||||||
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
|
|
||||||
return dictBuffer.readUnsignedByte();
|
|
||||||
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
|
|
||||||
return dictBuffer.readUnsignedShort();
|
|
||||||
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
|
|
||||||
return dictBuffer.readUnsignedInt24();
|
|
||||||
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
|
|
||||||
default:
|
|
||||||
return FormatSpec.NO_CHILDREN_ADDRESS;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reads shortcuts and returns the read length.
|
|
||||||
protected static int readShortcut(final DictBuffer dictBuffer,
|
|
||||||
final ArrayList<WeightedString> shortcutTargets) {
|
|
||||||
final int pointerBefore = dictBuffer.position();
|
|
||||||
dictBuffer.readUnsignedShort(); // skip the size
|
|
||||||
while (true) {
|
|
||||||
final int targetFlags = dictBuffer.readUnsignedByte();
|
|
||||||
final String word = CharEncoding.readString(dictBuffer);
|
|
||||||
shortcutTargets.add(new WeightedString(word,
|
|
||||||
targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
|
|
||||||
if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
|
|
||||||
}
|
|
||||||
return dictBuffer.position() - pointerBefore;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static int readBigramAddresses(final DictBuffer dictBuffer,
|
|
||||||
final ArrayList<PendingAttribute> bigrams, final int baseAddress) {
|
|
||||||
int readLength = 0;
|
|
||||||
int bigramCount = 0;
|
|
||||||
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
|
|
||||||
final int bigramFlags = dictBuffer.readUnsignedByte();
|
|
||||||
++readLength;
|
|
||||||
final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE)
|
|
||||||
? 1 : -1;
|
|
||||||
int bigramAddress = baseAddress + readLength;
|
|
||||||
switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) {
|
|
||||||
case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE:
|
|
||||||
bigramAddress += sign * dictBuffer.readUnsignedByte();
|
|
||||||
readLength += 1;
|
|
||||||
break;
|
|
||||||
case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES:
|
|
||||||
bigramAddress += sign * dictBuffer.readUnsignedShort();
|
|
||||||
readLength += 2;
|
|
||||||
break;
|
|
||||||
case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES:
|
|
||||||
bigramAddress += sign * dictBuffer.readUnsignedInt24();
|
|
||||||
readLength += 3;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new RuntimeException("Has bigrams with no address");
|
|
||||||
}
|
|
||||||
bigrams.add(new PendingAttribute(
|
|
||||||
bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
|
|
||||||
bigramAddress));
|
|
||||||
if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
|
|
||||||
}
|
|
||||||
return readLength;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public abstract void skipPtNode(final FormatOptions formatOptions);
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,7 @@ import java.util.ArrayList;
|
||||||
/**
|
/**
|
||||||
* An interface of a binary dictionary updater.
|
* An interface of a binary dictionary updater.
|
||||||
*/
|
*/
|
||||||
public interface DictUpdater {
|
public interface DictUpdater extends DictDecoder {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Deletes the word from the binary dictionary.
|
* Deletes the word from the binary dictionary.
|
||||||
|
|
|
@ -37,7 +37,7 @@ import java.util.Arrays;
|
||||||
* An implementation of DictDecoder for version 3 binary dictionary.
|
* An implementation of DictDecoder for version 3 binary dictionary.
|
||||||
*/
|
*/
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public class Ver3DictDecoder extends DictDecoder {
|
public class Ver3DictDecoder extends AbstractDictDecoder {
|
||||||
private static final String TAG = Ver3DictDecoder.class.getSimpleName();
|
private static final String TAG = Ver3DictDecoder.class.getSimpleName();
|
||||||
|
|
||||||
static {
|
static {
|
||||||
|
@ -47,7 +47,7 @@ public class Ver3DictDecoder extends DictDecoder {
|
||||||
// TODO: implement something sensical instead of just a phony method
|
// TODO: implement something sensical instead of just a phony method
|
||||||
private static native int doNothing();
|
private static native int doNothing();
|
||||||
|
|
||||||
protected static class PtNodeReader extends DictDecoder.PtNodeReader {
|
protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
|
||||||
private static int readFrequency(final DictBuffer dictBuffer) {
|
private static int readFrequency(final DictBuffer dictBuffer) {
|
||||||
return dictBuffer.readUnsignedByte();
|
return dictBuffer.readUnsignedByte();
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,7 +37,7 @@ import java.util.Arrays;
|
||||||
* An implementation of binary dictionary decoder for version 4 binary dictionary.
|
* An implementation of binary dictionary decoder for version 4 binary dictionary.
|
||||||
*/
|
*/
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public class Ver4DictDecoder extends DictDecoder {
|
public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
private static final String TAG = Ver4DictDecoder.class.getSimpleName();
|
private static final String TAG = Ver4DictDecoder.class.getSimpleName();
|
||||||
|
|
||||||
private static final int FILETYPE_TRIE = 1;
|
private static final int FILETYPE_TRIE = 1;
|
||||||
|
@ -157,8 +157,7 @@ public class Ver4DictDecoder extends DictDecoder {
|
||||||
new File[] { contentFile }, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE);
|
new File[] { contentFile }, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
|
||||||
protected static class PtNodeReader extends DictDecoder.PtNodeReader {
|
|
||||||
protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) {
|
protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) {
|
||||||
frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1);
|
frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1);
|
||||||
return frequencyBuffer.readUnsignedByte();
|
return frequencyBuffer.readUnsignedByte();
|
||||||
|
|
Loading…
Reference in New Issue