am 1f6b9d7f: am 10100b2c: am 14d31d46: Add AbstractDictDecoder.

* commit '1f6b9d7fefff6c94e63af5df8bc6e64879ad9316':
  Add AbstractDictDecoder.
main
Yuichiro Hanada 2013-10-09 20:33:13 -07:00 committed by Android Git Automerger
commit 07dcb6acab
5 changed files with 226 additions and 186 deletions

View File

@ -0,0 +1,206 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.TreeMap;
/**
* A base class of the binary dictionary decoder.
*/
public abstract class AbstractDictDecoder implements DictDecoder {
protected FileHeader readHeader(final DictBuffer dictBuffer)
throws IOException, UnsupportedFormatException {
if (dictBuffer == null) {
openDictBuffer();
}
final int version = HeaderReader.readVersion(dictBuffer);
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|| version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
throw new UnsupportedFormatException("Unsupported version : " + version);
}
// TODO: Remove this field.
final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer);
final int headerSize = HeaderReader.readHeaderSize(dictBuffer);
if (headerSize < 0) {
throw new UnsupportedFormatException("header size can't be negative.");
}
final HashMap<String, String> attributes = HeaderReader.readAttributes(dictBuffer,
headerSize);
final FileHeader header = new FileHeader(headerSize,
new FusionDictionary.DictionaryOptions(attributes,
0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
new FormatOptions(version,
0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE)));
return header;
}
@Override @UsedForTesting
public int getTerminalPosition(final String word)
throws IOException, UnsupportedFormatException {
if (!isDictBufferOpen()) {
openDictBuffer();
}
return BinaryDictIOUtils.getTerminalPosition(this, word);
}
@Override @UsedForTesting
public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words,
final TreeMap<Integer, Integer> frequencies,
final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams)
throws IOException, UnsupportedFormatException {
if (!isDictBufferOpen()) {
openDictBuffer();
}
BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams);
}
/**
* A utility class for reading a file header.
*/
protected static class HeaderReader {
protected static int readVersion(final DictBuffer dictBuffer)
throws IOException, UnsupportedFormatException {
return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer);
}
protected static int readOptionFlags(final DictBuffer dictBuffer) {
return dictBuffer.readUnsignedShort();
}
protected static int readHeaderSize(final DictBuffer dictBuffer) {
return dictBuffer.readInt();
}
protected static HashMap<String, String> readAttributes(final DictBuffer dictBuffer,
final int headerSize) {
final HashMap<String, String> attributes = new HashMap<String, String>();
while (dictBuffer.position() < headerSize) {
// We can avoid an infinite loop here since dictBuffer.position() is always
// increased by calling CharEncoding.readString.
final String key = CharEncoding.readString(dictBuffer);
final String value = CharEncoding.readString(dictBuffer);
attributes.put(key, value);
}
dictBuffer.position(headerSize);
return attributes;
}
}
/**
* A utility class for reading a PtNode.
*/
protected static class PtNodeReader {
protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) {
return dictBuffer.readUnsignedByte();
}
protected static int readParentAddress(final DictBuffer dictBuffer,
final FormatOptions formatOptions) {
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
return BinaryDictDecoderUtils.readSInt24(dictBuffer);
} else {
return FormatSpec.NO_PARENT_ADDRESS;
}
}
protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags,
final FormatOptions formatOptions) {
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer);
if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
return address;
} else {
switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
return dictBuffer.readUnsignedByte();
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
return dictBuffer.readUnsignedShort();
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
return dictBuffer.readUnsignedInt24();
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
default:
return FormatSpec.NO_CHILDREN_ADDRESS;
}
}
}
// Reads shortcuts and returns the read length.
protected static int readShortcut(final DictBuffer dictBuffer,
final ArrayList<WeightedString> shortcutTargets) {
final int pointerBefore = dictBuffer.position();
dictBuffer.readUnsignedShort(); // skip the size
while (true) {
final int targetFlags = dictBuffer.readUnsignedByte();
final String word = CharEncoding.readString(dictBuffer);
shortcutTargets.add(new WeightedString(word,
targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
}
return dictBuffer.position() - pointerBefore;
}
protected static int readBigramAddresses(final DictBuffer dictBuffer,
final ArrayList<PendingAttribute> bigrams, final int baseAddress) {
int readLength = 0;
int bigramCount = 0;
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
final int bigramFlags = dictBuffer.readUnsignedByte();
++readLength;
final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE)
? 1 : -1;
int bigramAddress = baseAddress + readLength;
switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) {
case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE:
bigramAddress += sign * dictBuffer.readUnsignedByte();
readLength += 1;
break;
case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES:
bigramAddress += sign * dictBuffer.readUnsignedShort();
readLength += 2;
break;
case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES:
bigramAddress += sign * dictBuffer.readUnsignedInt24();
readLength += 3;
break;
default:
throw new RuntimeException("Has bigrams with no address");
}
bigrams.add(new PendingAttribute(
bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
bigramAddress));
if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
}
return readLength;
}
}
}

View File

@ -17,11 +17,9 @@
package com.android.inputmethod.latin.makedict; package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
import java.io.File; import java.io.File;
@ -32,50 +30,17 @@ import java.io.RandomAccessFile;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.channels.FileChannel; import java.nio.channels.FileChannel;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import java.util.TreeMap; import java.util.TreeMap;
/** /**
* The base class of binary dictionary decoders. * An interface of binary dictionary decoders.
*/ */
public abstract class DictDecoder { public interface DictDecoder {
protected FileHeader readHeader(final DictBuffer dictBuffer)
throws IOException, UnsupportedFormatException {
if (dictBuffer == null) {
openDictBuffer();
}
final int version = HeaderReader.readVersion(dictBuffer);
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|| version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
throw new UnsupportedFormatException("Unsupported version : " + version);
}
// TODO: Remove this field.
final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer);
final int headerSize = HeaderReader.readHeaderSize(dictBuffer);
if (headerSize < 0) {
throw new UnsupportedFormatException("header size can't be negative.");
}
final HashMap<String, String> attributes = HeaderReader.readAttributes(dictBuffer,
headerSize);
final FileHeader header = new FileHeader(headerSize,
new FusionDictionary.DictionaryOptions(attributes,
0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
new FormatOptions(version,
0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE)));
return header;
}
/** /**
* Reads and returns the file header. * Reads and returns the file header.
*/ */
public abstract FileHeader readHeader() throws IOException, UnsupportedFormatException; public FileHeader readHeader() throws IOException, UnsupportedFormatException;
/** /**
* Reads PtNode from nodeAddress. * Reads PtNode from nodeAddress.
@ -83,7 +48,7 @@ public abstract class DictDecoder {
* @param formatOptions the format options. * @param formatOptions the format options.
* @return PtNodeInfo. * @return PtNodeInfo.
*/ */
public abstract PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions); public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions);
/** /**
* Reads a buffer and returns the memory representation of the dictionary. * Reads a buffer and returns the memory representation of the dictionary.
@ -98,7 +63,7 @@ public abstract class DictDecoder {
* @return the created (or merged) dictionary. * @return the created (or merged) dictionary.
*/ */
@UsedForTesting @UsedForTesting
public abstract FusionDictionary readDictionaryBinary(final FusionDictionary dict, public FusionDictionary readDictionaryBinary(final FusionDictionary dict,
final boolean deleteDictIfBroken) final boolean deleteDictIfBroken)
throws FileNotFoundException, IOException, UnsupportedFormatException; throws FileNotFoundException, IOException, UnsupportedFormatException;
@ -113,12 +78,7 @@ public abstract class DictDecoder {
*/ */
@UsedForTesting @UsedForTesting
public int getTerminalPosition(final String word) public int getTerminalPosition(final String word)
throws IOException, UnsupportedFormatException { throws IOException, UnsupportedFormatException;
if (!isDictBufferOpen()) {
openDictBuffer();
}
return BinaryDictIOUtils.getTerminalPosition(this, word);
}
/** /**
* Reads unigrams and bigrams from the binary file. * Reads unigrams and bigrams from the binary file.
@ -134,47 +94,42 @@ public abstract class DictDecoder {
public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words, public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words,
final TreeMap<Integer, Integer> frequencies, final TreeMap<Integer, Integer> frequencies,
final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams) final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams)
throws IOException, UnsupportedFormatException { throws IOException, UnsupportedFormatException;
if (!isDictBufferOpen()) {
openDictBuffer();
}
BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams);
}
/** /**
* Sets the position of the buffer to the given value. * Sets the position of the buffer to the given value.
* *
* @param newPos the new position * @param newPos the new position
*/ */
public abstract void setPosition(final int newPos); public void setPosition(final int newPos);
/** /**
* Gets the position of the buffer. * Gets the position of the buffer.
* *
* @return the position * @return the position
*/ */
public abstract int getPosition(); public int getPosition();
/** /**
* Reads and returns the PtNode count out of a buffer and forwards the pointer. * Reads and returns the PtNode count out of a buffer and forwards the pointer.
*/ */
public abstract int readPtNodeCount(); public int readPtNodeCount();
/** /**
* Reads the forward link and advances the position. * Reads the forward link and advances the position.
* *
* @return true if this method moves the file pointer, false otherwise. * @return true if this method moves the file pointer, false otherwise.
*/ */
public abstract boolean readAndFollowForwardLink(); public boolean readAndFollowForwardLink();
public abstract boolean hasNextPtNodeArray(); public boolean hasNextPtNodeArray();
/** /**
* Opens the dictionary file and makes DictBuffer. * Opens the dictionary file and makes DictBuffer.
*/ */
@UsedForTesting @UsedForTesting
public abstract void openDictBuffer() throws FileNotFoundException, IOException; public void openDictBuffer() throws FileNotFoundException, IOException;
@UsedForTesting @UsedForTesting
public abstract boolean isDictBufferOpen(); public boolean isDictBufferOpen();
// Constants for DictionaryBufferFactory. // Constants for DictionaryBufferFactory.
public static final int USE_READONLY_BYTEBUFFER = 0x01000000; public static final int USE_READONLY_BYTEBUFFER = 0x01000000;
@ -272,125 +227,5 @@ public abstract class DictDecoder {
} }
} }
/** public void skipPtNode(final FormatOptions formatOptions);
* A utility class for reading a file header.
*/
protected static class HeaderReader {
protected static int readVersion(final DictBuffer dictBuffer)
throws IOException, UnsupportedFormatException {
return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer);
}
protected static int readOptionFlags(final DictBuffer dictBuffer) {
return dictBuffer.readUnsignedShort();
}
protected static int readHeaderSize(final DictBuffer dictBuffer) {
return dictBuffer.readInt();
}
protected static HashMap<String, String> readAttributes(final DictBuffer dictBuffer,
final int headerSize) {
final HashMap<String, String> attributes = new HashMap<String, String>();
while (dictBuffer.position() < headerSize) {
// We can avoid an infinite loop here since dictBuffer.position() is always
// increased by calling CharEncoding.readString.
final String key = CharEncoding.readString(dictBuffer);
final String value = CharEncoding.readString(dictBuffer);
attributes.put(key, value);
}
dictBuffer.position(headerSize);
return attributes;
}
}
/**
* A utility class for reading a PtNode.
*/
protected static class PtNodeReader {
protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) {
return dictBuffer.readUnsignedByte();
}
protected static int readParentAddress(final DictBuffer dictBuffer,
final FormatOptions formatOptions) {
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
return BinaryDictDecoderUtils.readSInt24(dictBuffer);
} else {
return FormatSpec.NO_PARENT_ADDRESS;
}
}
protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags,
final FormatOptions formatOptions) {
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer);
if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
return address;
} else {
switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
return dictBuffer.readUnsignedByte();
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
return dictBuffer.readUnsignedShort();
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
return dictBuffer.readUnsignedInt24();
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
default:
return FormatSpec.NO_CHILDREN_ADDRESS;
}
}
}
// Reads shortcuts and returns the read length.
protected static int readShortcut(final DictBuffer dictBuffer,
final ArrayList<WeightedString> shortcutTargets) {
final int pointerBefore = dictBuffer.position();
dictBuffer.readUnsignedShort(); // skip the size
while (true) {
final int targetFlags = dictBuffer.readUnsignedByte();
final String word = CharEncoding.readString(dictBuffer);
shortcutTargets.add(new WeightedString(word,
targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
}
return dictBuffer.position() - pointerBefore;
}
protected static int readBigramAddresses(final DictBuffer dictBuffer,
final ArrayList<PendingAttribute> bigrams, final int baseAddress) {
int readLength = 0;
int bigramCount = 0;
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
final int bigramFlags = dictBuffer.readUnsignedByte();
++readLength;
final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE)
? 1 : -1;
int bigramAddress = baseAddress + readLength;
switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) {
case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE:
bigramAddress += sign * dictBuffer.readUnsignedByte();
readLength += 1;
break;
case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES:
bigramAddress += sign * dictBuffer.readUnsignedShort();
readLength += 2;
break;
case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES:
bigramAddress += sign * dictBuffer.readUnsignedInt24();
readLength += 3;
break;
default:
throw new RuntimeException("Has bigrams with no address");
}
bigrams.add(new PendingAttribute(
bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
bigramAddress));
if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
}
return readLength;
}
}
public abstract void skipPtNode(final FormatOptions formatOptions);
} }

View File

@ -24,7 +24,7 @@ import java.util.ArrayList;
/** /**
* An interface of a binary dictionary updater. * An interface of a binary dictionary updater.
*/ */
public interface DictUpdater { public interface DictUpdater extends DictDecoder {
/** /**
* Deletes the word from the binary dictionary. * Deletes the word from the binary dictionary.

View File

@ -37,7 +37,7 @@ import java.util.Arrays;
* An implementation of DictDecoder for version 3 binary dictionary. * An implementation of DictDecoder for version 3 binary dictionary.
*/ */
@UsedForTesting @UsedForTesting
public class Ver3DictDecoder extends DictDecoder { public class Ver3DictDecoder extends AbstractDictDecoder {
private static final String TAG = Ver3DictDecoder.class.getSimpleName(); private static final String TAG = Ver3DictDecoder.class.getSimpleName();
static { static {
@ -47,7 +47,7 @@ public class Ver3DictDecoder extends DictDecoder {
// TODO: implement something sensical instead of just a phony method // TODO: implement something sensical instead of just a phony method
private static native int doNothing(); private static native int doNothing();
protected static class PtNodeReader extends DictDecoder.PtNodeReader { protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
private static int readFrequency(final DictBuffer dictBuffer) { private static int readFrequency(final DictBuffer dictBuffer) {
return dictBuffer.readUnsignedByte(); return dictBuffer.readUnsignedByte();
} }

View File

@ -37,7 +37,7 @@ import java.util.Arrays;
* An implementation of binary dictionary decoder for version 4 binary dictionary. * An implementation of binary dictionary decoder for version 4 binary dictionary.
*/ */
@UsedForTesting @UsedForTesting
public class Ver4DictDecoder extends DictDecoder { public class Ver4DictDecoder extends AbstractDictDecoder {
private static final String TAG = Ver4DictDecoder.class.getSimpleName(); private static final String TAG = Ver4DictDecoder.class.getSimpleName();
private static final int FILETYPE_TRIE = 1; private static final int FILETYPE_TRIE = 1;
@ -157,8 +157,7 @@ public class Ver4DictDecoder extends DictDecoder {
new File[] { contentFile }, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE); new File[] { contentFile }, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE);
} }
protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
protected static class PtNodeReader extends DictDecoder.PtNodeReader {
protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) { protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) {
frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1); frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1);
return frequencyBuffer.readUnsignedByte(); return frequencyBuffer.readUnsignedByte();