From 14d31d464037c31e7f7d382a8a86f6acf4694b06 Mon Sep 17 00:00:00 2001 From: Yuichiro Hanada Date: Tue, 1 Oct 2013 17:59:50 +0900 Subject: [PATCH] Add AbstractDictDecoder. Change-Id: Ie69d84e090f69dc3ea1f5de73ad8c954ecd2c6a7 --- .../latin/makedict/AbstractDictDecoder.java | 206 ++++++++++++++++++ .../latin/makedict/DictDecoder.java | 195 ++--------------- .../latin/makedict/DictUpdater.java | 2 +- .../latin/makedict/Ver3DictDecoder.java | 4 +- .../latin/makedict/Ver4DictDecoder.java | 5 +- 5 files changed, 226 insertions(+), 186 deletions(-) create mode 100644 java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java diff --git a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java new file mode 100644 index 000000000..9f7f502ea --- /dev/null +++ b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import com.android.inputmethod.annotations.UsedForTesting; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; +import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; +import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; +import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.TreeMap; + +/** + * A base class of the binary dictionary decoder. + */ +public abstract class AbstractDictDecoder implements DictDecoder { + protected FileHeader readHeader(final DictBuffer dictBuffer) + throws IOException, UnsupportedFormatException { + if (dictBuffer == null) { + openDictBuffer(); + } + + final int version = HeaderReader.readVersion(dictBuffer); + if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION + || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) { + throw new UnsupportedFormatException("Unsupported version : " + version); + } + // TODO: Remove this field. + final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer); + + final int headerSize = HeaderReader.readHeaderSize(dictBuffer); + + if (headerSize < 0) { + throw new UnsupportedFormatException("header size can't be negative."); + } + + final HashMap attributes = HeaderReader.readAttributes(dictBuffer, + headerSize); + + final FileHeader header = new FileHeader(headerSize, + new FusionDictionary.DictionaryOptions(attributes, + 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), + 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), + new FormatOptions(version, + 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE))); + return header; + } + + @Override @UsedForTesting + public int getTerminalPosition(final String word) + throws IOException, UnsupportedFormatException { + if (!isDictBufferOpen()) { + openDictBuffer(); + } + return BinaryDictIOUtils.getTerminalPosition(this, word); + } + + @Override @UsedForTesting + public void readUnigramsAndBigramsBinary(final TreeMap words, + final TreeMap frequencies, + final TreeMap> bigrams) + throws IOException, UnsupportedFormatException { + if (!isDictBufferOpen()) { + openDictBuffer(); + } + BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams); + } + + /** + * A utility class for reading a file header. + */ + protected static class HeaderReader { + protected static int readVersion(final DictBuffer dictBuffer) + throws IOException, UnsupportedFormatException { + return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer); + } + + protected static int readOptionFlags(final DictBuffer dictBuffer) { + return dictBuffer.readUnsignedShort(); + } + + protected static int readHeaderSize(final DictBuffer dictBuffer) { + return dictBuffer.readInt(); + } + + protected static HashMap readAttributes(final DictBuffer dictBuffer, + final int headerSize) { + final HashMap attributes = new HashMap(); + while (dictBuffer.position() < headerSize) { + // We can avoid an infinite loop here since dictBuffer.position() is always + // increased by calling CharEncoding.readString. + final String key = CharEncoding.readString(dictBuffer); + final String value = CharEncoding.readString(dictBuffer); + attributes.put(key, value); + } + dictBuffer.position(headerSize); + return attributes; + } + } + + /** + * A utility class for reading a PtNode. + */ + protected static class PtNodeReader { + protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) { + return dictBuffer.readUnsignedByte(); + } + + protected static int readParentAddress(final DictBuffer dictBuffer, + final FormatOptions formatOptions) { + if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { + return BinaryDictDecoderUtils.readSInt24(dictBuffer); + } else { + return FormatSpec.NO_PARENT_ADDRESS; + } + } + + protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags, + final FormatOptions formatOptions) { + if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { + final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer); + if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS; + return address; + } else { + switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) { + case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE: + return dictBuffer.readUnsignedByte(); + case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES: + return dictBuffer.readUnsignedShort(); + case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES: + return dictBuffer.readUnsignedInt24(); + case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS: + default: + return FormatSpec.NO_CHILDREN_ADDRESS; + } + } + } + + // Reads shortcuts and returns the read length. + protected static int readShortcut(final DictBuffer dictBuffer, + final ArrayList shortcutTargets) { + final int pointerBefore = dictBuffer.position(); + dictBuffer.readUnsignedShort(); // skip the size + while (true) { + final int targetFlags = dictBuffer.readUnsignedByte(); + final String word = CharEncoding.readString(dictBuffer); + shortcutTargets.add(new WeightedString(word, + targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); + if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; + } + return dictBuffer.position() - pointerBefore; + } + + protected static int readBigramAddresses(final DictBuffer dictBuffer, + final ArrayList bigrams, final int baseAddress) { + int readLength = 0; + int bigramCount = 0; + while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { + final int bigramFlags = dictBuffer.readUnsignedByte(); + ++readLength; + final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE) + ? 1 : -1; + int bigramAddress = baseAddress + readLength; + switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) { + case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE: + bigramAddress += sign * dictBuffer.readUnsignedByte(); + readLength += 1; + break; + case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES: + bigramAddress += sign * dictBuffer.readUnsignedShort(); + readLength += 2; + break; + case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES: + bigramAddress += sign * dictBuffer.readUnsignedInt24(); + readLength += 3; + break; + default: + throw new RuntimeException("Has bigrams with no address"); + } + bigrams.add(new PendingAttribute( + bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, + bigramAddress)); + if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; + } + return readLength; + } + } +} diff --git a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java index e251f7df7..3dbeee099 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java @@ -17,11 +17,9 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; import java.io.File; @@ -32,50 +30,17 @@ import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.ArrayList; -import java.util.HashMap; import java.util.TreeMap; /** - * The base class of binary dictionary decoders. + * An interface of binary dictionary decoders. */ -public abstract class DictDecoder { - - protected FileHeader readHeader(final DictBuffer dictBuffer) - throws IOException, UnsupportedFormatException { - if (dictBuffer == null) { - openDictBuffer(); - } - - final int version = HeaderReader.readVersion(dictBuffer); - if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION - || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) { - throw new UnsupportedFormatException("Unsupported version : " + version); - } - // TODO: Remove this field. - final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer); - - final int headerSize = HeaderReader.readHeaderSize(dictBuffer); - - if (headerSize < 0) { - throw new UnsupportedFormatException("header size can't be negative."); - } - - final HashMap attributes = HeaderReader.readAttributes(dictBuffer, - headerSize); - - final FileHeader header = new FileHeader(headerSize, - new FusionDictionary.DictionaryOptions(attributes, - 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), - 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), - new FormatOptions(version, - 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE))); - return header; - } +public interface DictDecoder { /** * Reads and returns the file header. */ - public abstract FileHeader readHeader() throws IOException, UnsupportedFormatException; + public FileHeader readHeader() throws IOException, UnsupportedFormatException; /** * Reads PtNode from nodeAddress. @@ -83,7 +48,7 @@ public abstract class DictDecoder { * @param formatOptions the format options. * @return PtNodeInfo. */ - public abstract PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions); + public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions); /** * Reads a buffer and returns the memory representation of the dictionary. @@ -98,7 +63,7 @@ public abstract class DictDecoder { * @return the created (or merged) dictionary. */ @UsedForTesting - public abstract FusionDictionary readDictionaryBinary(final FusionDictionary dict, + public FusionDictionary readDictionaryBinary(final FusionDictionary dict, final boolean deleteDictIfBroken) throws FileNotFoundException, IOException, UnsupportedFormatException; @@ -113,12 +78,7 @@ public abstract class DictDecoder { */ @UsedForTesting public int getTerminalPosition(final String word) - throws IOException, UnsupportedFormatException { - if (!isDictBufferOpen()) { - openDictBuffer(); - } - return BinaryDictIOUtils.getTerminalPosition(this, word); - } + throws IOException, UnsupportedFormatException; /** * Reads unigrams and bigrams from the binary file. @@ -134,47 +94,42 @@ public abstract class DictDecoder { public void readUnigramsAndBigramsBinary(final TreeMap words, final TreeMap frequencies, final TreeMap> bigrams) - throws IOException, UnsupportedFormatException { - if (!isDictBufferOpen()) { - openDictBuffer(); - } - BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams); - } + throws IOException, UnsupportedFormatException; /** * Sets the position of the buffer to the given value. * * @param newPos the new position */ - public abstract void setPosition(final int newPos); + public void setPosition(final int newPos); /** * Gets the position of the buffer. * * @return the position */ - public abstract int getPosition(); + public int getPosition(); /** * Reads and returns the PtNode count out of a buffer and forwards the pointer. */ - public abstract int readPtNodeCount(); + public int readPtNodeCount(); /** * Reads the forward link and advances the position. * * @return true if this method moves the file pointer, false otherwise. */ - public abstract boolean readAndFollowForwardLink(); - public abstract boolean hasNextPtNodeArray(); + public boolean readAndFollowForwardLink(); + public boolean hasNextPtNodeArray(); /** * Opens the dictionary file and makes DictBuffer. */ @UsedForTesting - public abstract void openDictBuffer() throws FileNotFoundException, IOException; + public void openDictBuffer() throws FileNotFoundException, IOException; @UsedForTesting - public abstract boolean isDictBufferOpen(); + public boolean isDictBufferOpen(); // Constants for DictionaryBufferFactory. public static final int USE_READONLY_BYTEBUFFER = 0x01000000; @@ -272,125 +227,5 @@ public abstract class DictDecoder { } } - /** - * A utility class for reading a file header. - */ - protected static class HeaderReader { - protected static int readVersion(final DictBuffer dictBuffer) - throws IOException, UnsupportedFormatException { - return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer); - } - - protected static int readOptionFlags(final DictBuffer dictBuffer) { - return dictBuffer.readUnsignedShort(); - } - - protected static int readHeaderSize(final DictBuffer dictBuffer) { - return dictBuffer.readInt(); - } - - protected static HashMap readAttributes(final DictBuffer dictBuffer, - final int headerSize) { - final HashMap attributes = new HashMap(); - while (dictBuffer.position() < headerSize) { - // We can avoid an infinite loop here since dictBuffer.position() is always - // increased by calling CharEncoding.readString. - final String key = CharEncoding.readString(dictBuffer); - final String value = CharEncoding.readString(dictBuffer); - attributes.put(key, value); - } - dictBuffer.position(headerSize); - return attributes; - } - } - - /** - * A utility class for reading a PtNode. - */ - protected static class PtNodeReader { - protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) { - return dictBuffer.readUnsignedByte(); - } - - protected static int readParentAddress(final DictBuffer dictBuffer, - final FormatOptions formatOptions) { - if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { - return BinaryDictDecoderUtils.readSInt24(dictBuffer); - } else { - return FormatSpec.NO_PARENT_ADDRESS; - } - } - - protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags, - final FormatOptions formatOptions) { - if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { - final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer); - if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS; - return address; - } else { - switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) { - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE: - return dictBuffer.readUnsignedByte(); - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES: - return dictBuffer.readUnsignedShort(); - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES: - return dictBuffer.readUnsignedInt24(); - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS: - default: - return FormatSpec.NO_CHILDREN_ADDRESS; - } - } - } - - // Reads shortcuts and returns the read length. - protected static int readShortcut(final DictBuffer dictBuffer, - final ArrayList shortcutTargets) { - final int pointerBefore = dictBuffer.position(); - dictBuffer.readUnsignedShort(); // skip the size - while (true) { - final int targetFlags = dictBuffer.readUnsignedByte(); - final String word = CharEncoding.readString(dictBuffer); - shortcutTargets.add(new WeightedString(word, - targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); - if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; - } - return dictBuffer.position() - pointerBefore; - } - - protected static int readBigramAddresses(final DictBuffer dictBuffer, - final ArrayList bigrams, final int baseAddress) { - int readLength = 0; - int bigramCount = 0; - while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { - final int bigramFlags = dictBuffer.readUnsignedByte(); - ++readLength; - final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE) - ? 1 : -1; - int bigramAddress = baseAddress + readLength; - switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) { - case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE: - bigramAddress += sign * dictBuffer.readUnsignedByte(); - readLength += 1; - break; - case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES: - bigramAddress += sign * dictBuffer.readUnsignedShort(); - readLength += 2; - break; - case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES: - bigramAddress += sign * dictBuffer.readUnsignedInt24(); - readLength += 3; - break; - default: - throw new RuntimeException("Has bigrams with no address"); - } - bigrams.add(new PendingAttribute( - bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, - bigramAddress)); - if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; - } - return readLength; - } - } - - public abstract void skipPtNode(final FormatOptions formatOptions); + public void skipPtNode(final FormatOptions formatOptions); } diff --git a/java/src/com/android/inputmethod/latin/makedict/DictUpdater.java b/java/src/com/android/inputmethod/latin/makedict/DictUpdater.java index 413d0301c..709ea3310 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DictUpdater.java +++ b/java/src/com/android/inputmethod/latin/makedict/DictUpdater.java @@ -24,7 +24,7 @@ import java.util.ArrayList; /** * An interface of a binary dictionary updater. */ -public interface DictUpdater { +public interface DictUpdater extends DictDecoder { /** * Deletes the word from the binary dictionary. diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java index b87259c38..acab4f8a5 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java @@ -37,7 +37,7 @@ import java.util.Arrays; * An implementation of DictDecoder for version 3 binary dictionary. */ @UsedForTesting -public class Ver3DictDecoder extends DictDecoder { +public class Ver3DictDecoder extends AbstractDictDecoder { private static final String TAG = Ver3DictDecoder.class.getSimpleName(); static { @@ -47,7 +47,7 @@ public class Ver3DictDecoder extends DictDecoder { // TODO: implement something sensical instead of just a phony method private static native int doNothing(); - protected static class PtNodeReader extends DictDecoder.PtNodeReader { + protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader { private static int readFrequency(final DictBuffer dictBuffer) { return dictBuffer.readUnsignedByte(); } diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index 2d2da5fe0..bab24e301 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -37,7 +37,7 @@ import java.util.Arrays; * An implementation of binary dictionary decoder for version 4 binary dictionary. */ @UsedForTesting -public class Ver4DictDecoder extends DictDecoder { +public class Ver4DictDecoder extends AbstractDictDecoder { private static final String TAG = Ver4DictDecoder.class.getSimpleName(); private static final int FILETYPE_TRIE = 1; @@ -157,8 +157,7 @@ public class Ver4DictDecoder extends DictDecoder { new File[] { contentFile }, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE); } - - protected static class PtNodeReader extends DictDecoder.PtNodeReader { + protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader { protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) { frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1); return frequencyBuffer.readUnsignedByte();