From 752a33640c0160a2f836f716bf60e4991c07da1c Mon Sep 17 00:00:00 2001 From: Yuichiro Hanada Date: Fri, 23 Aug 2013 23:30:16 +0900 Subject: [PATCH] [Refactor] Add DictDecoder.readUnigramsAndBigramsBinary. Change-Id: I259db91d837c67cbcb3b6dc504b21dca23a6a5be --- .../latin/makedict/BinaryDictIOUtils.java | 2 +- .../latin/makedict/DictDecoder.java | 17 +++++++ .../latin/makedict/Ver3DictDecoder.java | 13 +++++ .../latin/utils/UserHistoryDictIOUtils.java | 21 ++++---- .../BinaryDictDecoderEncoderTests.java | 48 +++++++++---------- 5 files changed, 66 insertions(+), 35 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index a08e28c8b..106f02519 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -148,7 +148,7 @@ public final class BinaryDictIOUtils { * @throws IOException if the file can't be read. * @throws UnsupportedFormatException if the format of the file is not recognized. */ - public static void readUnigramsAndBigramsBinary(final Ver3DictDecoder dictDecoder, + /* package */ static void readUnigramsAndBigramsBinary(final Ver3DictDecoder dictDecoder, final Map words, final Map frequencies, final Map> bigrams) throws IOException, UnsupportedFormatException { diff --git a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java index d5fcacc09..11a3f0b3a 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java @@ -29,6 +29,8 @@ import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; +import java.util.ArrayList; +import java.util.TreeMap; /** * An interface of binary dictionary decoder. @@ -71,6 +73,21 @@ public interface DictDecoder { public int getTerminalPosition(final String word) throws IOException, UnsupportedFormatException; + /** + * Reads unigrams and bigrams from the binary file. + * Doesn't store a full memory representation of the dictionary. + * + * @param words the map to store the address as a key and the word as a value. + * @param frequencies the map to store the address as a key and the frequency as a value. + * @param bigrams the map to store the address as a key and the list of address as a value. + * @throws IOException if the file can't be read. + * @throws UnsupportedFormatException if the format of the file is not recognized. + */ + public void readUnigramsAndBigramsBinary(final TreeMap words, + final TreeMap frequencies, + final TreeMap> bigrams) + throws IOException, UnsupportedFormatException; + // Flags for DictionaryBufferFactory. public static final int USE_READONLY_BYTEBUFFER = 0x01000000; public static final int USE_BYTEARRAY = 0x02000000; diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java index 77e6393ee..1fff9b49e 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java @@ -31,6 +31,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.TreeMap; /** * An implementation of DictDecoder for version 3 binary dictionary. @@ -317,4 +318,16 @@ public class Ver3DictDecoder implements DictDecoder { } return BinaryDictIOUtils.getTerminalPosition(this, word); } + + @Override + public void readUnigramsAndBigramsBinary(final TreeMap words, + final TreeMap frequencies, + final TreeMap> bigrams) + throws IOException, UnsupportedFormatException { + if (mDictBuffer == null) { + openDictBuffer(); + } + BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams); + } + } diff --git a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java index 9d3d8a5da..99788f6f2 100644 --- a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java @@ -32,7 +32,8 @@ import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigram import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; -import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; /** * Reads and writes Binary files for a UserHistoryDictionary. @@ -119,12 +120,11 @@ public final class UserHistoryDictIOUtils { */ public static void readDictionaryBinary(final Ver3DictDecoder dictDecoder, final OnAddWordListener dict) { - final Map unigrams = CollectionUtils.newTreeMap(); - final Map frequencies = CollectionUtils.newTreeMap(); - final Map> bigrams = CollectionUtils.newTreeMap(); + final TreeMap unigrams = CollectionUtils.newTreeMap(); + final TreeMap frequencies = CollectionUtils.newTreeMap(); + final TreeMap> bigrams = CollectionUtils.newTreeMap(); try { - BinaryDictIOUtils.readUnigramsAndBigramsBinary(dictDecoder, unigrams, frequencies, - bigrams); + dictDecoder.readUnigramsAndBigramsBinary(unigrams, frequencies, bigrams); } catch (IOException e) { Log.e(TAG, "IO exception while reading file", e); } catch (UnsupportedFormatException e) { @@ -139,10 +139,11 @@ public final class UserHistoryDictIOUtils { * Adds all unigrams and bigrams in maps to OnAddWordListener. */ @UsedForTesting - static void addWordsFromWordMap(final Map unigrams, - final Map frequencies, - final Map> bigrams, final OnAddWordListener to) { - for (Map.Entry entry : unigrams.entrySet()) { + static void addWordsFromWordMap(final TreeMap unigrams, + final TreeMap frequencies, + final TreeMap> bigrams, + final OnAddWordListener to) { + for (Entry entry : unigrams.entrySet()) { final String word1 = entry.getValue(); final int unigramFrequency = frequencies.get(entry.getKey()); to.setUnigram(word1, null, unigramFrequency); diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index bb5b96a48..72ec5a302 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -39,10 +39,10 @@ import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Map.Entry; import java.util.Random; import java.util.Set; +import java.util.TreeMap; /** * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils. @@ -61,13 +61,13 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { private static final int USE_BYTE_ARRAY = 1; private static final int USE_BYTE_BUFFER = 2; - private static final List sWords = CollectionUtils.newArrayList(); + private static final ArrayList sWords = CollectionUtils.newArrayList(); private static final SparseArray> sEmptyBigrams = CollectionUtils.newSparseArray(); private static final SparseArray> sStarBigrams = CollectionUtils.newSparseArray(); private static final SparseArray> sChainBigrams = CollectionUtils.newSparseArray(); - private static final Map> sShortcuts = CollectionUtils.newHashMap(); + private static final HashMap> sShortcuts = CollectionUtils.newHashMap(); private static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2); private static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE = @@ -177,7 +177,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { * Adds unigrams to the dictionary. */ private void addUnigrams(final int number, final FusionDictionary dict, - final List words, final Map> shortcutMap) { + final List words, final HashMap> shortcutMap) { for (int i = 0; i < number; ++i) { final String word = words.get(i); final ArrayList shortcuts = CollectionUtils.newArrayList(); @@ -234,7 +234,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } private void checkDictionary(final FusionDictionary dict, final List words, - final SparseArray> bigrams, final Map> shortcutMap) { + final SparseArray> bigrams, + final HashMap> shortcutMap) { assertNotNull(dict); // check unigram @@ -255,7 +256,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { // check shortcut if (shortcutMap != null) { - for (final Map.Entry> entry : shortcutMap.entrySet()) { + for (final Entry> entry : shortcutMap.entrySet()) { assertTrue(words.contains(entry.getKey())); final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, entry.getKey()); @@ -278,8 +279,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { // Tests for readDictionaryBinary and writeDictionaryBinary private long timeReadingAndCheckDict(final File file, final List words, - final SparseArray> bigrams, final Map> shortcutMap, - final int bufferType) { + final SparseArray> bigrams, + final HashMap> shortcutMap, final int bufferType) { long now, diff = -1; FusionDictionary dict = null; @@ -302,7 +303,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { // Tests for readDictionaryBinary and writeDictionaryBinary private String runReadAndWrite(final List words, - final SparseArray> bigrams, final Map> shortcuts, + final SparseArray> bigrams, final HashMap> shortcuts, final int bufferType, final FormatSpec.FormatOptions formatOptions, final String message) { File file = null; @@ -387,9 +388,9 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { private void checkWordMap(final List expectedWords, final SparseArray> expectedBigrams, - final Map resultWords, - final Map resultFrequencies, - final Map> resultBigrams) { + final TreeMap resultWords, + final TreeMap resultFrequencies, + final TreeMap> resultBigrams) { // check unigrams final Set actualWordsSet = new HashSet(resultWords.values()); final Set expectedWordsSet = new HashSet(expectedWords); @@ -400,7 +401,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } // check bigrams - final Map> expBigrams = new HashMap>(); + final HashMap> expBigrams = new HashMap>(); for (int i = 0; i < expectedBigrams.size(); ++i) { final String word1 = expectedWords.get(expectedBigrams.keyAt(i)); for (int w2 : expectedBigrams.valueAt(i)) { @@ -411,7 +412,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } } - final Map> actBigrams = new HashMap>(); + final HashMap> actBigrams = new HashMap>(); for (Entry> entry : resultBigrams.entrySet()) { final String word1 = resultWords.get(entry.getKey()); final int unigramFreq = resultFrequencies.get(entry.getKey()); @@ -435,10 +436,10 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { final SparseArray> bigrams, final int bufferType) { FileInputStream inStream = null; - final Map resultWords = CollectionUtils.newTreeMap(); - final Map> resultBigrams = + final TreeMap resultWords = CollectionUtils.newTreeMap(); + final TreeMap> resultBigrams = CollectionUtils.newTreeMap(); - final Map resultFreqs = CollectionUtils.newTreeMap(); + final TreeMap resultFreqs = CollectionUtils.newTreeMap(); long now = -1, diff = -1; try { @@ -446,8 +447,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { dictDecoder.openDictBuffer(); assertNotNull("Can't get buffer.", dictDecoder.getDictBuffer()); now = System.currentTimeMillis(); - BinaryDictIOUtils.readUnigramsAndBigramsBinary(dictDecoder, resultWords, resultFreqs, - resultBigrams); + dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams); diff = System.currentTimeMillis() - now; } catch (IOException e) { Log.e(TAG, "IOException", e); @@ -467,7 +467,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { return diff; } - private String runReadUnigramsAndBigramsBinary(final List words, + private String runReadUnigramsAndBigramsBinary(final ArrayList words, final SparseArray> bigrams, final int bufferType, final FormatSpec.FormatOptions formatOptions, final String message) { File file = null; @@ -496,8 +496,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { + " : " + message + " : " + outputOptions(bufferType, formatOptions); } - private void runReadUnigramsAndBigramsTests(final List results, final int bufferType, - final FormatSpec.FormatOptions formatOptions) { + private void runReadUnigramsAndBigramsTests(final ArrayList results, + final int bufferType, final FormatSpec.FormatOptions formatOptions) { results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram")); results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType, @@ -507,7 +507,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } public void testReadUnigramsAndBigramsBinaryWithByteBuffer() { - final List results = CollectionUtils.newArrayList(); + final ArrayList results = CollectionUtils.newArrayList(); runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION2); runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); @@ -519,7 +519,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } public void testReadUnigramsAndBigramsBinaryWithByteArray() { - final List results = CollectionUtils.newArrayList(); + final ArrayList results = CollectionUtils.newArrayList(); runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION2); runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE);