diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java index fa8fb2028..80daedd50 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java @@ -121,8 +121,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { private static boolean needsToMigrateDictionary(final int formatVersion) { // When we bump up the dictionary format version, the old version should be added to here // for supporting migration. Note that native code has to support reading such formats. - return formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING - || formatVersion == FormatSpec.VERSION402; + return formatVersion == FormatSpec.VERSION402; } public boolean isValidDictionaryLocked() { diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index 288261bf0..e422c4cd2 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -174,9 +174,6 @@ public final class FormatSpec { public static final int VERSION202 = 202; // format version for Fava Dictionaries. public static final int VERSION_DELIGHT3 = 86736212; - public static final int MINIMUM_SUPPORTED_VERSION_OF_CODE_POINT_TABLE = VERSION201; - // Dictionary version used for testing. - public static final int VERSION4_ONLY_FOR_TESTING = 399; public static final int VERSION402 = 402; public static final int VERSION403 = 403; public static final int VERSION4 = VERSION403; diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 89167f744..e92831c48 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -42,8 +42,6 @@ import java.util.Random; public class BinaryDictionaryTests extends AndroidTestCase { private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; private static final String TEST_LOCALE = "test"; - private static final int[] DICT_FORMAT_VERSIONS = - new int[] { FormatSpec.VERSION402, FormatSpec.VERSION403 }; private static final String DICTIONARY_ID = "TestBinaryDictionary"; private static boolean supportsNgram(final int formatVersion) { @@ -113,13 +111,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testIsValidDictionary() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testIsValidDictionary(formatVersion); - } - } - - private void testIsValidDictionary(final int formatVersion) { - final File dictFile = createEmptyDictionaryAndGetFile(formatVersion); + final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); assertTrue("binaryDictionary must be valid for existing valid dictionary file.", binaryDictionary.isValidDictionary()); @@ -134,20 +126,14 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testConstructingDictionaryOnMemory() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testConstructingDictionaryOnMemory(formatVersion); - } - } - - private void testConstructingDictionaryOnMemory(final int formatVersion) { - final File dictFile = createEmptyDictionaryAndGetFile(formatVersion); + final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); FileUtils.deleteRecursively(dictFile); assertFalse(dictFile.exists()); final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion, - new HashMap()); + true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, + FormatSpec.VERSION403, new HashMap()); assertTrue(binaryDictionary.isValidDictionary()); - assertEquals(formatVersion, binaryDictionary.getFormatVersion()); + assertEquals(FormatSpec.VERSION403, binaryDictionary.getFormatVersion()); final int probability = 100; addUnigramWord(binaryDictionary, "word", probability); assertEquals(probability, binaryDictionary.getFrequency("word")); @@ -155,19 +141,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.flush(); assertTrue(dictFile.exists()); assertTrue(binaryDictionary.isValidDictionary()); - assertEquals(formatVersion, binaryDictionary.getFormatVersion()); + assertEquals(FormatSpec.VERSION403, binaryDictionary.getFormatVersion()); assertEquals(probability, binaryDictionary.getFrequency("word")); binaryDictionary.close(); } public void testAddTooLongWord() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testAddTooLongWord(formatVersion); - } - } - - private void testAddTooLongWord(final int formatVersion) { - final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion); + final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); final StringBuffer stringBuilder = new StringBuffer(); for (int i = 0; i < BinaryDictionary.DICTIONARY_MAX_WORD_LENGTH; i++) { stringBuilder.append('a'); @@ -234,13 +214,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddUnigramWord() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testAddUnigramWord(formatVersion); - } - } - - private void testAddUnigramWord(final int formatVersion) { - final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion); + final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); final int probability = 100; addUnigramWord(binaryDictionary, "aaa", probability); // Reallocate and create. @@ -267,16 +241,10 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testRandomlyAddUnigramWord() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testRandomlyAddUnigramWord(formatVersion); - } - } - - private void testRandomlyAddUnigramWord(final int formatVersion) { final int wordCount = 1000; final int codePointSetSize = 50; final long seed = System.currentTimeMillis(); - final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion); + final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); final HashMap probabilityMap = new HashMap<>(); // Test a word that isn't contained within the dictionary. @@ -295,13 +263,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddBigramWords() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testAddBigramWords(formatVersion); - } - } - - private void testAddBigramWords(final int formatVersion) { - final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion); + final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); final int unigramProbability = 100; final int bigramProbability = 150; @@ -354,18 +316,12 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testRandomlyAddBigramWords() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testRandomlyAddBigramWords(formatVersion); - } - } - - private void testRandomlyAddBigramWords(final int formatVersion) { final int wordCount = 100; final int bigramCount = 1000; final int codePointSetSize = 50; final long seed = System.currentTimeMillis(); final Random random = new Random(seed); - final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion); + final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); final ArrayList words = new ArrayList<>(); final ArrayList> bigramWords = new ArrayList<>(); @@ -406,15 +362,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddTrigramWords() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - if (supportsNgram(formatVersion)) { - testAddTrigramWords(formatVersion); - } - } - } - - private void testAddTrigramWords(final int formatVersion) { - final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion); + final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); final int unigramProbability = 100; final int trigramProbability = 150; final int updatedTrigramProbability = 200; @@ -440,13 +388,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testFlushDictionary() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testFlushDictionary(formatVersion); - } - } - - private void testFlushDictionary(final int formatVersion) { - final File dictFile = createEmptyDictionaryAndGetFile(formatVersion); + final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); final int probability = 100; @@ -480,13 +422,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testFlushWithGCDictionary() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testFlushWithGCDictionary(formatVersion); - } - } - - private void testFlushWithGCDictionary(final int formatVersion) { - final File dictFile = createEmptyDictionaryAndGetFile(formatVersion); + final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); final int unigramProbability = 100; final int bigramProbability = 150; @@ -516,20 +452,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddBigramWordsAndFlashWithGC() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testAddBigramWordsAndFlashWithGC(formatVersion); - } - } - - // TODO: Evaluate performance of GC - private void testAddBigramWordsAndFlashWithGC(final int formatVersion) { final int wordCount = 100; final int bigramCount = 1000; final int codePointSetSize = 30; final long seed = System.currentTimeMillis(); final Random random = new Random(seed); - final File dictFile = createEmptyDictionaryAndGetFile(formatVersion); + final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); final ArrayList words = new ArrayList<>(); @@ -575,12 +504,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testRandomOperationsAndFlashWithGC() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testRandomOperationsAndFlashWithGC(formatVersion); - } - } - - private void testRandomOperationsAndFlashWithGC(final int formatVersion) { final int maxUnigramCount = 5000; final int maxBigramCount = 10000; final HashMap attributeMap = new HashMap<>(); @@ -596,7 +519,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { final long seed = System.currentTimeMillis(); final Random random = new Random(seed); - final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(formatVersion, + final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(FormatSpec.VERSION403, attributeMap); BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); @@ -675,19 +598,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddManyUnigramsAndFlushWithGC() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testAddManyUnigramsAndFlushWithGC(formatVersion); - } - } - - private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) { final int flashWithGCIterationCount = 3; final int codePointSetSize = 50; final long seed = System.currentTimeMillis(); final Random random = new Random(seed); - final File dictFile = createEmptyDictionaryAndGetFile(formatVersion); + final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); final ArrayList words = new ArrayList<>(); final HashMap unigramProbabilities = new HashMap<>(); @@ -716,12 +633,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testUnigramAndBigramCount() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testUnigramAndBigramCount(formatVersion); - } - } - - private void testUnigramAndBigramCount(final int formatVersion) { final int maxUnigramCount = 5000; final int maxBigramCount = 10000; final HashMap attributeMap = new HashMap<>(); @@ -734,7 +645,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int bigramCountPerIteration = 2000; final long seed = System.currentTimeMillis(); final Random random = new Random(seed); - final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(formatVersion, + final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(FormatSpec.VERSION403, attributeMap); final ArrayList words = new ArrayList<>(); @@ -778,19 +689,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testGetWordProperties() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testGetWordProperties(formatVersion); - } - } - - private void testGetWordProperties(final int formatVersion) { final long seed = System.currentTimeMillis(); final Random random = new Random(seed); final int UNIGRAM_COUNT = 1000; final int BIGRAM_COUNT = 1000; final int codePointSetSize = 20; final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final File dictFile = createEmptyDictionaryAndGetFile(formatVersion); + final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord", @@ -869,19 +774,13 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testIterateAllWords() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testIterateAllWords(formatVersion); - } - } - - private void testIterateAllWords(final int formatVersion) { final long seed = System.currentTimeMillis(); final Random random = new Random(seed); final int UNIGRAM_COUNT = 1000; final int BIGRAM_COUNT = 1000; final int codePointSetSize = 20; final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion); + final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord", false /* isBeginningOfSentence */); @@ -965,123 +864,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(true, wordProperty.mIsPossiblyOffensive); } - public void testDictMigration() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion); - } - } - - private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) { - final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(fromFormatVersion); - final int unigramProbability = 100; - addUnigramWord(binaryDictionary, "aaa", unigramProbability); - addUnigramWord(binaryDictionary, "bbb", unigramProbability); - final int bigramProbability = 150; - addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability); - binaryDictionary.addUnigramEntry("ccc", unigramProbability, - false /* isBeginningOfSentence */, false /* isNotAWord */, - false /* isPossiblyOffensive */, 0 /* timestamp */); - binaryDictionary.addUnigramEntry("ddd", unigramProbability, - false /* isBeginningOfSentence */, - true /* isNotAWord */, true /* isPossiblyOffensive */, 0 /* timestamp */); - binaryDictionary.addNgramEntry(NgramContext.BEGINNING_OF_SENTENCE, - "aaa", bigramProbability, 0 /* timestamp */); - assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); - assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); - assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); - assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion()); - assertTrue(binaryDictionary.migrateTo(toFormatVersion)); - assertTrue(binaryDictionary.isValidDictionary()); - assertEquals(toFormatVersion, binaryDictionary.getFormatVersion()); - assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); - assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb")); - assertEquals(bigramProbability, binaryDictionary.getNgramProbability( - NgramContext.BEGINNING_OF_SENTENCE, "aaa")); - assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); - WordProperty wordProperty = binaryDictionary.getWordProperty("ccc", - false /* isBeginningOfSentence */); - wordProperty = binaryDictionary.getWordProperty("ddd", - false /* isBeginningOfSentence */); - assertTrue(wordProperty.mIsPossiblyOffensive); - assertTrue(wordProperty.mIsNotAWord); - } - - public void testLargeDictMigration() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testLargeDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion); - } - } - - private void testLargeDictMigration(final int fromFormatVersion, final int toFormatVersion) { - final int UNIGRAM_COUNT = 3000; - final int BIGRAM_COUNT = 3000; - final int codePointSetSize = 50; - final long seed = System.currentTimeMillis(); - final Random random = new Random(seed); - final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(fromFormatVersion); - - final ArrayList words = new ArrayList<>(); - final ArrayList> bigrams = new ArrayList<>(); - final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final HashMap unigramProbabilities = new HashMap<>(); - final HashMap, Integer> bigramProbabilities = new HashMap<>(); - - for (int i = 0; i < UNIGRAM_COUNT; i++) { - final String word = CodePointUtils.generateWord(random, codePointSet); - final int unigramProbability = random.nextInt(0xFF); - addUnigramWord(binaryDictionary, word, unigramProbability); - if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { - binaryDictionary.flushWithGC(); - } - words.add(word); - unigramProbabilities.put(word, unigramProbability); - } - - for (int i = 0; i < BIGRAM_COUNT; i++) { - final int word0Index = random.nextInt(words.size()); - final int word1Index = random.nextInt(words.size()); - if (word0Index == word1Index) { - continue; - } - final String word0 = words.get(word0Index); - final String word1 = words.get(word1Index); - final int unigramProbability = unigramProbabilities.get(word1); - final int bigramProbability = - random.nextInt(0xFF - unigramProbability) + unigramProbability; - addBigramWords(binaryDictionary, word0, word1, bigramProbability); - if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { - binaryDictionary.flushWithGC(); - } - final Pair bigram = new Pair<>(word0, word1); - bigrams.add(bigram); - bigramProbabilities.put(bigram, bigramProbability); - } - assertTrue(binaryDictionary.migrateTo(toFormatVersion)); - - for (final String word : words) { - assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word)); - } - assertEquals(unigramProbabilities.size(), Integer.parseInt( - binaryDictionary.getPropertyForGettingStats(BinaryDictionary.UNIGRAM_COUNT_QUERY))); - - for (final Pair bigram : bigrams) { - assertEquals((int)bigramProbabilities.get(bigram), - getBigramProbability(binaryDictionary, bigram.first, bigram.second)); - assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second)); - } - assertEquals(bigramProbabilities.size(), Integer.parseInt( - binaryDictionary.getPropertyForGettingStats(BinaryDictionary.BIGRAM_COUNT_QUERY))); - } - public void testBeginningOfSentence() { - for (final int formatVersion : DICT_FORMAT_VERSIONS) { - testBeginningOfSentence(formatVersion); - } - } - - private void testBeginningOfSentence(final int formatVersion) { - final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion); + final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); final int dummyProbability = 0; final NgramContext beginningOfSentenceContext = NgramContext.BEGINNING_OF_SENTENCE; final int bigramProbability = 200; diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index a432ca740..da1b32a8b 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -43,22 +43,12 @@ public final class BinaryDictIOUtils { */ public static DictDecoder getDictDecoder(final File dictFile, final long offset, final long length, final int bufferType) { - if (dictFile.isDirectory()) { - return new Ver4DictDecoder(dictFile); - } else if (dictFile.isFile()) { - return new Ver2DictDecoder(dictFile, offset, length, bufferType); - } - return null; + return new Ver4DictDecoder(dictFile); } public static DictDecoder getDictDecoder(final File dictFile, final long offset, final long length, final DictionaryBufferFactory factory) { - if (dictFile.isDirectory()) { - return new Ver4DictDecoder(dictFile); - } else if (dictFile.isFile()) { - return new Ver2DictDecoder(dictFile, offset, length, factory); - } - return null; + return new Ver4DictDecoder(dictFile); } public static DictDecoder getDictDecoder(final File dictFile, final long offset, diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java deleted file mode 100644 index 7ee1df92b..000000000 --- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java +++ /dev/null @@ -1,319 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.makedict; - -import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.BinaryDictionary; -import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; -import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; - -/** - * An implementation of DictDecoder for version 2 binary dictionary. - */ -// TODO: Separate logics that are used only for testing. -@UsedForTesting -public class Ver2DictDecoder extends AbstractDictDecoder { - /** - * A utility class for reading a PtNode. - */ - static class PtNodeReader { - static ProbabilityInfo readProbabilityInfo(final DictBuffer dictBuffer) { - // Ver2 dicts don't contain historical information. - return new ProbabilityInfo(dictBuffer.readUnsignedByte()); - } - - static int readPtNodeOptionFlags(final DictBuffer dictBuffer) { - return dictBuffer.readUnsignedByte(); - } - - static int readChildrenAddress(final DictBuffer dictBuffer, - final int ptNodeFlags) { - switch (ptNodeFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) { - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE: - return dictBuffer.readUnsignedByte(); - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES: - return dictBuffer.readUnsignedShort(); - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES: - return dictBuffer.readUnsignedInt24(); - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS: - default: - return FormatSpec.NO_CHILDREN_ADDRESS; - } - } - - // Reads shortcuts and returns the read length. - static int readShortcut(final DictBuffer dictBuffer, - final ArrayList shortcutTargets) { - final int pointerBefore = dictBuffer.position(); - dictBuffer.readUnsignedShort(); // skip the size - while (true) { - final int targetFlags = dictBuffer.readUnsignedByte(); - final String word = CharEncoding.readString(dictBuffer); - shortcutTargets.add(new WeightedString(word, - targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); - if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; - } - return dictBuffer.position() - pointerBefore; - } - - static int readBigramAddresses(final DictBuffer dictBuffer, - final ArrayList bigrams, final int baseAddress) { - int readLength = 0; - int bigramCount = 0; - while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { - final int bigramFlags = dictBuffer.readUnsignedByte(); - ++readLength; - final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE) - ? 1 : -1; - int bigramAddress = baseAddress + readLength; - switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) { - case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE: - bigramAddress += sign * dictBuffer.readUnsignedByte(); - readLength += 1; - break; - case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES: - bigramAddress += sign * dictBuffer.readUnsignedShort(); - readLength += 2; - break; - case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES: - bigramAddress += sign * dictBuffer.readUnsignedInt24(); - readLength += 3; - break; - default: - throw new RuntimeException("Has bigrams with no address"); - } - bigrams.add(new PendingAttribute( - bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, - bigramAddress)); - if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; - } - return readLength; - } - } - - protected final File mDictionaryBinaryFile; - protected final long mOffset; - protected final long mLength; - // TODO: Remove mBufferFactory and mDictBuffer from this class members because they are now - // used only for testing. - private final DictionaryBufferFactory mBufferFactory; - protected DictBuffer mDictBuffer; - - @UsedForTesting - /* package */ Ver2DictDecoder(final File file, final long offset, final long length, - final int factoryFlag) { - mDictionaryBinaryFile = file; - mOffset = offset; - mLength = length; - mDictBuffer = null; - if ((factoryFlag & MASK_DICTBUFFER) == USE_READONLY_BYTEBUFFER) { - mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory(); - } else if ((factoryFlag & MASK_DICTBUFFER) == USE_BYTEARRAY) { - mBufferFactory = new DictionaryBufferFromByteArrayFactory(); - } else if ((factoryFlag & MASK_DICTBUFFER) == USE_WRITABLE_BYTEBUFFER) { - mBufferFactory = new DictionaryBufferFromWritableByteBufferFactory(); - } else { - mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory(); - } - } - - /* package */ Ver2DictDecoder(final File file, final long offset, final long length, - final DictionaryBufferFactory factory) { - mDictionaryBinaryFile = file; - mOffset = offset; - mLength = length; - mBufferFactory = factory; - } - - @Override - public void openDictBuffer() throws FileNotFoundException, IOException { - mDictBuffer = mBufferFactory.getDictionaryBuffer(mDictionaryBinaryFile); - } - - @Override - public boolean isDictBufferOpen() { - return mDictBuffer != null; - } - - /* package */ DictBuffer getDictBuffer() { - return mDictBuffer; - } - - @UsedForTesting - /* package */ DictBuffer openAndGetDictBuffer() throws FileNotFoundException, IOException { - openDictBuffer(); - return getDictBuffer(); - } - - @Override - public DictionaryHeader readHeader() throws IOException, UnsupportedFormatException { - // dictType is not being used in dicttool. Passing an empty string. - final BinaryDictionary binaryDictionary = new BinaryDictionary( - mDictionaryBinaryFile.getAbsolutePath(), mOffset, mLength, - true /* useFullEditDistance */, null /* locale */, "" /* dictType */, - false /* isUpdatable */); - final DictionaryHeader header = binaryDictionary.getHeader(); - binaryDictionary.close(); - if (header == null) { - throw new IOException("Cannot read the dictionary header."); - } - if (header.mFormatOptions.mVersion != FormatSpec.VERSION2 && - header.mFormatOptions.mVersion != FormatSpec.VERSION201 && - header.mFormatOptions.mVersion != FormatSpec.VERSION202) { - throw new UnsupportedFormatException("File header has a wrong version : " - + header.mFormatOptions.mVersion); - } - if (!isDictBufferOpen()) { - openDictBuffer(); - } - // Advance buffer reading position to the head of dictionary body. - setPosition(header.mBodyOffset); - return header; - } - - // TODO: Make this buffer multi thread safe. - private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH]; - @Override - public PtNodeInfo readPtNode(final int ptNodePos) { - int addressPointer = ptNodePos; - final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); - addressPointer += FormatSpec.PTNODE_FLAGS_SIZE; - final int characters[]; - if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) { - int index = 0; - int character = CharEncoding.readChar(mDictBuffer); - addressPointer += CharEncoding.getCharSize(character, null); - while (FormatSpec.INVALID_CHARACTER != character) { - // FusionDictionary is making sure that the length of the word is smaller than - // MAX_WORD_LENGTH. - // So we'll never write past the end of mCharacterBuffer. - mCharacterBuffer[index++] = character; - character = CharEncoding.readChar(mDictBuffer); - addressPointer += CharEncoding.getCharSize(character, null); - } - characters = Arrays.copyOfRange(mCharacterBuffer, 0, index); - } else { - final int character = CharEncoding.readChar(mDictBuffer); - addressPointer += CharEncoding.getCharSize(character, null); - characters = new int[] { character }; - } - final ProbabilityInfo probabilityInfo; - if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { - probabilityInfo = PtNodeReader.readProbabilityInfo(mDictBuffer); - addressPointer += FormatSpec.PTNODE_FREQUENCY_SIZE; - } else { - probabilityInfo = null; - } - int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags); - if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { - childrenAddress += addressPointer; - } - addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags); - final ArrayList shortcutTargets; - if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) { - // readShortcut will add shortcuts to shortcutTargets. - shortcutTargets = new ArrayList<>(); - addressPointer += PtNodeReader.readShortcut(mDictBuffer, shortcutTargets); - } else { - shortcutTargets = null; - } - - final ArrayList bigrams; - if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { - bigrams = new ArrayList<>(); - addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams, - addressPointer); - if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { - throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size() - + " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")"); - } - } else { - bigrams = null; - } - return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, probabilityInfo, - childrenAddress, shortcutTargets, bigrams); - } - - @Override - public FusionDictionary readDictionaryBinary(final boolean deleteDictIfBroken) - throws FileNotFoundException, IOException, UnsupportedFormatException { - // dictType is not being used in dicttool. Passing an empty string. - final BinaryDictionary binaryDictionary = new BinaryDictionary( - mDictionaryBinaryFile.getAbsolutePath(), 0 /* offset */, - mDictionaryBinaryFile.length() /* length */, true /* useFullEditDistance */, - null /* locale */, "" /* dictType */, false /* isUpdatable */); - final DictionaryHeader header = readHeader(); - final FusionDictionary fusionDict = - new FusionDictionary(new FusionDictionary.PtNodeArray(), header.mDictionaryOptions); - int token = 0; - final ArrayList wordProperties = new ArrayList<>(); - do { - final BinaryDictionary.GetNextWordPropertyResult result = - binaryDictionary.getNextWordProperty(token); - final WordProperty wordProperty = result.mWordProperty; - if (wordProperty == null) { - binaryDictionary.close(); - if (deleteDictIfBroken) { - mDictionaryBinaryFile.delete(); - } - return null; - } - wordProperties.add(wordProperty); - token = result.mNextToken; - } while (token != 0); - - // Insert unigrams into the fusion dictionary. - for (final WordProperty wordProperty : wordProperties) { - fusionDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo, - wordProperty.mIsNotAWord, - wordProperty.mIsPossiblyOffensive); - } - // Insert bigrams into the fusion dictionary. - for (final WordProperty wordProperty : wordProperties) { - if (!wordProperty.mHasNgrams) { - continue; - } - final String word0 = wordProperty.mWord; - for (final WeightedString bigram : wordProperty.getBigrams()) { - fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo); - } - } - binaryDictionary.close(); - return fusionDict; - } - - @Override - public void setPosition(int newPos) { - mDictBuffer.position(newPos); - } - - @Override - public int getPosition() { - return mDictBuffer.position(); - } - - @Override - public int readPtNodeCount() { - return BinaryDictDecoderUtils.readPtNodeCount(mDictBuffer); - } -} diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoderTests.java deleted file mode 100644 index 3882c2c55..000000000 --- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoderTests.java +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.makedict; - -import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; -import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory; -import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFromByteArrayFactory; -import com.android.inputmethod.latin.makedict.DictDecoder. - DictionaryBufferFromReadOnlyByteBufferFactory; -import com.android.inputmethod.latin.makedict.DictDecoder. - DictionaryBufferFromWritableByteBufferFactory; - -import android.test.AndroidTestCase; -import android.util.Log; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; - -/** - * Unit tests for Ver2DictDecoder - */ -public class Ver2DictDecoderTests extends AndroidTestCase { - private static final String TAG = Ver2DictDecoderTests.class.getSimpleName(); - - private final byte[] data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; - - // Utilities for testing - public void writeDataToFile(final File file) { - FileOutputStream outStream = null; - try { - outStream = new FileOutputStream(file); - outStream.write(data); - } catch (IOException e) { - fail ("Can't write data to the test file"); - } finally { - if (outStream != null) { - try { - outStream.close(); - } catch (IOException e) { - Log.e(TAG, "Failed to close the output stream", e); - } - } - } - } - - public void runTestOpenBuffer(final String testName, final DictionaryBufferFactory factory) { - File testFile = null; - try { - testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir()); - } catch (IOException e) { - Log.e(TAG, "IOException while the creating temporary file", e); - } - - assertNotNull(testFile); - final Ver2DictDecoder dictDecoder = new Ver2DictDecoder(testFile, 0, testFile.length(), - factory); - try { - dictDecoder.openDictBuffer(); - } catch (Exception e) { - Log.e(TAG, "Failed to open the buffer", e); - } - - writeDataToFile(testFile); - - try { - dictDecoder.openDictBuffer(); - } catch (Exception e) { - Log.e(TAG, "Raised the exception while opening buffer", e); - } - - assertEquals(testFile.length(), dictDecoder.getDictBuffer().capacity()); - } - - public void testOpenBufferWithByteBuffer() { - runTestOpenBuffer("testOpenBufferWithByteBuffer", - new DictionaryBufferFromReadOnlyByteBufferFactory()); - } - - public void testOpenBufferWithByteArray() { - runTestOpenBuffer("testOpenBufferWithByteArray", - new DictionaryBufferFromByteArrayFactory()); - } - - public void testOpenBufferWithWritableByteBuffer() { - runTestOpenBuffer("testOpenBufferWithWritableByteBuffer", - new DictionaryBufferFromWritableByteBufferFactory()); - } - - public void runTestGetBuffer(final String testName, final DictionaryBufferFactory factory) { - File testFile = null; - try { - testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir()); - } catch (IOException e) { - Log.e(TAG, "IOException while the creating temporary file", e); - } - - final Ver2DictDecoder dictDecoder = new Ver2DictDecoder(testFile, 0, testFile.length(), - factory); - - // the default return value of getBuffer() must be null. - assertNull("the default return value of getBuffer() is not null", - dictDecoder.getDictBuffer()); - - writeDataToFile(testFile); - assertTrue(testFile.exists()); - Log.d(TAG, "file length = " + testFile.length()); - - DictBuffer dictBuffer = null; - try { - dictBuffer = dictDecoder.openAndGetDictBuffer(); - } catch (IOException e) { - Log.e(TAG, "Failed to open and get the buffer", e); - } - assertNotNull("the buffer must not be null", dictBuffer); - - for (int i = 0; i < data.length; ++i) { - assertEquals(data[i], dictBuffer.readUnsignedByte()); - } - } - - public void testGetBufferWithByteBuffer() { - runTestGetBuffer("testGetBufferWithByteBuffer", - new DictionaryBufferFromReadOnlyByteBufferFactory()); - } - - public void testGetBufferWithByteArray() { - runTestGetBuffer("testGetBufferWithByteArray", - new DictionaryBufferFromByteArrayFactory()); - } - - public void testGetBufferWithWritableByteBuffer() { - runTestGetBuffer("testGetBufferWithWritableByteBuffer", - new DictionaryBufferFromWritableByteBufferFactory()); - } -} diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java deleted file mode 100644 index c63b972eb..000000000 --- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.makedict; - -import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; -import com.android.inputmethod.latin.makedict.BinaryDictEncoderUtils.CodePointTable; -import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map.Entry; - -/** - * An implementation of DictEncoder for version 2 binary dictionary. - */ -@UsedForTesting -public class Ver2DictEncoder implements DictEncoder { - - private final File mDictFile; - private OutputStream mOutStream; - private byte[] mBuffer; - private int mPosition; - private final int mCodePointTableMode; - public static final int CODE_POINT_TABLE_OFF = 0; - public static final int CODE_POINT_TABLE_ON = 1; - - @UsedForTesting - public Ver2DictEncoder(final File dictFile, final int codePointTableMode) { - mDictFile = dictFile; - mOutStream = null; - mBuffer = null; - mCodePointTableMode = codePointTableMode; - } - - // This constructor is used only by BinaryDictOffdeviceUtilsTests. - // If you want to use this in the production code, you should consider keeping consistency of - // the interface of Ver3DictDecoder by using factory. - @UsedForTesting - public Ver2DictEncoder(final OutputStream outStream) { - mDictFile = null; - mOutStream = outStream; - mCodePointTableMode = CODE_POINT_TABLE_OFF; - } - - private void openStream() throws FileNotFoundException { - mOutStream = new FileOutputStream(mDictFile); - } - - private void close() throws IOException { - if (mOutStream != null) { - mOutStream.close(); - mOutStream = null; - } - } - - // Package for testing - static CodePointTable makeCodePointTable(final FusionDictionary dict) { - final HashMap codePointOccurrenceCounts = new HashMap<>(); - for (final WordProperty word : dict) { - // Store per code point occurrence - final String wordString = word.mWord; - for (int i = 0; i < wordString.length(); ++i) { - final int codePoint = Character.codePointAt(wordString, i); - if (codePointOccurrenceCounts.containsKey(codePoint)) { - codePointOccurrenceCounts.put(codePoint, - codePointOccurrenceCounts.get(codePoint) + 1); - } else { - codePointOccurrenceCounts.put(codePoint, 1); - } - } - } - final ArrayList> codePointOccurrenceArray = - new ArrayList<>(codePointOccurrenceCounts.entrySet()); - // Descending order sort by occurrence (value side) - Collections.sort(codePointOccurrenceArray, new Comparator>() { - @Override - public int compare(final Entry a, final Entry b) { - if (a.getValue() != b.getValue()) { - return b.getValue().compareTo(a.getValue()); - } - return b.getKey().compareTo(a.getKey()); - } - }); - int currentCodePointTableIndex = FormatSpec.MINIMAL_ONE_BYTE_CHARACTER_VALUE; - // Temporary map for writing of nodes - final HashMap codePointToOneByteCodeMap = new HashMap<>(); - for (final Entry entry : codePointOccurrenceArray) { - // Put a relation from the original code point to the one byte code. - codePointToOneByteCodeMap.put(entry.getKey(), currentCodePointTableIndex); - if (FormatSpec.MAXIMAL_ONE_BYTE_CHARACTER_VALUE < ++currentCodePointTableIndex) { - break; - } - } - // codePointToOneByteCodeMap for writing the trie - // codePointOccurrenceArray for writing the header - return new CodePointTable(codePointToOneByteCodeMap, codePointOccurrenceArray); - } - - @Override - public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions) - throws IOException, UnsupportedFormatException { - // We no longer support anything but the latest version of v2. - if (formatOptions.mVersion != FormatSpec.VERSION202) { - throw new UnsupportedFormatException( - "The given format options has wrong version number : " - + formatOptions.mVersion); - } - - if (mOutStream == null) { - openStream(); - } - - // Make code point conversion table ordered by occurrence of code points - // Version 201 or later have codePointTable - final CodePointTable codePointTable; - if (mCodePointTableMode == CODE_POINT_TABLE_OFF || formatOptions.mVersion - < FormatSpec.MINIMUM_SUPPORTED_VERSION_OF_CODE_POINT_TABLE) { - codePointTable = new CodePointTable(); - } else { - codePointTable = makeCodePointTable(dict); - } - - BinaryDictEncoderUtils.writeDictionaryHeader(mOutStream, dict, formatOptions, - codePointTable.mCodePointOccurrenceArray); - - // Addresses are limited to 3 bytes, but since addresses can be relative to each node - // array, the structure itself is not limited to 16MB. However, if it is over 16MB deciding - // the order of the PtNode arrays becomes a quite complicated problem, because though the - // dictionary itself does not have a size limit, each node array must still be within 16MB - // of all its children and parents. As long as this is ensured, the dictionary file may - // grow to any size. - - // Leave the choice of the optimal node order to the flattenTree function. - MakedictLog.i("Flattening the tree..."); - ArrayList flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray); - - MakedictLog.i("Computing addresses..."); - BinaryDictEncoderUtils.computeAddresses(dict, flatNodes, - codePointTable.mCodePointToOneByteCodeMap); - MakedictLog.i("Checking PtNode array..."); - if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); - - // Create a buffer that matches the final dictionary size. - final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1); - final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize; - mBuffer = new byte[bufferSize]; - - MakedictLog.i("Writing file..."); - - for (PtNodeArray nodeArray : flatNodes) { - BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray, - codePointTable.mCodePointToOneByteCodeMap); - } - if (MakedictLog.DBG) BinaryDictEncoderUtils.showStatistics(flatNodes); - mOutStream.write(mBuffer, 0, mPosition); - - MakedictLog.i("Done"); - close(); - } - - @Override - public void setPosition(final int position) { - if (mBuffer == null || position < 0 || position >= mBuffer.length) return; - mPosition = position; - } - - @Override - public int getPosition() { - return mPosition; - } - - @Override - public void writePtNodeCount(final int ptNodeCount) { - final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount); - if (countSize != 1 && countSize != 2) { - throw new RuntimeException("Strange size from getGroupCountSize : " + countSize); - } - final int encodedPtNodeCount = (countSize == 2) ? - (ptNodeCount | FormatSpec.LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG) : ptNodeCount; - mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, encodedPtNodeCount, - countSize); - } - - private void writePtNodeFlags(final PtNode ptNode, - final HashMap codePointToOneByteCodeMap) { - final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, - codePointToOneByteCodeMap); - mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, - BinaryDictEncoderUtils.makePtNodeFlags(ptNode, childrenPos), - FormatSpec.PTNODE_FLAGS_SIZE); - } - - private void writeCharacters(final int[] codePoints, final boolean hasSeveralChars, - final HashMap codePointToOneByteCodeMap) { - mPosition = CharEncoding.writeCharArray(codePoints, mBuffer, mPosition, - codePointToOneByteCodeMap); - if (hasSeveralChars) { - mBuffer[mPosition++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR; - } - } - - private void writeFrequency(final int frequency) { - if (frequency >= 0) { - mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, frequency, - FormatSpec.PTNODE_FREQUENCY_SIZE); - } - } - - private void writeChildrenPosition(final PtNode ptNode, - final HashMap codePointToOneByteCodeMap) { - final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, - codePointToOneByteCodeMap); - mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition, - childrenPos); - } - - /** - * Write a bigram attributes list to mBuffer. - * - * @param bigrams the bigram attributes list. - * @param dict the dictionary the node array is a part of (for relative offsets). - */ - private void writeBigrams(final ArrayList bigrams, - final FusionDictionary dict) { - if (bigrams == null) return; - - final Iterator bigramIterator = bigrams.iterator(); - while (bigramIterator.hasNext()) { - final WeightedString bigram = bigramIterator.next(); - final PtNode target = - FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord); - final int addressOfBigram = target.mCachedAddressAfterUpdate; - final int unigramFrequencyForThisWord = target.getProbability(); - final int offset = addressOfBigram - - (mPosition + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); - final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(), - offset, bigram.getProbability(), unigramFrequencyForThisWord, bigram.mWord); - mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, bigramFlags, - FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); - mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition, - Math.abs(offset)); - } - } - - @Override - public void writePtNode(final PtNode ptNode, final FusionDictionary dict, - final HashMap codePointToOneByteCodeMap) { - writePtNodeFlags(ptNode, codePointToOneByteCodeMap); - writeCharacters(ptNode.mChars, ptNode.hasSeveralChars(), codePointToOneByteCodeMap); - writeFrequency(ptNode.getProbability()); - writeChildrenPosition(ptNode, codePointToOneByteCodeMap); - writeBigrams(ptNode.mBigrams, dict); - } -}