/* * Copyright (C) 2012 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.inputmethod.latin.makedict; import android.test.AndroidTestCase; import android.test.suitebuilder.annotation.LargeTest; import android.util.Log; import android.util.Pair; import android.util.SparseArray; import com.android.inputmethod.latin.BinaryDictionary; import com.android.inputmethod.latin.common.CodePointUtils; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map.Entry; import java.util.Random; import java.util.Set; import java.util.TreeMap; /** * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils. */ @LargeTest public class BinaryDictDecoderEncoderTests extends AndroidTestCase { private static final String TAG = BinaryDictDecoderEncoderTests.class.getSimpleName(); private static final int DEFAULT_MAX_UNIGRAMS = 300; private static final int DEFAULT_CODE_POINT_SET_SIZE = 50; private static final int LARGE_CODE_POINT_SET_SIZE = 300; private static final int UNIGRAM_FREQ = 10; private static final int BIGRAM_FREQ = 50; private static final int TOLERANCE_OF_BIGRAM_FREQ = 5; private static final ArrayList sWords = new ArrayList<>(); private static final ArrayList sWordsWithVariousCodePoints = new ArrayList<>(); private static final SparseArray> sEmptyBigrams = new SparseArray<>(); private static final SparseArray> sStarBigrams = new SparseArray<>(); private static final SparseArray> sChainBigrams = new SparseArray<>(); final Random mRandom; public BinaryDictDecoderEncoderTests() { this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS); } public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) { super(); BinaryDictionaryUtils.setCurrentTimeForTest(0); Log.e(TAG, "Testing dictionary: seed is " + seed); mRandom = new Random(seed); sWords.clear(); sWordsWithVariousCodePoints.clear(); generateWords(maxUnigrams, mRandom); for (int i = 0; i < sWords.size(); ++i) { sChainBigrams.put(i, new ArrayList()); if (i > 0) { sChainBigrams.get(i - 1).add(i); } } sStarBigrams.put(0, new ArrayList()); // MAX - 1 because we added one above already final int maxBigrams = Math.min(sWords.size(), FormatSpec.MAX_BIGRAMS_IN_A_PTNODE - 1); for (int i = 1; i < maxBigrams; ++i) { sStarBigrams.get(0).add(i); } } @Override protected void setUp() throws Exception { super.setUp(); BinaryDictionaryUtils.setCurrentTimeForTest(0); } @Override protected void tearDown() throws Exception { // Quit test mode. BinaryDictionaryUtils.setCurrentTimeForTest(-1); super.tearDown(); } private static void generateWords(final int number, final Random random) { final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, random); final Set wordSet = new HashSet<>(); while (wordSet.size() < number) { wordSet.add(CodePointUtils.generateWord(random, codePointSet)); } sWords.addAll(wordSet); final int[] largeCodePointSet = CodePointUtils.generateCodePointSet( LARGE_CODE_POINT_SET_SIZE, random); wordSet.clear(); while (wordSet.size() < number) { wordSet.add(CodePointUtils.generateWord(random, largeCodePointSet)); } sWordsWithVariousCodePoints.addAll(wordSet); } /** * Adds unigrams to the dictionary. */ private static void addUnigrams(final int number, final FusionDictionary dict, final List words) { for (int i = 0; i < number; ++i) { final String word = words.get(i); final ArrayList shortcuts = new ArrayList<>(); dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ), false /* isNotAWord */, false /* isPossiblyOffensive */); } } private static void addBigrams(final FusionDictionary dict, final List words, final SparseArray> bigrams) { for (int i = 0; i < bigrams.size(); ++i) { final int w1 = bigrams.keyAt(i); for (int w2 : bigrams.valueAt(i)) { dict.setBigram(words.get(w1), words.get(w2), new ProbabilityInfo(BIGRAM_FREQ)); } } } // The following is useful to dump the dictionary into a textual file, but it can't compile // on-device, so it's commented out. // private void dumpToCombinedFileForDebug(final FusionDictionary dict, final String filename) // throws IOException { // com.android.inputmethod.latin.dicttool.CombinedInputOutput.writeDictionaryCombined( // new java.io.FileWriter(new File(filename)), dict); // } private static long timeWritingDictToFile(final File file, final FusionDictionary dict, final FormatSpec.FormatOptions formatOptions) { long now = -1, diff = -1; try { final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions); now = System.currentTimeMillis(); // If you need to dump the dict to a textual file, uncomment the line below and the // function above // dumpToCombinedFileForDebug(file, "/tmp/foo"); dictEncoder.writeDictionary(dict, formatOptions); diff = System.currentTimeMillis() - now; } catch (IOException e) { Log.e(TAG, "IO exception while writing file", e); } catch (UnsupportedFormatException e) { Log.e(TAG, "UnsupportedFormatException", e); } return diff; } private static void checkDictionary(final FusionDictionary dict, final List words, final SparseArray> bigrams) { assertNotNull(dict); // check unigram for (final String word : words) { final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); assertNotNull(ptNode); } // check bigram for (int i = 0; i < bigrams.size(); ++i) { final int w1 = bigrams.keyAt(i); for (final int w2 : bigrams.valueAt(i)) { final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, words.get(w1)); assertNotNull(words.get(w1) + "," + words.get(w2), ptNode.getBigram(words.get(w2))); } } } private static String outputOptions(final int bufferType, final FormatSpec.FormatOptions formatOptions) { final String result = " : buffer type = " + ((bufferType == BinaryDictUtils.USE_BYTE_BUFFER) ? "byte buffer" : "byte array"); return result + " : version = " + formatOptions.mVersion; } // Tests for readDictionaryBinary and writeDictionaryBinary private static long timeReadingAndCheckDict(final File file, final List words, final SparseArray> bigrams, final int bufferType) { long now, diff = -1; FusionDictionary dict = null; try { final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), bufferType); now = System.currentTimeMillis(); dict = dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */); diff = System.currentTimeMillis() - now; } catch (IOException e) { Log.e(TAG, "IOException while reading dictionary", e); } catch (UnsupportedFormatException e) { Log.e(TAG, "Unsupported format", e); } checkDictionary(dict, words, bigrams); return diff; } // Tests for readDictionaryBinary and writeDictionaryBinary private String runReadAndWrite(final List words, final SparseArray> bigrams, final int bufferType, final FormatSpec.FormatOptions formatOptions, final String message) { final String dictName = "runReadAndWrite"; final String dictVersion = Long.toString(System.currentTimeMillis()); final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, getContext().getCacheDir()); final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); addUnigrams(words.size(), dict, words); addBigrams(dict, words, bigrams); checkDictionary(dict, words, bigrams); final long write = timeWritingDictToFile(file, dict, formatOptions); final long read = timeReadingAndCheckDict(file, words, bigrams, bufferType); return "PROF: read=" + read + "ms, write=" + write + "ms :" + message + " : " + outputOptions(bufferType, formatOptions); } private void runReadAndWriteTests(final List results, final int bufferType, final FormatSpec.FormatOptions formatOptions) { results.add(runReadAndWrite(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram")); results.add(runReadAndWrite(sWords, sChainBigrams, bufferType, formatOptions, "chain")); results.add(runReadAndWrite(sWords, sStarBigrams, bufferType, formatOptions, "star")); results.add(runReadAndWrite(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram with shortcuts")); results.add(runReadAndWrite(sWords, sChainBigrams, bufferType, formatOptions, "chain with shortcuts")); results.add(runReadAndWrite(sWords, sStarBigrams, bufferType, formatOptions, "star with shortcuts")); results.add(runReadAndWrite(sWordsWithVariousCodePoints, sEmptyBigrams, bufferType, formatOptions, "unigram with various code points")); } public void testCharacterTableIsPresent() throws IOException, UnsupportedFormatException { final String[] wordSource = {"words", "used", "for", "testing", "a", "code point", "table"}; final List words = Arrays.asList(wordSource); final String correctCodePointTable = "toesdrniawuplgfcb "; final String dictName = "codePointTableTest"; final String dictVersion = Long.toString(System.currentTimeMillis()); final String codePointTableAttribute = DictionaryHeader.CODE_POINT_TABLE_KEY; final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, BinaryDictUtils.STATIC_OPTIONS, getContext().getCacheDir()); // Write a test dictionary final DictEncoder dictEncoder = new Ver2DictEncoder(file, Ver2DictEncoder.CODE_POINT_TABLE_ON); final FormatSpec.FormatOptions formatOptions = new FormatSpec.FormatOptions( FormatSpec.MINIMUM_SUPPORTED_STATIC_VERSION); final FusionDictionary sourcedict = new FusionDictionary(new PtNodeArray(), BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); addUnigrams(words.size(), sourcedict, words); dictEncoder.writeDictionary(sourcedict, formatOptions); // Read the dictionary final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), DictDecoder.USE_BYTEARRAY); final DictionaryHeader fileHeader = dictDecoder.readHeader(); // Check if codePointTable is present assertTrue("codePointTable is not present", fileHeader.mDictionaryOptions.mAttributes.containsKey(codePointTableAttribute)); final String codePointTable = fileHeader.mDictionaryOptions.mAttributes.get(codePointTableAttribute); // Check if codePointTable is correct assertEquals("codePointTable is incorrect", codePointTable, correctCodePointTable); } // Unit test for CharEncoding.readString and CharEncoding.writeString. public void testCharEncoding() { // the max length of a word in sWords is less than 50. // See generateWords. final byte[] buffer = new byte[50 * 3]; final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer); for (final String word : sWords) { Arrays.fill(buffer, (byte) 0); CharEncoding.writeString(buffer, 0, word, null); dictBuffer.position(0); final String str = CharEncoding.readString(dictBuffer); assertEquals(word, str); } } public void testReadAndWriteWithByteBuffer() { final List results = new ArrayList<>(); runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, BinaryDictUtils.STATIC_OPTIONS); runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, BinaryDictUtils.DYNAMIC_OPTIONS_WITHOUT_TIMESTAMP); runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, BinaryDictUtils.DYNAMIC_OPTIONS_WITH_TIMESTAMP); for (final String result : results) { Log.d(TAG, result); } } public void testReadAndWriteWithByteArray() { final List results = new ArrayList<>(); runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, BinaryDictUtils.STATIC_OPTIONS); runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, BinaryDictUtils.DYNAMIC_OPTIONS_WITHOUT_TIMESTAMP); runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, BinaryDictUtils.DYNAMIC_OPTIONS_WITH_TIMESTAMP); for (final String result : results) { Log.d(TAG, result); } } // Tests for readUnigramsAndBigramsBinary private static void checkWordMap(final List expectedWords, final SparseArray> expectedBigrams, final TreeMap resultWords, final TreeMap resultFrequencies, final TreeMap> resultBigrams, final boolean checkProbability) { // check unigrams final Set actualWordsSet = new HashSet<>(resultWords.values()); final Set expectedWordsSet = new HashSet<>(expectedWords); assertEquals(actualWordsSet, expectedWordsSet); if (checkProbability) { for (int freq : resultFrequencies.values()) { assertEquals(freq, UNIGRAM_FREQ); } } // check bigrams final HashMap> expBigrams = new HashMap<>(); for (int i = 0; i < expectedBigrams.size(); ++i) { final String word1 = expectedWords.get(expectedBigrams.keyAt(i)); for (int w2 : expectedBigrams.valueAt(i)) { if (expBigrams.get(word1) == null) { expBigrams.put(word1, new HashSet()); } expBigrams.get(word1).add(expectedWords.get(w2)); } } final HashMap> actBigrams = new HashMap<>(); for (Entry> entry : resultBigrams.entrySet()) { final String word1 = resultWords.get(entry.getKey()); final int unigramFreq = resultFrequencies.get(entry.getKey()); for (PendingAttribute attr : entry.getValue()) { final String word2 = resultWords.get(attr.mAddress); if (actBigrams.get(word1) == null) { actBigrams.put(word1, new HashSet()); } actBigrams.get(word1).add(word2); if (checkProbability) { final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency( unigramFreq, attr.mFrequency); assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ); } } } assertEquals(actBigrams, expBigrams); } private static long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List words, final SparseArray> bigrams, final int bufferType, final boolean checkProbability) { final TreeMap resultWords = new TreeMap<>(); final TreeMap> resultBigrams = new TreeMap<>(); final TreeMap resultFreqs = new TreeMap<>(); long now = -1, diff = -1; try { final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), bufferType); now = System.currentTimeMillis(); dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams); diff = System.currentTimeMillis() - now; } catch (IOException e) { Log.e(TAG, "IOException", e); } catch (UnsupportedFormatException e) { Log.e(TAG, "UnsupportedFormatException", e); } checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams, checkProbability); return diff; } private String runReadUnigramsAndBigramsBinary(final ArrayList words, final SparseArray> bigrams, final int bufferType, final FormatSpec.FormatOptions formatOptions, final String message) { final String dictName = "runReadUnigrams"; final String dictVersion = Long.toString(System.currentTimeMillis()); final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, getContext().getCacheDir()); // making the dictionary from lists of words. final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); addUnigrams(words.size(), dict, words); addBigrams(dict, words, bigrams); timeWritingDictToFile(file, dict, formatOptions); // Caveat: Currently, the Java code to read a v4 dictionary doesn't calculate the // probability when there's a timestamp for the entry. // TODO: Abandon the Java code, and implement the v4 dictionary reading code in native. long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType, !formatOptions.mHasTimestamp /* checkProbability */); long fullReading = timeReadingAndCheckDict(file, words, bigrams, bufferType); return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap + " : " + message + " : " + outputOptions(bufferType, formatOptions); } private void runReadUnigramsAndBigramsTests(final ArrayList results, final int bufferType, final FormatSpec.FormatOptions formatOptions) { results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram")); results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType, formatOptions, "chain")); results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, bufferType, formatOptions, "star")); } public void testReadUnigramsAndBigramsBinaryWithByteBuffer() { final ArrayList results = new ArrayList<>(); runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER, BinaryDictUtils.STATIC_OPTIONS); for (final String result : results) { Log.d(TAG, result); } } public void testReadUnigramsAndBigramsBinaryWithByteArray() { final ArrayList results = new ArrayList<>(); runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY, BinaryDictUtils.STATIC_OPTIONS); for (final String result : results) { Log.d(TAG, result); } } // Tests for getTerminalPosition private static String getWordFromBinary(final DictDecoder dictDecoder, final int address) { if (dictDecoder.getPosition() != 0) dictDecoder.setPosition(0); DictionaryHeader fileHeader = null; try { fileHeader = dictDecoder.readHeader(); } catch (IOException e) { return null; } catch (UnsupportedFormatException e) { return null; } if (fileHeader == null) return null; return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset, address).mWord; } private static long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word, final boolean contained) { long diff = -1; int position = -1; try { final long now = System.nanoTime(); position = dictDecoder.getTerminalPosition(word); diff = System.nanoTime() - now; } catch (IOException e) { Log.e(TAG, "IOException while getTerminalPosition", e); } catch (UnsupportedFormatException e) { Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e); } assertEquals(FormatSpec.NOT_VALID_WORD != position, contained); if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word); return diff; } private void runGetTerminalPosition(final ArrayList words, final SparseArray> bigrams, final int bufferType, final FormatOptions formatOptions, final String message) { final String dictName = "testGetTerminalPosition"; final String dictVersion = Long.toString(System.currentTimeMillis()); final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, getContext().getCacheDir()); final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); addUnigrams(sWords.size(), dict, sWords); addBigrams(dict, words, bigrams); timeWritingDictToFile(file, dict, formatOptions); final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), DictDecoder.USE_BYTEARRAY); try { dictDecoder.openDictBuffer(); } catch (IOException e) { Log.e(TAG, "IOException while opening the buffer", e); } catch (UnsupportedFormatException e) { Log.e(TAG, "IOException while opening the buffer", e); } assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen()); try { // too long word final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(longWord)); // null assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(null)); // empty string assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition("")); } catch (IOException e) { } catch (UnsupportedFormatException e) { } // Test a word that is contained within the dictionary. long sum = 0; for (int i = 0; i < sWords.size(); ++i) { final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), true); sum += time == -1 ? 0 : time; } Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message + " : " + outputOptions(bufferType, formatOptions)); // Test a word that isn't contained within the dictionary. final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, mRandom); for (int i = 0; i < 1000; ++i) { final String word = CodePointUtils.generateWord(mRandom, codePointSet); if (sWords.indexOf(word) != -1) continue; checkGetTerminalPosition(dictDecoder, word, false); } } private void runGetTerminalPositionTests(final int bufferType, final FormatOptions formatOptions) { runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram"); } public void testGetTerminalPosition() { final ArrayList results = new ArrayList<>(); runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY, BinaryDictUtils.STATIC_OPTIONS); runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER, BinaryDictUtils.STATIC_OPTIONS); for (final String result : results) { Log.d(TAG, result); } } public void testVer2DictGetWordProperty() { final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS; final ArrayList words = sWords; final String dictName = "testGetWordProperty"; final String dictVersion = Long.toString(System.currentTimeMillis()); final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); addUnigrams(words.size(), dict, words); addBigrams(dict, words, sEmptyBigrams); final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, getContext().getCacheDir()); file.delete(); timeWritingDictToFile(file, dict, formatOptions); final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(), 0 /* offset */, file.length(), true /* useFullEditDistance */, Locale.ENGLISH, dictName, false /* isUpdatable */); for (final String word : words) { final WordProperty wordProperty = binaryDictionary.getWordProperty(word, false /* isBeginningOfSentence */); assertEquals(word, wordProperty.mWord); assertEquals(UNIGRAM_FREQ, wordProperty.getProbability()); } } public void testVer2DictIteration() { final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS; final ArrayList words = sWords; final SparseArray> bigrams = sEmptyBigrams; final String dictName = "testGetWordProperty"; final String dictVersion = Long.toString(System.currentTimeMillis()); final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); addUnigrams(words.size(), dict, words); addBigrams(dict, words, bigrams); final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, getContext().getCacheDir()); timeWritingDictToFile(file, dict, formatOptions); Log.d(TAG, file.getAbsolutePath()); final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(), 0 /* offset */, file.length(), true /* useFullEditDistance */, Locale.ENGLISH, dictName, false /* isUpdatable */); final HashSet wordSet = new HashSet<>(words); final HashSet> bigramSet = new HashSet<>(); for (int i = 0; i < words.size(); i++) { final List bigramList = bigrams.get(i); if (bigramList != null) { for (final Integer word1Index : bigramList) { final String word1 = words.get(word1Index); bigramSet.add(new Pair<>(words.get(i), word1)); } } } int token = 0; do { final BinaryDictionary.GetNextWordPropertyResult result = binaryDictionary.getNextWordProperty(token); final WordProperty wordProperty = result.mWordProperty; final String word0 = wordProperty.mWord; assertEquals(UNIGRAM_FREQ, wordProperty.mProbabilityInfo.mProbability); wordSet.remove(word0); if (wordProperty.mHasNgrams) { for (final WeightedString bigramTarget : wordProperty.getBigrams()) { final String word1 = bigramTarget.mWord; final Pair bigram = new Pair<>(word0, word1); assertTrue(bigramSet.contains(bigram)); bigramSet.remove(bigram); } } token = result.mNextToken; } while (token != 0); assertTrue(wordSet.isEmpty()); assertTrue(bigramSet.isEmpty()); } }