From 26bd46095a05843e7574dfcf7db53406f215525d Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Wed, 29 Jan 2014 20:19:24 +0900 Subject: [PATCH] Reading dictionary containing timestamps in Java Side. Just skipping historical information fields. Bug: 11281877 Change-Id: I43d2adaa576b7da11ed3ca54990265dbb6f53b08 --- .../latin/makedict/AbstractDictDecoder.java | 6 +- .../makedict/BinaryDictDecoderUtils.java | 1 - .../makedict/BinaryDictEncoderUtils.java | 11 +-- .../latin/makedict/FormatSpec.java | 12 ++-- .../latin/makedict/FusionDictionary.java | 1 + .../latin/makedict/Ver4DictDecoder.java | 69 ++++++++++--------- .../latin/BinaryDictionaryDecayingTests.java | 52 ++++++++++++++ .../BinaryDictDecoderEncoderTests.java | 39 +++++++---- .../latin/makedict/BinaryDictUtils.java | 9 ++- 9 files changed, 132 insertions(+), 68 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java index f8fa68f45..370782b33 100644 --- a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java @@ -48,7 +48,7 @@ public abstract class AbstractDictDecoder implements DictDecoder { throw new UnsupportedFormatException("Unsupported version : " + version); } // TODO: Remove this field. - final int optionsFlags = HeaderReader.readOptionFlags(headerBuffer); + HeaderReader.readOptionFlags(headerBuffer); final int headerSize = HeaderReader.readHeaderSize(headerBuffer); if (headerSize < 0) { throw new UnsupportedFormatException("header size can't be negative."); @@ -59,8 +59,8 @@ public abstract class AbstractDictDecoder implements DictDecoder { final FileHeader header = new FileHeader(headerSize, new FusionDictionary.DictionaryOptions(attributes), - new FormatOptions(version, - 0 != (optionsFlags & FormatSpec.CONTAINS_TIMESTAMP_FLAG))); + new FormatOptions(version, FileHeader.ATTRIBUTE_VALUE_TRUE.equals( + attributes.get(FileHeader.HAS_HISTORICAL_INFO_ATTRIBUTE)))); return header; } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java index 9a24c47af..31747155e 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java @@ -499,7 +499,6 @@ public final class BinaryDictDecoderUtils { final int nodeArrayOriginPos = dictDecoder.getPosition(); do { // Scan the linked-list node. - final int nodeArrayHeadPos = dictDecoder.getPosition(); final int count = dictDecoder.readPtNodeCount(); int groupPos = dictDecoder.getPosition(); for (int i = count; i > 0; --i) { // Scan the array of PtNode. diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java index bb40e0dd5..eff8fc375 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java @@ -755,14 +755,6 @@ public class BinaryDictEncoderUtils { return discretizedFrequency > 0 ? discretizedFrequency : 0; } - /** - * Makes the 2-byte value for options flags. Unused at the moment, and always 0. - */ - private static final int makeOptionsValue(final FormatOptions formatOptions) { - // TODO: why doesn't this handle CONTAINS_TIMESTAMP_FLAG? - return 0; - } - /** * Makes the flag value for a shortcut. * @@ -949,7 +941,8 @@ public class BinaryDictEncoderUtils { headerBuffer.write((byte) (0xFF & version)); // Options flags - final int options = makeOptionsValue(formatOptions); + // TODO: Remove this field. + final int options = 0; headerBuffer.write((byte) (0xFF & (options >> 8))); headerBuffer.write((byte) (0xFF & options)); final int headerSizeOffset = headerBuffer.size(); diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index 437fa942b..61c17fc46 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -192,10 +192,6 @@ public final class FormatSpec { static final int MINIMUM_SUPPORTED_VERSION = VERSION2; static final int MAXIMUM_SUPPORTED_VERSION = VERSION4; - // These options need to be the same numeric values as the one in the native reading code. - // TODO: Make the native reading code read this variable. - static final int CONTAINS_TIMESTAMP_FLAG = 0x10; - // TODO: Make this value adaptative to content data, store it in the header, and // use it in the reading code. static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH; @@ -249,26 +245,26 @@ public final class FormatSpec { static final String TRIE_FILE_EXTENSION = ".trie"; public static final String HEADER_FILE_EXTENSION = ".header"; static final String FREQ_FILE_EXTENSION = ".freq"; - static final String UNIGRAM_TIMESTAMP_FILE_EXTENSION = ".timestamp"; // tat = Terminal Address Table static final String TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat"; static final String BIGRAM_FILE_EXTENSION = ".bigram"; static final String SHORTCUT_FILE_EXTENSION = ".shortcut"; static final String LOOKUP_TABLE_FILE_SUFFIX = "_lookup"; static final String CONTENT_TABLE_FILE_SUFFIX = "_index"; + static final int FLAGS_IN_FREQ_FILE_SIZE = 1; static final int FREQUENCY_AND_FLAGS_SIZE = 2; static final int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3; static final int UNIGRAM_TIMESTAMP_SIZE = 4; + static final int UNIGRAM_COUNTER_SIZE = 1; + static final int UNIGRAM_LEVEL_SIZE = 1; // With the English main dictionary as of October 2013, the size of bigram address table is // is 345KB with the block size being 16. // This is 54% of that of full address table. static final int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 16; - static final int BIGRAM_CONTENT_COUNT = 2; + static final int BIGRAM_CONTENT_COUNT = 1; static final int BIGRAM_FREQ_CONTENT_INDEX = 0; - static final int BIGRAM_TIMESTAMP_CONTENT_INDEX = 1; static final String BIGRAM_FREQ_CONTENT_ID = "_freq"; - static final String BIGRAM_TIMESTAMP_CONTENT_ID = "_timestamp"; static final int BIGRAM_TIMESTAMP_SIZE = 4; static final int BIGRAM_COUNTER_SIZE = 1; static final int BIGRAM_LEVEL_SIZE = 1; diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java index fdf2ae7b5..5b0e8399a 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java +++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java @@ -61,6 +61,7 @@ public final class FusionDictionary implements Iterable { mData = new ArrayList(); } public PtNodeArray(ArrayList data) { + Collections.sort(data, PTNODE_COMPARATOR); mData = data; } } diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index 7071893d2..f23022992 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -143,7 +143,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder { mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer( getFile(FILETYPE_TERMINAL_ADDRESS_TABLE)); mBigramReader = new BigramContentReader(mDictDirectory.getName(), - mDictDirectory, mBufferFactory, false); + mDictDirectory, mBufferFactory); mBigramReader.openBuffers(); mShortcutReader = new ShortcutContentReader(mDictDirectory.getName(), mDictDirectory, mBufferFactory); @@ -184,39 +184,24 @@ public class Ver4DictDecoder extends AbstractDictDecoder { */ protected static class BigramContentReader extends SparseTableContentReader { public BigramContentReader(final String name, final File baseDir, - final DictionaryBufferFactory factory, final boolean hasTimestamp) { + final DictionaryBufferFactory factory) { super(name + FormatSpec.BIGRAM_FILE_EXTENSION, FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir, - getContentFilenames(name, hasTimestamp), getContentIds(hasTimestamp), factory); + getContentFilenames(name), getContentIds(), factory); } // TODO: Consolidate this method and BigramContentWriter.getContentFilenames. - protected static String[] getContentFilenames(final String name, - final boolean hasTimestamp) { - final String[] contentFilenames; - if (hasTimestamp) { - contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION, - name + FormatSpec.BIGRAM_FILE_EXTENSION }; - } else { - contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION }; - } - return contentFilenames; + protected static String[] getContentFilenames(final String name) { + return new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION }; } // TODO: Consolidate this method and BigramContentWriter.getContentIds. - protected static String[] getContentIds(final boolean hasTimestamp) { - final String[] contentIds; - if (hasTimestamp) { - contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID, - FormatSpec.BIGRAM_TIMESTAMP_CONTENT_ID }; - } else { - contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID }; - } - return contentIds; + protected static String[] getContentIds() { + return new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID }; } public ArrayList readTargetsAndFrequencies(final int terminalId, - final DictBuffer terminalAddressTableBuffer) { + final DictBuffer terminalAddressTableBuffer, final FormatOptions options) { final ArrayList bigrams = CollectionUtils.newArrayList(); read(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, new SparseTableContentReaderInterface() { @@ -226,14 +211,25 @@ public class Ver4DictDecoder extends AbstractDictDecoder { // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE, // remaining bigram entries are ignored. final int bigramFlags = buffer.readUnsignedByte(); + final int probability; + + if (options.mHasTimestamp) { + probability = buffer.readUnsignedByte(); + final int pos = buffer.position(); + // Skip historical info. + buffer.position(pos + FormatSpec.BIGRAM_TIMESTAMP_SIZE + + FormatSpec.BIGRAM_LEVEL_SIZE + + FormatSpec.BIGRAM_COUNTER_SIZE); + } else { + probability = bigramFlags + & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY; + } final int targetTerminalId = buffer.readUnsignedInt24(); terminalAddressTableBuffer.position(targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); final int targetAddress = terminalAddressTableBuffer.readUnsignedInt24(); - bigrams.add(new PendingAttribute(bigramFlags - & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, - targetAddress)); + bigrams.add(new PendingAttribute(probability, targetAddress)); if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) { break; @@ -286,8 +282,19 @@ public class Ver4DictDecoder extends AbstractDictDecoder { } protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader { - protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) { - frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1); + protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId, + final FormatOptions formatOptions) { + final int readingPos; + if (formatOptions.mHasTimestamp) { + final int entrySize = FormatSpec.FREQUENCY_AND_FLAGS_SIZE + + FormatSpec.UNIGRAM_TIMESTAMP_SIZE + FormatSpec.UNIGRAM_LEVEL_SIZE + + FormatSpec.UNIGRAM_COUNTER_SIZE; + readingPos = terminalId * entrySize + FormatSpec.FLAGS_IN_FREQ_FILE_SIZE; + } else { + readingPos = terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + + FormatSpec.FLAGS_IN_FREQ_FILE_SIZE; + } + frequencyBuffer.position(readingPos); return frequencyBuffer.readUnsignedByte(); } @@ -354,12 +361,12 @@ public class Ver4DictDecoder extends AbstractDictDecoder { } @Override - public PtNodeInfo readPtNode(int ptNodePos, FormatOptions options) { + public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions options) { final Ver4PtNodeInfo nodeInfo = readVer4PtNodeInfo(ptNodePos, options); final int frequency; if (0 != (FormatSpec.FLAG_IS_TERMINAL & nodeInfo.mFlags)) { - frequency = PtNodeReader.readFrequency(mFrequencyBuffer, nodeInfo.mTerminalId); + frequency = PtNodeReader.readFrequency(mFrequencyBuffer, nodeInfo.mTerminalId, options); } else { frequency = PtNode.NOT_A_TERMINAL; } @@ -367,7 +374,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder { final ArrayList shortcutTargets = mShortcutReader.readShortcuts( nodeInfo.mTerminalId); final ArrayList bigrams = mBigramReader.readTargetsAndFrequencies( - nodeInfo.mTerminalId, mTerminalAddressTableBuffer); + nodeInfo.mTerminalId, mTerminalAddressTableBuffer, options); return new PtNodeInfo(ptNodePos, ptNodePos + nodeInfo.mNodeSize, nodeInfo.mFlags, nodeInfo.mCharacters, frequency, nodeInfo.mParentPos, nodeInfo.mChildrenPos, diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java index 1dc1f5a1c..c42765633 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java @@ -21,7 +21,11 @@ import android.test.suitebuilder.annotation.LargeTest; import android.util.Pair; import com.android.inputmethod.latin.makedict.CodePointUtils; +import com.android.inputmethod.latin.makedict.DictDecoder; import com.android.inputmethod.latin.makedict.FormatSpec; +import com.android.inputmethod.latin.makedict.FusionDictionary; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; +import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import com.android.inputmethod.latin.utils.FileUtils; import java.io.File; @@ -98,6 +102,10 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { getContext().getCacheDir()); FileUtils.deleteRecursively(file); Map attributeMap = new HashMap(); + attributeMap.put(FormatSpec.FileHeader.DICTIONARY_ID_ATTRIBUTE, dictId); + attributeMap.put(FormatSpec.FileHeader.DICTIONARY_LOCALE_ATTRIBUTE, dictId); + attributeMap.put(FormatSpec.FileHeader.DICTIONARY_VERSION_ATTRIBUTE, + String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()))); attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE, FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); attributeMap.put(FormatSpec.FileHeader.HAS_HISTORICAL_INFO_ATTRIBUTE, @@ -119,6 +127,50 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { return BinaryDictionary.setCurrentTimeForTest(-1); } + public void testReadDictInJavaSide() { + testReadDictInJavaSide(FormatSpec.VERSION4); + } + + private void testReadDictInJavaSide(final int formatVersion) { + setCurrentTimeForTestMode(mCurrentTime); + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY); + addUnigramWord(binaryDictionary, "ab", DUMMY_PROBABILITY); + addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY); + addBigramWords(binaryDictionary, "a", "aaa", DUMMY_PROBABILITY); + binaryDictionary.flushWithGC(); + binaryDictionary.close(); + + final DictDecoder dictDecoder = FormatSpec.getDictDecoder(dictFile); + try { + final FusionDictionary dict = dictDecoder.readDictionaryBinary(null, + false /* deleteDictIfBroken */); + PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, "a"); + assertNotNull(ptNode); + assertTrue(ptNode.isTerminal()); + assertNotNull(ptNode.getBigram("aaa")); + ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, "ab"); + assertNotNull(ptNode); + assertTrue(ptNode.isTerminal()); + ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa"); + assertNotNull(ptNode); + assertTrue(ptNode.isTerminal()); + } catch (IOException e) { + fail("IOException while reading dictionary: " + e); + } catch (UnsupportedFormatException e) { + fail("Unsupported format: " + e); + } + dictFile.delete(); + } + public void testControlCurrentTime() { testControlCurrentTime(FormatSpec.VERSION4); } diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index b5a71f0bf..8a1ac5233 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -17,11 +17,11 @@ package com.android.inputmethod.latin.makedict; import android.test.AndroidTestCase; -import android.test.MoreAsserts; import android.test.suitebuilder.annotation.LargeTest; import android.util.Log; import android.util.SparseArray; +import com.android.inputmethod.latin.BinaryDictionary; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; @@ -76,6 +76,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) { super(); + BinaryDictionary.setCurrentTimeForTest(0); Log.e(TAG, "Testing dictionary: seed is " + seed); final Random random = new Random(seed); sWords.clear(); @@ -262,7 +263,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { getContext().getCacheDir()); final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion)); + BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); addUnigrams(words.size(), dict, words, shortcuts); addBigrams(dict, words, bigrams); checkDictionary(dict, words, bigrams, shortcuts); @@ -317,7 +318,6 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP); runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP); - for (final String result : results) { Log.d(TAG, result); } @@ -344,14 +344,16 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { final SparseArray> expectedBigrams, final TreeMap resultWords, final TreeMap resultFrequencies, - final TreeMap> resultBigrams) { + final TreeMap> resultBigrams, + final boolean checkProbability) { // check unigrams final Set actualWordsSet = new HashSet(resultWords.values()); final Set expectedWordsSet = new HashSet(expectedWords); assertEquals(actualWordsSet, expectedWordsSet); - - for (int freq : resultFrequencies.values()) { - assertEquals(freq, UNIGRAM_FREQ); + if (checkProbability) { + for (int freq : resultFrequencies.values()) { + assertEquals(freq, UNIGRAM_FREQ); + } } // check bigrams @@ -377,16 +379,19 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } actBigrams.get(word1).add(word2); - final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency( - unigramFreq, attr.mFrequency); - assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ); + if (checkProbability) { + final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency( + unigramFreq, attr.mFrequency); + assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ); + } } } assertEquals(actBigrams, expBigrams); } private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List words, - final SparseArray> bigrams, final int bufferType) { + final SparseArray> bigrams, final int bufferType, + final boolean checkProbability) { final TreeMap resultWords = CollectionUtils.newTreeMap(); final TreeMap> resultBigrams = CollectionUtils.newTreeMap(); @@ -404,7 +409,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { Log.e(TAG, "UnsupportedFormatException", e); } - checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams); + checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams, checkProbability); return diff; } @@ -418,13 +423,17 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { // making the dictionary from lists of words. final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion)); + BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); addUnigrams(words.size(), dict, words, null /* shortcutMap */); addBigrams(dict, words, bigrams); timeWritingDictToFile(file, dict, formatOptions); - long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType); + // Caveat: Currently, the Java code to read a v4 dictionary doesn't calculate the + // probability when there's a timestamp for the entry. + // TODO: Abandon the Java code, and implement the v4 dictionary reading code in native. + long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType, + !formatOptions.mHasTimestamp /* checkProbability */); long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */, bufferType); @@ -517,7 +526,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { getContext().getCacheDir()); final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion)); + BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); addBigrams(dict, words, bigrams); timeWritingDictToFile(file, dict, formatOptions); diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java index f7a808c1e..f17596865 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java @@ -36,11 +36,18 @@ public class BinaryDictUtils { public static final FormatSpec.FormatOptions VERSION4_OPTIONS_WITH_TIMESTAMP = new FormatSpec.FormatOptions(FormatSpec.VERSION4, true /* hasTimestamp */); - public static DictionaryOptions makeDictionaryOptions(final String id, final String version) { + public static DictionaryOptions makeDictionaryOptions(final String id, final String version, + final FormatSpec.FormatOptions formatOptions) { final DictionaryOptions options = new DictionaryOptions(new HashMap()); options.mAttributes.put(FileHeader.DICTIONARY_LOCALE_ATTRIBUTE, "en_US"); options.mAttributes.put(FileHeader.DICTIONARY_ID_ATTRIBUTE, id); options.mAttributes.put(FileHeader.DICTIONARY_VERSION_ATTRIBUTE, version); + if (formatOptions.mHasTimestamp) { + options.mAttributes.put(FileHeader.HAS_HISTORICAL_INFO_ATTRIBUTE, + FileHeader.ATTRIBUTE_VALUE_TRUE); + options.mAttributes.put(FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE, + FileHeader.ATTRIBUTE_VALUE_TRUE); + } return options; }