diff --git a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java index 9f7f502ea..fda97dafc 100644 --- a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java @@ -60,7 +60,8 @@ public abstract class AbstractDictDecoder implements DictDecoder { 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), new FormatOptions(version, - 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE))); + 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE), + 0 != (optionsFlags & FormatSpec.CONTAINS_TIMESTAMP_FLAG))); return header; } diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index 5a5d7af6b..605930ab4 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -37,13 +37,15 @@ public final class FormatSpec { * sion * * o | - * p | not used 4 bits - * t | has bigrams ? 1 bit, 1 = yes, 0 = no : CONTAINS_BIGRAMS_FLAG - * i | FRENCH_LIGATURE_PROCESSING_FLAG - * o | supports dynamic updates ? 1 bit, 1 = yes, 0 = no : SUPPORTS_DYNAMIC_UPDATE - * n | GERMAN_UMLAUT_PROCESSING_FLAG - * f | - * lags + * p | not used 3 bits + * t | each unigram and bigram entry has a time stamp? + * i | 1 bit, 1 = yes, 0 = no : CONTAINS_TIMESTAMP_FLAG + * o | has bigrams ? 1 bit, 1 = yes, 0 = no : CONTAINS_BIGRAMS_FLAG + * n | FRENCH_LIGATURE_PROCESSING_FLAG + * f | supports dynamic updates ? 1 bit, 1 = yes, 0 = no : SUPPORTS_DYNAMIC_UPDATE + * l | GERMAN_UMLAUT_PROCESSING_FLAG + * a | + * gs * * h | * e | size of the file header, 4bytes @@ -211,6 +213,8 @@ public final class FormatSpec { static final int SUPPORTS_DYNAMIC_UPDATE = 0x2; static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; static final int CONTAINS_BIGRAMS_FLAG = 0x8; + // TODO: Implement timestamps for unigram. + static final int CONTAINS_TIMESTAMP_FLAG = 0x10; // TODO: Make this value adaptative to content data, store it in the header, and // use it in the reading code. @@ -276,9 +280,14 @@ public final class FormatSpec { // is 584KB with the block size being 4. // This is 91% of that of full address table. static final int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4; - static final int BIGRAM_CONTENT_COUNT = 1; + static final int BIGRAM_CONTENT_COUNT = 2; static final int BIGRAM_FREQ_CONTENT_INDEX = 0; + static final int BIGRAM_TIMESTAMP_CONTENT_INDEX = 1; static final String BIGRAM_FREQ_CONTENT_ID = "_freq"; + static final String BIGRAM_TIMESTAMP_CONTENT_ID = "_timestamp"; + static final int BIGRAM_TIMESTAMP_SIZE = 4; + static final int BIGRAM_COUNTER_SIZE = 1; + static final int BIGRAM_LEVEL_SIZE = 1; static final int SHORTCUT_CONTENT_COUNT = 1; static final int SHORTCUT_CONTENT_INDEX = 0; @@ -321,6 +330,7 @@ public final class FormatSpec { public final int mVersion; public final boolean mSupportsDynamicUpdate; public final boolean mHasTerminalId; + public final boolean mHasTimestamp; @UsedForTesting public FormatOptions(final int version) { this(version, false); @@ -328,6 +338,11 @@ public final class FormatSpec { @UsedForTesting public FormatOptions(final int version, final boolean supportsDynamicUpdate) { + this(version, supportsDynamicUpdate, false /* hasTimestamp */); + } + + public FormatOptions(final int version, final boolean supportsDynamicUpdate, + final boolean hasTimestamp) { mVersion = version; if (version < FIRST_VERSION_WITH_DYNAMIC_UPDATE && supportsDynamicUpdate) { throw new RuntimeException("Dynamic updates are only supported with versions " @@ -335,6 +350,7 @@ public final class FormatSpec { } mSupportsDynamicUpdate = supportsDynamicUpdate; mHasTerminalId = (version >= FIRST_VERSION_WITH_TERMINAL_ID); + mHasTimestamp = hasTimestamp; } } diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index 53729075f..734223ec2 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -153,8 +153,12 @@ public class Ver4DictDecoder extends AbstractDictDecoder { final File contentFile = new File(mDictDirectory, mDictDirectory.getName() + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + FormatSpec.SHORTCUT_CONTENT_ID); + final File timestampsFile = new File(mDictDirectory, mDictDirectory.getName() + + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + + FormatSpec.SHORTCUT_CONTENT_ID); mShortcutAddressTable = SparseTable.readFromFiles(lookupIndexFile, - new File[] { contentFile }, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE); + new File[] { contentFile, timestampsFile }, + FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE); } protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader { diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java index f9dcacf77..fe9894246 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java @@ -69,16 +69,16 @@ public class Ver4DictEncoder implements DictEncoder { private final File[] mContentFiles; protected final OutputStream[] mContentOutStreams; - public SparseTableContentWriter(final String name, final int contentCount, - final int initialCapacity, final int blockSize, final File baseDir, - final String[] contentFilenames, final String[] contentIds) { + public SparseTableContentWriter(final String name, final int initialCapacity, + final int blockSize, final File baseDir, final String[] contentFilenames, + final String[] contentIds) { if (contentFilenames.length != contentIds.length) { throw new RuntimeException("The length of contentFilenames and the length of" + " contentIds are different " + contentFilenames.length + ", " + contentIds.length); } - mContentCount = contentCount; - mSparseTable = new SparseTable(initialCapacity, blockSize, contentCount); + mContentCount = contentFilenames.length; + mSparseTable = new SparseTable(initialCapacity, blockSize, mContentCount); mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX); mAddressTableFiles = new File[mContentCount]; mContentFiles = new File[mContentCount]; @@ -113,16 +113,40 @@ public class Ver4DictEncoder implements DictEncoder { } private static class BigramContentWriter extends SparseTableContentWriter { + private final boolean mWriteTimestamp; public BigramContentWriter(final String name, final int initialCapacity, - final File baseDir) { - super(name + FormatSpec.BIGRAM_FILE_EXTENSION, FormatSpec.BIGRAM_CONTENT_COUNT, - initialCapacity, FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir, - new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION }, - new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID }); + final File baseDir, final boolean writeTimestamp) { + super(name + FormatSpec.BIGRAM_FILE_EXTENSION, initialCapacity, + FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir, + getContentFilenames(name, writeTimestamp), getContentIds(writeTimestamp)); + mWriteTimestamp = writeTimestamp; } - public void writeBigramsForOneWord(final int terminalId, + private static String[] getContentFilenames(final String name, + final boolean writeTimestamp) { + final String[] contentFilenames; + if (writeTimestamp) { + contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION, + name + FormatSpec.BIGRAM_FILE_EXTENSION }; + } else { + contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION }; + } + return contentFilenames; + } + + private static String[] getContentIds(final boolean writeTimestamp) { + final String[] contentIds; + if (writeTimestamp) { + contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID, + FormatSpec.BIGRAM_TIMESTAMP_CONTENT_ID }; + } else { + contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID }; + } + return contentIds; + } + + public void writeBigramsForOneWord(final int terminalId, final int bigramCount, final Iterator bigramIterator, final FusionDictionary dict) throws IOException { write(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, @@ -130,8 +154,16 @@ public class Ver4DictEncoder implements DictEncoder { @Override public void write(final OutputStream outStream) throws IOException { writeBigramsForOneWordInternal(outStream, bigramIterator, dict); - } - }); + }}); + if (mWriteTimestamp) { + write(FormatSpec.BIGRAM_TIMESTAMP_CONTENT_INDEX, terminalId, + new SparseTableContentWriterInterface() { + @Override + public void write(final OutputStream outStream) throws IOException { + initBigramTimestampsCountersAndLevelsForOneWordInternal(outStream, + bigramCount); + }}); + } } private void writeBigramsForOneWordInternal(final OutputStream outStream, @@ -151,13 +183,26 @@ public class Ver4DictEncoder implements DictEncoder { FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE); } } + + private void initBigramTimestampsCountersAndLevelsForOneWordInternal( + final OutputStream outStream, final int bigramCount) throws IOException { + for (int i = 0; i < bigramCount; ++i) { + // TODO: Figure out what initial values should be. + BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */, + FormatSpec.BIGRAM_TIMESTAMP_SIZE); + BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */, + FormatSpec.BIGRAM_COUNTER_SIZE); + BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */, + FormatSpec.BIGRAM_LEVEL_SIZE); + } + } } private static class ShortcutContentWriter extends SparseTableContentWriter { public ShortcutContentWriter(final String name, final int initialCapacity, final File baseDir) { - super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, FormatSpec.SHORTCUT_CONTENT_COUNT, - initialCapacity, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir, + super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, initialCapacity, + FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir, new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION }, new String[] { FormatSpec.SHORTCUT_CONTENT_ID }); } @@ -257,7 +302,8 @@ public class Ver4DictEncoder implements DictEncoder { if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); writeTerminalData(flatNodes, terminalCount); - mBigramWriter = new BigramContentWriter(mBaseFilename, terminalCount, mDictDir); + mBigramWriter = new BigramContentWriter(mBaseFilename, terminalCount, mDictDir, + formatOptions.mHasTimestamp); writeBigrams(flatNodes, dict); mShortcutWriter = new ShortcutContentWriter(mBaseFilename, terminalCount, mDictDir); writeShortcuts(flatNodes); @@ -348,7 +394,7 @@ public class Ver4DictEncoder implements DictEncoder { for (final PtNodeArray nodeArray : flatNodes) { for (final PtNode ptNode : nodeArray.mData) { if (ptNode.mBigrams != null) { - mBigramWriter.writeBigramsForOneWord(ptNode.mTerminalId, + mBigramWriter.writeBigramsForOneWord(ptNode.mTerminalId, ptNode.mBigrams.size(), ptNode.mBigrams.iterator(), dict); } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index 0189b3334..32c07e106 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -80,6 +80,9 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { new FormatSpec.FormatOptions(4, false /* supportsDynamicUpdate */); private static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE = new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */); + private static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP = + new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */, + true /* hasTimestamp */); private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; @@ -363,6 +366,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); + runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); for (final String result : results) { Log.d(TAG, result); @@ -377,6 +381,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); + runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); for (final String result : results) { Log.d(TAG, result); @@ -508,6 +513,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, + VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); for (final String result : results) { Log.d(TAG, result); @@ -522,6 +529,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, + VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); for (final String result : results) { Log.d(TAG, result); @@ -634,12 +643,14 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); + runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION2); runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); + runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); for (final String result : results) { Log.d(TAG, result);