From c32962b8f1f9b7255fef84486b53cfc874835bbd Mon Sep 17 00:00:00 2001 From: Yuichiro Hanada Date: Fri, 4 Oct 2013 17:38:02 +0900 Subject: [PATCH] Add a time stamp for unigrams. Bug: 10920255 Change-Id: I26d2cce3c322a4ff39a614f8615f43fb7bd3baed --- .../latin/makedict/FormatSpec.java | 3 ++- .../latin/makedict/Ver4DictEncoder.java | 22 ++++++++++++++++--- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index 605930ab4..6d5827023 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -213,7 +213,6 @@ public final class FormatSpec { static final int SUPPORTS_DYNAMIC_UPDATE = 0x2; static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; static final int CONTAINS_BIGRAMS_FLAG = 0x8; - // TODO: Implement timestamps for unigram. static final int CONTAINS_TIMESTAMP_FLAG = 0x10; // TODO: Make this value adaptative to content data, store it in the header, and @@ -267,6 +266,7 @@ public final class FormatSpec { // These values are used only by version 4 or later. static final String TRIE_FILE_EXTENSION = ".trie"; static final String FREQ_FILE_EXTENSION = ".freq"; + static final String UNIGRAM_TIMESTAMP_FILE_EXTENSION = ".timestamp"; // tat = Terminal Address Table static final String TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat"; static final String BIGRAM_FILE_EXTENSION = ".bigram"; @@ -275,6 +275,7 @@ public final class FormatSpec { static final String CONTENT_TABLE_FILE_SUFFIX = "_index"; static final int FREQUENCY_AND_FLAGS_SIZE = 2; static final int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3; + static final int UNIGRAM_TIMESTAMP_SIZE = 4; // With the English main dictionary as of October 2013, the size of bigram address table is // is 584KB with the block size being 4. diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java index fe9894246..5d5ab0462 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java @@ -45,6 +45,7 @@ public class Ver4DictEncoder implements DictEncoder { private int mHeaderSize; private OutputStream mTrieOutStream; private OutputStream mFreqOutStream; + private OutputStream mUnigramTimestampOutStream; private OutputStream mTerminalAddressTableOutStream; private File mDictDir; private String mBaseFilename; @@ -238,18 +239,20 @@ public class Ver4DictEncoder implements DictEncoder { mDictDir = new File(mDictPlacedDir, mBaseFilename); final File trieFile = new File(mDictDir, mBaseFilename + FormatSpec.TRIE_FILE_EXTENSION); final File freqFile = new File(mDictDir, mBaseFilename + FormatSpec.FREQ_FILE_EXTENSION); + final File timestampFile = new File(mDictDir, + mBaseFilename + FormatSpec.UNIGRAM_TIMESTAMP_FILE_EXTENSION); final File terminalAddressTableFile = new File(mDictDir, mBaseFilename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); if (!mDictDir.isDirectory()) { if (mDictDir.exists()) mDictDir.delete(); mDictDir.mkdirs(); } - if (!trieFile.exists()) trieFile.createNewFile(); - if (!freqFile.exists()) freqFile.createNewFile(); - if (!terminalAddressTableFile.exists()) terminalAddressTableFile.createNewFile(); mTrieOutStream = new FileOutputStream(trieFile); mFreqOutStream = new FileOutputStream(freqFile); mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile); + if (formatOptions.mHasTimestamp) { + mUnigramTimestampOutStream = new FileOutputStream(timestampFile); + } } private void close() throws IOException { @@ -263,6 +266,9 @@ public class Ver4DictEncoder implements DictEncoder { if (mTerminalAddressTableOutStream != null) { mTerminalAddressTableOutStream.close(); } + if (mUnigramTimestampOutStream != null) { + mUnigramTimestampOutStream.close(); + } } finally { mTrieOutStream = null; mFreqOutStream = null; @@ -302,6 +308,9 @@ public class Ver4DictEncoder implements DictEncoder { if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); writeTerminalData(flatNodes, terminalCount); + if (formatOptions.mHasTimestamp) { + initUnigramTimestamps(terminalCount); + } mBigramWriter = new BigramContentWriter(mBaseFilename, terminalCount, mDictDir, formatOptions.mHasTimestamp); writeBigrams(flatNodes, dict); @@ -454,4 +463,11 @@ public class Ver4DictEncoder implements DictEncoder { mFreqOutStream.write(freqBuf); mTerminalAddressTableOutStream.write(terminalAddressTableBuf); } + + private void initUnigramTimestamps(final int terminalCount) throws IOException { + // Initial value of time stamps for each word is 0. + final byte[] unigramTimestampBuf = + new byte[terminalCount * FormatSpec.UNIGRAM_TIMESTAMP_SIZE]; + mUnigramTimestampOutStream.write(unigramTimestampBuf); + } }