am 7abdab1c: Merge "Add a time stamp for unigrams."
* commit '7abdab1c6fe693f11cc5ee385b2ff4d5a84cfaf4': Add a time stamp for unigrams.main
commit
075d8fa65a
|
@ -213,7 +213,6 @@ public final class FormatSpec {
|
|||
static final int SUPPORTS_DYNAMIC_UPDATE = 0x2;
|
||||
static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
|
||||
static final int CONTAINS_BIGRAMS_FLAG = 0x8;
|
||||
// TODO: Implement timestamps for unigram.
|
||||
static final int CONTAINS_TIMESTAMP_FLAG = 0x10;
|
||||
|
||||
// TODO: Make this value adaptative to content data, store it in the header, and
|
||||
|
@ -267,6 +266,7 @@ public final class FormatSpec {
|
|||
// These values are used only by version 4 or later.
|
||||
static final String TRIE_FILE_EXTENSION = ".trie";
|
||||
static final String FREQ_FILE_EXTENSION = ".freq";
|
||||
static final String UNIGRAM_TIMESTAMP_FILE_EXTENSION = ".timestamp";
|
||||
// tat = Terminal Address Table
|
||||
static final String TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
|
||||
static final String BIGRAM_FILE_EXTENSION = ".bigram";
|
||||
|
@ -275,6 +275,7 @@ public final class FormatSpec {
|
|||
static final String CONTENT_TABLE_FILE_SUFFIX = "_index";
|
||||
static final int FREQUENCY_AND_FLAGS_SIZE = 2;
|
||||
static final int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
|
||||
static final int UNIGRAM_TIMESTAMP_SIZE = 4;
|
||||
|
||||
// With the English main dictionary as of October 2013, the size of bigram address table is
|
||||
// is 584KB with the block size being 4.
|
||||
|
|
|
@ -45,6 +45,7 @@ public class Ver4DictEncoder implements DictEncoder {
|
|||
private int mHeaderSize;
|
||||
private OutputStream mTrieOutStream;
|
||||
private OutputStream mFreqOutStream;
|
||||
private OutputStream mUnigramTimestampOutStream;
|
||||
private OutputStream mTerminalAddressTableOutStream;
|
||||
private File mDictDir;
|
||||
private String mBaseFilename;
|
||||
|
@ -238,18 +239,20 @@ public class Ver4DictEncoder implements DictEncoder {
|
|||
mDictDir = new File(mDictPlacedDir, mBaseFilename);
|
||||
final File trieFile = new File(mDictDir, mBaseFilename + FormatSpec.TRIE_FILE_EXTENSION);
|
||||
final File freqFile = new File(mDictDir, mBaseFilename + FormatSpec.FREQ_FILE_EXTENSION);
|
||||
final File timestampFile = new File(mDictDir,
|
||||
mBaseFilename + FormatSpec.UNIGRAM_TIMESTAMP_FILE_EXTENSION);
|
||||
final File terminalAddressTableFile = new File(mDictDir,
|
||||
mBaseFilename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
|
||||
if (!mDictDir.isDirectory()) {
|
||||
if (mDictDir.exists()) mDictDir.delete();
|
||||
mDictDir.mkdirs();
|
||||
}
|
||||
if (!trieFile.exists()) trieFile.createNewFile();
|
||||
if (!freqFile.exists()) freqFile.createNewFile();
|
||||
if (!terminalAddressTableFile.exists()) terminalAddressTableFile.createNewFile();
|
||||
mTrieOutStream = new FileOutputStream(trieFile);
|
||||
mFreqOutStream = new FileOutputStream(freqFile);
|
||||
mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile);
|
||||
if (formatOptions.mHasTimestamp) {
|
||||
mUnigramTimestampOutStream = new FileOutputStream(timestampFile);
|
||||
}
|
||||
}
|
||||
|
||||
private void close() throws IOException {
|
||||
|
@ -263,6 +266,9 @@ public class Ver4DictEncoder implements DictEncoder {
|
|||
if (mTerminalAddressTableOutStream != null) {
|
||||
mTerminalAddressTableOutStream.close();
|
||||
}
|
||||
if (mUnigramTimestampOutStream != null) {
|
||||
mUnigramTimestampOutStream.close();
|
||||
}
|
||||
} finally {
|
||||
mTrieOutStream = null;
|
||||
mFreqOutStream = null;
|
||||
|
@ -302,6 +308,9 @@ public class Ver4DictEncoder implements DictEncoder {
|
|||
if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes);
|
||||
|
||||
writeTerminalData(flatNodes, terminalCount);
|
||||
if (formatOptions.mHasTimestamp) {
|
||||
initUnigramTimestamps(terminalCount);
|
||||
}
|
||||
mBigramWriter = new BigramContentWriter(mBaseFilename, terminalCount, mDictDir,
|
||||
formatOptions.mHasTimestamp);
|
||||
writeBigrams(flatNodes, dict);
|
||||
|
@ -454,4 +463,11 @@ public class Ver4DictEncoder implements DictEncoder {
|
|||
mFreqOutStream.write(freqBuf);
|
||||
mTerminalAddressTableOutStream.write(terminalAddressTableBuf);
|
||||
}
|
||||
|
||||
private void initUnigramTimestamps(final int terminalCount) throws IOException {
|
||||
// Initial value of time stamps for each word is 0.
|
||||
final byte[] unigramTimestampBuf =
|
||||
new byte[terminalCount * FormatSpec.UNIGRAM_TIMESTAMP_SIZE];
|
||||
mUnigramTimestampOutStream.write(unigramTimestampBuf);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue