From 22c5c450fecb856100059f4e5b34b847fb0acfa7 Mon Sep 17 00:00:00 2001 From: Yuichiro Hanada Date: Thu, 12 Sep 2013 20:53:44 +0900 Subject: [PATCH] Make Ver4DictEncoder write an address table of terminal nodes. Bug: 10920165 Change-Id: I86017456ea9fa5d6e12b57172c34f5ed4f88ef94 --- .../makedict/BinaryDictEncoderUtils.java | 4 +- .../latin/makedict/FormatSpec.java | 3 ++ .../latin/makedict/Ver4DictDecoder.java | 7 +++ .../latin/makedict/Ver4DictEncoder.java | 47 ++++++++++++++----- 4 files changed, 49 insertions(+), 12 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java index 4dba8e5cf..3b1d2427b 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java @@ -882,8 +882,9 @@ public class BinaryDictEncoderUtils { * @param destination the stream to write the file header to. * @param dict the dictionary to write. * @param formatOptions file format options. + * @return the size of the header. */ - /* package */ static void writeDictionaryHeader(final OutputStream destination, + /* package */ static int writeDictionaryHeader(final OutputStream destination, final FusionDictionary dict, final FormatOptions formatOptions) throws IOException, UnsupportedFormatException { final int version = formatOptions.mVersion; @@ -932,5 +933,6 @@ public class BinaryDictEncoderUtils { destination.write(bytes); headerBuffer.close(); + return size; } } diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index 51b89a02a..aa5129ccb 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -263,7 +263,10 @@ public final class FormatSpec { // These values are used only by version 4 or later. static final String TRIE_FILE_EXTENSION = ".trie"; static final String FREQ_FILE_EXTENSION = ".freq"; + // tat = Terminal Address Table + static final String TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat"; static final int FREQUENCY_AND_FLAGS_SIZE = 2; + static final int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3; static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE; static final int NO_PARENT_ADDRESS = 0; diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index 36c5a2720..4c8ff8ea4 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -41,11 +41,13 @@ public class Ver4DictDecoder extends DictDecoder { private static final int FILETYPE_TRIE = 1; private static final int FILETYPE_FREQUENCY = 2; + private static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3; private final File mDictDirectory; private final DictionaryBufferFactory mBufferFactory; private DictBuffer mDictBuffer; private DictBuffer mFrequencyBuffer; + private DictBuffer mTerminalAddressTableBuffer; @UsedForTesting /* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) { @@ -77,6 +79,9 @@ public class Ver4DictDecoder extends DictDecoder { } else if (fileType == FILETYPE_FREQUENCY) { return new File(mDictDirectory, mDictDirectory.getName() + FormatSpec.FREQ_FILE_EXTENSION); + } else if (fileType == FILETYPE_TERMINAL_ADDRESS_TABLE) { + return new File(mDictDirectory, + mDictDirectory.getName() + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); } else { throw new RuntimeException("Unsupported kind of file : " + fileType); } @@ -87,6 +92,8 @@ public class Ver4DictDecoder extends DictDecoder { final String filename = mDictDirectory.getName(); mDictBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_TRIE)); mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY)); + mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer( + getFile(FILETYPE_TERMINAL_ADDRESS_TABLE)); } @Override diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java index 75b75ae2e..4fb89671f 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java @@ -41,10 +41,11 @@ import java.util.Iterator; public class Ver4DictEncoder implements DictEncoder { private final File mDictPlacedDir; private byte[] mTrieBuf; - private byte[] mFreqBuf; private int mTriePos; + private int mHeaderSize; private OutputStream mTrieOutStream; private OutputStream mFreqOutStream; + private OutputStream mTerminalAddressTableOutStream; @UsedForTesting public Ver4DictEncoder(final File dictPlacedDir) { @@ -58,14 +59,18 @@ public class Ver4DictEncoder implements DictEncoder { final File mDictDir = new File(mDictPlacedDir, filename); final File trieFile = new File(mDictDir, filename + FormatSpec.TRIE_FILE_EXTENSION); final File freqFile = new File(mDictDir, filename + FormatSpec.FREQ_FILE_EXTENSION); + final File terminalAddressTableFile = new File(mDictDir, + filename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); if (!mDictDir.isDirectory()) { if (mDictDir.exists()) mDictDir.delete(); mDictDir.mkdirs(); } if (!trieFile.exists()) trieFile.createNewFile(); if (!freqFile.exists()) freqFile.createNewFile(); + if (!terminalAddressTableFile.exists()) terminalAddressTableFile.createNewFile(); mTrieOutStream = new FileOutputStream(trieFile); mFreqOutStream = new FileOutputStream(freqFile); + mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile); } private void close() throws IOException { @@ -76,9 +81,13 @@ public class Ver4DictEncoder implements DictEncoder { if (mFreqOutStream != null) { mFreqOutStream.close(); } + if (mTerminalAddressTableOutStream != null) { + mTerminalAddressTableOutStream.close(); + } } finally { mTrieOutStream = null; mFreqOutStream = null; + mTerminalAddressTableOutStream = null; } } @@ -97,7 +106,8 @@ public class Ver4DictEncoder implements DictEncoder { openStreams(formatOptions, dict.mOptions); } - BinaryDictEncoderUtils.writeDictionaryHeader(mTrieOutStream, dict, formatOptions); + mHeaderSize = BinaryDictEncoderUtils.writeDictionaryHeader(mTrieOutStream, dict, + formatOptions); MakedictLog.i("Flattening the tree..."); ArrayList flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray); @@ -112,10 +122,11 @@ public class Ver4DictEncoder implements DictEncoder { BinaryDictEncoderUtils.computeAddresses(dict, flatNodes, formatOptions); if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); + writeTerminalData(flatNodes, terminalCount); + final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1); final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize; mTrieBuf = new byte[bufferSize]; - mFreqBuf = new byte[terminalCount * FormatSpec.FREQUENCY_AND_FLAGS_SIZE]; MakedictLog.i("Writing file..."); for (PtNodeArray nodeArray : flatNodes) { @@ -126,7 +137,6 @@ public class Ver4DictEncoder implements DictEncoder { MakedictLog.i("has " + terminalCount + " terminals."); } mTrieOutStream.write(mTrieBuf); - mFreqOutStream.write(mFreqBuf); MakedictLog.i("Done"); close(); @@ -185,12 +195,6 @@ public class Ver4DictEncoder implements DictEncoder { FormatSpec.PTNODE_TERMINAL_ID_SIZE); } - private void writeFrequency(final int frequency, final int terminalId) { - final int freqPos = terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE; - BinaryDictEncoderUtils.writeUIntToBuffer(mFreqBuf, freqPos, frequency, - FormatSpec.FREQUENCY_AND_FLAGS_SIZE); - } - private void writeChildrenPosition(PtNode ptNode, FormatOptions formatOptions) { final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); if (formatOptions.mSupportsDynamicUpdate) { @@ -260,10 +264,31 @@ public class Ver4DictEncoder implements DictEncoder { writeCharacters(ptNode.mChars, ptNode.hasSeveralChars()); if (ptNode.isTerminal()) { writeTerminalId(ptNode.mTerminalId); - writeFrequency(ptNode.mFrequency, ptNode.mTerminalId); } writeChildrenPosition(ptNode, formatOptions); writeShortcuts(ptNode.mShortcutTargets); writeBigrams(ptNode.mBigrams, dict); } + + private void writeTerminalData(final ArrayList flatNodes, + final int terminalCount) throws IOException { + final byte[] freqBuf = new byte[terminalCount * FormatSpec.FREQUENCY_AND_FLAGS_SIZE]; + final byte[] terminalAddressTableBuf = + new byte[terminalCount * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE]; + for (final PtNodeArray nodeArray : flatNodes) { + for (final PtNode ptNode : nodeArray.mData) { + if (ptNode.isTerminal()) { + BinaryDictEncoderUtils.writeUIntToBuffer(freqBuf, + ptNode.mTerminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE, + ptNode.mFrequency, FormatSpec.FREQUENCY_AND_FLAGS_SIZE); + BinaryDictEncoderUtils.writeUIntToBuffer(terminalAddressTableBuf, + ptNode.mTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, + ptNode.mCachedAddressAfterUpdate + mHeaderSize, + FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); + } + } + } + mFreqOutStream.write(freqBuf); + mTerminalAddressTableOutStream.write(terminalAddressTableBuf); + } }