From 2b7110ff1f55f33ad411efef4184ce822040a10b Mon Sep 17 00:00:00 2001 From: Yuichiro Hanada Date: Thu, 17 Oct 2013 19:10:56 +0900 Subject: [PATCH] (2/2) Implement insertWord in Ver4DictUpdater. Change-Id: I2328a9df0a009b564e8acaf4180f9b0c1ed0901a --- .../makedict/SparseTableContentReader.java | 16 +-- .../makedict/SparseTableContentUpdater.java | 123 +++++++++++++++++ .../latin/makedict/Ver4DictDecoder.java | 7 +- .../latin/makedict/Ver4DictUpdater.java | 128 +++++++++++++++++- .../makedict/BinaryDictIOUtilsTests.java | 3 +- 5 files changed, 257 insertions(+), 20 deletions(-) create mode 100644 java/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java diff --git a/java/src/com/android/inputmethod/latin/makedict/SparseTableContentReader.java b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentReader.java index 00f401ea7..06088b651 100644 --- a/java/src/com/android/inputmethod/latin/makedict/SparseTableContentReader.java +++ b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentReader.java @@ -40,16 +40,16 @@ public class SparseTableContentReader { public void read(final DictBuffer buffer); } - private final int mContentCount; - private final int mBlockSize; + protected final int mContentCount; + protected final int mBlockSize; protected final File mBaseDir; - private final File mLookupTableFile; - private final File[] mAddressTableFiles; - private final File[] mContentFiles; - private DictBuffer mLookupTableBuffer; - private final DictBuffer[] mAddressTableBuffers; + protected final File mLookupTableFile; + protected final File[] mAddressTableFiles; + protected final File[] mContentFiles; + protected DictBuffer mLookupTableBuffer; + protected final DictBuffer[] mAddressTableBuffers; private final DictBuffer[] mContentBuffers; - private final DictionaryBufferFactory mFactory; + protected final DictionaryBufferFactory mFactory; /** * Sole constructor of SparseTableContentReader. diff --git a/java/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java new file mode 100644 index 000000000..4518f21b9 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java @@ -0,0 +1,123 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +/** + * An auxiliary class for updating data associated with SparseTable. + */ +public class SparseTableContentUpdater extends SparseTableContentReader { + protected OutputStream mLookupTableOutStream; + protected OutputStream[] mAddressTableOutStreams; + protected OutputStream[] mContentOutStreams; + + public SparseTableContentUpdater(final String name, final int blockSize, + final File baseDir, final String[] contentFilenames, final String[] contentIds, + final DictionaryBufferFactory factory) { + super(name, blockSize, baseDir, contentFilenames, contentIds, factory); + mAddressTableOutStreams = new OutputStream[mContentCount]; + mContentOutStreams = new OutputStream[mContentCount]; + } + + protected void openStreamsAndBuffers() throws IOException { + openBuffers(); + mLookupTableOutStream = new FileOutputStream(mLookupTableFile, true /* append */); + for (int i = 0; i < mContentCount; ++i) { + mAddressTableOutStreams[i] = new FileOutputStream(mAddressTableFiles[i], + true /* append */); + mContentOutStreams[i] = new FileOutputStream(mContentFiles[i], true /* append */); + } + } + + /** + * Set the contentIndex-th elements of contentId-th table. + * + * @param contentId the id of the content table. + * @param contentIndex the index where to set the valie. + * @param value the value to set. + */ + protected void setContentValue(final int contentId, final int contentIndex, final int value) + throws IOException { + if ((contentIndex / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES + >= mLookupTableBuffer.limit()) { + // Need to extend the lookup table + final int currentSize = mLookupTableBuffer.limit() + / SparseTable.SIZE_OF_INT_IN_BYTES; + final int target = contentIndex / mBlockSize + 1; + for (int i = currentSize; i < target; ++i) { + BinaryDictEncoderUtils.writeUIntToStream(mLookupTableOutStream, + SparseTable.NOT_EXIST, SparseTable.SIZE_OF_INT_IN_BYTES); + } + // We need to reopen the byte buffer of the lookup table because a MappedByteBuffer in + // Java isn't expanded automatically when the underlying file is expanded. + reopenLookupTable(); + } + + mLookupTableBuffer.position((contentIndex / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES); + int posInAddressTable = mLookupTableBuffer.readInt(); + if (posInAddressTable == SparseTable.NOT_EXIST) { + // Need to extend the address table + mLookupTableBuffer.position(mLookupTableBuffer.position() + - SparseTable.SIZE_OF_INT_IN_BYTES); + posInAddressTable = mAddressTableBuffers[0].limit() / mBlockSize; + BinaryDictEncoderUtils.writeUIntToDictBuffer(mLookupTableBuffer, + posInAddressTable, SparseTable.SIZE_OF_INT_IN_BYTES); + for (int i = 0; i < mContentCount; ++i) { + for (int j = 0; j < mBlockSize; ++j) { + BinaryDictEncoderUtils.writeUIntToStream(mAddressTableOutStreams[i], + SparseTable.NOT_EXIST, SparseTable.SIZE_OF_INT_IN_BYTES); + } + } + // We need to reopen the byte buffers of the address tables because a MappedByteBuffer + // in Java isn't expanded automatically when the underlying file is expanded. + reopenAddressTables(); + } + posInAddressTable += (contentIndex % mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES; + + mAddressTableBuffers[contentId].position(posInAddressTable); + BinaryDictEncoderUtils.writeUIntToDictBuffer(mAddressTableBuffers[contentId], + value, SparseTable.SIZE_OF_INT_IN_BYTES); + } + + private void reopenLookupTable() throws IOException { + mLookupTableOutStream.flush(); + mLookupTableBuffer = mFactory.getDictionaryBuffer(mLookupTableFile); + } + + private void reopenAddressTables() throws IOException { + for (int i = 0; i < mContentCount; ++i) { + mAddressTableOutStreams[i].flush(); + mAddressTableBuffers[i] = mFactory.getDictionaryBuffer(mAddressTableFiles[i]); + } + } + + protected void close() throws IOException { + mLookupTableOutStream.close(); + for (final OutputStream stream : mAddressTableOutStreams) { + stream.close(); + } + for (final OutputStream stream : mContentOutStreams) { + stream.close(); + } + } +} diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index ae344a2cd..f0fed3fda 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -46,7 +46,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder { protected static final int FILETYPE_BIGRAM_FREQ = 4; protected static final int FILETYPE_SHORTCUT = 5; - private final File mDictDirectory; + protected final File mDictDirectory; protected final DictionaryBufferFactory mBufferFactory; protected DictBuffer mDictBuffer; protected DictBuffer mFrequencyBuffer; @@ -178,7 +178,8 @@ public class Ver4DictDecoder extends AbstractDictDecoder { } // TODO: Consolidate this method and BigramContentWriter.getContentFilenames. - private static String[] getContentFilenames(final String name, final boolean hasTimestamp) { + protected static String[] getContentFilenames(final String name, + final boolean hasTimestamp) { final String[] contentFilenames; if (hasTimestamp) { contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION, @@ -190,7 +191,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder { } // TODO: Consolidate this method and BigramContentWriter.getContentIds. - private static String[] getContentIds(final boolean hasTimestamp) { + protected static String[] getContentIds(final boolean hasTimestamp) { final String[] contentIds; if (hasTimestamp) { contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID, diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java index d1e723812..65860ee72 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java @@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; +import com.android.inputmethod.latin.utils.CollectionUtils; import android.util.Log; @@ -31,6 +32,7 @@ import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.Arrays; +import java.util.Iterator; /** * An implementation of DictUpdater for version 4 binary dictionary. @@ -50,6 +52,91 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater { mFrequencyFile = getFile(FILETYPE_FREQUENCY); } + private static class BigramContentUpdater extends SparseTableContentUpdater { + private final boolean mHasTimestamp; + + public BigramContentUpdater(final String name, final File baseDir, + final boolean hasTimestamp) { + super(name + FormatSpec.BIGRAM_FILE_EXTENSION, + FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir, + BigramContentReader.getContentFilenames(name, hasTimestamp), + BigramContentReader.getContentIds(hasTimestamp), + new DictionaryBufferFromWritableByteBufferFactory()); + mHasTimestamp = hasTimestamp; + } + + public void insertBigramEntries(final int terminalId, final int frequency, + final ArrayList entries) throws IOException { + if (terminalId < 0) { + throw new RuntimeException("Invalid terminal id : " + terminalId); + } + openStreamsAndBuffers(); + + if (entries == null || entries.isEmpty()) { + setContentValue(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, + SparseTable.NOT_EXIST); + return; + } + final int positionOfEntries = + (int) mContentFiles[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX].length(); + setContentValue(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, positionOfEntries); + + final Iterator bigramIterator = entries.iterator(); + while (bigramIterator.hasNext()) { + final PendingAttribute entry = bigramIterator.next(); + final int flags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(), + 0 /* offset */, entry.mFrequency, frequency, "" /* word */); + BinaryDictEncoderUtils.writeUIntToStream( + mContentOutStreams[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX], flags, + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); + BinaryDictEncoderUtils.writeUIntToStream( + mContentOutStreams[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX], entry.mAddress, + FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE); + } + close(); + } + } + + private static class ShortcutContentUpdater extends SparseTableContentUpdater { + public ShortcutContentUpdater(final String name, final File baseDir) { + super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, + FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir, + new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION }, + new String[] { FormatSpec.SHORTCUT_CONTENT_ID }, + new DictionaryBufferFromWritableByteBufferFactory()); + } + + public void insertShortcuts(final int terminalId, + final ArrayList shortcuts) throws IOException { + if (terminalId < 0) { + throw new RuntimeException("Invalid terminal id : " + terminalId); + } + openStreamsAndBuffers(); + if (shortcuts == null || shortcuts.isEmpty()) { + setContentValue(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId, + SparseTable.NOT_EXIST); + return; + } + + final int positionOfShortcuts = + (int) mContentFiles[FormatSpec.SHORTCUT_CONTENT_INDEX].length(); + setContentValue(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId, positionOfShortcuts); + + final Iterator shortcutIterator = shortcuts.iterator(); + while (shortcutIterator.hasNext()) { + final WeightedString target = shortcutIterator.next(); + final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags( + shortcutIterator.hasNext(), target.mFrequency); + BinaryDictEncoderUtils.writeUIntToStream( + mContentOutStreams[FormatSpec.SHORTCUT_CONTENT_INDEX], shortcutFlags, + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); + CharEncoding.writeString(mContentOutStreams[FormatSpec.SHORTCUT_CONTENT_INDEX], + target.mWord); + } + close(); + } + } + @Override public void deleteWord(final String word) throws IOException, UnsupportedFormatException { if (mDictBuffer == null) openDictBuffer(); @@ -574,6 +661,7 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater { true /* append */); BinaryDictEncoderUtils.writeUIntToStream(frequencyStream, frequency, FormatSpec.FREQUENCY_AND_FLAGS_SIZE); + frequencyStream.close(); } private void insertTerminalPosition(final int posOfTerminal) throws IOException { @@ -581,14 +669,37 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater { getFile(FILETYPE_TERMINAL_ADDRESS_TABLE), true /* append */); BinaryDictEncoderUtils.writeUIntToStream(terminalPosStream, posOfTerminal, FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); + terminalPosStream.close(); } - private void insertBigrams(final int terminalId, final ArrayList bigrams) { - // TODO: Implement. + private void insertBigrams(final int terminalId, final int frequency, + final ArrayList bigramAddresses) + throws IOException, UnsupportedFormatException { + openDictBuffer(); + final BigramContentUpdater updater = new BigramContentUpdater(mDictDirectory.getName(), + mDictDirectory, false); + + // Convert addresses to terminal ids. + final ArrayList bigrams = CollectionUtils.newArrayList(); + mDictBuffer.position(0); + final FileHeader header = readHeader(); + for (PendingAttribute attr : bigramAddresses) { + mDictBuffer.position(attr.mAddress); + final Ver4PtNodeInfo info = readVer4PtNodeInfo(attr.mAddress, header.mFormatOptions); + if (info.mTerminalId == PtNode.NOT_A_TERMINAL) { + throw new RuntimeException("We can't have a bigram target that's not a terminal."); + } + bigrams.add(new PendingAttribute(frequency, info.mTerminalId)); + } + updater.insertBigramEntries(terminalId, frequency, bigrams); + close(); } - private void insertShortcuts(final int terminalId, final ArrayList shortcuts) { - // TODO: Implement. + private void insertShortcuts(final int terminalId, final ArrayList shortcuts) + throws IOException { + final ShortcutContentUpdater updater = new ShortcutContentUpdater(mDictDirectory.getName(), + mDictDirectory); + updater.insertShortcuts(terminalId, shortcuts); } private void openBuffersAndStream() throws IOException { @@ -597,7 +708,10 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater { } private void close() throws IOException { - mDictStream.close(); + if (mDictStream != null) { + mDictStream.close(); + mDictStream = null; + } mDictBuffer = null; mFrequencyBuffer = null; mTerminalAddressTableBuffer = null; @@ -620,7 +734,7 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater { mDictBuffer.put((byte) newFlags); updateFrequency(terminalId, frequency); - insertBigrams(terminalId, + insertBigrams(terminalId, frequency, DynamicBinaryDictIOUtils.resolveBigramPositions(this, bigramStrings)); insertShortcuts(terminalId, shortcuts); } @@ -650,7 +764,7 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater { insertTerminalPosition(posOfTerminal); close(); - insertBigrams(newTerminalId, + insertBigrams(newTerminalId, frequency, DynamicBinaryDictIOUtils.resolveBigramPositions(this, bigramStrings)); insertShortcuts(newTerminalId, shortcuts); } diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java index b831f1fb3..8bea3c074 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java @@ -330,8 +330,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { public void testInsertWordWithBigrams() { runTestInsertWordWithBigrams(BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE); - // TODO: Add a test for version 4. - // runTestInsertWordWithBigrams(BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE); + runTestInsertWordWithBigrams(BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE); } private void runTestRandomWords(final FormatOptions formatOptions) {