(2/2) Implement insertWord in Ver4DictUpdater.

Change-Id: I2328a9df0a009b564e8acaf4180f9b0c1ed0901a
This commit is contained in:
Yuichiro Hanada 2013-10-17 19:10:56 +09:00
parent 19b247e79d
commit 2b7110ff1f
5 changed files with 257 additions and 20 deletions
java/src/com/android/inputmethod/latin/makedict
tests/src/com/android/inputmethod/latin/makedict

View file

@ -40,16 +40,16 @@ public class SparseTableContentReader {
public void read(final DictBuffer buffer); public void read(final DictBuffer buffer);
} }
private final int mContentCount; protected final int mContentCount;
private final int mBlockSize; protected final int mBlockSize;
protected final File mBaseDir; protected final File mBaseDir;
private final File mLookupTableFile; protected final File mLookupTableFile;
private final File[] mAddressTableFiles; protected final File[] mAddressTableFiles;
private final File[] mContentFiles; protected final File[] mContentFiles;
private DictBuffer mLookupTableBuffer; protected DictBuffer mLookupTableBuffer;
private final DictBuffer[] mAddressTableBuffers; protected final DictBuffer[] mAddressTableBuffers;
private final DictBuffer[] mContentBuffers; private final DictBuffer[] mContentBuffers;
private final DictionaryBufferFactory mFactory; protected final DictionaryBufferFactory mFactory;
/** /**
* Sole constructor of SparseTableContentReader. * Sole constructor of SparseTableContentReader.

View file

@ -0,0 +1,123 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
/**
* An auxiliary class for updating data associated with SparseTable.
*/
public class SparseTableContentUpdater extends SparseTableContentReader {
protected OutputStream mLookupTableOutStream;
protected OutputStream[] mAddressTableOutStreams;
protected OutputStream[] mContentOutStreams;
public SparseTableContentUpdater(final String name, final int blockSize,
final File baseDir, final String[] contentFilenames, final String[] contentIds,
final DictionaryBufferFactory factory) {
super(name, blockSize, baseDir, contentFilenames, contentIds, factory);
mAddressTableOutStreams = new OutputStream[mContentCount];
mContentOutStreams = new OutputStream[mContentCount];
}
protected void openStreamsAndBuffers() throws IOException {
openBuffers();
mLookupTableOutStream = new FileOutputStream(mLookupTableFile, true /* append */);
for (int i = 0; i < mContentCount; ++i) {
mAddressTableOutStreams[i] = new FileOutputStream(mAddressTableFiles[i],
true /* append */);
mContentOutStreams[i] = new FileOutputStream(mContentFiles[i], true /* append */);
}
}
/**
* Set the contentIndex-th elements of contentId-th table.
*
* @param contentId the id of the content table.
* @param contentIndex the index where to set the valie.
* @param value the value to set.
*/
protected void setContentValue(final int contentId, final int contentIndex, final int value)
throws IOException {
if ((contentIndex / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES
>= mLookupTableBuffer.limit()) {
// Need to extend the lookup table
final int currentSize = mLookupTableBuffer.limit()
/ SparseTable.SIZE_OF_INT_IN_BYTES;
final int target = contentIndex / mBlockSize + 1;
for (int i = currentSize; i < target; ++i) {
BinaryDictEncoderUtils.writeUIntToStream(mLookupTableOutStream,
SparseTable.NOT_EXIST, SparseTable.SIZE_OF_INT_IN_BYTES);
}
// We need to reopen the byte buffer of the lookup table because a MappedByteBuffer in
// Java isn't expanded automatically when the underlying file is expanded.
reopenLookupTable();
}
mLookupTableBuffer.position((contentIndex / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES);
int posInAddressTable = mLookupTableBuffer.readInt();
if (posInAddressTable == SparseTable.NOT_EXIST) {
// Need to extend the address table
mLookupTableBuffer.position(mLookupTableBuffer.position()
- SparseTable.SIZE_OF_INT_IN_BYTES);
posInAddressTable = mAddressTableBuffers[0].limit() / mBlockSize;
BinaryDictEncoderUtils.writeUIntToDictBuffer(mLookupTableBuffer,
posInAddressTable, SparseTable.SIZE_OF_INT_IN_BYTES);
for (int i = 0; i < mContentCount; ++i) {
for (int j = 0; j < mBlockSize; ++j) {
BinaryDictEncoderUtils.writeUIntToStream(mAddressTableOutStreams[i],
SparseTable.NOT_EXIST, SparseTable.SIZE_OF_INT_IN_BYTES);
}
}
// We need to reopen the byte buffers of the address tables because a MappedByteBuffer
// in Java isn't expanded automatically when the underlying file is expanded.
reopenAddressTables();
}
posInAddressTable += (contentIndex % mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES;
mAddressTableBuffers[contentId].position(posInAddressTable);
BinaryDictEncoderUtils.writeUIntToDictBuffer(mAddressTableBuffers[contentId],
value, SparseTable.SIZE_OF_INT_IN_BYTES);
}
private void reopenLookupTable() throws IOException {
mLookupTableOutStream.flush();
mLookupTableBuffer = mFactory.getDictionaryBuffer(mLookupTableFile);
}
private void reopenAddressTables() throws IOException {
for (int i = 0; i < mContentCount; ++i) {
mAddressTableOutStreams[i].flush();
mAddressTableBuffers[i] = mFactory.getDictionaryBuffer(mAddressTableFiles[i]);
}
}
protected void close() throws IOException {
mLookupTableOutStream.close();
for (final OutputStream stream : mAddressTableOutStreams) {
stream.close();
}
for (final OutputStream stream : mContentOutStreams) {
stream.close();
}
}
}

View file

@ -46,7 +46,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
protected static final int FILETYPE_BIGRAM_FREQ = 4; protected static final int FILETYPE_BIGRAM_FREQ = 4;
protected static final int FILETYPE_SHORTCUT = 5; protected static final int FILETYPE_SHORTCUT = 5;
private final File mDictDirectory; protected final File mDictDirectory;
protected final DictionaryBufferFactory mBufferFactory; protected final DictionaryBufferFactory mBufferFactory;
protected DictBuffer mDictBuffer; protected DictBuffer mDictBuffer;
protected DictBuffer mFrequencyBuffer; protected DictBuffer mFrequencyBuffer;
@ -178,7 +178,8 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
} }
// TODO: Consolidate this method and BigramContentWriter.getContentFilenames. // TODO: Consolidate this method and BigramContentWriter.getContentFilenames.
private static String[] getContentFilenames(final String name, final boolean hasTimestamp) { protected static String[] getContentFilenames(final String name,
final boolean hasTimestamp) {
final String[] contentFilenames; final String[] contentFilenames;
if (hasTimestamp) { if (hasTimestamp) {
contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION, contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION,
@ -190,7 +191,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
} }
// TODO: Consolidate this method and BigramContentWriter.getContentIds. // TODO: Consolidate this method and BigramContentWriter.getContentIds.
private static String[] getContentIds(final boolean hasTimestamp) { protected static String[] getContentIds(final boolean hasTimestamp) {
final String[] contentIds; final String[] contentIds;
if (hasTimestamp) { if (hasTimestamp) {
contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID, contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID,

View file

@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.CollectionUtils;
import android.util.Log; import android.util.Log;
@ -31,6 +32,7 @@ import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Iterator;
/** /**
* An implementation of DictUpdater for version 4 binary dictionary. * An implementation of DictUpdater for version 4 binary dictionary.
@ -50,6 +52,91 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
mFrequencyFile = getFile(FILETYPE_FREQUENCY); mFrequencyFile = getFile(FILETYPE_FREQUENCY);
} }
private static class BigramContentUpdater extends SparseTableContentUpdater {
private final boolean mHasTimestamp;
public BigramContentUpdater(final String name, final File baseDir,
final boolean hasTimestamp) {
super(name + FormatSpec.BIGRAM_FILE_EXTENSION,
FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
BigramContentReader.getContentFilenames(name, hasTimestamp),
BigramContentReader.getContentIds(hasTimestamp),
new DictionaryBufferFromWritableByteBufferFactory());
mHasTimestamp = hasTimestamp;
}
public void insertBigramEntries(final int terminalId, final int frequency,
final ArrayList<PendingAttribute> entries) throws IOException {
if (terminalId < 0) {
throw new RuntimeException("Invalid terminal id : " + terminalId);
}
openStreamsAndBuffers();
if (entries == null || entries.isEmpty()) {
setContentValue(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId,
SparseTable.NOT_EXIST);
return;
}
final int positionOfEntries =
(int) mContentFiles[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX].length();
setContentValue(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, positionOfEntries);
final Iterator<PendingAttribute> bigramIterator = entries.iterator();
while (bigramIterator.hasNext()) {
final PendingAttribute entry = bigramIterator.next();
final int flags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(),
0 /* offset */, entry.mFrequency, frequency, "" /* word */);
BinaryDictEncoderUtils.writeUIntToStream(
mContentOutStreams[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX], flags,
FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
BinaryDictEncoderUtils.writeUIntToStream(
mContentOutStreams[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX], entry.mAddress,
FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE);
}
close();
}
}
private static class ShortcutContentUpdater extends SparseTableContentUpdater {
public ShortcutContentUpdater(final String name, final File baseDir) {
super(name + FormatSpec.SHORTCUT_FILE_EXTENSION,
FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION },
new String[] { FormatSpec.SHORTCUT_CONTENT_ID },
new DictionaryBufferFromWritableByteBufferFactory());
}
public void insertShortcuts(final int terminalId,
final ArrayList<WeightedString> shortcuts) throws IOException {
if (terminalId < 0) {
throw new RuntimeException("Invalid terminal id : " + terminalId);
}
openStreamsAndBuffers();
if (shortcuts == null || shortcuts.isEmpty()) {
setContentValue(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId,
SparseTable.NOT_EXIST);
return;
}
final int positionOfShortcuts =
(int) mContentFiles[FormatSpec.SHORTCUT_CONTENT_INDEX].length();
setContentValue(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId, positionOfShortcuts);
final Iterator<WeightedString> shortcutIterator = shortcuts.iterator();
while (shortcutIterator.hasNext()) {
final WeightedString target = shortcutIterator.next();
final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
shortcutIterator.hasNext(), target.mFrequency);
BinaryDictEncoderUtils.writeUIntToStream(
mContentOutStreams[FormatSpec.SHORTCUT_CONTENT_INDEX], shortcutFlags,
FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
CharEncoding.writeString(mContentOutStreams[FormatSpec.SHORTCUT_CONTENT_INDEX],
target.mWord);
}
close();
}
}
@Override @Override
public void deleteWord(final String word) throws IOException, UnsupportedFormatException { public void deleteWord(final String word) throws IOException, UnsupportedFormatException {
if (mDictBuffer == null) openDictBuffer(); if (mDictBuffer == null) openDictBuffer();
@ -574,6 +661,7 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
true /* append */); true /* append */);
BinaryDictEncoderUtils.writeUIntToStream(frequencyStream, frequency, BinaryDictEncoderUtils.writeUIntToStream(frequencyStream, frequency,
FormatSpec.FREQUENCY_AND_FLAGS_SIZE); FormatSpec.FREQUENCY_AND_FLAGS_SIZE);
frequencyStream.close();
} }
private void insertTerminalPosition(final int posOfTerminal) throws IOException { private void insertTerminalPosition(final int posOfTerminal) throws IOException {
@ -581,14 +669,37 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
getFile(FILETYPE_TERMINAL_ADDRESS_TABLE), true /* append */); getFile(FILETYPE_TERMINAL_ADDRESS_TABLE), true /* append */);
BinaryDictEncoderUtils.writeUIntToStream(terminalPosStream, posOfTerminal, BinaryDictEncoderUtils.writeUIntToStream(terminalPosStream, posOfTerminal,
FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
terminalPosStream.close();
} }
private void insertBigrams(final int terminalId, final ArrayList<PendingAttribute> bigrams) { private void insertBigrams(final int terminalId, final int frequency,
// TODO: Implement. final ArrayList<PendingAttribute> bigramAddresses)
throws IOException, UnsupportedFormatException {
openDictBuffer();
final BigramContentUpdater updater = new BigramContentUpdater(mDictDirectory.getName(),
mDictDirectory, false);
// Convert addresses to terminal ids.
final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList();
mDictBuffer.position(0);
final FileHeader header = readHeader();
for (PendingAttribute attr : bigramAddresses) {
mDictBuffer.position(attr.mAddress);
final Ver4PtNodeInfo info = readVer4PtNodeInfo(attr.mAddress, header.mFormatOptions);
if (info.mTerminalId == PtNode.NOT_A_TERMINAL) {
throw new RuntimeException("We can't have a bigram target that's not a terminal.");
}
bigrams.add(new PendingAttribute(frequency, info.mTerminalId));
}
updater.insertBigramEntries(terminalId, frequency, bigrams);
close();
} }
private void insertShortcuts(final int terminalId, final ArrayList<WeightedString> shortcuts) { private void insertShortcuts(final int terminalId, final ArrayList<WeightedString> shortcuts)
// TODO: Implement. throws IOException {
final ShortcutContentUpdater updater = new ShortcutContentUpdater(mDictDirectory.getName(),
mDictDirectory);
updater.insertShortcuts(terminalId, shortcuts);
} }
private void openBuffersAndStream() throws IOException { private void openBuffersAndStream() throws IOException {
@ -597,7 +708,10 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
} }
private void close() throws IOException { private void close() throws IOException {
if (mDictStream != null) {
mDictStream.close(); mDictStream.close();
mDictStream = null;
}
mDictBuffer = null; mDictBuffer = null;
mFrequencyBuffer = null; mFrequencyBuffer = null;
mTerminalAddressTableBuffer = null; mTerminalAddressTableBuffer = null;
@ -620,7 +734,7 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
mDictBuffer.put((byte) newFlags); mDictBuffer.put((byte) newFlags);
updateFrequency(terminalId, frequency); updateFrequency(terminalId, frequency);
insertBigrams(terminalId, insertBigrams(terminalId, frequency,
DynamicBinaryDictIOUtils.resolveBigramPositions(this, bigramStrings)); DynamicBinaryDictIOUtils.resolveBigramPositions(this, bigramStrings));
insertShortcuts(terminalId, shortcuts); insertShortcuts(terminalId, shortcuts);
} }
@ -650,7 +764,7 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
insertTerminalPosition(posOfTerminal); insertTerminalPosition(posOfTerminal);
close(); close();
insertBigrams(newTerminalId, insertBigrams(newTerminalId, frequency,
DynamicBinaryDictIOUtils.resolveBigramPositions(this, bigramStrings)); DynamicBinaryDictIOUtils.resolveBigramPositions(this, bigramStrings));
insertShortcuts(newTerminalId, shortcuts); insertShortcuts(newTerminalId, shortcuts);
} }

View file

@ -330,8 +330,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
public void testInsertWordWithBigrams() { public void testInsertWordWithBigrams() {
runTestInsertWordWithBigrams(BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE); runTestInsertWordWithBigrams(BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE);
// TODO: Add a test for version 4. runTestInsertWordWithBigrams(BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE);
// runTestInsertWordWithBigrams(BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE);
} }
private void runTestRandomWords(final FormatOptions formatOptions) { private void runTestRandomWords(final FormatOptions formatOptions) {