(2/2) Implement insertWord in Ver4DictUpdater.
Change-Id: I2328a9df0a009b564e8acaf4180f9b0c1ed0901a
This commit is contained in:
parent
19b247e79d
commit
2b7110ff1f
5 changed files with 257 additions and 20 deletions
|
@ -40,16 +40,16 @@ public class SparseTableContentReader {
|
|||
public void read(final DictBuffer buffer);
|
||||
}
|
||||
|
||||
private final int mContentCount;
|
||||
private final int mBlockSize;
|
||||
protected final int mContentCount;
|
||||
protected final int mBlockSize;
|
||||
protected final File mBaseDir;
|
||||
private final File mLookupTableFile;
|
||||
private final File[] mAddressTableFiles;
|
||||
private final File[] mContentFiles;
|
||||
private DictBuffer mLookupTableBuffer;
|
||||
private final DictBuffer[] mAddressTableBuffers;
|
||||
protected final File mLookupTableFile;
|
||||
protected final File[] mAddressTableFiles;
|
||||
protected final File[] mContentFiles;
|
||||
protected DictBuffer mLookupTableBuffer;
|
||||
protected final DictBuffer[] mAddressTableBuffers;
|
||||
private final DictBuffer[] mContentBuffers;
|
||||
private final DictionaryBufferFactory mFactory;
|
||||
protected final DictionaryBufferFactory mFactory;
|
||||
|
||||
/**
|
||||
* Sole constructor of SparseTableContentReader.
|
||||
|
|
|
@ -0,0 +1,123 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.android.inputmethod.latin.makedict;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
/**
|
||||
* An auxiliary class for updating data associated with SparseTable.
|
||||
*/
|
||||
public class SparseTableContentUpdater extends SparseTableContentReader {
|
||||
protected OutputStream mLookupTableOutStream;
|
||||
protected OutputStream[] mAddressTableOutStreams;
|
||||
protected OutputStream[] mContentOutStreams;
|
||||
|
||||
public SparseTableContentUpdater(final String name, final int blockSize,
|
||||
final File baseDir, final String[] contentFilenames, final String[] contentIds,
|
||||
final DictionaryBufferFactory factory) {
|
||||
super(name, blockSize, baseDir, contentFilenames, contentIds, factory);
|
||||
mAddressTableOutStreams = new OutputStream[mContentCount];
|
||||
mContentOutStreams = new OutputStream[mContentCount];
|
||||
}
|
||||
|
||||
protected void openStreamsAndBuffers() throws IOException {
|
||||
openBuffers();
|
||||
mLookupTableOutStream = new FileOutputStream(mLookupTableFile, true /* append */);
|
||||
for (int i = 0; i < mContentCount; ++i) {
|
||||
mAddressTableOutStreams[i] = new FileOutputStream(mAddressTableFiles[i],
|
||||
true /* append */);
|
||||
mContentOutStreams[i] = new FileOutputStream(mContentFiles[i], true /* append */);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the contentIndex-th elements of contentId-th table.
|
||||
*
|
||||
* @param contentId the id of the content table.
|
||||
* @param contentIndex the index where to set the valie.
|
||||
* @param value the value to set.
|
||||
*/
|
||||
protected void setContentValue(final int contentId, final int contentIndex, final int value)
|
||||
throws IOException {
|
||||
if ((contentIndex / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES
|
||||
>= mLookupTableBuffer.limit()) {
|
||||
// Need to extend the lookup table
|
||||
final int currentSize = mLookupTableBuffer.limit()
|
||||
/ SparseTable.SIZE_OF_INT_IN_BYTES;
|
||||
final int target = contentIndex / mBlockSize + 1;
|
||||
for (int i = currentSize; i < target; ++i) {
|
||||
BinaryDictEncoderUtils.writeUIntToStream(mLookupTableOutStream,
|
||||
SparseTable.NOT_EXIST, SparseTable.SIZE_OF_INT_IN_BYTES);
|
||||
}
|
||||
// We need to reopen the byte buffer of the lookup table because a MappedByteBuffer in
|
||||
// Java isn't expanded automatically when the underlying file is expanded.
|
||||
reopenLookupTable();
|
||||
}
|
||||
|
||||
mLookupTableBuffer.position((contentIndex / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES);
|
||||
int posInAddressTable = mLookupTableBuffer.readInt();
|
||||
if (posInAddressTable == SparseTable.NOT_EXIST) {
|
||||
// Need to extend the address table
|
||||
mLookupTableBuffer.position(mLookupTableBuffer.position()
|
||||
- SparseTable.SIZE_OF_INT_IN_BYTES);
|
||||
posInAddressTable = mAddressTableBuffers[0].limit() / mBlockSize;
|
||||
BinaryDictEncoderUtils.writeUIntToDictBuffer(mLookupTableBuffer,
|
||||
posInAddressTable, SparseTable.SIZE_OF_INT_IN_BYTES);
|
||||
for (int i = 0; i < mContentCount; ++i) {
|
||||
for (int j = 0; j < mBlockSize; ++j) {
|
||||
BinaryDictEncoderUtils.writeUIntToStream(mAddressTableOutStreams[i],
|
||||
SparseTable.NOT_EXIST, SparseTable.SIZE_OF_INT_IN_BYTES);
|
||||
}
|
||||
}
|
||||
// We need to reopen the byte buffers of the address tables because a MappedByteBuffer
|
||||
// in Java isn't expanded automatically when the underlying file is expanded.
|
||||
reopenAddressTables();
|
||||
}
|
||||
posInAddressTable += (contentIndex % mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES;
|
||||
|
||||
mAddressTableBuffers[contentId].position(posInAddressTable);
|
||||
BinaryDictEncoderUtils.writeUIntToDictBuffer(mAddressTableBuffers[contentId],
|
||||
value, SparseTable.SIZE_OF_INT_IN_BYTES);
|
||||
}
|
||||
|
||||
private void reopenLookupTable() throws IOException {
|
||||
mLookupTableOutStream.flush();
|
||||
mLookupTableBuffer = mFactory.getDictionaryBuffer(mLookupTableFile);
|
||||
}
|
||||
|
||||
private void reopenAddressTables() throws IOException {
|
||||
for (int i = 0; i < mContentCount; ++i) {
|
||||
mAddressTableOutStreams[i].flush();
|
||||
mAddressTableBuffers[i] = mFactory.getDictionaryBuffer(mAddressTableFiles[i]);
|
||||
}
|
||||
}
|
||||
|
||||
protected void close() throws IOException {
|
||||
mLookupTableOutStream.close();
|
||||
for (final OutputStream stream : mAddressTableOutStreams) {
|
||||
stream.close();
|
||||
}
|
||||
for (final OutputStream stream : mContentOutStreams) {
|
||||
stream.close();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -46,7 +46,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
|||
protected static final int FILETYPE_BIGRAM_FREQ = 4;
|
||||
protected static final int FILETYPE_SHORTCUT = 5;
|
||||
|
||||
private final File mDictDirectory;
|
||||
protected final File mDictDirectory;
|
||||
protected final DictionaryBufferFactory mBufferFactory;
|
||||
protected DictBuffer mDictBuffer;
|
||||
protected DictBuffer mFrequencyBuffer;
|
||||
|
@ -178,7 +178,8 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
|||
}
|
||||
|
||||
// TODO: Consolidate this method and BigramContentWriter.getContentFilenames.
|
||||
private static String[] getContentFilenames(final String name, final boolean hasTimestamp) {
|
||||
protected static String[] getContentFilenames(final String name,
|
||||
final boolean hasTimestamp) {
|
||||
final String[] contentFilenames;
|
||||
if (hasTimestamp) {
|
||||
contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION,
|
||||
|
@ -190,7 +191,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
|||
}
|
||||
|
||||
// TODO: Consolidate this method and BigramContentWriter.getContentIds.
|
||||
private static String[] getContentIds(final boolean hasTimestamp) {
|
||||
protected static String[] getContentIds(final boolean hasTimestamp) {
|
||||
final String[] contentIds;
|
||||
if (hasTimestamp) {
|
||||
contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID,
|
||||
|
|
|
@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
|||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||
|
||||
import android.util.Log;
|
||||
|
||||
|
@ -31,6 +32,7 @@ import java.io.IOException;
|
|||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* An implementation of DictUpdater for version 4 binary dictionary.
|
||||
|
@ -50,6 +52,91 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
|
|||
mFrequencyFile = getFile(FILETYPE_FREQUENCY);
|
||||
}
|
||||
|
||||
private static class BigramContentUpdater extends SparseTableContentUpdater {
|
||||
private final boolean mHasTimestamp;
|
||||
|
||||
public BigramContentUpdater(final String name, final File baseDir,
|
||||
final boolean hasTimestamp) {
|
||||
super(name + FormatSpec.BIGRAM_FILE_EXTENSION,
|
||||
FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
|
||||
BigramContentReader.getContentFilenames(name, hasTimestamp),
|
||||
BigramContentReader.getContentIds(hasTimestamp),
|
||||
new DictionaryBufferFromWritableByteBufferFactory());
|
||||
mHasTimestamp = hasTimestamp;
|
||||
}
|
||||
|
||||
public void insertBigramEntries(final int terminalId, final int frequency,
|
||||
final ArrayList<PendingAttribute> entries) throws IOException {
|
||||
if (terminalId < 0) {
|
||||
throw new RuntimeException("Invalid terminal id : " + terminalId);
|
||||
}
|
||||
openStreamsAndBuffers();
|
||||
|
||||
if (entries == null || entries.isEmpty()) {
|
||||
setContentValue(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId,
|
||||
SparseTable.NOT_EXIST);
|
||||
return;
|
||||
}
|
||||
final int positionOfEntries =
|
||||
(int) mContentFiles[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX].length();
|
||||
setContentValue(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, positionOfEntries);
|
||||
|
||||
final Iterator<PendingAttribute> bigramIterator = entries.iterator();
|
||||
while (bigramIterator.hasNext()) {
|
||||
final PendingAttribute entry = bigramIterator.next();
|
||||
final int flags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(),
|
||||
0 /* offset */, entry.mFrequency, frequency, "" /* word */);
|
||||
BinaryDictEncoderUtils.writeUIntToStream(
|
||||
mContentOutStreams[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX], flags,
|
||||
FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
|
||||
BinaryDictEncoderUtils.writeUIntToStream(
|
||||
mContentOutStreams[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX], entry.mAddress,
|
||||
FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE);
|
||||
}
|
||||
close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class ShortcutContentUpdater extends SparseTableContentUpdater {
|
||||
public ShortcutContentUpdater(final String name, final File baseDir) {
|
||||
super(name + FormatSpec.SHORTCUT_FILE_EXTENSION,
|
||||
FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
|
||||
new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION },
|
||||
new String[] { FormatSpec.SHORTCUT_CONTENT_ID },
|
||||
new DictionaryBufferFromWritableByteBufferFactory());
|
||||
}
|
||||
|
||||
public void insertShortcuts(final int terminalId,
|
||||
final ArrayList<WeightedString> shortcuts) throws IOException {
|
||||
if (terminalId < 0) {
|
||||
throw new RuntimeException("Invalid terminal id : " + terminalId);
|
||||
}
|
||||
openStreamsAndBuffers();
|
||||
if (shortcuts == null || shortcuts.isEmpty()) {
|
||||
setContentValue(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId,
|
||||
SparseTable.NOT_EXIST);
|
||||
return;
|
||||
}
|
||||
|
||||
final int positionOfShortcuts =
|
||||
(int) mContentFiles[FormatSpec.SHORTCUT_CONTENT_INDEX].length();
|
||||
setContentValue(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId, positionOfShortcuts);
|
||||
|
||||
final Iterator<WeightedString> shortcutIterator = shortcuts.iterator();
|
||||
while (shortcutIterator.hasNext()) {
|
||||
final WeightedString target = shortcutIterator.next();
|
||||
final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
|
||||
shortcutIterator.hasNext(), target.mFrequency);
|
||||
BinaryDictEncoderUtils.writeUIntToStream(
|
||||
mContentOutStreams[FormatSpec.SHORTCUT_CONTENT_INDEX], shortcutFlags,
|
||||
FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
|
||||
CharEncoding.writeString(mContentOutStreams[FormatSpec.SHORTCUT_CONTENT_INDEX],
|
||||
target.mWord);
|
||||
}
|
||||
close();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteWord(final String word) throws IOException, UnsupportedFormatException {
|
||||
if (mDictBuffer == null) openDictBuffer();
|
||||
|
@ -574,6 +661,7 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
|
|||
true /* append */);
|
||||
BinaryDictEncoderUtils.writeUIntToStream(frequencyStream, frequency,
|
||||
FormatSpec.FREQUENCY_AND_FLAGS_SIZE);
|
||||
frequencyStream.close();
|
||||
}
|
||||
|
||||
private void insertTerminalPosition(final int posOfTerminal) throws IOException {
|
||||
|
@ -581,14 +669,37 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
|
|||
getFile(FILETYPE_TERMINAL_ADDRESS_TABLE), true /* append */);
|
||||
BinaryDictEncoderUtils.writeUIntToStream(terminalPosStream, posOfTerminal,
|
||||
FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
|
||||
terminalPosStream.close();
|
||||
}
|
||||
|
||||
private void insertBigrams(final int terminalId, final ArrayList<PendingAttribute> bigrams) {
|
||||
// TODO: Implement.
|
||||
private void insertBigrams(final int terminalId, final int frequency,
|
||||
final ArrayList<PendingAttribute> bigramAddresses)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
openDictBuffer();
|
||||
final BigramContentUpdater updater = new BigramContentUpdater(mDictDirectory.getName(),
|
||||
mDictDirectory, false);
|
||||
|
||||
// Convert addresses to terminal ids.
|
||||
final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList();
|
||||
mDictBuffer.position(0);
|
||||
final FileHeader header = readHeader();
|
||||
for (PendingAttribute attr : bigramAddresses) {
|
||||
mDictBuffer.position(attr.mAddress);
|
||||
final Ver4PtNodeInfo info = readVer4PtNodeInfo(attr.mAddress, header.mFormatOptions);
|
||||
if (info.mTerminalId == PtNode.NOT_A_TERMINAL) {
|
||||
throw new RuntimeException("We can't have a bigram target that's not a terminal.");
|
||||
}
|
||||
bigrams.add(new PendingAttribute(frequency, info.mTerminalId));
|
||||
}
|
||||
updater.insertBigramEntries(terminalId, frequency, bigrams);
|
||||
close();
|
||||
}
|
||||
|
||||
private void insertShortcuts(final int terminalId, final ArrayList<WeightedString> shortcuts) {
|
||||
// TODO: Implement.
|
||||
private void insertShortcuts(final int terminalId, final ArrayList<WeightedString> shortcuts)
|
||||
throws IOException {
|
||||
final ShortcutContentUpdater updater = new ShortcutContentUpdater(mDictDirectory.getName(),
|
||||
mDictDirectory);
|
||||
updater.insertShortcuts(terminalId, shortcuts);
|
||||
}
|
||||
|
||||
private void openBuffersAndStream() throws IOException {
|
||||
|
@ -597,7 +708,10 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
|
|||
}
|
||||
|
||||
private void close() throws IOException {
|
||||
if (mDictStream != null) {
|
||||
mDictStream.close();
|
||||
mDictStream = null;
|
||||
}
|
||||
mDictBuffer = null;
|
||||
mFrequencyBuffer = null;
|
||||
mTerminalAddressTableBuffer = null;
|
||||
|
@ -620,7 +734,7 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
|
|||
mDictBuffer.put((byte) newFlags);
|
||||
|
||||
updateFrequency(terminalId, frequency);
|
||||
insertBigrams(terminalId,
|
||||
insertBigrams(terminalId, frequency,
|
||||
DynamicBinaryDictIOUtils.resolveBigramPositions(this, bigramStrings));
|
||||
insertShortcuts(terminalId, shortcuts);
|
||||
}
|
||||
|
@ -650,7 +764,7 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
|
|||
insertTerminalPosition(posOfTerminal);
|
||||
close();
|
||||
|
||||
insertBigrams(newTerminalId,
|
||||
insertBigrams(newTerminalId, frequency,
|
||||
DynamicBinaryDictIOUtils.resolveBigramPositions(this, bigramStrings));
|
||||
insertShortcuts(newTerminalId, shortcuts);
|
||||
}
|
||||
|
|
|
@ -330,8 +330,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
|||
|
||||
public void testInsertWordWithBigrams() {
|
||||
runTestInsertWordWithBigrams(BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE);
|
||||
// TODO: Add a test for version 4.
|
||||
// runTestInsertWordWithBigrams(BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE);
|
||||
runTestInsertWordWithBigrams(BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE);
|
||||
}
|
||||
|
||||
private void runTestRandomWords(final FormatOptions formatOptions) {
|
||||
|
|
Loading…
Reference in a new issue