Add SparseContentTableReader.

Change-Id: I976afc5d320bc65835d659bae1d10d2cdc68262b
main
Yuichiro Hanada 2013-10-14 10:36:33 +09:00
parent 7b5f2b71f5
commit 098639d99f
2 changed files with 198 additions and 39 deletions

View File

@ -0,0 +1,120 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
/**
* An auxiliary class for reading SparseTable and data written by SparseTableContentWriter.
*/
public class SparseTableContentReader {
/**
* An interface of a function which is passed to SparseTableContentReader.read.
*/
public interface SparseTableContentReaderInterface {
/**
* Reads data.
*
* @param buffer the DictBuffer. The position of the buffer is set to the head of data.
*/
public void read(final DictBuffer buffer);
}
private final int mContentCount;
private final int mBlockSize;
protected final File mBaseDir;
private final File mLookupTableFile;
private final File[] mAddressTableFiles;
private final File[] mContentFiles;
private DictBuffer mLookupTableBuffer;
private final DictBuffer[] mAddressTableBuffers;
private final DictBuffer[] mContentBuffers;
private final DictionaryBufferFactory mFactory;
/**
* Sole constructor of SparseTableContentReader.
*
* @param name the name of SparseTable.
* @param blockSize the block size of the content table.
* @param baseDir the directory which contains the files of the content table.
* @param contentFilenames the file names of content files.
* @param contentIds the ids of contents. These ids are used for a suffix of a name of
* address files and content files.
* @param factory the DictionaryBufferFactory which is used for opening the files.
*/
public SparseTableContentReader(final String name, final int blockSize, final File baseDir,
final String[] contentFilenames, final String[] contentIds,
final DictionaryBufferFactory factory) {
if (contentFilenames.length != contentIds.length) {
throw new RuntimeException("The length of contentFilenames and the length of"
+ " contentIds are different " + contentFilenames.length + ", "
+ contentIds.length);
}
mBlockSize = blockSize;
mBaseDir = baseDir;
mFactory = factory;
mContentCount = contentFilenames.length;
mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
mAddressTableFiles = new File[mContentCount];
mContentFiles = new File[mContentCount];
for (int i = 0; i < mContentCount; ++i) {
mAddressTableFiles[i] = new File(mBaseDir,
name + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + contentIds[i]);
mContentFiles[i] = new File(mBaseDir, contentFilenames[i] + contentIds[i]);
}
mAddressTableBuffers = new DictBuffer[mContentCount];
mContentBuffers = new DictBuffer[mContentCount];
}
public void openBuffers() throws FileNotFoundException, IOException {
mLookupTableBuffer = mFactory.getDictionaryBuffer(mLookupTableFile);
for (int i = 0; i < mContentCount; ++i) {
mAddressTableBuffers[i] = mFactory.getDictionaryBuffer(mAddressTableFiles[i]);
mContentBuffers[i] = mFactory.getDictionaryBuffer(mContentFiles[i]);
}
}
protected void read(final int contentIndex, final int index,
final SparseTableContentReaderInterface reader) {
if (index < 0 || (index / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES
>= mLookupTableBuffer.limit()) {
return;
}
mLookupTableBuffer.position((index / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES);
final int posInAddressTable = mLookupTableBuffer.readInt();
if (posInAddressTable == SparseTable.NOT_EXIST) {
return;
}
mAddressTableBuffers[contentIndex].position(
(posInAddressTable + index % mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES);
final int address = mAddressTableBuffers[contentIndex].readInt();
if (address == SparseTable.NOT_EXIST) {
return;
}
mContentBuffers[contentIndex].position(address);
reader.read(mContentBuffers[contentIndex]);
}
}

View File

@ -51,9 +51,8 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
protected DictBuffer mDictBuffer; protected DictBuffer mDictBuffer;
private DictBuffer mFrequencyBuffer; private DictBuffer mFrequencyBuffer;
private DictBuffer mTerminalAddressTableBuffer; private DictBuffer mTerminalAddressTableBuffer;
private DictBuffer mBigramBuffer;
private DictBuffer mShortcutBuffer; private DictBuffer mShortcutBuffer;
private SparseTable mBigramAddressTable; private BigramContentReader mBigramReader;
private SparseTable mShortcutAddressTable; private SparseTable mShortcutAddressTable;
@UsedForTesting @UsedForTesting
@ -108,8 +107,9 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY)); mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY));
mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer( mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer(
getFile(FILETYPE_TERMINAL_ADDRESS_TABLE)); getFile(FILETYPE_TERMINAL_ADDRESS_TABLE));
mBigramBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_BIGRAM_FREQ)); mBigramReader = new BigramContentReader(mDictDirectory.getName(),
loadBigramAddressSparseTable(); mDictDirectory, mBufferFactory, false);
mBigramReader.openBuffers();
mShortcutBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_SHORTCUT)); mShortcutBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_SHORTCUT));
loadShortcutAddressSparseTable(); loadShortcutAddressSparseTable();
} }
@ -136,16 +136,6 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
return header; return header;
} }
private void loadBigramAddressSparseTable() throws IOException {
final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName()
+ FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
final File freqsFile = new File(mDictDirectory, mDictDirectory.getName()
+ FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX
+ FormatSpec.BIGRAM_FREQ_CONTENT_ID);
mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, new File[] { freqsFile },
FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE);
}
// TODO: Let's have something like SparseTableContentsReader in this class. // TODO: Let's have something like SparseTableContentsReader in this class.
private void loadShortcutAddressSparseTable() throws IOException { private void loadShortcutAddressSparseTable() throws IOException {
final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName() final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName()
@ -161,6 +151,77 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE); FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE);
} }
/**
* An auxiliary class for reading bigrams.
*/
protected static class BigramContentReader extends SparseTableContentReader {
private final boolean mHasTimestamp;
public BigramContentReader(final String name, final File baseDir,
final DictionaryBufferFactory factory, final boolean hasTimestamp) {
super(name + FormatSpec.BIGRAM_FILE_EXTENSION,
FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
getContentFilenames(name, hasTimestamp), getContentIds(hasTimestamp), factory);
mHasTimestamp = hasTimestamp;
}
// TODO: Consolidate this method and BigramContentWriter.getContentFilenames.
private static String[] getContentFilenames(final String name, final boolean hasTimestamp) {
final String[] contentFilenames;
if (hasTimestamp) {
contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION,
name + FormatSpec.BIGRAM_FILE_EXTENSION };
} else {
contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION };
}
return contentFilenames;
}
// TODO: Consolidate this method and BigramContentWriter.getContentIds.
private static String[] getContentIds(final boolean hasTimestamp) {
final String[] contentIds;
if (hasTimestamp) {
contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID,
FormatSpec.BIGRAM_TIMESTAMP_CONTENT_ID };
} else {
contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID };
}
return contentIds;
}
public ArrayList<PendingAttribute> readTargetsAndFrequencies(final int terminalId,
final DictBuffer terminalAddressTableBuffer) {
final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList();
read(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId,
new SparseTableContentReaderInterface() {
@Override
public void read(final DictBuffer buffer) {
while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
// If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE,
// remaining bigram entries are ignored.
final int bigramFlags = buffer.readUnsignedByte();
final int targetTerminalId = buffer.readUnsignedInt24();
terminalAddressTableBuffer.position(
targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
final int targetAddress = terminalAddressTableBuffer.readUnsignedInt24();
bigrams.add(new PendingAttribute(
bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
targetAddress));
if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) {
break;
}
}
if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size()
+ " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")");
}
}
});
if (bigrams.isEmpty()) return null;
return bigrams;
}
}
protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader { protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) { protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) {
frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1); frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1);
@ -240,32 +301,10 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
} }
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options); addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
final ArrayList<WeightedString> shortcutTargets = readShortcuts(terminalId); final ArrayList<WeightedString> shortcutTargets = readShortcuts(terminalId);
final ArrayList<PendingAttribute> bigrams =
mBigramReader.readTargetsAndFrequencies(terminalId,
mTerminalAddressTableBuffer);
final ArrayList<PendingAttribute> bigrams;
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
bigrams = new ArrayList<PendingAttribute>();
final int posOfBigrams = mBigramAddressTable.get(0 /* contentTableIndex */, terminalId);
mBigramBuffer.position(posOfBigrams);
while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
// If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE,
// remaining bigram entries are ignored.
final int bigramFlags = mBigramBuffer.readUnsignedByte();
final int targetTerminalId = mBigramBuffer.readUnsignedInt24();
mTerminalAddressTableBuffer.position(
targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
final int targetAddress = mTerminalAddressTableBuffer.readUnsignedInt24();
bigrams.add(new PendingAttribute(
bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
targetAddress));
if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
}
if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size()
+ " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")");
}
} else {
bigrams = null;
}
return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, frequency, return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, frequency,
parentAddress, childrenAddress, shortcutTargets, bigrams); parentAddress, childrenAddress, shortcutTargets, bigrams);
} }