Add SparseContentTableReader.
Change-Id: I976afc5d320bc65835d659bae1d10d2cdc68262b
This commit is contained in:
parent
7b5f2b71f5
commit
098639d99f
2 changed files with 198 additions and 39 deletions
|
@ -0,0 +1,120 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.android.inputmethod.latin.makedict;
|
||||||
|
|
||||||
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||||
|
import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An auxiliary class for reading SparseTable and data written by SparseTableContentWriter.
|
||||||
|
*/
|
||||||
|
public class SparseTableContentReader {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An interface of a function which is passed to SparseTableContentReader.read.
|
||||||
|
*/
|
||||||
|
public interface SparseTableContentReaderInterface {
|
||||||
|
/**
|
||||||
|
* Reads data.
|
||||||
|
*
|
||||||
|
* @param buffer the DictBuffer. The position of the buffer is set to the head of data.
|
||||||
|
*/
|
||||||
|
public void read(final DictBuffer buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final int mContentCount;
|
||||||
|
private final int mBlockSize;
|
||||||
|
protected final File mBaseDir;
|
||||||
|
private final File mLookupTableFile;
|
||||||
|
private final File[] mAddressTableFiles;
|
||||||
|
private final File[] mContentFiles;
|
||||||
|
private DictBuffer mLookupTableBuffer;
|
||||||
|
private final DictBuffer[] mAddressTableBuffers;
|
||||||
|
private final DictBuffer[] mContentBuffers;
|
||||||
|
private final DictionaryBufferFactory mFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sole constructor of SparseTableContentReader.
|
||||||
|
*
|
||||||
|
* @param name the name of SparseTable.
|
||||||
|
* @param blockSize the block size of the content table.
|
||||||
|
* @param baseDir the directory which contains the files of the content table.
|
||||||
|
* @param contentFilenames the file names of content files.
|
||||||
|
* @param contentIds the ids of contents. These ids are used for a suffix of a name of
|
||||||
|
* address files and content files.
|
||||||
|
* @param factory the DictionaryBufferFactory which is used for opening the files.
|
||||||
|
*/
|
||||||
|
public SparseTableContentReader(final String name, final int blockSize, final File baseDir,
|
||||||
|
final String[] contentFilenames, final String[] contentIds,
|
||||||
|
final DictionaryBufferFactory factory) {
|
||||||
|
if (contentFilenames.length != contentIds.length) {
|
||||||
|
throw new RuntimeException("The length of contentFilenames and the length of"
|
||||||
|
+ " contentIds are different " + contentFilenames.length + ", "
|
||||||
|
+ contentIds.length);
|
||||||
|
}
|
||||||
|
mBlockSize = blockSize;
|
||||||
|
mBaseDir = baseDir;
|
||||||
|
mFactory = factory;
|
||||||
|
mContentCount = contentFilenames.length;
|
||||||
|
mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
|
||||||
|
mAddressTableFiles = new File[mContentCount];
|
||||||
|
mContentFiles = new File[mContentCount];
|
||||||
|
for (int i = 0; i < mContentCount; ++i) {
|
||||||
|
mAddressTableFiles[i] = new File(mBaseDir,
|
||||||
|
name + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + contentIds[i]);
|
||||||
|
mContentFiles[i] = new File(mBaseDir, contentFilenames[i] + contentIds[i]);
|
||||||
|
}
|
||||||
|
mAddressTableBuffers = new DictBuffer[mContentCount];
|
||||||
|
mContentBuffers = new DictBuffer[mContentCount];
|
||||||
|
}
|
||||||
|
|
||||||
|
public void openBuffers() throws FileNotFoundException, IOException {
|
||||||
|
mLookupTableBuffer = mFactory.getDictionaryBuffer(mLookupTableFile);
|
||||||
|
for (int i = 0; i < mContentCount; ++i) {
|
||||||
|
mAddressTableBuffers[i] = mFactory.getDictionaryBuffer(mAddressTableFiles[i]);
|
||||||
|
mContentBuffers[i] = mFactory.getDictionaryBuffer(mContentFiles[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void read(final int contentIndex, final int index,
|
||||||
|
final SparseTableContentReaderInterface reader) {
|
||||||
|
if (index < 0 || (index / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES
|
||||||
|
>= mLookupTableBuffer.limit()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
mLookupTableBuffer.position((index / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES);
|
||||||
|
final int posInAddressTable = mLookupTableBuffer.readInt();
|
||||||
|
if (posInAddressTable == SparseTable.NOT_EXIST) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
mAddressTableBuffers[contentIndex].position(
|
||||||
|
(posInAddressTable + index % mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES);
|
||||||
|
final int address = mAddressTableBuffers[contentIndex].readInt();
|
||||||
|
if (address == SparseTable.NOT_EXIST) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
mContentBuffers[contentIndex].position(address);
|
||||||
|
reader.read(mContentBuffers[contentIndex]);
|
||||||
|
}
|
||||||
|
}
|
|
@ -51,9 +51,8 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
protected DictBuffer mDictBuffer;
|
protected DictBuffer mDictBuffer;
|
||||||
private DictBuffer mFrequencyBuffer;
|
private DictBuffer mFrequencyBuffer;
|
||||||
private DictBuffer mTerminalAddressTableBuffer;
|
private DictBuffer mTerminalAddressTableBuffer;
|
||||||
private DictBuffer mBigramBuffer;
|
|
||||||
private DictBuffer mShortcutBuffer;
|
private DictBuffer mShortcutBuffer;
|
||||||
private SparseTable mBigramAddressTable;
|
private BigramContentReader mBigramReader;
|
||||||
private SparseTable mShortcutAddressTable;
|
private SparseTable mShortcutAddressTable;
|
||||||
|
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
|
@ -108,8 +107,9 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY));
|
mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY));
|
||||||
mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer(
|
mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer(
|
||||||
getFile(FILETYPE_TERMINAL_ADDRESS_TABLE));
|
getFile(FILETYPE_TERMINAL_ADDRESS_TABLE));
|
||||||
mBigramBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_BIGRAM_FREQ));
|
mBigramReader = new BigramContentReader(mDictDirectory.getName(),
|
||||||
loadBigramAddressSparseTable();
|
mDictDirectory, mBufferFactory, false);
|
||||||
|
mBigramReader.openBuffers();
|
||||||
mShortcutBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_SHORTCUT));
|
mShortcutBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_SHORTCUT));
|
||||||
loadShortcutAddressSparseTable();
|
loadShortcutAddressSparseTable();
|
||||||
}
|
}
|
||||||
|
@ -136,16 +136,6 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
return header;
|
return header;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void loadBigramAddressSparseTable() throws IOException {
|
|
||||||
final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName()
|
|
||||||
+ FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
|
|
||||||
final File freqsFile = new File(mDictDirectory, mDictDirectory.getName()
|
|
||||||
+ FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX
|
|
||||||
+ FormatSpec.BIGRAM_FREQ_CONTENT_ID);
|
|
||||||
mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, new File[] { freqsFile },
|
|
||||||
FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Let's have something like SparseTableContentsReader in this class.
|
// TODO: Let's have something like SparseTableContentsReader in this class.
|
||||||
private void loadShortcutAddressSparseTable() throws IOException {
|
private void loadShortcutAddressSparseTable() throws IOException {
|
||||||
final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName()
|
final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName()
|
||||||
|
@ -161,6 +151,77 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE);
|
FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An auxiliary class for reading bigrams.
|
||||||
|
*/
|
||||||
|
protected static class BigramContentReader extends SparseTableContentReader {
|
||||||
|
private final boolean mHasTimestamp;
|
||||||
|
|
||||||
|
public BigramContentReader(final String name, final File baseDir,
|
||||||
|
final DictionaryBufferFactory factory, final boolean hasTimestamp) {
|
||||||
|
super(name + FormatSpec.BIGRAM_FILE_EXTENSION,
|
||||||
|
FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
|
||||||
|
getContentFilenames(name, hasTimestamp), getContentIds(hasTimestamp), factory);
|
||||||
|
mHasTimestamp = hasTimestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Consolidate this method and BigramContentWriter.getContentFilenames.
|
||||||
|
private static String[] getContentFilenames(final String name, final boolean hasTimestamp) {
|
||||||
|
final String[] contentFilenames;
|
||||||
|
if (hasTimestamp) {
|
||||||
|
contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION,
|
||||||
|
name + FormatSpec.BIGRAM_FILE_EXTENSION };
|
||||||
|
} else {
|
||||||
|
contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION };
|
||||||
|
}
|
||||||
|
return contentFilenames;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Consolidate this method and BigramContentWriter.getContentIds.
|
||||||
|
private static String[] getContentIds(final boolean hasTimestamp) {
|
||||||
|
final String[] contentIds;
|
||||||
|
if (hasTimestamp) {
|
||||||
|
contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID,
|
||||||
|
FormatSpec.BIGRAM_TIMESTAMP_CONTENT_ID };
|
||||||
|
} else {
|
||||||
|
contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID };
|
||||||
|
}
|
||||||
|
return contentIds;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArrayList<PendingAttribute> readTargetsAndFrequencies(final int terminalId,
|
||||||
|
final DictBuffer terminalAddressTableBuffer) {
|
||||||
|
final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList();
|
||||||
|
read(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId,
|
||||||
|
new SparseTableContentReaderInterface() {
|
||||||
|
@Override
|
||||||
|
public void read(final DictBuffer buffer) {
|
||||||
|
while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
|
||||||
|
// If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE,
|
||||||
|
// remaining bigram entries are ignored.
|
||||||
|
final int bigramFlags = buffer.readUnsignedByte();
|
||||||
|
final int targetTerminalId = buffer.readUnsignedInt24();
|
||||||
|
terminalAddressTableBuffer.position(
|
||||||
|
targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
|
||||||
|
final int targetAddress = terminalAddressTableBuffer.readUnsignedInt24();
|
||||||
|
bigrams.add(new PendingAttribute(
|
||||||
|
bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
|
||||||
|
targetAddress));
|
||||||
|
if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
|
||||||
|
throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size()
|
||||||
|
+ " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if (bigrams.isEmpty()) return null;
|
||||||
|
return bigrams;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
|
protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
|
||||||
protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) {
|
protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) {
|
||||||
frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1);
|
frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1);
|
||||||
|
@ -240,32 +301,10 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
}
|
}
|
||||||
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
|
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
|
||||||
final ArrayList<WeightedString> shortcutTargets = readShortcuts(terminalId);
|
final ArrayList<WeightedString> shortcutTargets = readShortcuts(terminalId);
|
||||||
|
final ArrayList<PendingAttribute> bigrams =
|
||||||
|
mBigramReader.readTargetsAndFrequencies(terminalId,
|
||||||
|
mTerminalAddressTableBuffer);
|
||||||
|
|
||||||
final ArrayList<PendingAttribute> bigrams;
|
|
||||||
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
|
|
||||||
bigrams = new ArrayList<PendingAttribute>();
|
|
||||||
final int posOfBigrams = mBigramAddressTable.get(0 /* contentTableIndex */, terminalId);
|
|
||||||
mBigramBuffer.position(posOfBigrams);
|
|
||||||
while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
|
|
||||||
// If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE,
|
|
||||||
// remaining bigram entries are ignored.
|
|
||||||
final int bigramFlags = mBigramBuffer.readUnsignedByte();
|
|
||||||
final int targetTerminalId = mBigramBuffer.readUnsignedInt24();
|
|
||||||
mTerminalAddressTableBuffer.position(
|
|
||||||
targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
|
|
||||||
final int targetAddress = mTerminalAddressTableBuffer.readUnsignedInt24();
|
|
||||||
bigrams.add(new PendingAttribute(
|
|
||||||
bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
|
|
||||||
targetAddress));
|
|
||||||
if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
|
|
||||||
}
|
|
||||||
if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
|
|
||||||
throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size()
|
|
||||||
+ " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
bigrams = null;
|
|
||||||
}
|
|
||||||
return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, frequency,
|
return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, frequency,
|
||||||
parentAddress, childrenAddress, shortcutTargets, bigrams);
|
parentAddress, childrenAddress, shortcutTargets, bigrams);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue