Merge "Make SparseTable have multiple content tables."
This commit is contained in:
commit
2dcb63375d
4 changed files with 161 additions and 93 deletions
|
@ -17,6 +17,7 @@
|
|||
package com.android.inputmethod.latin.makedict;
|
||||
|
||||
import com.android.inputmethod.annotations.UsedForTesting;
|
||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
|
@ -37,35 +38,39 @@ public class SparseTable {
|
|||
/**
|
||||
* mLookupTable is indexed by terminal ID, containing exactly one entry for every mBlockSize
|
||||
* terminals.
|
||||
* It contains at index i = j / mBlockSize the index in mContentsTable where the values for
|
||||
* terminals with IDs j to j + mBlockSize - 1 are stored as an mBlockSize-sized integer array.
|
||||
* It contains at index i = j / mBlockSize the index in each ArrayList in mContentsTables where
|
||||
* the values for terminals with IDs j to j + mBlockSize - 1 are stored as an mBlockSize-sized
|
||||
* integer array.
|
||||
*/
|
||||
private final ArrayList<Integer> mLookupTable;
|
||||
private final ArrayList<Integer> mContentTable;
|
||||
private final ArrayList<ArrayList<Integer>> mContentTables;
|
||||
|
||||
private final int mBlockSize;
|
||||
private final int mContentTableCount;
|
||||
public static final int NOT_EXIST = -1;
|
||||
public static final int SIZE_OF_INT_IN_BYTES = 4;
|
||||
|
||||
@UsedForTesting
|
||||
public SparseTable(final int initialCapacity, final int blockSize) {
|
||||
public SparseTable(final int initialCapacity, final int blockSize,
|
||||
final int contentTableCount) {
|
||||
mBlockSize = blockSize;
|
||||
final int lookupTableSize = initialCapacity / mBlockSize
|
||||
+ (initialCapacity % mBlockSize > 0 ? 1 : 0);
|
||||
mLookupTable = new ArrayList<Integer>(Collections.nCopies(lookupTableSize, NOT_EXIST));
|
||||
mContentTable = new ArrayList<Integer>();
|
||||
mContentTableCount = contentTableCount;
|
||||
mContentTables = CollectionUtils.newArrayList();
|
||||
for (int i = 0; i < mContentTableCount; ++i) {
|
||||
mContentTables.add(new ArrayList<Integer>());
|
||||
}
|
||||
}
|
||||
|
||||
@UsedForTesting
|
||||
public SparseTable(final int[] lookupTable, final int[] contentTable, final int blockSize) {
|
||||
public SparseTable(final ArrayList<Integer> lookupTable,
|
||||
final ArrayList<ArrayList<Integer>> contentTables, final int blockSize) {
|
||||
mBlockSize = blockSize;
|
||||
mLookupTable = new ArrayList<Integer>(lookupTable.length);
|
||||
for (int i = 0; i < lookupTable.length; ++i) {
|
||||
mLookupTable.add(lookupTable[i]);
|
||||
}
|
||||
mContentTable = new ArrayList<Integer>(contentTable.length);
|
||||
for (int i = 0; i < contentTable.length; ++i) {
|
||||
mContentTable.add(contentTable[i]);
|
||||
}
|
||||
mContentTableCount = contentTables.size();
|
||||
mLookupTable = lookupTable;
|
||||
mContentTables = contentTables;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -75,8 +80,8 @@ public class SparseTable {
|
|||
* Otherwise, IndexOutOfBoundsException will be raised.
|
||||
*/
|
||||
@UsedForTesting
|
||||
private static void convertByteArrayToIntegerArray(final byte[] byteArray,
|
||||
final ArrayList<Integer> integerArray) {
|
||||
private static ArrayList<Integer> convertByteArrayToIntegerArray(final byte[] byteArray) {
|
||||
final ArrayList<Integer> integerArray = new ArrayList<Integer>(byteArray.length / 4);
|
||||
for (int i = 0; i < byteArray.length; i += 4) {
|
||||
int value = 0;
|
||||
for (int j = i; j < i + 4; ++j) {
|
||||
|
@ -85,39 +90,43 @@ public class SparseTable {
|
|||
}
|
||||
integerArray.add(value);
|
||||
}
|
||||
return integerArray;
|
||||
}
|
||||
|
||||
@UsedForTesting
|
||||
public SparseTable(final byte[] lookupTable, final byte[] contentTable, final int blockSize) {
|
||||
mBlockSize = blockSize;
|
||||
mLookupTable = new ArrayList<Integer>(lookupTable.length / 4);
|
||||
mContentTable = new ArrayList<Integer>(contentTable.length / 4);
|
||||
convertByteArrayToIntegerArray(lookupTable, mLookupTable);
|
||||
convertByteArrayToIntegerArray(contentTable, mContentTable);
|
||||
}
|
||||
|
||||
@UsedForTesting
|
||||
public int get(final int index) {
|
||||
if (index < 0 || index / mBlockSize >= mLookupTable.size()
|
||||
|| mLookupTable.get(index / mBlockSize) == NOT_EXIST) {
|
||||
public int get(final int contentTableIndex, final int index) {
|
||||
if (!contains(index)) {
|
||||
return NOT_EXIST;
|
||||
}
|
||||
return mContentTable.get(mLookupTable.get(index / mBlockSize) + (index % mBlockSize));
|
||||
return mContentTables.get(contentTableIndex).get(
|
||||
mLookupTable.get(index / mBlockSize) + (index % mBlockSize));
|
||||
}
|
||||
|
||||
@UsedForTesting
|
||||
public void set(final int index, final int value) {
|
||||
if (mLookupTable.get(index / mBlockSize) == NOT_EXIST) {
|
||||
mLookupTable.set(index / mBlockSize, mContentTable.size());
|
||||
for (int i = 0; i < mBlockSize; ++i) {
|
||||
mContentTable.add(NOT_EXIST);
|
||||
}
|
||||
public ArrayList<Integer> getAll(final int index) {
|
||||
final ArrayList<Integer> ret = CollectionUtils.newArrayList();
|
||||
for (int i = 0; i < mContentTableCount; ++i) {
|
||||
ret.add(get(i, index));
|
||||
}
|
||||
mContentTable.set(mLookupTable.get(index / mBlockSize) + (index % mBlockSize), value);
|
||||
return ret;
|
||||
}
|
||||
|
||||
public void remove(final int index) {
|
||||
set(index, NOT_EXIST);
|
||||
@UsedForTesting
|
||||
public void set(final int contentTableIndex, final int index, final int value) {
|
||||
if (mLookupTable.get(index / mBlockSize) == NOT_EXIST) {
|
||||
mLookupTable.set(index / mBlockSize, mContentTables.get(contentTableIndex).size());
|
||||
for (int i = 0; i < mContentTableCount; ++i) {
|
||||
for (int j = 0; j < mBlockSize; ++j) {
|
||||
mContentTables.get(i).add(NOT_EXIST);
|
||||
}
|
||||
}
|
||||
}
|
||||
mContentTables.get(contentTableIndex).set(
|
||||
mLookupTable.get(index / mBlockSize) + (index % mBlockSize), value);
|
||||
}
|
||||
|
||||
public void remove(final int indexOfContent, final int index) {
|
||||
set(indexOfContent, index, NOT_EXIST);
|
||||
}
|
||||
|
||||
@UsedForTesting
|
||||
|
@ -127,7 +136,8 @@ public class SparseTable {
|
|||
|
||||
@UsedForTesting
|
||||
/* package */ int getContentTableSize() {
|
||||
return mContentTable.size();
|
||||
// This class always has at least one content table.
|
||||
return mContentTables.get(0).size();
|
||||
}
|
||||
|
||||
@UsedForTesting
|
||||
|
@ -136,36 +146,51 @@ public class SparseTable {
|
|||
}
|
||||
|
||||
public boolean contains(final int index) {
|
||||
return get(index) != NOT_EXIST;
|
||||
if (index < 0 || index / mBlockSize >= mLookupTable.size()
|
||||
|| mLookupTable.get(index / mBlockSize) == NOT_EXIST) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@UsedForTesting
|
||||
public void write(final OutputStream lookupOutStream, final OutputStream contentOutStream)
|
||||
public void write(final OutputStream lookupOutStream, final OutputStream[] contentOutStreams)
|
||||
throws IOException {
|
||||
if (contentOutStreams.length != mContentTableCount) {
|
||||
throw new RuntimeException(contentOutStreams.length + " streams are given, but the"
|
||||
+ " table has " + mContentTableCount + " content tables.");
|
||||
}
|
||||
for (final int index : mLookupTable) {
|
||||
BinaryDictEncoderUtils.writeUIntToStream(lookupOutStream, index, 4);
|
||||
BinaryDictEncoderUtils.writeUIntToStream(lookupOutStream, index, SIZE_OF_INT_IN_BYTES);
|
||||
}
|
||||
|
||||
for (final int index : mContentTable) {
|
||||
BinaryDictEncoderUtils.writeUIntToStream(contentOutStream, index, 4);
|
||||
for (int i = 0; i < contentOutStreams.length; ++i) {
|
||||
for (final int data : mContentTables.get(i)) {
|
||||
BinaryDictEncoderUtils.writeUIntToStream(contentOutStreams[i], data,
|
||||
SIZE_OF_INT_IN_BYTES);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@UsedForTesting
|
||||
public void writeToFiles(final File lookupTableFile, final File contentFile)
|
||||
public void writeToFiles(final File lookupTableFile, final File[] contentFiles)
|
||||
throws IOException {
|
||||
FileOutputStream lookupTableOutStream = null;
|
||||
FileOutputStream contentOutStream = null;
|
||||
FileOutputStream lookupTableOutStream = null;
|
||||
final FileOutputStream[] contentTableOutStreams = new FileOutputStream[mContentTableCount];
|
||||
try {
|
||||
lookupTableOutStream = new FileOutputStream(lookupTableFile);
|
||||
contentOutStream = new FileOutputStream(contentFile);
|
||||
write(lookupTableOutStream, contentOutStream);
|
||||
for (int i = 0; i < contentFiles.length; ++i) {
|
||||
contentTableOutStreams[i] = new FileOutputStream(contentFiles[i]);
|
||||
}
|
||||
write(lookupTableOutStream, contentTableOutStreams);
|
||||
} finally {
|
||||
if (lookupTableOutStream != null) {
|
||||
lookupTableOutStream.close();
|
||||
}
|
||||
if (contentOutStream != null) {
|
||||
contentOutStream.close();
|
||||
for (int i = 0; i < contentTableOutStreams.length; ++i) {
|
||||
if (contentTableOutStreams[i] != null) {
|
||||
contentTableOutStreams[i].close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -185,10 +210,15 @@ public class SparseTable {
|
|||
}
|
||||
|
||||
@UsedForTesting
|
||||
public static SparseTable readFromFiles(final File lookupTableFile, final File contentFile,
|
||||
public static SparseTable readFromFiles(final File lookupTableFile, final File[] contentFiles,
|
||||
final int blockSize) throws IOException {
|
||||
final byte[] lookupTable = readFileToByteArray(lookupTableFile);
|
||||
final byte[] content = readFileToByteArray(contentFile);
|
||||
return new SparseTable(lookupTable, content, blockSize);
|
||||
final ArrayList<ArrayList<Integer>> contentTables =
|
||||
new ArrayList<ArrayList<Integer>>(contentFiles.length);
|
||||
for (int i = 0; i < contentFiles.length; ++i) {
|
||||
contentTables.add(convertByteArrayToIntegerArray(readFileToByteArray(contentFiles[i])));
|
||||
}
|
||||
return new SparseTable(convertByteArrayToIntegerArray(readFileToByteArray(lookupTableFile)),
|
||||
contentTables, blockSize);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -131,7 +131,7 @@ public class Ver4DictDecoder extends DictDecoder {
|
|||
mDictDirectory.getName() + FormatSpec.BIGRAM_LOOKUP_TABLE_FILE_EXTENSION);
|
||||
final File contentFile = new File(mDictDirectory,
|
||||
mDictDirectory.getName() + FormatSpec.BIGRAM_ADDRESS_TABLE_FILE_EXTENSION);
|
||||
mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, contentFile,
|
||||
mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, new File[] { contentFile },
|
||||
FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
|
@ -208,7 +208,7 @@ public class Ver4DictDecoder extends DictDecoder {
|
|||
final ArrayList<PendingAttribute> bigrams;
|
||||
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
|
||||
bigrams = new ArrayList<PendingAttribute>();
|
||||
final int posOfBigrams = mBigramAddressTable.get(terminalId);
|
||||
final int posOfBigrams = mBigramAddressTable.get(0 /* contentTableIndex */, terminalId);
|
||||
mBigramBuffer.position(posOfBigrams);
|
||||
while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
|
||||
// If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE,
|
||||
|
|
|
@ -136,7 +136,7 @@ public class Ver4DictEncoder implements DictEncoder {
|
|||
|
||||
writeTerminalData(flatNodes, terminalCount);
|
||||
mBigramAddressTable = new SparseTable(terminalCount,
|
||||
FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE);
|
||||
FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, 1 /* contentTableCount */);
|
||||
writeBigrams(flatNodes, dict);
|
||||
writeBigramAddressSparseTable();
|
||||
|
||||
|
@ -231,8 +231,7 @@ public class Ver4DictEncoder implements DictEncoder {
|
|||
while (shortcutIterator.hasNext()) {
|
||||
final WeightedString target = shortcutIterator.next();
|
||||
final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
|
||||
shortcutIterator.hasNext(),
|
||||
target.mFrequency);
|
||||
shortcutIterator.hasNext(), target.mFrequency);
|
||||
mTrieBuf[mTriePos++] = (byte)shortcutFlags;
|
||||
final int shortcutShift = CharEncoding.writeString(mTrieBuf, mTriePos,
|
||||
target.mWord);
|
||||
|
@ -254,7 +253,8 @@ public class Ver4DictEncoder implements DictEncoder {
|
|||
for (final PtNode ptNode : nodeArray.mData) {
|
||||
if (ptNode.mBigrams != null) {
|
||||
final int startPos = bigramBuffer.size();
|
||||
mBigramAddressTable.set(ptNode.mTerminalId, startPos);
|
||||
mBigramAddressTable.set(0 /* contentTableIndex */, ptNode.mTerminalId,
|
||||
startPos);
|
||||
final Iterator<WeightedString> bigramIterator = ptNode.mBigrams.iterator();
|
||||
while (bigramIterator.hasNext()) {
|
||||
final WeightedString bigram = bigramIterator.next();
|
||||
|
@ -280,7 +280,7 @@ public class Ver4DictEncoder implements DictEncoder {
|
|||
new File(mDictDir, mBaseFilename + FormatSpec.BIGRAM_LOOKUP_TABLE_FILE_EXTENSION);
|
||||
final File contentFile =
|
||||
new File(mDictDir, mBaseFilename + FormatSpec.BIGRAM_ADDRESS_TABLE_FILE_EXTENSION);
|
||||
mBigramAddressTable.writeToFiles(lookupIndexFile, contentFile);
|
||||
mBigramAddressTable.writeToFiles(lookupIndexFile, new File[] { contentFile });
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,7 +21,6 @@ import android.test.suitebuilder.annotation.LargeTest;
|
|||
import android.util.Log;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
@ -36,9 +35,6 @@ import java.util.Random;
|
|||
public class SparseTableTests extends AndroidTestCase {
|
||||
private static final String TAG = SparseTableTests.class.getSimpleName();
|
||||
|
||||
private static final int[] SMALL_INDEX = { SparseTable.NOT_EXIST, 0 };
|
||||
private static final int[] BIG_INDEX = { SparseTable.NOT_EXIST, 1, 2, 3, 4, 5, 6, 7};
|
||||
|
||||
private final Random mRandom;
|
||||
private final ArrayList<Integer> mRandomIndex;
|
||||
|
||||
|
@ -59,26 +55,15 @@ public class SparseTableTests extends AndroidTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testInitializeWithArray() {
|
||||
final SparseTable table = new SparseTable(SMALL_INDEX, BIG_INDEX, BLOCK_SIZE);
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
assertEquals(SparseTable.NOT_EXIST, table.get(i));
|
||||
}
|
||||
assertEquals(SparseTable.NOT_EXIST, table.get(8));
|
||||
for (int i = 9; i < 16; ++i) {
|
||||
assertEquals(i - 8, table.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
public void testSet() {
|
||||
final SparseTable table = new SparseTable(16, BLOCK_SIZE);
|
||||
table.set(3, 6);
|
||||
table.set(8, 16);
|
||||
final SparseTable table = new SparseTable(16, BLOCK_SIZE, 1);
|
||||
table.set(0, 3, 6);
|
||||
table.set(0, 8, 16);
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
if (i == 3 || i == 8) {
|
||||
assertEquals(i * 2, table.get(i));
|
||||
assertEquals(i * 2, table.get(0, i));
|
||||
} else {
|
||||
assertEquals(SparseTable.NOT_EXIST, table.get(i));
|
||||
assertEquals(SparseTable.NOT_EXIST, table.get(0, i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -94,11 +79,11 @@ public class SparseTableTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
private void runTestRandomSet() {
|
||||
final SparseTable table = new SparseTable(DEFAULT_SIZE, BLOCK_SIZE);
|
||||
final SparseTable table = new SparseTable(DEFAULT_SIZE, BLOCK_SIZE, 1);
|
||||
int elementCount = 0;
|
||||
for (int i = 0; i < DEFAULT_SIZE; ++i) {
|
||||
if (mRandomIndex.get(i) != SparseTable.NOT_EXIST) {
|
||||
table.set(i, mRandomIndex.get(i));
|
||||
table.set(0, i, mRandomIndex.get(i));
|
||||
elementCount++;
|
||||
}
|
||||
}
|
||||
|
@ -107,7 +92,7 @@ public class SparseTableTests extends AndroidTestCase {
|
|||
+ table.getContentTableSize());
|
||||
Log.d(TAG, "the table has " + elementCount + " elements");
|
||||
for (int i = 0; i < DEFAULT_SIZE; ++i) {
|
||||
assertEquals(table.get(i), (int)mRandomIndex.get(i));
|
||||
assertEquals(table.get(0, i), (int)mRandomIndex.get(i));
|
||||
}
|
||||
|
||||
// flush and reload
|
||||
|
@ -120,16 +105,13 @@ public class SparseTableTests extends AndroidTestCase {
|
|||
final File contentFile = File.createTempFile("testRandomSet", ".big");
|
||||
lookupOutStream = new FileOutputStream(lookupIndexFile);
|
||||
contentOutStream = new FileOutputStream(contentFile);
|
||||
table.write(lookupOutStream, contentOutStream);
|
||||
lookupInStream = new FileInputStream(lookupIndexFile);
|
||||
contentInStream = new FileInputStream(contentFile);
|
||||
final byte[] lookupArray = new byte[(int) lookupIndexFile.length()];
|
||||
final byte[] contentArray = new byte[(int) contentFile.length()];
|
||||
lookupInStream.read(lookupArray);
|
||||
contentInStream.read(contentArray);
|
||||
final SparseTable newTable = new SparseTable(lookupArray, contentArray, BLOCK_SIZE);
|
||||
table.write(lookupOutStream, new OutputStream[] { contentOutStream });
|
||||
lookupOutStream.flush();
|
||||
contentOutStream.flush();
|
||||
final SparseTable newTable = SparseTable.readFromFiles(lookupIndexFile,
|
||||
new File[] { contentFile }, BLOCK_SIZE);
|
||||
for (int i = 0; i < DEFAULT_SIZE; ++i) {
|
||||
assertEquals(table.get(i), newTable.get(i));
|
||||
assertEquals(table.get(0, i), newTable.get(0, i));
|
||||
}
|
||||
} catch (IOException e) {
|
||||
Log.d(TAG, "IOException while flushing and realoding", e);
|
||||
|
@ -157,4 +139,60 @@ public class SparseTableTests extends AndroidTestCase {
|
|||
runTestRandomSet();
|
||||
}
|
||||
}
|
||||
|
||||
public void testMultipleContents() {
|
||||
final int numOfContents = 5;
|
||||
generateRandomIndex(DEFAULT_SIZE, 20);
|
||||
final SparseTable table = new SparseTable(DEFAULT_SIZE, BLOCK_SIZE, numOfContents);
|
||||
for (int i = 0; i < mRandomIndex.size(); ++i) {
|
||||
if (mRandomIndex.get(i) != SparseTable.NOT_EXIST) {
|
||||
for (int j = 0; j < numOfContents; ++j) {
|
||||
table.set(j, i, mRandomIndex.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OutputStream lookupOutStream = null;
|
||||
OutputStream[] contentsOutStream = new OutputStream[numOfContents];
|
||||
try {
|
||||
final File lookupIndexFile = File.createTempFile("testMultipleContents", "small");
|
||||
lookupOutStream = new FileOutputStream(lookupIndexFile);
|
||||
final File[] contentFiles = new File[numOfContents];
|
||||
for (int i = 0; i < numOfContents; ++i) {
|
||||
contentFiles[i] = File.createTempFile("testMultipleContents", "big" + i);
|
||||
contentsOutStream[i] = new FileOutputStream(contentFiles[i]);
|
||||
}
|
||||
table.write(lookupOutStream, contentsOutStream);
|
||||
lookupOutStream.flush();
|
||||
for (int i = 0; i < numOfContents; ++i) {
|
||||
contentsOutStream[i].flush();
|
||||
}
|
||||
final SparseTable newTable = SparseTable.readFromFiles(lookupIndexFile, contentFiles,
|
||||
BLOCK_SIZE);
|
||||
for (int i = 0; i < numOfContents; ++i) {
|
||||
for (int j = 0; j < DEFAULT_SIZE; ++j) {
|
||||
assertEquals(table.get(i, j), newTable.get(i, j));
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
Log.d(TAG, "IOException while flushing and reloading", e);
|
||||
} finally {
|
||||
if (lookupOutStream != null) {
|
||||
try {
|
||||
lookupOutStream.close();
|
||||
} catch (IOException e) {
|
||||
Log.d(TAG, "IOException while closing the stream", e);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < numOfContents; ++i) {
|
||||
if (contentsOutStream[i] != null) {
|
||||
try {
|
||||
contentsOutStream[i].close();
|
||||
} catch (IOException e) {
|
||||
Log.d(TAG, "IOException while closing the stream.", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue