Merge "Use native logic to read Ver4 dict."
commit
6422f77e94
|
@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
|
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -223,4 +224,49 @@ public abstract class AbstractDictDecoder implements DictDecoder {
|
||||||
public boolean hasValidRawBinaryDictionary() {
|
public boolean hasValidRawBinaryDictionary() {
|
||||||
return checkHeader() == SUCCESS;
|
return checkHeader() == SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Placeholder implementations below. These are actually unused.
|
||||||
|
@Override
|
||||||
|
public void openDictBuffer() throws FileNotFoundException, IOException,
|
||||||
|
UnsupportedFormatException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isDictBufferOpen() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions options) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setPosition(int newPos) {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getPosition() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int readPtNodeCount() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean readAndFollowForwardLink() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasNextPtNodeArray() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@UsedForTesting
|
||||||
|
public void skipPtNode(final FormatOptions formatOptions) {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -436,25 +436,25 @@ public final class FusionDictionary implements Iterable<WordProperty> {
|
||||||
/**
|
/**
|
||||||
* Helper method to add a new bigram to the dictionary.
|
* Helper method to add a new bigram to the dictionary.
|
||||||
*
|
*
|
||||||
* @param word1 the previous word of the context
|
* @param word0 the previous word of the context
|
||||||
* @param word2 the next word of the context
|
* @param word1 the next word of the context
|
||||||
* @param frequency the bigram frequency
|
* @param frequency the bigram frequency
|
||||||
*/
|
*/
|
||||||
public void setBigram(final String word1, final String word2, final int frequency) {
|
public void setBigram(final String word0, final String word1, final int frequency) {
|
||||||
PtNode ptNode = findWordInTree(mRootNodeArray, word1);
|
PtNode ptNode0 = findWordInTree(mRootNodeArray, word0);
|
||||||
if (ptNode != null) {
|
if (ptNode0 != null) {
|
||||||
final PtNode ptNode2 = findWordInTree(mRootNodeArray, word2);
|
final PtNode ptNode1 = findWordInTree(mRootNodeArray, word1);
|
||||||
if (ptNode2 == null) {
|
if (ptNode1 == null) {
|
||||||
add(getCodePoints(word2), 0, null, false /* isNotAWord */,
|
add(getCodePoints(word1), 0, null, false /* isNotAWord */,
|
||||||
false /* isBlacklistEntry */);
|
false /* isBlacklistEntry */);
|
||||||
// The PtNode for the first word may have moved by the above insertion,
|
// The PtNode for the first word may have moved by the above insertion,
|
||||||
// if word1 and word2 share a common stem that happens not to have been
|
// if word1 and word2 share a common stem that happens not to have been
|
||||||
// a cutting point until now. In this case, we need to refresh ptNode.
|
// a cutting point until now. In this case, we need to refresh ptNode.
|
||||||
ptNode = findWordInTree(mRootNodeArray, word1);
|
ptNode0 = findWordInTree(mRootNodeArray, word0);
|
||||||
}
|
}
|
||||||
ptNode.addBigram(word2, frequency);
|
ptNode0.addBigram(word1, frequency);
|
||||||
} else {
|
} else {
|
||||||
throw new RuntimeException("First word of bigram not found");
|
throw new RuntimeException("First word of bigram not found " + word0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,223 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.android.inputmethod.latin.makedict;
|
|
||||||
|
|
||||||
import com.android.inputmethod.annotations.UsedForTesting;
|
|
||||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SparseTable is an extensible map from integer to integer.
|
|
||||||
* This holds one value for every mBlockSize keys, so it uses 1/mBlockSize'th of the full index
|
|
||||||
* memory.
|
|
||||||
*/
|
|
||||||
@UsedForTesting
|
|
||||||
public class SparseTable {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* mLookupTable is indexed by terminal ID, containing exactly one entry for every mBlockSize
|
|
||||||
* terminals.
|
|
||||||
* It contains at index i = j / mBlockSize the index in each ArrayList in mContentsTables where
|
|
||||||
* the values for terminals with IDs j to j + mBlockSize - 1 are stored as an mBlockSize-sized
|
|
||||||
* integer array.
|
|
||||||
*/
|
|
||||||
private final ArrayList<Integer> mLookupTable;
|
|
||||||
private final ArrayList<ArrayList<Integer>> mContentTables;
|
|
||||||
|
|
||||||
private final int mBlockSize;
|
|
||||||
private final int mContentTableCount;
|
|
||||||
public static final int NOT_EXIST = -1;
|
|
||||||
public static final int SIZE_OF_INT_IN_BYTES = 4;
|
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
public SparseTable(final int initialCapacity, final int blockSize,
|
|
||||||
final int contentTableCount) {
|
|
||||||
mBlockSize = blockSize;
|
|
||||||
final int lookupTableSize = initialCapacity / mBlockSize
|
|
||||||
+ (initialCapacity % mBlockSize > 0 ? 1 : 0);
|
|
||||||
mLookupTable = new ArrayList<Integer>(Collections.nCopies(lookupTableSize, NOT_EXIST));
|
|
||||||
mContentTableCount = contentTableCount;
|
|
||||||
mContentTables = CollectionUtils.newArrayList();
|
|
||||||
for (int i = 0; i < mContentTableCount; ++i) {
|
|
||||||
mContentTables.add(new ArrayList<Integer>());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
public SparseTable(final ArrayList<Integer> lookupTable,
|
|
||||||
final ArrayList<ArrayList<Integer>> contentTables, final int blockSize) {
|
|
||||||
mBlockSize = blockSize;
|
|
||||||
mContentTableCount = contentTables.size();
|
|
||||||
mLookupTable = lookupTable;
|
|
||||||
mContentTables = contentTables;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Converts an byte array to an int array considering each set of 4 bytes is an int stored in
|
|
||||||
* big-endian.
|
|
||||||
* The length of byteArray must be a multiple of four.
|
|
||||||
* Otherwise, IndexOutOfBoundsException will be raised.
|
|
||||||
*/
|
|
||||||
@UsedForTesting
|
|
||||||
private static ArrayList<Integer> convertByteArrayToIntegerArray(final byte[] byteArray) {
|
|
||||||
final ArrayList<Integer> integerArray = new ArrayList<Integer>(byteArray.length / 4);
|
|
||||||
for (int i = 0; i < byteArray.length; i += 4) {
|
|
||||||
int value = 0;
|
|
||||||
for (int j = i; j < i + 4; ++j) {
|
|
||||||
value <<= 8;
|
|
||||||
value |= byteArray[j] & 0xFF;
|
|
||||||
}
|
|
||||||
integerArray.add(value);
|
|
||||||
}
|
|
||||||
return integerArray;
|
|
||||||
}
|
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
public int get(final int contentTableIndex, final int index) {
|
|
||||||
if (!contains(index)) {
|
|
||||||
return NOT_EXIST;
|
|
||||||
}
|
|
||||||
return mContentTables.get(contentTableIndex).get(
|
|
||||||
mLookupTable.get(index / mBlockSize) + (index % mBlockSize));
|
|
||||||
}
|
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
public ArrayList<Integer> getAll(final int index) {
|
|
||||||
final ArrayList<Integer> ret = CollectionUtils.newArrayList();
|
|
||||||
for (int i = 0; i < mContentTableCount; ++i) {
|
|
||||||
ret.add(get(i, index));
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
public void set(final int contentTableIndex, final int index, final int value) {
|
|
||||||
if (mLookupTable.get(index / mBlockSize) == NOT_EXIST) {
|
|
||||||
mLookupTable.set(index / mBlockSize, mContentTables.get(contentTableIndex).size());
|
|
||||||
for (int i = 0; i < mContentTableCount; ++i) {
|
|
||||||
for (int j = 0; j < mBlockSize; ++j) {
|
|
||||||
mContentTables.get(i).add(NOT_EXIST);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mContentTables.get(contentTableIndex).set(
|
|
||||||
mLookupTable.get(index / mBlockSize) + (index % mBlockSize), value);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void remove(final int indexOfContent, final int index) {
|
|
||||||
set(indexOfContent, index, NOT_EXIST);
|
|
||||||
}
|
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
public int size() {
|
|
||||||
return mLookupTable.size() * mBlockSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
/* package */ int getContentTableSize() {
|
|
||||||
// This class always has at least one content table.
|
|
||||||
return mContentTables.get(0).size();
|
|
||||||
}
|
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
/* package */ int getLookupTableSize() {
|
|
||||||
return mLookupTable.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean contains(final int index) {
|
|
||||||
if (index < 0 || index / mBlockSize >= mLookupTable.size()
|
|
||||||
|| mLookupTable.get(index / mBlockSize) == NOT_EXIST) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
public void write(final OutputStream lookupOutStream, final OutputStream[] contentOutStreams)
|
|
||||||
throws IOException {
|
|
||||||
if (contentOutStreams.length != mContentTableCount) {
|
|
||||||
throw new RuntimeException(contentOutStreams.length + " streams are given, but the"
|
|
||||||
+ " table has " + mContentTableCount + " content tables.");
|
|
||||||
}
|
|
||||||
for (final int index : mLookupTable) {
|
|
||||||
BinaryDictEncoderUtils.writeUIntToStream(lookupOutStream, index, SIZE_OF_INT_IN_BYTES);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < contentOutStreams.length; ++i) {
|
|
||||||
for (final int data : mContentTables.get(i)) {
|
|
||||||
BinaryDictEncoderUtils.writeUIntToStream(contentOutStreams[i], data,
|
|
||||||
SIZE_OF_INT_IN_BYTES);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
public void writeToFiles(final File lookupTableFile, final File[] contentFiles)
|
|
||||||
throws IOException {
|
|
||||||
FileOutputStream lookupTableOutStream = null;
|
|
||||||
final FileOutputStream[] contentTableOutStreams = new FileOutputStream[mContentTableCount];
|
|
||||||
try {
|
|
||||||
lookupTableOutStream = new FileOutputStream(lookupTableFile);
|
|
||||||
for (int i = 0; i < contentFiles.length; ++i) {
|
|
||||||
contentTableOutStreams[i] = new FileOutputStream(contentFiles[i]);
|
|
||||||
}
|
|
||||||
write(lookupTableOutStream, contentTableOutStreams);
|
|
||||||
} finally {
|
|
||||||
if (lookupTableOutStream != null) {
|
|
||||||
lookupTableOutStream.close();
|
|
||||||
}
|
|
||||||
for (int i = 0; i < contentTableOutStreams.length; ++i) {
|
|
||||||
if (contentTableOutStreams[i] != null) {
|
|
||||||
contentTableOutStreams[i].close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static byte[] readFileToByteArray(final File file) throws IOException {
|
|
||||||
final byte[] contents = new byte[(int) file.length()];
|
|
||||||
FileInputStream inStream = null;
|
|
||||||
try {
|
|
||||||
inStream = new FileInputStream(file);
|
|
||||||
inStream.read(contents);
|
|
||||||
} finally {
|
|
||||||
if (inStream != null) {
|
|
||||||
inStream.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return contents;
|
|
||||||
}
|
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
public static SparseTable readFromFiles(final File lookupTableFile, final File[] contentFiles,
|
|
||||||
final int blockSize) throws IOException {
|
|
||||||
final ArrayList<ArrayList<Integer>> contentTables =
|
|
||||||
new ArrayList<ArrayList<Integer>>(contentFiles.length);
|
|
||||||
for (int i = 0; i < contentFiles.length; ++i) {
|
|
||||||
contentTables.add(convertByteArrayToIntegerArray(readFileToByteArray(contentFiles[i])));
|
|
||||||
}
|
|
||||||
return new SparseTable(convertByteArrayToIntegerArray(readFileToByteArray(lookupTableFile)),
|
|
||||||
contentTables, blockSize);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,126 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.android.inputmethod.latin.makedict;
|
|
||||||
|
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
|
||||||
import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An auxiliary class for reading SparseTable and data written by SparseTableContentWriter.
|
|
||||||
*/
|
|
||||||
public class SparseTableContentReader {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An interface of a function which is passed to SparseTableContentReader.read.
|
|
||||||
*/
|
|
||||||
public interface SparseTableContentReaderInterface {
|
|
||||||
/**
|
|
||||||
* Reads data.
|
|
||||||
*
|
|
||||||
* @param buffer the DictBuffer. The position of the buffer is set to the head of data.
|
|
||||||
*/
|
|
||||||
public void read(final DictBuffer buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected final int mContentCount;
|
|
||||||
protected final int mBlockSize;
|
|
||||||
protected final File mBaseDir;
|
|
||||||
protected final File mLookupTableFile;
|
|
||||||
protected final File[] mAddressTableFiles;
|
|
||||||
protected final File[] mContentFiles;
|
|
||||||
protected DictBuffer mLookupTableBuffer;
|
|
||||||
protected final DictBuffer[] mAddressTableBuffers;
|
|
||||||
private final DictBuffer[] mContentBuffers;
|
|
||||||
protected final DictionaryBufferFactory mFactory;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sole constructor of SparseTableContentReader.
|
|
||||||
*
|
|
||||||
* @param name the name of SparseTable.
|
|
||||||
* @param blockSize the block size of the content table.
|
|
||||||
* @param baseDir the directory which contains the files of the content table.
|
|
||||||
* @param contentFilenames the file names of content files.
|
|
||||||
* @param contentSuffixes the ids of contents. These ids are used for a suffix of a name of
|
|
||||||
* address files and content files.
|
|
||||||
* @param factory the DictionaryBufferFactory which is used for opening the files.
|
|
||||||
*/
|
|
||||||
public SparseTableContentReader(final String name, final int blockSize, final File baseDir,
|
|
||||||
final String[] contentFilenames, final String[] contentSuffixes,
|
|
||||||
final DictionaryBufferFactory factory) {
|
|
||||||
if (contentFilenames.length != contentSuffixes.length) {
|
|
||||||
throw new RuntimeException("The length of contentFilenames and the length of"
|
|
||||||
+ " contentSuffixes are different " + contentFilenames.length + ", "
|
|
||||||
+ contentSuffixes.length);
|
|
||||||
}
|
|
||||||
mBlockSize = blockSize;
|
|
||||||
mBaseDir = baseDir;
|
|
||||||
mFactory = factory;
|
|
||||||
mContentCount = contentFilenames.length;
|
|
||||||
mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
|
|
||||||
mAddressTableFiles = new File[mContentCount];
|
|
||||||
mContentFiles = new File[mContentCount];
|
|
||||||
for (int i = 0; i < mContentCount; ++i) {
|
|
||||||
mAddressTableFiles[i] = new File(mBaseDir,
|
|
||||||
name + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + contentSuffixes[i]);
|
|
||||||
mContentFiles[i] = new File(mBaseDir, contentFilenames[i] + contentSuffixes[i]);
|
|
||||||
}
|
|
||||||
mAddressTableBuffers = new DictBuffer[mContentCount];
|
|
||||||
mContentBuffers = new DictBuffer[mContentCount];
|
|
||||||
}
|
|
||||||
|
|
||||||
public void openBuffers() throws FileNotFoundException, IOException {
|
|
||||||
mLookupTableBuffer = mFactory.getDictionaryBuffer(mLookupTableFile);
|
|
||||||
for (int i = 0; i < mContentCount; ++i) {
|
|
||||||
mAddressTableBuffers[i] = mFactory.getDictionaryBuffer(mAddressTableFiles[i]);
|
|
||||||
mContentBuffers[i] = mFactory.getDictionaryBuffer(mContentFiles[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calls the read() callback of the reader with the appropriate buffer appropriately positioned.
|
|
||||||
* @param contentNumber the index in the original contentFilenames[] array.
|
|
||||||
* @param terminalId the terminal ID to read.
|
|
||||||
* @param reader the reader on which to call the callback.
|
|
||||||
*/
|
|
||||||
protected void read(final int contentNumber, final int terminalId,
|
|
||||||
final SparseTableContentReaderInterface reader) {
|
|
||||||
if (terminalId < 0 || (terminalId / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES
|
|
||||||
>= mLookupTableBuffer.limit()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
mLookupTableBuffer.position((terminalId / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES);
|
|
||||||
final int indexInAddressTable = mLookupTableBuffer.readInt();
|
|
||||||
if (indexInAddressTable == SparseTable.NOT_EXIST) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
mAddressTableBuffers[contentNumber].position(SparseTable.SIZE_OF_INT_IN_BYTES
|
|
||||||
* ((indexInAddressTable * mBlockSize) + (terminalId % mBlockSize)));
|
|
||||||
final int address = mAddressTableBuffers[contentNumber].readInt();
|
|
||||||
if (address == SparseTable.NOT_EXIST) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
mContentBuffers[contentNumber].position(address);
|
|
||||||
reader.read(mContentBuffers[contentNumber]);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,93 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.android.inputmethod.latin.makedict;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An auxiliary class for writing data associated with SparseTable to files.
|
|
||||||
*/
|
|
||||||
public class SparseTableContentWriter {
|
|
||||||
public interface SparseTableContentWriterInterface {
|
|
||||||
public void write(final OutputStream outStream) throws IOException;
|
|
||||||
}
|
|
||||||
|
|
||||||
private final int mContentCount;
|
|
||||||
private final SparseTable mSparseTable;
|
|
||||||
private final File mLookupTableFile;
|
|
||||||
protected final File mBaseDir;
|
|
||||||
private final File[] mAddressTableFiles;
|
|
||||||
private final File[] mContentFiles;
|
|
||||||
protected final OutputStream[] mContentOutStreams;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sole constructor of SparseTableContentWriter.
|
|
||||||
*
|
|
||||||
* @param name the name of SparseTable.
|
|
||||||
* @param initialCapacity the initial capacity of SparseTable.
|
|
||||||
* @param blockSize the block size of the content table.
|
|
||||||
* @param baseDir the directory which contains the files of the content table.
|
|
||||||
* @param contentFilenames the file names of content files.
|
|
||||||
* @param contentIds the ids of contents. These ids are used for a suffix of a name of address
|
|
||||||
* files and content files.
|
|
||||||
*/
|
|
||||||
public SparseTableContentWriter(final String name, final int initialCapacity,
|
|
||||||
final int blockSize, final File baseDir, final String[] contentFilenames,
|
|
||||||
final String[] contentIds) {
|
|
||||||
if (contentFilenames.length != contentIds.length) {
|
|
||||||
throw new RuntimeException("The length of contentFilenames and the length of"
|
|
||||||
+ " contentIds are different " + contentFilenames.length + ", "
|
|
||||||
+ contentIds.length);
|
|
||||||
}
|
|
||||||
mContentCount = contentFilenames.length;
|
|
||||||
mSparseTable = new SparseTable(initialCapacity, blockSize, mContentCount);
|
|
||||||
mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
|
|
||||||
mAddressTableFiles = new File[mContentCount];
|
|
||||||
mContentFiles = new File[mContentCount];
|
|
||||||
mBaseDir = baseDir;
|
|
||||||
for (int i = 0; i < mContentCount; ++i) {
|
|
||||||
mAddressTableFiles[i] = new File(mBaseDir,
|
|
||||||
name + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + contentIds[i]);
|
|
||||||
mContentFiles[i] = new File(mBaseDir, contentFilenames[i] + contentIds[i]);
|
|
||||||
}
|
|
||||||
mContentOutStreams = new OutputStream[mContentCount];
|
|
||||||
}
|
|
||||||
|
|
||||||
public void openStreams() throws FileNotFoundException {
|
|
||||||
for (int i = 0; i < mContentCount; ++i) {
|
|
||||||
mContentOutStreams[i] = new FileOutputStream(mContentFiles[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void write(final int contentIndex, final int index,
|
|
||||||
final SparseTableContentWriterInterface writer) throws IOException {
|
|
||||||
mSparseTable.set(contentIndex, index, (int) mContentFiles[contentIndex].length());
|
|
||||||
writer.write(mContentOutStreams[contentIndex]);
|
|
||||||
mContentOutStreams[contentIndex].flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void closeStreams() throws IOException {
|
|
||||||
mSparseTable.writeToFiles(mLookupTableFile, mAddressTableFiles);
|
|
||||||
for (int i = 0; i < mContentCount; ++i) {
|
|
||||||
mContentOutStreams[i].close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -17,20 +17,15 @@
|
||||||
package com.android.inputmethod.latin.makedict;
|
package com.android.inputmethod.latin.makedict;
|
||||||
|
|
||||||
import com.android.inputmethod.annotations.UsedForTesting;
|
import com.android.inputmethod.annotations.UsedForTesting;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
|
import com.android.inputmethod.latin.BinaryDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||||
|
import com.android.inputmethod.latin.utils.FileUtils;
|
||||||
import android.util.Log;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An implementation of binary dictionary decoder for version 4 binary dictionary.
|
* An implementation of binary dictionary decoder for version 4 binary dictionary.
|
||||||
|
@ -39,421 +34,74 @@ import java.util.Arrays;
|
||||||
public class Ver4DictDecoder extends AbstractDictDecoder {
|
public class Ver4DictDecoder extends AbstractDictDecoder {
|
||||||
private static final String TAG = Ver4DictDecoder.class.getSimpleName();
|
private static final String TAG = Ver4DictDecoder.class.getSimpleName();
|
||||||
|
|
||||||
protected static final int FILETYPE_TRIE = 1;
|
final File mDictDirectory;
|
||||||
protected static final int FILETYPE_FREQUENCY = 2;
|
final BinaryDictionary mBinaryDictionary;
|
||||||
protected static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3;
|
|
||||||
protected static final int FILETYPE_BIGRAM_FREQ = 4;
|
|
||||||
protected static final int FILETYPE_SHORTCUT = 5;
|
|
||||||
protected static final int FILETYPE_HEADER = 6;
|
|
||||||
|
|
||||||
protected final File mDictDirectory;
|
|
||||||
protected final DictionaryBufferFactory mBufferFactory;
|
|
||||||
protected DictBuffer mDictBuffer;
|
|
||||||
protected DictBuffer mHeaderBuffer;
|
|
||||||
protected DictBuffer mFrequencyBuffer;
|
|
||||||
protected DictBuffer mTerminalAddressTableBuffer;
|
|
||||||
private BigramContentReader mBigramReader;
|
|
||||||
private ShortcutContentReader mShortcutReader;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Raw PtNode info straight out of a trie file in version 4 dictionary.
|
|
||||||
*/
|
|
||||||
protected static final class Ver4PtNodeInfo {
|
|
||||||
public final int mFlags;
|
|
||||||
public final int[] mCharacters;
|
|
||||||
public final int mTerminalId;
|
|
||||||
public final int mChildrenPos;
|
|
||||||
public final int mParentPos;
|
|
||||||
public final int mNodeSize;
|
|
||||||
public int mStartIndexOfCharacters;
|
|
||||||
public int mEndIndexOfCharacters; // exclusive
|
|
||||||
|
|
||||||
public Ver4PtNodeInfo(final int flags, final int[] characters, final int terminalId,
|
|
||||||
final int childrenPos, final int parentPos, final int nodeSize) {
|
|
||||||
mFlags = flags;
|
|
||||||
mCharacters = characters;
|
|
||||||
mTerminalId = terminalId;
|
|
||||||
mChildrenPos = childrenPos;
|
|
||||||
mParentPos = parentPos;
|
|
||||||
mNodeSize = nodeSize;
|
|
||||||
mStartIndexOfCharacters = 0;
|
|
||||||
mEndIndexOfCharacters = characters.length;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
/* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) {
|
/* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) {
|
||||||
mDictDirectory = dictDirectory;
|
this(dictDirectory, null /* factory */);
|
||||||
mDictBuffer = mHeaderBuffer = mFrequencyBuffer = null;
|
|
||||||
|
|
||||||
if ((factoryFlag & MASK_DICTBUFFER) == USE_READONLY_BYTEBUFFER) {
|
|
||||||
mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory();
|
|
||||||
} else if ((factoryFlag & MASK_DICTBUFFER) == USE_BYTEARRAY) {
|
|
||||||
mBufferFactory = new DictionaryBufferFromByteArrayFactory();
|
|
||||||
} else if ((factoryFlag & MASK_DICTBUFFER) == USE_WRITABLE_BYTEBUFFER) {
|
|
||||||
mBufferFactory = new DictionaryBufferFromWritableByteBufferFactory();
|
|
||||||
} else {
|
|
||||||
mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
/* package */ Ver4DictDecoder(final File dictDirectory, final DictionaryBufferFactory factory) {
|
/* package */ Ver4DictDecoder(final File dictDirectory, final DictionaryBufferFactory factory) {
|
||||||
mDictDirectory = dictDirectory;
|
mDictDirectory = dictDirectory;
|
||||||
mBufferFactory = factory;
|
mBinaryDictionary = new BinaryDictionary(dictDirectory.getAbsolutePath(),
|
||||||
mDictBuffer = mHeaderBuffer = mFrequencyBuffer = null;
|
0 /* offset */, 0 /* length */, true /* useFullEditDistance */, null /* locale */,
|
||||||
}
|
"" /* dictType */, true /* isUpdatable */);
|
||||||
|
|
||||||
protected File getFile(final int fileType) throws UnsupportedFormatException {
|
|
||||||
if (fileType == FILETYPE_TRIE) {
|
|
||||||
return new File(mDictDirectory,
|
|
||||||
mDictDirectory.getName() + FormatSpec.TRIE_FILE_EXTENSION);
|
|
||||||
} else if (fileType == FILETYPE_HEADER) {
|
|
||||||
return new File(mDictDirectory,
|
|
||||||
mDictDirectory.getName() + FormatSpec.HEADER_FILE_EXTENSION);
|
|
||||||
} else if (fileType == FILETYPE_FREQUENCY) {
|
|
||||||
return new File(mDictDirectory,
|
|
||||||
mDictDirectory.getName() + FormatSpec.FREQ_FILE_EXTENSION);
|
|
||||||
} else if (fileType == FILETYPE_TERMINAL_ADDRESS_TABLE) {
|
|
||||||
return new File(mDictDirectory,
|
|
||||||
mDictDirectory.getName() + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
|
|
||||||
} else if (fileType == FILETYPE_BIGRAM_FREQ) {
|
|
||||||
return new File(mDictDirectory,
|
|
||||||
mDictDirectory.getName() + FormatSpec.BIGRAM_FILE_EXTENSION
|
|
||||||
+ FormatSpec.BIGRAM_FREQ_CONTENT_ID);
|
|
||||||
} else if (fileType == FILETYPE_SHORTCUT) {
|
|
||||||
return new File(mDictDirectory,
|
|
||||||
mDictDirectory.getName() + FormatSpec.SHORTCUT_FILE_EXTENSION
|
|
||||||
+ FormatSpec.SHORTCUT_CONTENT_ID);
|
|
||||||
} else {
|
|
||||||
throw new UnsupportedFormatException("Unsupported kind of file : " + fileType);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void openDictBuffer() throws FileNotFoundException, IOException,
|
|
||||||
UnsupportedFormatException {
|
|
||||||
if (!mDictDirectory.isDirectory()) {
|
|
||||||
throw new UnsupportedFormatException("Format 4 dictionary needs a directory");
|
|
||||||
}
|
|
||||||
mHeaderBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_HEADER));
|
|
||||||
mDictBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_TRIE));
|
|
||||||
mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY));
|
|
||||||
mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer(
|
|
||||||
getFile(FILETYPE_TERMINAL_ADDRESS_TABLE));
|
|
||||||
mBigramReader = new BigramContentReader(mDictDirectory.getName(),
|
|
||||||
mDictDirectory, mBufferFactory);
|
|
||||||
mBigramReader.openBuffers();
|
|
||||||
mShortcutReader = new ShortcutContentReader(mDictDirectory.getName(), mDictDirectory,
|
|
||||||
mBufferFactory);
|
|
||||||
mShortcutReader.openBuffers();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isDictBufferOpen() {
|
|
||||||
return mDictBuffer != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
/* package */ DictBuffer getHeaderBuffer() {
|
|
||||||
return mHeaderBuffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
/* package */ DictBuffer getDictBuffer() {
|
|
||||||
return mDictBuffer;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DictionaryHeader readHeader() throws IOException, UnsupportedFormatException {
|
public DictionaryHeader readHeader() throws IOException, UnsupportedFormatException {
|
||||||
if (mHeaderBuffer == null) {
|
return mBinaryDictionary.getHeader();
|
||||||
openDictBuffer();
|
|
||||||
}
|
|
||||||
mHeaderBuffer.position(0);
|
|
||||||
final DictionaryHeader header = super.readHeader(mHeaderBuffer);
|
|
||||||
final int version = header.mFormatOptions.mVersion;
|
|
||||||
if (version != FormatSpec.VERSION4) {
|
|
||||||
throw new UnsupportedFormatException("File header has a wrong version : " + version);
|
|
||||||
}
|
|
||||||
return header;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An auxiliary class for reading bigrams.
|
|
||||||
*/
|
|
||||||
protected static class BigramContentReader extends SparseTableContentReader {
|
|
||||||
public BigramContentReader(final String name, final File baseDir,
|
|
||||||
final DictionaryBufferFactory factory) {
|
|
||||||
super(name + FormatSpec.BIGRAM_FILE_EXTENSION,
|
|
||||||
FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
|
|
||||||
getContentFilenames(name), getContentIds(), factory);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Consolidate this method and BigramContentWriter.getContentFilenames.
|
|
||||||
protected static String[] getContentFilenames(final String name) {
|
|
||||||
return new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION };
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Consolidate this method and BigramContentWriter.getContentIds.
|
|
||||||
protected static String[] getContentIds() {
|
|
||||||
return new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID };
|
|
||||||
}
|
|
||||||
|
|
||||||
public ArrayList<PendingAttribute> readTargetsAndFrequencies(final int terminalId,
|
|
||||||
final DictBuffer terminalAddressTableBuffer, final FormatOptions options) {
|
|
||||||
final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList();
|
|
||||||
read(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId,
|
|
||||||
new SparseTableContentReaderInterface() {
|
|
||||||
@Override
|
|
||||||
public void read(final DictBuffer buffer) {
|
|
||||||
while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
|
|
||||||
// If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE,
|
|
||||||
// remaining bigram entries are ignored.
|
|
||||||
final int bigramFlags = buffer.readUnsignedByte();
|
|
||||||
final int probability;
|
|
||||||
|
|
||||||
if (options.mHasTimestamp) {
|
|
||||||
probability = buffer.readUnsignedByte();
|
|
||||||
// Skip timestamp
|
|
||||||
buffer.readInt();
|
|
||||||
// Skip level
|
|
||||||
buffer.readUnsignedByte();
|
|
||||||
// Skip count
|
|
||||||
buffer.readUnsignedByte();
|
|
||||||
} else {
|
|
||||||
probability = bigramFlags
|
|
||||||
& FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY;
|
|
||||||
}
|
|
||||||
final int targetTerminalId = buffer.readUnsignedInt24();
|
|
||||||
terminalAddressTableBuffer.position(targetTerminalId
|
|
||||||
* FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
|
|
||||||
final int targetAddress =
|
|
||||||
terminalAddressTableBuffer.readUnsignedInt24();
|
|
||||||
bigrams.add(new PendingAttribute(probability, targetAddress));
|
|
||||||
if (0 == (bigramFlags
|
|
||||||
& FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
|
|
||||||
throw new RuntimeException("Too many bigrams in a PtNode ("
|
|
||||||
+ bigrams.size() + " but max is "
|
|
||||||
+ FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
if (bigrams.isEmpty()) return null;
|
|
||||||
return bigrams;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An auxiliary class for reading shortcuts.
|
|
||||||
*/
|
|
||||||
protected static class ShortcutContentReader extends SparseTableContentReader {
|
|
||||||
public ShortcutContentReader(final String name, final File baseDir,
|
|
||||||
final DictionaryBufferFactory factory) {
|
|
||||||
super(name + FormatSpec.SHORTCUT_FILE_EXTENSION,
|
|
||||||
FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
|
|
||||||
new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION },
|
|
||||||
new String[] { FormatSpec.SHORTCUT_CONTENT_ID }, factory);
|
|
||||||
}
|
|
||||||
|
|
||||||
public ArrayList<WeightedString> readShortcuts(final int terminalId) {
|
|
||||||
final ArrayList<WeightedString> shortcuts = CollectionUtils.newArrayList();
|
|
||||||
read(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId,
|
|
||||||
new SparseTableContentReaderInterface() {
|
|
||||||
@Override
|
|
||||||
public void read(final DictBuffer buffer) {
|
|
||||||
while (true) {
|
|
||||||
final int flags = buffer.readUnsignedByte();
|
|
||||||
final String word = CharEncoding.readString(buffer);
|
|
||||||
shortcuts.add(new WeightedString(word,
|
|
||||||
flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
|
|
||||||
if (0 == (flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
if (shortcuts.isEmpty()) return null;
|
|
||||||
return shortcuts;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
|
|
||||||
protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId,
|
|
||||||
final FormatOptions formatOptions) {
|
|
||||||
final int readingPos;
|
|
||||||
if (formatOptions.mHasTimestamp) {
|
|
||||||
final int entrySize = FormatSpec.FREQUENCY_AND_FLAGS_SIZE
|
|
||||||
+ FormatSpec.UNIGRAM_TIMESTAMP_SIZE + FormatSpec.UNIGRAM_LEVEL_SIZE
|
|
||||||
+ FormatSpec.UNIGRAM_COUNTER_SIZE;
|
|
||||||
readingPos = terminalId * entrySize + FormatSpec.FLAGS_IN_FREQ_FILE_SIZE;
|
|
||||||
} else {
|
|
||||||
readingPos = terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE
|
|
||||||
+ FormatSpec.FLAGS_IN_FREQ_FILE_SIZE;
|
|
||||||
}
|
|
||||||
frequencyBuffer.position(readingPos);
|
|
||||||
return frequencyBuffer.readUnsignedByte();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static int readTerminalId(final DictBuffer dictBuffer) {
|
|
||||||
return dictBuffer.readInt();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private final int[] mCharacterBufferForReadingVer4PtNodeInfo
|
|
||||||
= new int[FormatSpec.MAX_WORD_LENGTH];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads PtNode from ptNodePos in the trie file and returns Ver4PtNodeInfo.
|
|
||||||
*
|
|
||||||
* @param ptNodePos the position of PtNode.
|
|
||||||
* @param options the format options.
|
|
||||||
* @return Ver4PtNodeInfo.
|
|
||||||
*/
|
|
||||||
// TODO: Make this buffer thread safe.
|
|
||||||
// TODO: Support words longer than FormatSpec.MAX_WORD_LENGTH.
|
|
||||||
protected Ver4PtNodeInfo readVer4PtNodeInfo(final int ptNodePos, final FormatOptions options) {
|
|
||||||
int readingPos = ptNodePos;
|
|
||||||
final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
|
|
||||||
readingPos += FormatSpec.PTNODE_FLAGS_SIZE;
|
|
||||||
|
|
||||||
final int parentPos = PtNodeReader.readParentAddress(mDictBuffer, options);
|
|
||||||
if (BinaryDictIOUtils.supportsDynamicUpdate(options)) {
|
|
||||||
readingPos += FormatSpec.PARENT_ADDRESS_SIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
final int characters[];
|
|
||||||
if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
|
|
||||||
int index = 0;
|
|
||||||
int character = CharEncoding.readChar(mDictBuffer);
|
|
||||||
readingPos += CharEncoding.getCharSize(character);
|
|
||||||
while (FormatSpec.INVALID_CHARACTER != character
|
|
||||||
&& index < FormatSpec.MAX_WORD_LENGTH) {
|
|
||||||
mCharacterBufferForReadingVer4PtNodeInfo[index++] = character;
|
|
||||||
character = CharEncoding.readChar(mDictBuffer);
|
|
||||||
readingPos += CharEncoding.getCharSize(character);
|
|
||||||
}
|
|
||||||
characters = Arrays.copyOfRange(mCharacterBufferForReadingVer4PtNodeInfo, 0, index);
|
|
||||||
} else {
|
|
||||||
final int character = CharEncoding.readChar(mDictBuffer);
|
|
||||||
readingPos += CharEncoding.getCharSize(character);
|
|
||||||
characters = new int[] { character };
|
|
||||||
}
|
|
||||||
final int terminalId;
|
|
||||||
if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
|
|
||||||
terminalId = PtNodeReader.readTerminalId(mDictBuffer);
|
|
||||||
readingPos += FormatSpec.PTNODE_TERMINAL_ID_SIZE;
|
|
||||||
} else {
|
|
||||||
terminalId = PtNode.NOT_A_TERMINAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
int childrenPos = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options);
|
|
||||||
if (childrenPos != FormatSpec.NO_CHILDREN_ADDRESS) {
|
|
||||||
childrenPos += readingPos;
|
|
||||||
}
|
|
||||||
readingPos += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
|
|
||||||
|
|
||||||
return new Ver4PtNodeInfo(flags, characters, terminalId, childrenPos, parentPos,
|
|
||||||
readingPos - ptNodePos);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions options) {
|
|
||||||
final Ver4PtNodeInfo nodeInfo = readVer4PtNodeInfo(ptNodePos, options);
|
|
||||||
|
|
||||||
final int frequency;
|
|
||||||
if (0 != (FormatSpec.FLAG_IS_TERMINAL & nodeInfo.mFlags)) {
|
|
||||||
frequency = PtNodeReader.readFrequency(mFrequencyBuffer, nodeInfo.mTerminalId, options);
|
|
||||||
} else {
|
|
||||||
frequency = PtNode.NOT_A_TERMINAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
final ArrayList<WeightedString> shortcutTargets = mShortcutReader.readShortcuts(
|
|
||||||
nodeInfo.mTerminalId);
|
|
||||||
final ArrayList<PendingAttribute> bigrams = mBigramReader.readTargetsAndFrequencies(
|
|
||||||
nodeInfo.mTerminalId, mTerminalAddressTableBuffer, options);
|
|
||||||
|
|
||||||
return new PtNodeInfo(ptNodePos, ptNodePos + nodeInfo.mNodeSize, nodeInfo.mFlags,
|
|
||||||
nodeInfo.mCharacters, frequency, nodeInfo.mParentPos, nodeInfo.mChildrenPos,
|
|
||||||
shortcutTargets, bigrams);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void deleteDictFiles() {
|
|
||||||
final File[] files = mDictDirectory.listFiles();
|
|
||||||
for (int i = 0; i < files.length; ++i) {
|
|
||||||
files[i].delete();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FusionDictionary readDictionaryBinary(final FusionDictionary dict,
|
public FusionDictionary readDictionaryBinary(final FusionDictionary dict,
|
||||||
final boolean deleteDictIfBroken)
|
final boolean deleteDictIfBroken)
|
||||||
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
||||||
if (mDictBuffer == null) {
|
final DictionaryHeader header = readHeader();
|
||||||
openDictBuffer();
|
final FusionDictionary fusionDict = dict != null ? dict :
|
||||||
}
|
new FusionDictionary(new FusionDictionary.PtNodeArray(), header.mDictionaryOptions);
|
||||||
try {
|
int token = 0;
|
||||||
return BinaryDictDecoderUtils.readDictionaryBinary(this, dict);
|
final ArrayList<WordProperty> wordProperties = CollectionUtils.newArrayList();
|
||||||
} catch (IOException e) {
|
do {
|
||||||
Log.e(TAG, "The dictionary " + mDictDirectory.getName() + " is broken.", e);
|
final BinaryDictionary.GetNextWordPropertyResult result =
|
||||||
|
mBinaryDictionary.getNextWordProperty(token);
|
||||||
|
final WordProperty wordProperty = result.mWordProperty;
|
||||||
|
if (wordProperty == null) {
|
||||||
if (deleteDictIfBroken) {
|
if (deleteDictIfBroken) {
|
||||||
deleteDictFiles();
|
mBinaryDictionary.close();
|
||||||
}
|
FileUtils.deleteRecursively(mDictDirectory);
|
||||||
throw e;
|
|
||||||
} catch (UnsupportedFormatException e) {
|
|
||||||
Log.e(TAG, "The dictionary " + mDictDirectory.getName() + " is broken.", e);
|
|
||||||
if (deleteDictIfBroken) {
|
|
||||||
deleteDictFiles();
|
|
||||||
}
|
|
||||||
throw e;
|
|
||||||
}
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
wordProperties.add(wordProperty);
|
||||||
|
token = result.mNextToken;
|
||||||
|
} while (token != 0);
|
||||||
|
|
||||||
@Override
|
// Insert unigrams to the fusion dictionary.
|
||||||
public void setPosition(int newPos) {
|
for (final WordProperty wordProperty : wordProperties) {
|
||||||
mDictBuffer.position(newPos);
|
// TODO: Support probability that is -1.
|
||||||
}
|
final int probability = wordProperty.getProbability() < 0 ?
|
||||||
|
0 : wordProperty.getProbability();
|
||||||
@Override
|
if (wordProperty.mIsBlacklistEntry) {
|
||||||
public int getPosition() {
|
fusionDict.addBlacklistEntry(wordProperty.mWord, wordProperty.mShortcutTargets,
|
||||||
return mDictBuffer.position();
|
wordProperty.mIsNotAWord);
|
||||||
}
|
} else {
|
||||||
|
fusionDict.add(wordProperty.mWord, probability,
|
||||||
@Override
|
wordProperty.mShortcutTargets, wordProperty.mIsNotAWord);
|
||||||
public int readPtNodeCount() {
|
|
||||||
return BinaryDictDecoderUtils.readPtNodeCount(mDictBuffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean readAndFollowForwardLink() {
|
|
||||||
final int forwardLinkPos = mDictBuffer.position();
|
|
||||||
int nextRelativePos = BinaryDictDecoderUtils.readSInt24(mDictBuffer);
|
|
||||||
if (nextRelativePos != FormatSpec.NO_FORWARD_LINK_ADDRESS) {
|
|
||||||
final int nextPos = forwardLinkPos + nextRelativePos;
|
|
||||||
if (nextPos >= 0 && nextPos < mDictBuffer.limit()) {
|
|
||||||
mDictBuffer.position(nextPos);
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
// Insert bigrams to the fusion dictionary.
|
||||||
|
for (final WordProperty wordProperty : wordProperties) {
|
||||||
|
if (wordProperty.mBigrams == null) {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
final String word0 = wordProperty.mWord;
|
||||||
@Override
|
for (final WeightedString bigram : wordProperty.mBigrams) {
|
||||||
public boolean hasNextPtNodeArray() {
|
fusionDict.setBigram(word0, bigram.mWord, bigram.getProbability());
|
||||||
return mDictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
@Override
|
return fusionDict;
|
||||||
@UsedForTesting
|
|
||||||
public void skipPtNode(final FormatOptions formatOptions) {
|
|
||||||
final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
|
|
||||||
PtNodeReader.readParentAddress(mDictBuffer, formatOptions);
|
|
||||||
BinaryDictIOUtils.skipString(mDictBuffer,
|
|
||||||
(flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
|
|
||||||
if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) PtNodeReader.readTerminalId(mDictBuffer);
|
|
||||||
PtNodeReader.readChildrenAddress(mDictBuffer, flags, formatOptions);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -468,10 +468,6 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
|
|
||||||
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
||||||
BinaryDictUtils.VERSION2_OPTIONS);
|
BinaryDictUtils.VERSION2_OPTIONS);
|
||||||
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
|
||||||
BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
|
|
||||||
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
|
|
||||||
BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
|
|
||||||
|
|
||||||
for (final String result : results) {
|
for (final String result : results) {
|
||||||
Log.d(TAG, result);
|
Log.d(TAG, result);
|
||||||
|
@ -483,10 +479,6 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
|
|
||||||
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
||||||
BinaryDictUtils.VERSION2_OPTIONS);
|
BinaryDictUtils.VERSION2_OPTIONS);
|
||||||
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
|
||||||
BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
|
|
||||||
runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
|
|
||||||
BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
|
|
||||||
|
|
||||||
for (final String result : results) {
|
for (final String result : results) {
|
||||||
Log.d(TAG, result);
|
Log.d(TAG, result);
|
||||||
|
@ -597,17 +589,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
|
|
||||||
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY,
|
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY,
|
||||||
BinaryDictUtils.VERSION2_OPTIONS);
|
BinaryDictUtils.VERSION2_OPTIONS);
|
||||||
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY,
|
|
||||||
BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
|
|
||||||
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY,
|
|
||||||
BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
|
|
||||||
|
|
||||||
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER,
|
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER,
|
||||||
BinaryDictUtils.VERSION2_OPTIONS);
|
BinaryDictUtils.VERSION2_OPTIONS);
|
||||||
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER,
|
|
||||||
BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
|
|
||||||
runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER,
|
|
||||||
BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
|
|
||||||
|
|
||||||
for (final String result : results) {
|
for (final String result : results) {
|
||||||
Log.d(TAG, result);
|
Log.d(TAG, result);
|
||||||
|
|
|
@ -1,195 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.android.inputmethod.latin.makedict;
|
|
||||||
|
|
||||||
import android.test.AndroidTestCase;
|
|
||||||
import android.test.suitebuilder.annotation.LargeTest;
|
|
||||||
import android.util.Log;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Random;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Unit tests for SparseTable.
|
|
||||||
*/
|
|
||||||
@LargeTest
|
|
||||||
public class SparseTableTests extends AndroidTestCase {
|
|
||||||
private static final String TAG = SparseTableTests.class.getSimpleName();
|
|
||||||
|
|
||||||
private final Random mRandom;
|
|
||||||
private final ArrayList<Integer> mRandomIndex;
|
|
||||||
|
|
||||||
private static final int DEFAULT_SIZE = 10000;
|
|
||||||
private static final int BLOCK_SIZE = 8;
|
|
||||||
|
|
||||||
public SparseTableTests() {
|
|
||||||
this(System.currentTimeMillis(), DEFAULT_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
public SparseTableTests(final long seed, final int tableSize) {
|
|
||||||
super();
|
|
||||||
Log.d(TAG, "Seed for test is " + seed + ", size is " + tableSize);
|
|
||||||
mRandom = new Random(seed);
|
|
||||||
mRandomIndex = new ArrayList<Integer>(tableSize);
|
|
||||||
for (int i = 0; i < tableSize; ++i) {
|
|
||||||
mRandomIndex.add(SparseTable.NOT_EXIST);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testSet() {
|
|
||||||
final SparseTable table = new SparseTable(16, BLOCK_SIZE, 1);
|
|
||||||
table.set(0, 3, 6);
|
|
||||||
table.set(0, 8, 16);
|
|
||||||
for (int i = 0; i < 16; ++i) {
|
|
||||||
if (i == 3 || i == 8) {
|
|
||||||
assertEquals(i * 2, table.get(0, i));
|
|
||||||
} else {
|
|
||||||
assertEquals(SparseTable.NOT_EXIST, table.get(0, i));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void generateRandomIndex(final int size, final int prop) {
|
|
||||||
for (int i = 0; i < size; ++i) {
|
|
||||||
if (mRandom.nextInt(100) < prop) {
|
|
||||||
mRandomIndex.set(i, mRandom.nextInt());
|
|
||||||
} else {
|
|
||||||
mRandomIndex.set(i, SparseTable.NOT_EXIST);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void runTestRandomSet() {
|
|
||||||
final SparseTable table = new SparseTable(DEFAULT_SIZE, BLOCK_SIZE, 1);
|
|
||||||
int elementCount = 0;
|
|
||||||
for (int i = 0; i < DEFAULT_SIZE; ++i) {
|
|
||||||
if (mRandomIndex.get(i) != SparseTable.NOT_EXIST) {
|
|
||||||
table.set(0, i, mRandomIndex.get(i));
|
|
||||||
elementCount++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Log.d(TAG, "table size = " + table.getLookupTableSize() + " + "
|
|
||||||
+ table.getContentTableSize());
|
|
||||||
Log.d(TAG, "the table has " + elementCount + " elements");
|
|
||||||
for (int i = 0; i < DEFAULT_SIZE; ++i) {
|
|
||||||
assertEquals(table.get(0, i), (int)mRandomIndex.get(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
// flush and reload
|
|
||||||
OutputStream lookupOutStream = null;
|
|
||||||
OutputStream contentOutStream = null;
|
|
||||||
try {
|
|
||||||
final File lookupIndexFile = File.createTempFile("testRandomSet", ".small");
|
|
||||||
final File contentFile = File.createTempFile("testRandomSet", ".big");
|
|
||||||
lookupOutStream = new FileOutputStream(lookupIndexFile);
|
|
||||||
contentOutStream = new FileOutputStream(contentFile);
|
|
||||||
table.write(lookupOutStream, new OutputStream[] { contentOutStream });
|
|
||||||
lookupOutStream.flush();
|
|
||||||
contentOutStream.flush();
|
|
||||||
final SparseTable newTable = SparseTable.readFromFiles(lookupIndexFile,
|
|
||||||
new File[] { contentFile }, BLOCK_SIZE);
|
|
||||||
for (int i = 0; i < DEFAULT_SIZE; ++i) {
|
|
||||||
assertEquals(table.get(0, i), newTable.get(0, i));
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
Log.d(TAG, "IOException while flushing and realoding", e);
|
|
||||||
} finally {
|
|
||||||
if (lookupOutStream != null) {
|
|
||||||
try {
|
|
||||||
lookupOutStream.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
Log.d(TAG, "IOException while closing the stream", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (contentOutStream != null) {
|
|
||||||
try {
|
|
||||||
contentOutStream.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
Log.d(TAG, "IOException while closing contentStream.", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testRandomSet() {
|
|
||||||
for (int i = 0; i <= 100; i += 10) {
|
|
||||||
generateRandomIndex(DEFAULT_SIZE, i);
|
|
||||||
runTestRandomSet();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testMultipleContents() {
|
|
||||||
final int numOfContents = 5;
|
|
||||||
generateRandomIndex(DEFAULT_SIZE, 20);
|
|
||||||
final SparseTable table = new SparseTable(DEFAULT_SIZE, BLOCK_SIZE, numOfContents);
|
|
||||||
for (int i = 0; i < mRandomIndex.size(); ++i) {
|
|
||||||
if (mRandomIndex.get(i) != SparseTable.NOT_EXIST) {
|
|
||||||
for (int j = 0; j < numOfContents; ++j) {
|
|
||||||
table.set(j, i, mRandomIndex.get(i));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
OutputStream lookupOutStream = null;
|
|
||||||
OutputStream[] contentsOutStream = new OutputStream[numOfContents];
|
|
||||||
try {
|
|
||||||
final File lookupIndexFile = File.createTempFile("testMultipleContents", "small");
|
|
||||||
lookupOutStream = new FileOutputStream(lookupIndexFile);
|
|
||||||
final File[] contentFiles = new File[numOfContents];
|
|
||||||
for (int i = 0; i < numOfContents; ++i) {
|
|
||||||
contentFiles[i] = File.createTempFile("testMultipleContents", "big" + i);
|
|
||||||
contentsOutStream[i] = new FileOutputStream(contentFiles[i]);
|
|
||||||
}
|
|
||||||
table.write(lookupOutStream, contentsOutStream);
|
|
||||||
lookupOutStream.flush();
|
|
||||||
for (int i = 0; i < numOfContents; ++i) {
|
|
||||||
contentsOutStream[i].flush();
|
|
||||||
}
|
|
||||||
final SparseTable newTable = SparseTable.readFromFiles(lookupIndexFile, contentFiles,
|
|
||||||
BLOCK_SIZE);
|
|
||||||
for (int i = 0; i < numOfContents; ++i) {
|
|
||||||
for (int j = 0; j < DEFAULT_SIZE; ++j) {
|
|
||||||
assertEquals(table.get(i, j), newTable.get(i, j));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
Log.d(TAG, "IOException while flushing and reloading", e);
|
|
||||||
} finally {
|
|
||||||
if (lookupOutStream != null) {
|
|
||||||
try {
|
|
||||||
lookupOutStream.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
Log.d(TAG, "IOException while closing the stream", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int i = 0; i < numOfContents; ++i) {
|
|
||||||
if (contentsOutStream[i] != null) {
|
|
||||||
try {
|
|
||||||
contentsOutStream[i].close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
Log.d(TAG, "IOException while closing the stream.", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue