Make BinaryDictIOUtils and DynamicBinaryIOUtils use BinaryDictReader.

Change-Id: I191dfe0e05ff3c2c5af99e8beebbb73b097748a3
This commit is contained in:
Yuichiro Hanada 2013-08-14 12:56:51 +09:00
parent c2f2d0d211
commit 3a73b37b30
4 changed files with 64 additions and 101 deletions

View file

@ -162,15 +162,16 @@ public final class BinaryDictIOUtils {
* Gets the address of the last CharGroup of the exact matching word in the dictionary.
* If no match is found, returns NOT_VALID_WORD.
*
* @param buffer the buffer to read.
* @param reader the reader.
* @param word the word we search for.
* @return the address of the terminal node.
* @throws IOException if the file can't be read.
* @throws UnsupportedFormatException if the format of the file is not recognized.
*/
@UsedForTesting
public static int getTerminalPosition(final FusionDictionaryBufferInterface buffer,
public static int getTerminalPosition(final BinaryDictReader reader,
final String word) throws IOException, UnsupportedFormatException {
final FusionDictionaryBufferInterface buffer = reader.getBuffer();
if (word == null) return FormatSpec.NOT_VALID_WORD;
if (buffer.position() != 0) buffer.position(0);
@ -507,18 +508,19 @@ public final class BinaryDictIOUtils {
}
/**
* Find a word from the buffer.
* Find a word using the BinaryDictReader.
*
* @param buffer the buffer representing the body of the dictionary file.
* @param reader the reader
* @param word the word searched
* @return the found group
* @throws IOException
* @throws UnsupportedFormatException
*/
@UsedForTesting
public static CharGroupInfo findWordFromBuffer(final FusionDictionaryBufferInterface buffer,
public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictReader reader,
final String word) throws IOException, UnsupportedFormatException {
int position = getTerminalPosition(buffer, word);
int position = getTerminalPosition(reader, word);
final FusionDictionaryBufferInterface buffer = reader.getBuffer();
if (position != FormatSpec.NOT_VALID_WORD) {
buffer.position(0);
final FileHeader header = BinaryDictDecoder.readHeader(buffer);

View file

@ -49,17 +49,18 @@ public final class DynamicBinaryDictIOUtils {
/**
* Delete the word from the binary file.
*
* @param buffer the buffer to write.
* @param reader the reader.
* @param word the word we delete
* @throws IOException
* @throws UnsupportedFormatException
*/
@UsedForTesting
public static void deleteWord(final FusionDictionaryBufferInterface buffer,
final String word) throws IOException, UnsupportedFormatException {
public static void deleteWord(final BinaryDictReader reader, final String word)
throws IOException, UnsupportedFormatException {
final FusionDictionaryBufferInterface buffer = reader.getBuffer();
buffer.position(0);
final FileHeader header = BinaryDictDecoder.readHeader(buffer);
final int wordPosition = BinaryDictIOUtils.getTerminalPosition(buffer, word);
final int wordPosition = BinaryDictIOUtils.getTerminalPosition(reader, word);
if (wordPosition == FormatSpec.NOT_VALID_WORD) return;
buffer.position(wordPosition);
@ -235,7 +236,7 @@ public final class DynamicBinaryDictIOUtils {
/**
* Insert a word into a binary dictionary.
*
* @param buffer the buffer containing the existing dictionary.
* @param reader the reader.
* @param destination a stream to the underlying file, with the pointer at the end of the file.
* @param word the word to insert.
* @param frequency the frequency of the new word.
@ -248,16 +249,16 @@ public final class DynamicBinaryDictIOUtils {
// TODO: Support batch insertion.
// TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
@UsedForTesting
public static void insertWord(final FusionDictionaryBufferInterface buffer,
final OutputStream destination, final String word, final int frequency,
final ArrayList<WeightedString> bigramStrings,
public static void insertWord(final BinaryDictReader reader, final OutputStream destination,
final String word, final int frequency, final ArrayList<WeightedString> bigramStrings,
final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
final boolean isBlackListEntry)
throws IOException, UnsupportedFormatException {
final ArrayList<PendingAttribute> bigrams = new ArrayList<PendingAttribute>();
final FusionDictionaryBufferInterface buffer = reader.getBuffer();
if (bigramStrings != null) {
for (final WeightedString bigram : bigramStrings) {
int position = BinaryDictIOUtils.getTerminalPosition(buffer, bigram.mWord);
int position = BinaryDictIOUtils.getTerminalPosition(reader, bigram.mWord);
if (position == FormatSpec.NOT_VALID_WORD) {
// TODO: figure out what is the correct thing to do here.
} else {

View file

@ -493,8 +493,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
}
// Tests for getTerminalPosition
private String getWordFromBinary(final FusionDictionaryBufferInterface buffer,
final int address) {
private String getWordFromBinary(final BinaryDictReader reader, final int address) {
final FusionDictionaryBufferInterface buffer = reader.getBuffer();
if (buffer.position() != 0) buffer.position(0);
FileHeader header = null;
@ -510,14 +510,14 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
address - header.mHeaderSize, header.mFormatOptions).mWord;
}
private long runGetTerminalPosition(final FusionDictionaryBufferInterface buffer,
final String word, int index, boolean contained) {
private long runGetTerminalPosition(final BinaryDictReader reader, final String word, int index,
boolean contained) {
final int expectedFrequency = (UNIGRAM_FREQ + index) % 255;
long diff = -1;
int position = -1;
try {
final long now = System.nanoTime();
position = BinaryDictIOUtils.getTerminalPosition(buffer, word);
position = BinaryDictIOUtils.getTerminalPosition(reader, word);
diff = System.nanoTime() - now;
} catch (IOException e) {
Log.e(TAG, "IOException while getTerminalPosition", e);
@ -526,7 +526,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
}
assertEquals(FormatSpec.NOT_VALID_WORD != position, contained);
if (contained) assertEquals(getWordFromBinary(buffer, position), word);
if (contained) assertEquals(getWordFromBinary(reader, position), word);
return diff;
}
@ -547,29 +547,27 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
final BinaryDictReader reader = new BinaryDictReader(file);
FusionDictionaryBufferInterface buffer = null;
try {
buffer = reader.openAndGetBuffer(
new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
} catch (IOException e) {
// ignore
Log.e(TAG, "IOException while opening the buffer", e);
}
assertNotNull("Can't get the buffer", buffer);
assertNotNull("Can't get the buffer", reader.getBuffer());
try {
// too long word
final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(buffer, longWord));
BinaryDictIOUtils.getTerminalPosition(reader, longWord));
// null
assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(buffer, null));
BinaryDictIOUtils.getTerminalPosition(reader, null));
// empty string
assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(buffer, ""));
BinaryDictIOUtils.getTerminalPosition(reader, ""));
} catch (IOException e) {
} catch (UnsupportedFormatException e) {
}
@ -577,7 +575,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
// Test a word that is contained within the dictionary.
long sum = 0;
for (int i = 0; i < sWords.size(); ++i) {
final long time = runGetTerminalPosition(buffer, sWords.get(i), i, true);
final long time = runGetTerminalPosition(reader, sWords.get(i), i, true);
sum += time == -1 ? 0 : time;
}
Log.d(TAG, "per a search : " + (((double)sum) / sWords.size() / 1000000));
@ -588,7 +586,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
for (int i = 0; i < 1000; ++i) {
final String word = generateWord(random, codePointSet);
if (sWords.indexOf(word) != -1) continue;
runGetTerminalPosition(buffer, word, i, false);
runGetTerminalPosition(reader, word, i, false);
}
}
@ -608,28 +606,27 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
final BinaryDictReader reader = new BinaryDictReader(file);
FusionDictionaryBufferInterface buffer = null;
try {
buffer = reader.openAndGetBuffer(
reader.openBuffer(
new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
} catch (IOException e) {
// ignore
Log.e(TAG, "IOException while opening the buffer", e);
}
assertNotNull("Can't get the buffer", buffer);
assertNotNull("Can't get the buffer", reader.getBuffer());
try {
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(buffer, sWords.get(0)));
DynamicBinaryDictIOUtils.deleteWord(buffer, sWords.get(0));
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(0)));
DynamicBinaryDictIOUtils.deleteWord(reader, sWords.get(0));
assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(buffer, sWords.get(0)));
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(0)));
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(buffer, sWords.get(5)));
DynamicBinaryDictIOUtils.deleteWord(buffer, sWords.get(5));
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(5)));
DynamicBinaryDictIOUtils.deleteWord(reader, sWords.get(5));
assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(buffer, sWords.get(5)));
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(5)));
} catch (IOException e) {
} catch (UnsupportedFormatException e) {
}

View file

@ -21,8 +21,9 @@ import android.test.MoreAsserts;
import android.test.suitebuilder.annotation.LargeTest;
import android.util.Log;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.ByteBufferWrapper;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.BinaryDictReader.
FusionDictionaryBufferFromWritableByteBufferFactory;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
@ -33,8 +34,6 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Random;
@ -137,12 +136,12 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
private int getWordPosition(final File file, final String word) {
int position = FormatSpec.NOT_VALID_WORD;
final BinaryDictReader reader = new BinaryDictReader(file);
FileInputStream inStream = null;
try {
inStream = new FileInputStream(file);
final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper(
inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length()));
position = BinaryDictIOUtils.getTerminalPosition(buffer, word);
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
position = BinaryDictIOUtils.getTerminalPosition(reader, word);
} catch (IOException e) {
} catch (UnsupportedFormatException e) {
} finally {
@ -158,23 +157,13 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
}
private CharGroupInfo findWordFromFile(final File file, final String word) {
FileInputStream inStream = null;
final BinaryDictReader reader = new BinaryDictReader(file);
CharGroupInfo info = null;
try {
inStream = new FileInputStream(file);
final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper(
inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length()));
info = BinaryDictIOUtils.findWordFromBuffer(buffer, word);
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
info = BinaryDictIOUtils.findWordByBinaryDictReader(reader, word);
} catch (IOException e) {
} catch (UnsupportedFormatException e) {
} finally {
if (inStream != null) {
try {
inStream.close();
} catch (IOException e) {
// do nothing
}
}
}
return info;
}
@ -183,42 +172,33 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
private long insertAndCheckWord(final File file, final String word, final int frequency,
final boolean exist, final ArrayList<WeightedString> bigrams,
final ArrayList<WeightedString> shortcuts) {
RandomAccessFile raFile = null;
final BinaryDictReader reader = new BinaryDictReader(file);
BufferedOutputStream outStream = null;
FusionDictionaryBufferInterface buffer = null;
long amountOfTime = -1;
try {
raFile = new RandomAccessFile(file, "rw");
buffer = new ByteBufferWrapper(raFile.getChannel().map(
FileChannel.MapMode.READ_WRITE, 0, file.length()));
reader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory());
outStream = new BufferedOutputStream(new FileOutputStream(file, true));
if (!exist) {
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
}
final long now = System.nanoTime();
DynamicBinaryDictIOUtils.insertWord(buffer, outStream, word, frequency, bigrams,
DynamicBinaryDictIOUtils.insertWord(reader, outStream, word, frequency, bigrams,
shortcuts, false, false);
amountOfTime = System.nanoTime() - now;
outStream.flush();
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
outStream.close();
raFile.close();
} catch (IOException e) {
Log.e(TAG, "Raised an IOException while inserting a word", e);
} catch (UnsupportedFormatException e) {
Log.e(TAG, "Raised an UnsupportedFormatException error while inserting a word", e);
} finally {
if (outStream != null) {
try {
outStream.close();
} catch (IOException e) {
// do nothing
}
}
if (raFile != null) {
try {
raFile.close();
} catch (IOException e) {
// do nothing
Log.e(TAG, "Failed to close the output stream", e);
}
}
}
@ -226,45 +206,28 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
}
private void deleteWord(final File file, final String word) {
RandomAccessFile raFile = null;
FusionDictionaryBufferInterface buffer = null;
final BinaryDictReader reader = new BinaryDictReader(file);
try {
raFile = new RandomAccessFile(file, "rw");
buffer = new ByteBufferWrapper(raFile.getChannel().map(
FileChannel.MapMode.READ_WRITE, 0, file.length()));
DynamicBinaryDictIOUtils.deleteWord(buffer, word);
reader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory());
DynamicBinaryDictIOUtils.deleteWord(reader, word);
} catch (IOException e) {
} catch (UnsupportedFormatException e) {
} finally {
if (raFile != null) {
try {
raFile.close();
} catch (IOException e) {
// do nothing
}
}
}
}
private void checkReverseLookup(final File file, final String word, final int position) {
FileInputStream inStream = null;
final BinaryDictReader reader = new BinaryDictReader(file);
try {
inStream = new FileInputStream(file);
final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper(
inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length()));
final FusionDictionaryBufferInterface buffer = reader.openAndGetBuffer(
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
final FileHeader header = BinaryDictDecoder.readHeader(buffer);
assertEquals(word, BinaryDictDecoder.getWordAtAddress(buffer, header.mHeaderSize,
position - header.mHeaderSize, header.mFormatOptions).mWord);
assertEquals(word,
BinaryDictDecoder.getWordAtAddress(reader.getBuffer(), header.mHeaderSize,
position - header.mHeaderSize, header.mFormatOptions).mWord);
} catch (IOException e) {
Log.e(TAG, "Raised an IOException while looking up a word", e);
} catch (UnsupportedFormatException e) {
} finally {
if (inStream != null) {
try {
inStream.close();
} catch (IOException e) {
// do nothing
}
}
Log.e(TAG, "Raised an UnsupportedFormatException error while looking up a word", e);
}
}