Make BinaryDictIOUtils and DynamicBinaryIOUtils use BinaryDictReader.

Change-Id: I191dfe0e05ff3c2c5af99e8beebbb73b097748a3
main
Yuichiro Hanada 2013-08-14 12:56:51 +09:00
parent c2f2d0d211
commit 3a73b37b30
4 changed files with 64 additions and 101 deletions

View File

@ -162,15 +162,16 @@ public final class BinaryDictIOUtils {
* Gets the address of the last CharGroup of the exact matching word in the dictionary. * Gets the address of the last CharGroup of the exact matching word in the dictionary.
* If no match is found, returns NOT_VALID_WORD. * If no match is found, returns NOT_VALID_WORD.
* *
* @param buffer the buffer to read. * @param reader the reader.
* @param word the word we search for. * @param word the word we search for.
* @return the address of the terminal node. * @return the address of the terminal node.
* @throws IOException if the file can't be read. * @throws IOException if the file can't be read.
* @throws UnsupportedFormatException if the format of the file is not recognized. * @throws UnsupportedFormatException if the format of the file is not recognized.
*/ */
@UsedForTesting @UsedForTesting
public static int getTerminalPosition(final FusionDictionaryBufferInterface buffer, public static int getTerminalPosition(final BinaryDictReader reader,
final String word) throws IOException, UnsupportedFormatException { final String word) throws IOException, UnsupportedFormatException {
final FusionDictionaryBufferInterface buffer = reader.getBuffer();
if (word == null) return FormatSpec.NOT_VALID_WORD; if (word == null) return FormatSpec.NOT_VALID_WORD;
if (buffer.position() != 0) buffer.position(0); if (buffer.position() != 0) buffer.position(0);
@ -507,18 +508,19 @@ public final class BinaryDictIOUtils {
} }
/** /**
* Find a word from the buffer. * Find a word using the BinaryDictReader.
* *
* @param buffer the buffer representing the body of the dictionary file. * @param reader the reader
* @param word the word searched * @param word the word searched
* @return the found group * @return the found group
* @throws IOException * @throws IOException
* @throws UnsupportedFormatException * @throws UnsupportedFormatException
*/ */
@UsedForTesting @UsedForTesting
public static CharGroupInfo findWordFromBuffer(final FusionDictionaryBufferInterface buffer, public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictReader reader,
final String word) throws IOException, UnsupportedFormatException { final String word) throws IOException, UnsupportedFormatException {
int position = getTerminalPosition(buffer, word); int position = getTerminalPosition(reader, word);
final FusionDictionaryBufferInterface buffer = reader.getBuffer();
if (position != FormatSpec.NOT_VALID_WORD) { if (position != FormatSpec.NOT_VALID_WORD) {
buffer.position(0); buffer.position(0);
final FileHeader header = BinaryDictDecoder.readHeader(buffer); final FileHeader header = BinaryDictDecoder.readHeader(buffer);

View File

@ -49,17 +49,18 @@ public final class DynamicBinaryDictIOUtils {
/** /**
* Delete the word from the binary file. * Delete the word from the binary file.
* *
* @param buffer the buffer to write. * @param reader the reader.
* @param word the word we delete * @param word the word we delete
* @throws IOException * @throws IOException
* @throws UnsupportedFormatException * @throws UnsupportedFormatException
*/ */
@UsedForTesting @UsedForTesting
public static void deleteWord(final FusionDictionaryBufferInterface buffer, public static void deleteWord(final BinaryDictReader reader, final String word)
final String word) throws IOException, UnsupportedFormatException { throws IOException, UnsupportedFormatException {
final FusionDictionaryBufferInterface buffer = reader.getBuffer();
buffer.position(0); buffer.position(0);
final FileHeader header = BinaryDictDecoder.readHeader(buffer); final FileHeader header = BinaryDictDecoder.readHeader(buffer);
final int wordPosition = BinaryDictIOUtils.getTerminalPosition(buffer, word); final int wordPosition = BinaryDictIOUtils.getTerminalPosition(reader, word);
if (wordPosition == FormatSpec.NOT_VALID_WORD) return; if (wordPosition == FormatSpec.NOT_VALID_WORD) return;
buffer.position(wordPosition); buffer.position(wordPosition);
@ -235,7 +236,7 @@ public final class DynamicBinaryDictIOUtils {
/** /**
* Insert a word into a binary dictionary. * Insert a word into a binary dictionary.
* *
* @param buffer the buffer containing the existing dictionary. * @param reader the reader.
* @param destination a stream to the underlying file, with the pointer at the end of the file. * @param destination a stream to the underlying file, with the pointer at the end of the file.
* @param word the word to insert. * @param word the word to insert.
* @param frequency the frequency of the new word. * @param frequency the frequency of the new word.
@ -248,16 +249,16 @@ public final class DynamicBinaryDictIOUtils {
// TODO: Support batch insertion. // TODO: Support batch insertion.
// TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary. // TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
@UsedForTesting @UsedForTesting
public static void insertWord(final FusionDictionaryBufferInterface buffer, public static void insertWord(final BinaryDictReader reader, final OutputStream destination,
final OutputStream destination, final String word, final int frequency, final String word, final int frequency, final ArrayList<WeightedString> bigramStrings,
final ArrayList<WeightedString> bigramStrings,
final ArrayList<WeightedString> shortcuts, final boolean isNotAWord, final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
final boolean isBlackListEntry) final boolean isBlackListEntry)
throws IOException, UnsupportedFormatException { throws IOException, UnsupportedFormatException {
final ArrayList<PendingAttribute> bigrams = new ArrayList<PendingAttribute>(); final ArrayList<PendingAttribute> bigrams = new ArrayList<PendingAttribute>();
final FusionDictionaryBufferInterface buffer = reader.getBuffer();
if (bigramStrings != null) { if (bigramStrings != null) {
for (final WeightedString bigram : bigramStrings) { for (final WeightedString bigram : bigramStrings) {
int position = BinaryDictIOUtils.getTerminalPosition(buffer, bigram.mWord); int position = BinaryDictIOUtils.getTerminalPosition(reader, bigram.mWord);
if (position == FormatSpec.NOT_VALID_WORD) { if (position == FormatSpec.NOT_VALID_WORD) {
// TODO: figure out what is the correct thing to do here. // TODO: figure out what is the correct thing to do here.
} else { } else {

View File

@ -493,8 +493,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
} }
// Tests for getTerminalPosition // Tests for getTerminalPosition
private String getWordFromBinary(final FusionDictionaryBufferInterface buffer, private String getWordFromBinary(final BinaryDictReader reader, final int address) {
final int address) { final FusionDictionaryBufferInterface buffer = reader.getBuffer();
if (buffer.position() != 0) buffer.position(0); if (buffer.position() != 0) buffer.position(0);
FileHeader header = null; FileHeader header = null;
@ -510,14 +510,14 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
address - header.mHeaderSize, header.mFormatOptions).mWord; address - header.mHeaderSize, header.mFormatOptions).mWord;
} }
private long runGetTerminalPosition(final FusionDictionaryBufferInterface buffer, private long runGetTerminalPosition(final BinaryDictReader reader, final String word, int index,
final String word, int index, boolean contained) { boolean contained) {
final int expectedFrequency = (UNIGRAM_FREQ + index) % 255; final int expectedFrequency = (UNIGRAM_FREQ + index) % 255;
long diff = -1; long diff = -1;
int position = -1; int position = -1;
try { try {
final long now = System.nanoTime(); final long now = System.nanoTime();
position = BinaryDictIOUtils.getTerminalPosition(buffer, word); position = BinaryDictIOUtils.getTerminalPosition(reader, word);
diff = System.nanoTime() - now; diff = System.nanoTime() - now;
} catch (IOException e) { } catch (IOException e) {
Log.e(TAG, "IOException while getTerminalPosition", e); Log.e(TAG, "IOException while getTerminalPosition", e);
@ -526,7 +526,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
} }
assertEquals(FormatSpec.NOT_VALID_WORD != position, contained); assertEquals(FormatSpec.NOT_VALID_WORD != position, contained);
if (contained) assertEquals(getWordFromBinary(buffer, position), word); if (contained) assertEquals(getWordFromBinary(reader, position), word);
return diff; return diff;
} }
@ -547,29 +547,27 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE); timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
final BinaryDictReader reader = new BinaryDictReader(file); final BinaryDictReader reader = new BinaryDictReader(file);
FusionDictionaryBufferInterface buffer = null;
try { try {
buffer = reader.openAndGetBuffer( reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
} catch (IOException e) { } catch (IOException e) {
// ignore // ignore
Log.e(TAG, "IOException while opening the buffer", e); Log.e(TAG, "IOException while opening the buffer", e);
} }
assertNotNull("Can't get the buffer", buffer); assertNotNull("Can't get the buffer", reader.getBuffer());
try { try {
// too long word // too long word
final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
assertEquals(FormatSpec.NOT_VALID_WORD, assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(buffer, longWord)); BinaryDictIOUtils.getTerminalPosition(reader, longWord));
// null // null
assertEquals(FormatSpec.NOT_VALID_WORD, assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(buffer, null)); BinaryDictIOUtils.getTerminalPosition(reader, null));
// empty string // empty string
assertEquals(FormatSpec.NOT_VALID_WORD, assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(buffer, "")); BinaryDictIOUtils.getTerminalPosition(reader, ""));
} catch (IOException e) { } catch (IOException e) {
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
} }
@ -577,7 +575,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
// Test a word that is contained within the dictionary. // Test a word that is contained within the dictionary.
long sum = 0; long sum = 0;
for (int i = 0; i < sWords.size(); ++i) { for (int i = 0; i < sWords.size(); ++i) {
final long time = runGetTerminalPosition(buffer, sWords.get(i), i, true); final long time = runGetTerminalPosition(reader, sWords.get(i), i, true);
sum += time == -1 ? 0 : time; sum += time == -1 ? 0 : time;
} }
Log.d(TAG, "per a search : " + (((double)sum) / sWords.size() / 1000000)); Log.d(TAG, "per a search : " + (((double)sum) / sWords.size() / 1000000));
@ -588,7 +586,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
for (int i = 0; i < 1000; ++i) { for (int i = 0; i < 1000; ++i) {
final String word = generateWord(random, codePointSet); final String word = generateWord(random, codePointSet);
if (sWords.indexOf(word) != -1) continue; if (sWords.indexOf(word) != -1) continue;
runGetTerminalPosition(buffer, word, i, false); runGetTerminalPosition(reader, word, i, false);
} }
} }
@ -608,28 +606,27 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE); timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
final BinaryDictReader reader = new BinaryDictReader(file); final BinaryDictReader reader = new BinaryDictReader(file);
FusionDictionaryBufferInterface buffer = null;
try { try {
buffer = reader.openAndGetBuffer( reader.openBuffer(
new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory()); new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
} catch (IOException e) { } catch (IOException e) {
// ignore // ignore
Log.e(TAG, "IOException while opening the buffer", e); Log.e(TAG, "IOException while opening the buffer", e);
} }
assertNotNull("Can't get the buffer", buffer); assertNotNull("Can't get the buffer", reader.getBuffer());
try { try {
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(buffer, sWords.get(0))); BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(0)));
DynamicBinaryDictIOUtils.deleteWord(buffer, sWords.get(0)); DynamicBinaryDictIOUtils.deleteWord(reader, sWords.get(0));
assertEquals(FormatSpec.NOT_VALID_WORD, assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(buffer, sWords.get(0))); BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(0)));
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(buffer, sWords.get(5))); BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(5)));
DynamicBinaryDictIOUtils.deleteWord(buffer, sWords.get(5)); DynamicBinaryDictIOUtils.deleteWord(reader, sWords.get(5));
assertEquals(FormatSpec.NOT_VALID_WORD, assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(buffer, sWords.get(5))); BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(5)));
} catch (IOException e) { } catch (IOException e) {
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
} }

View File

@ -21,8 +21,9 @@ import android.test.MoreAsserts;
import android.test.suitebuilder.annotation.LargeTest; import android.test.suitebuilder.annotation.LargeTest;
import android.util.Log; import android.util.Log;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.ByteBufferWrapper;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.BinaryDictReader.
FusionDictionaryBufferFromWritableByteBufferFactory;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
@ -33,8 +34,6 @@ import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.Random; import java.util.Random;
@ -137,12 +136,12 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
private int getWordPosition(final File file, final String word) { private int getWordPosition(final File file, final String word) {
int position = FormatSpec.NOT_VALID_WORD; int position = FormatSpec.NOT_VALID_WORD;
final BinaryDictReader reader = new BinaryDictReader(file);
FileInputStream inStream = null; FileInputStream inStream = null;
try { try {
inStream = new FileInputStream(file); inStream = new FileInputStream(file);
final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper( reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length())); position = BinaryDictIOUtils.getTerminalPosition(reader, word);
position = BinaryDictIOUtils.getTerminalPosition(buffer, word);
} catch (IOException e) { } catch (IOException e) {
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
} finally { } finally {
@ -158,23 +157,13 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
} }
private CharGroupInfo findWordFromFile(final File file, final String word) { private CharGroupInfo findWordFromFile(final File file, final String word) {
FileInputStream inStream = null; final BinaryDictReader reader = new BinaryDictReader(file);
CharGroupInfo info = null; CharGroupInfo info = null;
try { try {
inStream = new FileInputStream(file); reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper( info = BinaryDictIOUtils.findWordByBinaryDictReader(reader, word);
inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length()));
info = BinaryDictIOUtils.findWordFromBuffer(buffer, word);
} catch (IOException e) { } catch (IOException e) {
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
} finally {
if (inStream != null) {
try {
inStream.close();
} catch (IOException e) {
// do nothing
}
}
} }
return info; return info;
} }
@ -183,42 +172,33 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
private long insertAndCheckWord(final File file, final String word, final int frequency, private long insertAndCheckWord(final File file, final String word, final int frequency,
final boolean exist, final ArrayList<WeightedString> bigrams, final boolean exist, final ArrayList<WeightedString> bigrams,
final ArrayList<WeightedString> shortcuts) { final ArrayList<WeightedString> shortcuts) {
RandomAccessFile raFile = null; final BinaryDictReader reader = new BinaryDictReader(file);
BufferedOutputStream outStream = null; BufferedOutputStream outStream = null;
FusionDictionaryBufferInterface buffer = null;
long amountOfTime = -1; long amountOfTime = -1;
try { try {
raFile = new RandomAccessFile(file, "rw"); reader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory());
buffer = new ByteBufferWrapper(raFile.getChannel().map(
FileChannel.MapMode.READ_WRITE, 0, file.length()));
outStream = new BufferedOutputStream(new FileOutputStream(file, true)); outStream = new BufferedOutputStream(new FileOutputStream(file, true));
if (!exist) { if (!exist) {
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
} }
final long now = System.nanoTime(); final long now = System.nanoTime();
DynamicBinaryDictIOUtils.insertWord(buffer, outStream, word, frequency, bigrams, DynamicBinaryDictIOUtils.insertWord(reader, outStream, word, frequency, bigrams,
shortcuts, false, false); shortcuts, false, false);
amountOfTime = System.nanoTime() - now; amountOfTime = System.nanoTime() - now;
outStream.flush(); outStream.flush();
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
outStream.close(); outStream.close();
raFile.close();
} catch (IOException e) { } catch (IOException e) {
Log.e(TAG, "Raised an IOException while inserting a word", e);
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
Log.e(TAG, "Raised an UnsupportedFormatException error while inserting a word", e);
} finally { } finally {
if (outStream != null) { if (outStream != null) {
try { try {
outStream.close(); outStream.close();
} catch (IOException e) { } catch (IOException e) {
// do nothing Log.e(TAG, "Failed to close the output stream", e);
}
}
if (raFile != null) {
try {
raFile.close();
} catch (IOException e) {
// do nothing
} }
} }
} }
@ -226,45 +206,28 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
} }
private void deleteWord(final File file, final String word) { private void deleteWord(final File file, final String word) {
RandomAccessFile raFile = null; final BinaryDictReader reader = new BinaryDictReader(file);
FusionDictionaryBufferInterface buffer = null;
try { try {
raFile = new RandomAccessFile(file, "rw"); reader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory());
buffer = new ByteBufferWrapper(raFile.getChannel().map( DynamicBinaryDictIOUtils.deleteWord(reader, word);
FileChannel.MapMode.READ_WRITE, 0, file.length()));
DynamicBinaryDictIOUtils.deleteWord(buffer, word);
} catch (IOException e) { } catch (IOException e) {
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
} finally {
if (raFile != null) {
try {
raFile.close();
} catch (IOException e) {
// do nothing
}
}
} }
} }
private void checkReverseLookup(final File file, final String word, final int position) { private void checkReverseLookup(final File file, final String word, final int position) {
FileInputStream inStream = null; final BinaryDictReader reader = new BinaryDictReader(file);
try { try {
inStream = new FileInputStream(file); final FusionDictionaryBufferInterface buffer = reader.openAndGetBuffer(
final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper( new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length()));
final FileHeader header = BinaryDictDecoder.readHeader(buffer); final FileHeader header = BinaryDictDecoder.readHeader(buffer);
assertEquals(word, BinaryDictDecoder.getWordAtAddress(buffer, header.mHeaderSize, assertEquals(word,
BinaryDictDecoder.getWordAtAddress(reader.getBuffer(), header.mHeaderSize,
position - header.mHeaderSize, header.mFormatOptions).mWord); position - header.mHeaderSize, header.mFormatOptions).mWord);
} catch (IOException e) { } catch (IOException e) {
Log.e(TAG, "Raised an IOException while looking up a word", e);
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
} finally { Log.e(TAG, "Raised an UnsupportedFormatException error while looking up a word", e);
if (inStream != null) {
try {
inStream.close();
} catch (IOException e) {
// do nothing
}
}
} }
} }