fix writeCharGroup.

Change-Id: Ib841afaba0a20c3b300eb7d3e9133243f9f3ae58
main
Yuichiro Hanada 2012-10-05 11:43:21 +09:00
parent feac8a91f9
commit d2579c4832
2 changed files with 106 additions and 21 deletions

View File

@ -535,10 +535,13 @@ public final class BinaryDictIOUtils {
// TODO: Consolidate this code with the code that computes the size of the bigram list // TODO: Consolidate this code with the code that computes the size of the bigram list
// in BinaryDictionaryInputOutput#computeActualNodeSize // in BinaryDictionaryInputOutput#computeActualNodeSize
for (int i = 0; i < info.mBigrams.size(); ++i) { for (int i = 0; i < info.mBigrams.size(); ++i) {
final int bigramOffset = info.mBigrams.get(i).mAddress - info.mOriginalAddress;
final int bigramFrequency = info.mBigrams.get(i).mFrequency; final int bigramFrequency = info.mBigrams.get(i).mFrequency;
int bigramFlags = (i < info.mBigrams.size() - 1) int bigramFlags = (i < info.mBigrams.size() - 1)
? FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT : 0; ? FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT : 0;
size++;
final int bigramOffset = info.mBigrams.get(i).mAddress - (info.mOriginalAddress
+ size);
bigramFlags |= (bigramOffset < 0) ? FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE : 0; bigramFlags |= (bigramOffset < 0) ? FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE : 0;
switch (BinaryDictInputOutput.getByteSize(bigramOffset)) { switch (BinaryDictInputOutput.getByteSize(bigramOffset)) {
case 1: case 1:
@ -553,7 +556,6 @@ public final class BinaryDictIOUtils {
} }
bigramFlags |= bigramFrequency & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY; bigramFlags |= bigramFrequency & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY;
destination.write((byte)bigramFlags); destination.write((byte)bigramFlags);
size++;
size += writeVariableAddress(destination, Math.abs(bigramOffset)); size += writeVariableAddress(destination, Math.abs(bigramOffset));
} }
} }
@ -717,7 +719,7 @@ public final class BinaryDictIOUtils {
if (position == FormatSpec.NOT_VALID_WORD) { if (position == FormatSpec.NOT_VALID_WORD) {
// TODO: figure out what is the correct thing to do here. // TODO: figure out what is the correct thing to do here.
} else { } else {
bigrams.add(new PendingAttribute(position, bigram.mFrequency)); bigrams.add(new PendingAttribute(bigram.mFrequency, position));
} }
} }
} }
@ -947,4 +949,25 @@ public final class BinaryDictIOUtils {
} }
} }
} }
/**
* Find a word from the buffer.
*
* @param buffer the buffer representing the body of the dictionary file.
* @param word the word searched
* @return the found group
* @throws IOException
* @throws UnsupportedFormatException
*/
public static CharGroupInfo findWordFromBuffer(final FusionDictionaryBufferInterface buffer,
final String word) throws IOException, UnsupportedFormatException {
int position = getTerminalPosition(buffer, word);
if (position != FormatSpec.NOT_VALID_WORD) {
buffer.position(0);
final FileHeader header = BinaryDictInputOutput.readHeader(buffer);
buffer.position(position);
return BinaryDictInputOutput.readCharGroup(buffer, position, header.mFormatOptions);
}
return null;
}
} }

View File

@ -28,6 +28,7 @@ import android.test.AndroidTestCase;
import android.test.MoreAsserts; import android.test.MoreAsserts;
import android.util.Log; import android.util.Log;
import java.io.BufferedOutputStream;
import java.io.BufferedWriter; import java.io.BufferedWriter;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
@ -41,7 +42,7 @@ import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.Random; import java.util.Random;
public class BinaryDictIOUtilsTests extends AndroidTestCase{ public class BinaryDictIOUtilsTests extends AndroidTestCase {
private static final String TAG = BinaryDictIOUtilsTests.class.getSimpleName(); private static final String TAG = BinaryDictIOUtilsTests.class.getSimpleName();
private static final FormatSpec.FormatOptions FORMAT_OPTIONS = private static final FormatSpec.FormatOptions FORMAT_OPTIONS =
new FormatSpec.FormatOptions(3, true); new FormatSpec.FormatOptions(3, true);
@ -150,26 +151,50 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase{
return position; return position;
} }
private CharGroupInfo findWordFromFile(final File file, final String word) {
FileInputStream inStream = null;
CharGroupInfo info = null;
try {
inStream = new FileInputStream(file);
final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper(
inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length()));
info = BinaryDictIOUtils.findWordFromBuffer(buffer, word);
} catch (IOException e) {
} catch (UnsupportedFormatException e) {
} finally {
if (inStream != null) {
try {
inStream.close();
} catch (IOException e) {
// do nothing
}
}
}
return info;
}
// return amount of time to insert a word // return amount of time to insert a word
private long insertAndCheckWord(final File file, final String word, final int frequency, private long insertAndCheckWord(final File file, final String word, final int frequency,
final boolean exist) { final boolean exist, final ArrayList<WeightedString> bigrams,
final ArrayList<WeightedString> shortcuts) {
RandomAccessFile raFile = null; RandomAccessFile raFile = null;
FileOutputStream outStream = null; BufferedOutputStream outStream = null;
FusionDictionaryBufferInterface buffer = null; FusionDictionaryBufferInterface buffer = null;
long amountOfTime = -1; long amountOfTime = -1;
try { try {
raFile = new RandomAccessFile(file, "rw"); raFile = new RandomAccessFile(file, "rw");
buffer = new ByteBufferWrapper(raFile.getChannel().map( buffer = new ByteBufferWrapper(raFile.getChannel().map(
FileChannel.MapMode.READ_WRITE, 0, file.length())); FileChannel.MapMode.READ_WRITE, 0, file.length()));
outStream = new FileOutputStream(file, true); outStream = new BufferedOutputStream(new FileOutputStream(file, true));
if (!exist) { if (!exist) {
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
} }
final long now = System.nanoTime(); final long now = System.nanoTime();
BinaryDictIOUtils.insertWord(buffer, outStream, word, frequency, null, null, false, BinaryDictIOUtils.insertWord(buffer, outStream, word, frequency, bigrams, shortcuts,
false); false, false);
amountOfTime = System.nanoTime() - now; amountOfTime = System.nanoTime() - now;
outStream.flush();
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
outStream.close(); outStream.close();
raFile.close(); raFile.close();
@ -215,8 +240,6 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase{
} }
} }
private void checkReverseLookup(final File file, final String word, final int position) { private void checkReverseLookup(final File file, final String word, final int position) {
FileInputStream inStream = null; FileInputStream inStream = null;
try { try {
@ -242,7 +265,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase{
public void testInsertWord() { public void testInsertWord() {
File file = null; File file = null;
try { try {
file = File.createTempFile("testInsertWord", ".dict"); file = File.createTempFile("testInsertWord", ".dict", getContext().getCacheDir());
} catch (IOException e) { } catch (IOException e) {
fail("IOException while creating temporary file: " + e); fail("IOException while creating temporary file: " + e);
} }
@ -263,36 +286,74 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase{
} }
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd")); MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd"));
insertAndCheckWord(file, "abcde", 10, false); insertAndCheckWord(file, "abcde", 10, false, null, null);
insertAndCheckWord(file, "abcdefghijklmn", 10, false); insertAndCheckWord(file, "abcdefghijklmn", 10, false, null, null);
checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn")); checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn"));
insertAndCheckWord(file, "abcdabcd", 10, false); insertAndCheckWord(file, "abcdabcd", 10, false, null, null);
checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd")); checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd"));
// update the existing word. // update the existing word.
insertAndCheckWord(file, "abcdabcd", 15, true); insertAndCheckWord(file, "abcdabcd", 15, true, null, null);
// split 1 // split 1
insertAndCheckWord(file, "ab", 20, false); insertAndCheckWord(file, "ab", 20, false, null, null);
// split 2 // split 2
insertAndCheckWord(file, "ami", 30, false); insertAndCheckWord(file, "ami", 30, false, null, null);
deleteWord(file, "ami"); deleteWord(file, "ami");
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "ami")); assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "ami"));
insertAndCheckWord(file, "abcdabfg", 30, false); insertAndCheckWord(file, "abcdabfg", 30, false, null, null);
deleteWord(file, "abcd"); deleteWord(file, "abcd");
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd")); assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd"));
} }
public void testInsertWordWithBigrams() {
File file = null;
try {
file = File.createTempFile("testInsertWordWithBigrams", ".dict",
getContext().getCacheDir());
} catch (IOException e) {
fail("IOException while creating temporary file: " + e);
}
// set an initial dictionary.
final FusionDictionary dict = new FusionDictionary(new Node(),
new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
dict.add("abcd", 10, null, false);
dict.add("efgh", 15, null, false);
try {
final FileOutputStream out = new FileOutputStream(file);
BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS);
out.close();
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
} catch (UnsupportedFormatException e) {
fail("UnsupportedFormatException while writing an initial dictionary : " + e);
}
final ArrayList<WeightedString> banana = new ArrayList<WeightedString>();
banana.add(new WeightedString("banana", 10));
insertAndCheckWord(file, "banana", 0, false, null, null);
insertAndCheckWord(file, "recursive", 60, true, banana, null);
final CharGroupInfo info = findWordFromFile(file, "recursive");
int bananaPos = getWordPosition(file, "banana");
assertNotNull(info.mBigrams);
assertEquals(info.mBigrams.size(), 1);
assertEquals(info.mBigrams.get(0).mAddress, bananaPos);
}
public void testRandomWords() { public void testRandomWords() {
File file = null; File file = null;
try { try {
file = File.createTempFile("testRandomWord", ".dict"); file = File.createTempFile("testRandomWord", ".dict", getContext().getCacheDir());
} catch (IOException e) { } catch (IOException e) {
} }
assertNotNull(file); assertNotNull(file);
@ -317,7 +378,8 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase{
long minTimeToInsert = 100000000; // 1000000000 is an upper bound for minTimeToInsert. long minTimeToInsert = 100000000; // 1000000000 is an upper bound for minTimeToInsert.
int cnt = 0; int cnt = 0;
for (final String word : sWords) { for (final String word : sWords) {
final long diff = insertAndCheckWord(file, word, cnt%255, false); final long diff = insertAndCheckWord(file, word,
cnt % FormatSpec.MAX_TERMINAL_FREQUENCY, false, null, null);
maxTimeToInsert = Math.max(maxTimeToInsert, diff); maxTimeToInsert = Math.max(maxTimeToInsert, diff);
minTimeToInsert = Math.min(minTimeToInsert, diff); minTimeToInsert = Math.min(minTimeToInsert, diff);
sum += diff; sum += diff;