From d2579c4832325bb703e275674706886dce50915d Mon Sep 17 00:00:00 2001 From: Yuichiro Hanada Date: Fri, 5 Oct 2012 11:43:21 +0900 Subject: [PATCH] fix writeCharGroup. Change-Id: Ib841afaba0a20c3b300eb7d3e9133243f9f3ae58 --- .../latin/makedict/BinaryDictIOUtils.java | 29 +++++- .../makedict/BinaryDictIOUtilsTests.java | 98 +++++++++++++++---- 2 files changed, 106 insertions(+), 21 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index e5ec449ea..40e089f3a 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -535,10 +535,13 @@ public final class BinaryDictIOUtils { // TODO: Consolidate this code with the code that computes the size of the bigram list // in BinaryDictionaryInputOutput#computeActualNodeSize for (int i = 0; i < info.mBigrams.size(); ++i) { - final int bigramOffset = info.mBigrams.get(i).mAddress - info.mOriginalAddress; + final int bigramFrequency = info.mBigrams.get(i).mFrequency; int bigramFlags = (i < info.mBigrams.size() - 1) ? FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT : 0; + size++; + final int bigramOffset = info.mBigrams.get(i).mAddress - (info.mOriginalAddress + + size); bigramFlags |= (bigramOffset < 0) ? FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE : 0; switch (BinaryDictInputOutput.getByteSize(bigramOffset)) { case 1: @@ -553,7 +556,6 @@ public final class BinaryDictIOUtils { } bigramFlags |= bigramFrequency & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY; destination.write((byte)bigramFlags); - size++; size += writeVariableAddress(destination, Math.abs(bigramOffset)); } } @@ -717,7 +719,7 @@ public final class BinaryDictIOUtils { if (position == FormatSpec.NOT_VALID_WORD) { // TODO: figure out what is the correct thing to do here. } else { - bigrams.add(new PendingAttribute(position, bigram.mFrequency)); + bigrams.add(new PendingAttribute(bigram.mFrequency, position)); } } } @@ -947,4 +949,25 @@ public final class BinaryDictIOUtils { } } } + + /** + * Find a word from the buffer. + * + * @param buffer the buffer representing the body of the dictionary file. + * @param word the word searched + * @return the found group + * @throws IOException + * @throws UnsupportedFormatException + */ + public static CharGroupInfo findWordFromBuffer(final FusionDictionaryBufferInterface buffer, + final String word) throws IOException, UnsupportedFormatException { + int position = getTerminalPosition(buffer, word); + if (position != FormatSpec.NOT_VALID_WORD) { + buffer.position(0); + final FileHeader header = BinaryDictInputOutput.readHeader(buffer); + buffer.position(position); + return BinaryDictInputOutput.readCharGroup(buffer, position, header.mFormatOptions); + } + return null; + } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java index 7607b58eb..318516845 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java @@ -28,6 +28,7 @@ import android.test.AndroidTestCase; import android.test.MoreAsserts; import android.util.Log; +import java.io.BufferedOutputStream; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; @@ -41,7 +42,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.Random; -public class BinaryDictIOUtilsTests extends AndroidTestCase{ +public class BinaryDictIOUtilsTests extends AndroidTestCase { private static final String TAG = BinaryDictIOUtilsTests.class.getSimpleName(); private static final FormatSpec.FormatOptions FORMAT_OPTIONS = new FormatSpec.FormatOptions(3, true); @@ -150,26 +151,50 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase{ return position; } + private CharGroupInfo findWordFromFile(final File file, final String word) { + FileInputStream inStream = null; + CharGroupInfo info = null; + try { + inStream = new FileInputStream(file); + final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper( + inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length())); + info = BinaryDictIOUtils.findWordFromBuffer(buffer, word); + } catch (IOException e) { + } catch (UnsupportedFormatException e) { + } finally { + if (inStream != null) { + try { + inStream.close(); + } catch (IOException e) { + // do nothing + } + } + } + return info; + } + // return amount of time to insert a word private long insertAndCheckWord(final File file, final String word, final int frequency, - final boolean exist) { + final boolean exist, final ArrayList bigrams, + final ArrayList shortcuts) { RandomAccessFile raFile = null; - FileOutputStream outStream = null; + BufferedOutputStream outStream = null; FusionDictionaryBufferInterface buffer = null; long amountOfTime = -1; try { raFile = new RandomAccessFile(file, "rw"); buffer = new ByteBufferWrapper(raFile.getChannel().map( FileChannel.MapMode.READ_WRITE, 0, file.length())); - outStream = new FileOutputStream(file, true); + outStream = new BufferedOutputStream(new FileOutputStream(file, true)); if (!exist) { assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); } final long now = System.nanoTime(); - BinaryDictIOUtils.insertWord(buffer, outStream, word, frequency, null, null, false, - false); + BinaryDictIOUtils.insertWord(buffer, outStream, word, frequency, bigrams, shortcuts, + false, false); amountOfTime = System.nanoTime() - now; + outStream.flush(); MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); outStream.close(); raFile.close(); @@ -215,8 +240,6 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase{ } } - - private void checkReverseLookup(final File file, final String word, final int position) { FileInputStream inStream = null; try { @@ -242,7 +265,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase{ public void testInsertWord() { File file = null; try { - file = File.createTempFile("testInsertWord", ".dict"); + file = File.createTempFile("testInsertWord", ".dict", getContext().getCacheDir()); } catch (IOException e) { fail("IOException while creating temporary file: " + e); } @@ -263,36 +286,74 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase{ } MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd")); - insertAndCheckWord(file, "abcde", 10, false); + insertAndCheckWord(file, "abcde", 10, false, null, null); - insertAndCheckWord(file, "abcdefghijklmn", 10, false); + insertAndCheckWord(file, "abcdefghijklmn", 10, false, null, null); checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn")); - insertAndCheckWord(file, "abcdabcd", 10, false); + insertAndCheckWord(file, "abcdabcd", 10, false, null, null); checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd")); // update the existing word. - insertAndCheckWord(file, "abcdabcd", 15, true); + insertAndCheckWord(file, "abcdabcd", 15, true, null, null); // split 1 - insertAndCheckWord(file, "ab", 20, false); + insertAndCheckWord(file, "ab", 20, false, null, null); // split 2 - insertAndCheckWord(file, "ami", 30, false); + insertAndCheckWord(file, "ami", 30, false, null, null); deleteWord(file, "ami"); assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "ami")); - insertAndCheckWord(file, "abcdabfg", 30, false); + insertAndCheckWord(file, "abcdabfg", 30, false, null, null); deleteWord(file, "abcd"); assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd")); } + public void testInsertWordWithBigrams() { + File file = null; + try { + file = File.createTempFile("testInsertWordWithBigrams", ".dict", + getContext().getCacheDir()); + } catch (IOException e) { + fail("IOException while creating temporary file: " + e); + } + + // set an initial dictionary. + final FusionDictionary dict = new FusionDictionary(new Node(), + new FusionDictionary.DictionaryOptions(new HashMap(), false, false)); + dict.add("abcd", 10, null, false); + dict.add("efgh", 15, null, false); + + try { + final FileOutputStream out = new FileOutputStream(file); + BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS); + out.close(); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } catch (UnsupportedFormatException e) { + fail("UnsupportedFormatException while writing an initial dictionary : " + e); + } + + final ArrayList banana = new ArrayList(); + banana.add(new WeightedString("banana", 10)); + + insertAndCheckWord(file, "banana", 0, false, null, null); + insertAndCheckWord(file, "recursive", 60, true, banana, null); + + final CharGroupInfo info = findWordFromFile(file, "recursive"); + int bananaPos = getWordPosition(file, "banana"); + assertNotNull(info.mBigrams); + assertEquals(info.mBigrams.size(), 1); + assertEquals(info.mBigrams.get(0).mAddress, bananaPos); + } + public void testRandomWords() { File file = null; try { - file = File.createTempFile("testRandomWord", ".dict"); + file = File.createTempFile("testRandomWord", ".dict", getContext().getCacheDir()); } catch (IOException e) { } assertNotNull(file); @@ -317,7 +378,8 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase{ long minTimeToInsert = 100000000; // 1000000000 is an upper bound for minTimeToInsert. int cnt = 0; for (final String word : sWords) { - final long diff = insertAndCheckWord(file, word, cnt%255, false); + final long diff = insertAndCheckWord(file, word, + cnt % FormatSpec.MAX_TERMINAL_FREQUENCY, false, null, null); maxTimeToInsert = Math.max(maxTimeToInsert, diff); minTimeToInsert = Math.min(minTimeToInsert, diff); sum += diff;