From 3c6d9fe14840fd2c455ec65b6481ed78f99a5460 Mon Sep 17 00:00:00 2001 From: Yuichiro Hanada Date: Mon, 1 Oct 2012 14:50:58 +0900 Subject: [PATCH] Add insertWord. bug: 6669677 Change-Id: Ide55a4931071de9cd42c1cddae63ddd531d2feba --- .../latin/makedict/BinaryDictIOUtils.java | 301 ++++++++++++++++ .../latin/makedict/BinaryDictInputOutput.java | 3 +- .../latin/makedict/FusionDictionary.java | 2 +- .../makedict/BinaryDictIOUtilsTests.java | 335 ++++++++++++++++++ 4 files changed, 639 insertions(+), 2 deletions(-) create mode 100644 tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index 096ca0992..e5ec449ea 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -27,12 +27,15 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; import java.util.Map; import java.util.Stack; public final class BinaryDictIOUtils { private static final boolean DBG = false; + private static final int MSB24 = 0x800000; + private static final int SINT24_MAX = 0x7FFFFF; private static final int MAX_JUMPS = 10000; private BinaryDictIOUtils() { @@ -646,4 +649,302 @@ public final class BinaryDictIOUtils { writeSInt24ToStream(destination, FormatSpec.NO_FORWARD_LINK_ADDRESS); return size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE; } + + /** + * Move a group that is referred to by oldGroupOrigin to the tail of the file. + * And set the children address to the byte after the group. + * + * @param nodeOrigin the address of the tail of the file. + * @param characters + * @param length + * @param flags + * @param frequency + * @param parentAddress + * @param shortcutTargets + * @param bigrams + * @param destination the stream representing the tail of the file. + * @param buffer the buffer representing the (constant-size) body of the file. + * @param oldNodeOrigin + * @param oldGroupOrigin + * @param formatOptions + * @return the size written, in bytes. + * @throws IOException + */ + private static int moveGroup(final int nodeOrigin, final int[] characters, final int length, + final int flags, final int frequency, final int parentAddress, + final ArrayList shortcutTargets, + final ArrayList bigrams, final OutputStream destination, + final FusionDictionaryBufferInterface buffer, final int oldNodeOrigin, + final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException { + int size = 0; + final int newGroupOrigin = nodeOrigin + 1; + final int[] writtenCharacters = Arrays.copyOfRange(characters, 0, length); + final CharGroupInfo tmpInfo = new CharGroupInfo(newGroupOrigin, -1 /* endAddress */, + flags, writtenCharacters, frequency, parentAddress, FormatSpec.NO_CHILDREN_ADDRESS, + shortcutTargets, bigrams); + size = computeGroupSize(tmpInfo, formatOptions); + final CharGroupInfo newInfo = new CharGroupInfo(newGroupOrigin, newGroupOrigin + size, + flags, writtenCharacters, frequency, parentAddress, + nodeOrigin + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets, + bigrams); + moveCharGroup(destination, buffer, newInfo, oldNodeOrigin, oldGroupOrigin, formatOptions); + return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE; + } + + /** + * Insert a word into a binary dictionary. + * + * @param buffer + * @param destination + * @param word + * @param frequency + * @param bigramStrings + * @param shortcuts + * @throws IOException + * @throws UnsupportedFormatException + */ + // TODO: Support batch insertion. + public static void insertWord(final FusionDictionaryBufferInterface buffer, + final OutputStream destination, final String word, final int frequency, + final ArrayList bigramStrings, + final ArrayList shortcuts, final boolean isNotAWord, + final boolean isBlackListEntry) + throws IOException, UnsupportedFormatException { + final ArrayList bigrams = new ArrayList(); + if (bigramStrings != null) { + for (final WeightedString bigram : bigramStrings) { + int position = getTerminalPosition(buffer, bigram.mWord); + if (position == FormatSpec.NOT_VALID_WORD) { + // TODO: figure out what is the correct thing to do here. + } else { + bigrams.add(new PendingAttribute(position, bigram.mFrequency)); + } + } + } + + final boolean isTerminal = true; + final boolean hasBigrams = !bigrams.isEmpty(); + final boolean hasShortcuts = shortcuts != null && !shortcuts.isEmpty(); + + // find the insert position of the word. + if (buffer.position() != 0) buffer.position(0); + final FileHeader header = BinaryDictInputOutput.readHeader(buffer); + + int wordPos = 0, address = buffer.position(), nodeOriginAddress = buffer.position(); + final int[] codePoints = FusionDictionary.getCodePoints(word); + final int wordLen = codePoints.length; + + for (int depth = 0; depth < Constants.Dictionary.MAX_WORD_LENGTH; ++depth) { + if (wordPos >= wordLen) break; + nodeOriginAddress = buffer.position(); + int nodeParentAddress = -1; + final int charGroupCount = BinaryDictInputOutput.readCharGroupCount(buffer); + boolean foundNextGroup = false; + + for (int i = 0; i < charGroupCount; ++i) { + address = buffer.position(); + final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer, + buffer.position(), header.mFormatOptions); + final boolean isMovedGroup = BinaryDictInputOutput.isMovedGroup(currentInfo.mFlags, + header.mFormatOptions); + if (isMovedGroup) continue; + nodeParentAddress = (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) + ? FormatSpec.NO_PARENT_ADDRESS : currentInfo.mParentAddress + address; + boolean matched = true; + for (int p = 0; p < currentInfo.mCharacters.length; ++p) { + if (wordPos + p >= wordLen) { + /* + * splitting + * before + * abcd - ef + * + * insert "abc" + * + * after + * abc - d - ef + */ + final int newNodeAddress = buffer.limit(); + final int flags = BinaryDictInputOutput.makeCharGroupFlags(p > 1, + isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */, + false /* isBlackListEntry */, header.mFormatOptions); + int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, flags, + frequency, nodeParentAddress, shortcuts, bigrams, destination, + buffer, nodeOriginAddress, address, header.mFormatOptions); + + final int[] characters2 = Arrays.copyOfRange(currentInfo.mCharacters, p, + currentInfo.mCharacters.length); + if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { + updateParentAddresses(buffer, currentInfo.mChildrenAddress, + newNodeAddress + written + 1, header.mFormatOptions); + } + final CharGroupInfo newInfo2 = new CharGroupInfo( + newNodeAddress + written + 1, -1 /* endAddress */, + currentInfo.mFlags, characters2, currentInfo.mFrequency, + newNodeAddress + 1, currentInfo.mChildrenAddress, + currentInfo.mShortcutTargets, currentInfo.mBigrams); + writeNode(destination, new CharGroupInfo[] { newInfo2 }); + return; + } else if (codePoints[wordPos + p] != currentInfo.mCharacters[p]) { + if (p > 0) { + /* + * splitting + * before + * ab - cd + * + * insert "ac" + * + * after + * a - b - cd + * | + * - c + */ + + final int newNodeAddress = buffer.limit(); + final int childrenAddress = currentInfo.mChildrenAddress; + + // move prefix + final int prefixFlags = BinaryDictInputOutput.makeCharGroupFlags(p > 1, + false /* isTerminal */, 0 /* childrenAddressSize*/, + false /* hasShortcut */, false /* hasBigrams */, + false /* isNotAWord */, false /* isBlackListEntry */, + header.mFormatOptions); + int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, + prefixFlags, -1 /* frequency */, nodeParentAddress, null, null, + destination, buffer, nodeOriginAddress, address, + header.mFormatOptions); + + final int[] suffixCharacters = Arrays.copyOfRange( + currentInfo.mCharacters, p, currentInfo.mCharacters.length); + if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { + updateParentAddresses(buffer, currentInfo.mChildrenAddress, + newNodeAddress + written + 1, header.mFormatOptions); + } + final int suffixFlags = BinaryDictInputOutput.makeCharGroupFlags( + suffixCharacters.length > 1, + (currentInfo.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0, + 0 /* childrenAddressSize */, + (currentInfo.mFlags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) + != 0, + (currentInfo.mFlags & FormatSpec.FLAG_HAS_BIGRAMS) != 0, + isNotAWord, isBlackListEntry, header.mFormatOptions); + final CharGroupInfo suffixInfo = new CharGroupInfo( + newNodeAddress + written + 1, -1 /* endAddress */, suffixFlags, + suffixCharacters, currentInfo.mFrequency, newNodeAddress + 1, + currentInfo.mChildrenAddress, currentInfo.mShortcutTargets, + currentInfo.mBigrams); + written += computeGroupSize(suffixInfo, header.mFormatOptions) + 1; + + final int[] newCharacters = Arrays.copyOfRange(codePoints, wordPos + p, + codePoints.length); + final int flags = BinaryDictInputOutput.makeCharGroupFlags( + newCharacters.length > 1, isTerminal, + 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, + isNotAWord, isBlackListEntry, header.mFormatOptions); + final CharGroupInfo newInfo = new CharGroupInfo( + newNodeAddress + written, -1 /* endAddress */, flags, + newCharacters, frequency, newNodeAddress + 1, + FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); + writeNode(destination, new CharGroupInfo[] { suffixInfo, newInfo }); + return; + } + matched = false; + break; + } + } + + if (matched) { + if (wordPos + currentInfo.mCharacters.length == wordLen) { + // the word exists in the dictionary. + // only update group. + final int newNodeAddress = buffer.limit(); + final boolean hasMultipleChars = currentInfo.mCharacters.length > 1; + final int flags = BinaryDictInputOutput.makeCharGroupFlags(hasMultipleChars, + isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, + isNotAWord, isBlackListEntry, header.mFormatOptions); + final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1, + -1 /* endAddress */, flags, currentInfo.mCharacters, frequency, + nodeParentAddress, currentInfo.mChildrenAddress, shortcuts, + bigrams); + moveCharGroup(destination, buffer, newInfo, nodeOriginAddress, address, + header.mFormatOptions); + return; + } + wordPos += currentInfo.mCharacters.length; + if (currentInfo.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) { + /* + * found the prefix of the word. + * make new node and link to the node from this group. + * + * before + * ab - cd + * + * insert "abcde" + * + * after + * ab - cd - e + */ + final int newNodeAddress = buffer.limit(); + updateChildrenAddress(buffer, address, newNodeAddress, + header.mFormatOptions); + final int newGroupAddress = newNodeAddress + 1; + final boolean hasMultipleChars = (wordLen - wordPos) > 1; + final int flags = BinaryDictInputOutput.makeCharGroupFlags(hasMultipleChars, + isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, + isNotAWord, isBlackListEntry, header.mFormatOptions); + final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen); + final CharGroupInfo newInfo = new CharGroupInfo(newGroupAddress, -1, flags, + characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS, + shortcuts, bigrams); + writeNode(destination, new CharGroupInfo[] { newInfo }); + return; + } + buffer.position(currentInfo.mChildrenAddress); + foundNextGroup = true; + break; + } + } + + if (foundNextGroup) continue; + + // reached the end of the array. + final int linkAddressPosition = buffer.position(); + int nextLink = buffer.readUnsignedInt24(); + if ((nextLink & MSB24) != 0) { + nextLink = -(nextLink & SINT24_MAX); + } + if (nextLink == FormatSpec.NO_FORWARD_LINK_ADDRESS) { + /* + * expand this node. + * + * before + * ab - cd + * + * insert "abef" + * + * after + * ab - cd + * | + * - ef + */ + + // change the forward link address. + final int newNodeAddress = buffer.limit(); + buffer.position(linkAddressPosition); + writeSInt24ToBuffer(buffer, newNodeAddress); + + final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen); + final int flags = BinaryDictInputOutput.makeCharGroupFlags(characters.length > 1, + isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, + isNotAWord, isBlackListEntry, header.mFormatOptions); + final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1, + -1 /* endAddress */, flags, characters, frequency, nodeParentAddress, + FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); + writeNode(destination, new CharGroupInfo[]{ newInfo }); + return; + } else { + depth--; + buffer.position(nextLink); + } + } + } } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java index 624e72f0c..2d39094ff 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java @@ -411,7 +411,8 @@ public final class BinaryDictInputOutput { * Helper method to check whether the group is moved. */ public static boolean isMovedGroup(final int flags, final FormatOptions options) { - return options.mSupportsDynamicUpdate && ((flags & FormatSpec.FLAG_IS_MOVED) == 1); + return options.mSupportsDynamicUpdate + && ((flags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) == FormatSpec.FLAG_IS_MOVED); } /** diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java index 3193ef457..6f1faa192 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java +++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java @@ -279,7 +279,7 @@ public final class FusionDictionary implements Iterable { /** * Helper method to convert a String to an int array. */ - static private int[] getCodePoints(final String word) { + static int[] getCodePoints(final String word) { // TODO: this is a copy-paste of the contents of StringUtils.toCodePointArray, // which is not visible from the makedict package. Factor this code. final char[] characters = word.toCharArray(); diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java new file mode 100644 index 000000000..7607b58eb --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java @@ -0,0 +1,335 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import com.android.inputmethod.latin.CollectionUtils; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.ByteBufferWrapper; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.CharEncoding; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; +import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; +import com.android.inputmethod.latin.makedict.FusionDictionary.Node; +import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; + +import android.test.AndroidTestCase; +import android.test.MoreAsserts; +import android.util.Log; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.channels.FileChannel; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Random; + +public class BinaryDictIOUtilsTests extends AndroidTestCase{ + private static final String TAG = BinaryDictIOUtilsTests.class.getSimpleName(); + private static final FormatSpec.FormatOptions FORMAT_OPTIONS = + new FormatSpec.FormatOptions(3, true); + private static final int MAX_UNIGRAMS = 1500; + + private static final ArrayList sWords = CollectionUtils.newArrayList(); + + private static final String[] CHARACTERS = { + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", + "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", + "\u00FC" /* ü */, "\u00E2" /* â */, "\u00F1" /* ñ */, // accented characters + "\u4E9C" /* 亜 */, "\u4F0A" /* 伊 */, "\u5B87" /* 宇 */, // kanji + "\uD841\uDE28" /* 𠘨 */, "\uD840\uDC0B" /* 𠀋 */, "\uD861\uDeD7" /* 𨛗 */ // surrogate pair + }; + + public BinaryDictIOUtilsTests() { + super(); + final Random random = new Random(123456); + sWords.clear(); + for (int i = 0; i < MAX_UNIGRAMS; ++i) { + sWords.add(generateWord(random.nextInt())); + } + } + + // Utilities for test + private String generateWord(final int value) { + final int lengthOfChars = CHARACTERS.length; + StringBuilder builder = new StringBuilder(""); + long lvalue = Math.abs((long)value); + while (lvalue > 0) { + builder.append(CHARACTERS[(int)(lvalue % lengthOfChars)]); + lvalue /= lengthOfChars; + } + if (builder.toString().equals("")) return "a"; + return builder.toString(); + } + + private static void printCharGroup(final CharGroupInfo info) { + Log.d(TAG, " CharGroup at " + info.mOriginalAddress); + Log.d(TAG, " flags = " + info.mFlags); + Log.d(TAG, " parentAddress = " + info.mParentAddress); + Log.d(TAG, " characters = " + new String(info.mCharacters, 0, + info.mCharacters.length)); + if (info.mFrequency != -1) Log.d(TAG, " frequency = " + info.mFrequency); + if (info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) { + Log.d(TAG, " children address = no children address"); + } else { + Log.d(TAG, " children address = " + info.mChildrenAddress); + } + if (info.mShortcutTargets != null) { + for (final WeightedString ws : info.mShortcutTargets) { + Log.d(TAG, " shortcuts = " + ws.mWord); + } + } + if (info.mBigrams != null) { + for (final PendingAttribute attr : info.mBigrams) { + Log.d(TAG, " bigram = " + attr.mAddress); + } + } + Log.d(TAG, " end address = " + info.mEndAddress); + } + + private static void printNode(final FusionDictionaryBufferInterface buffer, + final FormatSpec.FormatOptions formatOptions) { + Log.d(TAG, "Node at " + buffer.position()); + final int count = BinaryDictInputOutput.readCharGroupCount(buffer); + Log.d(TAG, " charGroupCount = " + count); + for (int i = 0; i < count; ++i) { + final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer, + buffer.position(), formatOptions); + printCharGroup(currentInfo); + } + if (formatOptions.mSupportsDynamicUpdate) { + final int forwardLinkAddress = buffer.readUnsignedInt24(); + Log.d(TAG, " forwardLinkAddress = " + forwardLinkAddress); + } + } + + private static void printBinaryFile(final FusionDictionaryBufferInterface buffer) + throws IOException, UnsupportedFormatException { + FileHeader header = BinaryDictInputOutput.readHeader(buffer); + while (buffer.position() < buffer.limit()) { + printNode(buffer, header.mFormatOptions); + } + } + + private int getWordPosition(final File file, final String word) { + int position = FormatSpec.NOT_VALID_WORD; + FileInputStream inStream = null; + try { + inStream = new FileInputStream(file); + final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper( + inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length())); + position = BinaryDictIOUtils.getTerminalPosition(buffer, word); + } catch (IOException e) { + } catch (UnsupportedFormatException e) { + } finally { + if (inStream != null) { + try { + inStream.close(); + } catch (IOException e) { + // do nothing + } + } + } + return position; + } + + // return amount of time to insert a word + private long insertAndCheckWord(final File file, final String word, final int frequency, + final boolean exist) { + RandomAccessFile raFile = null; + FileOutputStream outStream = null; + FusionDictionaryBufferInterface buffer = null; + long amountOfTime = -1; + try { + raFile = new RandomAccessFile(file, "rw"); + buffer = new ByteBufferWrapper(raFile.getChannel().map( + FileChannel.MapMode.READ_WRITE, 0, file.length())); + outStream = new FileOutputStream(file, true); + + if (!exist) { + assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); + } + final long now = System.nanoTime(); + BinaryDictIOUtils.insertWord(buffer, outStream, word, frequency, null, null, false, + false); + amountOfTime = System.nanoTime() - now; + MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); + outStream.close(); + raFile.close(); + } catch (IOException e) { + } catch (UnsupportedFormatException e) { + } finally { + if (outStream != null) { + try { + outStream.close(); + } catch (IOException e) { + // do nothing + } + } + if (raFile != null) { + try { + raFile.close(); + } catch (IOException e) { + // do nothing + } + } + } + return amountOfTime; + } + + private void deleteWord(final File file, final String word) { + RandomAccessFile raFile = null; + FusionDictionaryBufferInterface buffer = null; + try { + raFile = new RandomAccessFile(file, "rw"); + buffer = new ByteBufferWrapper(raFile.getChannel().map( + FileChannel.MapMode.READ_WRITE, 0, file.length())); + BinaryDictIOUtils.deleteWord(buffer, word); + } catch (IOException e) { + } catch (UnsupportedFormatException e) { + } finally { + if (raFile != null) { + try { + raFile.close(); + } catch (IOException e) { + // do nothing + } + } + } + } + + + + private void checkReverseLookup(final File file, final String word, final int position) { + FileInputStream inStream = null; + try { + inStream = new FileInputStream(file); + final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper( + inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length())); + final FileHeader header = BinaryDictInputOutput.readHeader(buffer); + assertEquals(word, BinaryDictInputOutput.getWordAtAddress(buffer, header.mHeaderSize, + position - header.mHeaderSize, header.mFormatOptions)); + } catch (IOException e) { + } catch (UnsupportedFormatException e) { + } finally { + if (inStream != null) { + try { + inStream.close(); + } catch (IOException e) { + // do nothing + } + } + } + } + + public void testInsertWord() { + File file = null; + try { + file = File.createTempFile("testInsertWord", ".dict"); + } catch (IOException e) { + fail("IOException while creating temporary file: " + e); + } + + // set an initial dictionary. + final FusionDictionary dict = new FusionDictionary(new Node(), + new FusionDictionary.DictionaryOptions(new HashMap(), false, false)); + dict.add("abcd", 10, null, false); + + try { + final FileOutputStream out = new FileOutputStream(file); + BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS); + out.close(); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } catch (UnsupportedFormatException e) { + fail("UnsupportedFormatException while writing an initial dictionary : " + e); + } + + MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd")); + insertAndCheckWord(file, "abcde", 10, false); + + insertAndCheckWord(file, "abcdefghijklmn", 10, false); + checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn")); + + insertAndCheckWord(file, "abcdabcd", 10, false); + checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd")); + + // update the existing word. + insertAndCheckWord(file, "abcdabcd", 15, true); + + // split 1 + insertAndCheckWord(file, "ab", 20, false); + + // split 2 + insertAndCheckWord(file, "ami", 30, false); + + deleteWord(file, "ami"); + assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "ami")); + + insertAndCheckWord(file, "abcdabfg", 30, false); + + deleteWord(file, "abcd"); + assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd")); + } + + public void testRandomWords() { + File file = null; + try { + file = File.createTempFile("testRandomWord", ".dict"); + } catch (IOException e) { + } + assertNotNull(file); + + // set an initial dictionary. + final FusionDictionary dict = new FusionDictionary(new Node(), + new FusionDictionary.DictionaryOptions(new HashMap(), false, + false)); + dict.add("initial", 10, null, false); + + try { + final FileOutputStream out = new FileOutputStream(file); + BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS); + out.close(); + } catch (IOException e) { + assertTrue(false); + } catch (UnsupportedFormatException e) { + assertTrue(false); + } + + long maxTimeToInsert = 0, sum = 0; + long minTimeToInsert = 100000000; // 1000000000 is an upper bound for minTimeToInsert. + int cnt = 0; + for (final String word : sWords) { + final long diff = insertAndCheckWord(file, word, cnt%255, false); + maxTimeToInsert = Math.max(maxTimeToInsert, diff); + minTimeToInsert = Math.min(minTimeToInsert, diff); + sum += diff; + cnt++; + } + cnt = 0; + for (final String word : sWords) { + MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); + } + + Log.d(TAG, "max = " + ((double)maxTimeToInsert/1000000) + " ms."); + Log.d(TAG, "min = " + ((double)minTimeToInsert/1000000) + " ms."); + Log.d(TAG, "avg = " + ((double)sum/MAX_UNIGRAMS/1000000) + " ms."); + } +}