From c2fd53ee0e610c9f143537aa3c5a4b0ab6b14e6a Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Tue, 17 Dec 2013 18:17:51 +0900 Subject: [PATCH] Remove ver4 dict updater. Change-Id: I468994c98d091be621b9fb3fbe6405c67fc6a465 --- .../BinaryDictDecoderEncoderTests.java | 31 - .../makedict/BinaryDictIOUtilsTests.java | 380 --------- .../latin/makedict/BinaryDictUtils.java | 10 - .../latin/makedict/DictUpdater.java | 50 -- .../makedict/SparseTableContentUpdater.java | 123 --- .../latin/makedict/Ver4DictUpdater.java | 790 ------------------ .../inputmethod/latin/dicttool/Test.java | 5 +- 7 files changed, 2 insertions(+), 1387 deletions(-) delete mode 100644 tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java delete mode 100644 tests/src/com/android/inputmethod/latin/makedict/DictUpdater.java delete mode 100644 tests/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java delete mode 100644 tests/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index 8c5da254b..b5a71f0bf 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -592,35 +592,4 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { Log.d(TAG, result); } } - - private void runTestDeleteWord(final FormatOptions formatOptions) - throws IOException, UnsupportedFormatException { - final String dictName = "testDeleteWord"; - final String dictVersion = Long.toString(System.currentTimeMillis()); - final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, - getContext().getCacheDir()); - - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion)); - addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); - timeWritingDictToFile(file, dict, formatOptions); - - final DictUpdater dictUpdater = BinaryDictUtils.getDictUpdater(file, formatOptions); - MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, - dictUpdater.getTerminalPosition(sWords.get(0))); - dictUpdater.deleteWord(sWords.get(0)); - assertEquals(FormatSpec.NOT_VALID_WORD, - dictUpdater.getTerminalPosition(sWords.get(0))); - - MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, - dictUpdater.getTerminalPosition(sWords.get(5))); - dictUpdater.deleteWord(sWords.get(5)); - assertEquals(FormatSpec.NOT_VALID_WORD, - dictUpdater.getTerminalPosition(sWords.get(5))); - } - - public void testDeleteWord() throws IOException, UnsupportedFormatException { - runTestDeleteWord(BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP); - runTestDeleteWord(BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP); - } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java deleted file mode 100644 index 9ed50c4b3..000000000 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java +++ /dev/null @@ -1,380 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.makedict; - -import android.test.AndroidTestCase; -import android.test.MoreAsserts; -import android.test.suitebuilder.annotation.LargeTest; -import android.util.Log; - -import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; -import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; -import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; -import com.android.inputmethod.latin.utils.CollectionUtils; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Random; - -@LargeTest -public class BinaryDictIOUtilsTests extends AndroidTestCase { - private static final String TAG = BinaryDictIOUtilsTests.class.getSimpleName(); - - private static final ArrayList sWords = CollectionUtils.newArrayList(); - public static final int DEFAULT_MAX_UNIGRAMS = 1500; - private final int mMaxUnigrams; - - private static final String[] CHARACTERS = { - "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", - "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", - "\u00FC" /* ü */, "\u00E2" /* â */, "\u00F1" /* ñ */, // accented characters - "\u4E9C" /* 亜 */, "\u4F0A" /* 伊 */, "\u5B87" /* 宇 */, // kanji - "\uD841\uDE28" /* 𠘨 */, "\uD840\uDC0B" /* 𠀋 */, "\uD861\uDED7" /* 𨛗 */ // surrogate pair - }; - - public BinaryDictIOUtilsTests() { - // 1500 is the default max unigrams - this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS); - } - - public BinaryDictIOUtilsTests(final long seed, final int maxUnigrams) { - super(); - Log.d(TAG, "Seed for test is " + seed + ", maxUnigrams is " + maxUnigrams); - mMaxUnigrams = maxUnigrams; - final Random random = new Random(seed); - sWords.clear(); - for (int i = 0; i < maxUnigrams; ++i) { - sWords.add(generateWord(random.nextInt())); - } - } - - // Utilities for test - private String generateWord(final int value) { - final int lengthOfChars = CHARACTERS.length; - StringBuilder builder = new StringBuilder(""); - long lvalue = Math.abs((long)value); - while (lvalue > 0) { - builder.append(CHARACTERS[(int)(lvalue % lengthOfChars)]); - lvalue /= lengthOfChars; - } - if (builder.toString().equals("")) return "a"; - return builder.toString(); - } - - private static void printPtNode(final PtNodeInfo info) { - Log.d(TAG, " PtNode at " + info.mOriginalAddress); - Log.d(TAG, " flags = " + info.mFlags); - Log.d(TAG, " parentAddress = " + info.mParentAddress); - Log.d(TAG, " characters = " + new String(info.mCharacters, 0, - info.mCharacters.length)); - if (info.mFrequency != -1) Log.d(TAG, " frequency = " + info.mFrequency); - if (info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) { - Log.d(TAG, " children address = no children address"); - } else { - Log.d(TAG, " children address = " + info.mChildrenAddress); - } - if (info.mShortcutTargets != null) { - for (final WeightedString ws : info.mShortcutTargets) { - Log.d(TAG, " shortcuts = " + ws.mWord); - } - } - if (info.mBigrams != null) { - for (final PendingAttribute attr : info.mBigrams) { - Log.d(TAG, " bigram = " + attr.mAddress); - } - } - Log.d(TAG, " end address = " + info.mEndAddress); - } - - private static void printNode(final Ver2DictDecoder dictDecoder, - final FormatSpec.FormatOptions formatOptions) { - final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); - Log.d(TAG, "Node at " + dictBuffer.position()); - final int count = BinaryDictDecoderUtils.readPtNodeCount(dictBuffer); - Log.d(TAG, " ptNodeCount = " + count); - for (int i = 0; i < count; ++i) { - final PtNodeInfo currentInfo = dictDecoder.readPtNode(dictBuffer.position(), - formatOptions); - printPtNode(currentInfo); - } - if (formatOptions.supportsDynamicUpdate()) { - final int forwardLinkAddress = dictBuffer.readUnsignedInt24(); - Log.d(TAG, " forwardLinkAddress = " + forwardLinkAddress); - } - } - - @SuppressWarnings("unused") - private static void printBinaryFile(final Ver2DictDecoder dictDecoder) - throws IOException, UnsupportedFormatException { - final FileHeader fileHeader = dictDecoder.readHeader(); - final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); - while (dictBuffer.position() < dictBuffer.limit()) { - printNode(dictDecoder, fileHeader.mFormatOptions); - } - } - - private int getWordPosition(final File file, final String word) { - int position = FormatSpec.NOT_VALID_WORD; - - try { - final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, - DictDecoder.USE_READONLY_BYTEBUFFER); - position = dictDecoder.getTerminalPosition(word); - } catch (IOException e) { - } catch (UnsupportedFormatException e) { - } - return position; - } - - /** - * Find a word using the DictDecoder. - * - * @param dictDecoder the dict decoder - * @param word the word searched - * @return the found ptNodeInfo - * @throws IOException - * @throws UnsupportedFormatException - */ - private static PtNodeInfo findWordByDictDecoder(final DictDecoder dictDecoder, - final String word) throws IOException, UnsupportedFormatException { - int position = dictDecoder.getTerminalPosition(word); - if (position != FormatSpec.NOT_VALID_WORD) { - dictDecoder.setPosition(0); - final FileHeader header = dictDecoder.readHeader(); - dictDecoder.setPosition(position); - return dictDecoder.readPtNode(position, header.mFormatOptions); - } - return null; - } - - private PtNodeInfo findWordFromFile(final File file, final String word) { - final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file); - PtNodeInfo info = null; - try { - dictDecoder.openDictBuffer(); - info = findWordByDictDecoder(dictDecoder, word); - } catch (IOException e) { - } catch (UnsupportedFormatException e) { - } - return info; - } - - // return amount of time to insert a word - private long insertAndCheckWord(final File file, final String word, final int frequency, - final boolean exist, final ArrayList bigrams, - final ArrayList shortcuts, final FormatOptions formatOptions) { - long amountOfTime = -1; - try { - final DictUpdater dictUpdater = BinaryDictUtils.getDictUpdater(file, formatOptions); - - if (!exist) { - assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); - } - final long now = System.nanoTime(); - dictUpdater.insertWord(word, frequency, bigrams, shortcuts, false, false); - amountOfTime = System.nanoTime() - now; - MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); - } catch (IOException e) { - Log.e(TAG, "Raised an IOException while inserting a word", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Raised an UnsupportedFormatException error while inserting a word", e); - } - return amountOfTime; - } - - private void deleteWord(final File file, final String word, final FormatOptions formatOptions) { - try { - final DictUpdater dictUpdater = BinaryDictUtils.getDictUpdater(file, formatOptions); - dictUpdater.deleteWord(word); - } catch (IOException e) { - Log.e(TAG, "Raised an IOException while deleting a word", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Raised an UnsupportedFormatException while deleting a word", e); - } - } - - private void checkReverseLookup(final File file, final String word, final int position) { - - try { - final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file); - final FileHeader fileHeader = dictDecoder.readHeader(); - assertEquals(word, - BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset, - position, fileHeader.mFormatOptions).mWord); - } catch (IOException e) { - Log.e(TAG, "Raised an IOException while looking up a word", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Raised an UnsupportedFormatException error while looking up a word", e); - } - } - - private void runTestInsertWord(final FormatOptions formatOptions) { - final String testName = "testInsertWord"; - final String version = Long.toString(System.currentTimeMillis()); - final File file = BinaryDictUtils.getDictFile(testName, version, formatOptions, - getContext().getCacheDir()); - - // set an initial dictionary. - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - BinaryDictUtils.makeDictionaryOptions(testName, version)); - dict.add("abcd", 10, null, false); - - try { - final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions); - dictEncoder.writeDictionary(dict, formatOptions); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); - } - - MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd")); - insertAndCheckWord(file, "abcde", 10, false, null, null, formatOptions); - checkReverseLookup(file, "abcde", getWordPosition(file, "abcde")); - - insertAndCheckWord(file, "abcdefghijklmn", 10, false, null, null, formatOptions); - checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn")); - - insertAndCheckWord(file, "abcdabcd", 10, false, null, null, formatOptions); - checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd")); - - // update the existing word. - insertAndCheckWord(file, "abcdabcd", 15, true, null, null, formatOptions); - checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd")); - - // Testing splitOnly - insertAndCheckWord(file, "ab", 20, false, null, null, formatOptions); - checkReverseLookup(file, "ab", getWordPosition(file, "ab")); - checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd")); - checkReverseLookup(file, "abcde", getWordPosition(file, "abcde")); - checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn")); - - // Testing splitAndBranch - insertAndCheckWord(file, "ami", 30, false, null, null, formatOptions); - checkReverseLookup(file, "ami", getWordPosition(file, "ami")); - checkReverseLookup(file, "ab", getWordPosition(file, "ab")); - checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd")); - checkReverseLookup(file, "abcde", getWordPosition(file, "abcde")); - checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn")); - checkReverseLookup(file, "ami", getWordPosition(file, "ami")); - - insertAndCheckWord(file, "abcdefzzzz", 40, false, null, null, formatOptions); - checkReverseLookup(file, "abcdefzzzz", getWordPosition(file, "abcdefzzzz")); - - deleteWord(file, "ami", formatOptions); - assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "ami")); - - insertAndCheckWord(file, "abcdabfg", 30, false, null, null, formatOptions); - - deleteWord(file, "abcd", formatOptions); - assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd")); - } - - public void testInsertWord() { - runTestInsertWord(BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP); - runTestInsertWord(BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP); - } - - private void runTestInsertWordWithBigrams(final FormatOptions formatOptions) { - final String testName = "testInsertWordWithBigrams"; - final String version = Long.toString(System.currentTimeMillis()); - File file = BinaryDictUtils.getDictFile(testName, version, formatOptions, - getContext().getCacheDir()); - - // set an initial dictionary. - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - BinaryDictUtils.makeDictionaryOptions(testName, version)); - dict.add("abcd", 10, null, false); - dict.add("efgh", 15, null, false); - - try { - final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions); - dictEncoder.writeDictionary(dict, formatOptions); - } catch (IOException e) { - fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); - } - - final ArrayList banana = new ArrayList(); - banana.add(new WeightedString("banana", 10)); - - insertAndCheckWord(file, "banana", 0, false, null, null, formatOptions); - insertAndCheckWord(file, "recursive", 60, true, banana, null, formatOptions); - - final PtNodeInfo info = findWordFromFile(file, "recursive"); - int bananaPos = getWordPosition(file, "banana"); - assertNotNull(info.mBigrams); - assertEquals(info.mBigrams.size(), 1); - assertEquals(info.mBigrams.get(0).mAddress, bananaPos); - } - - public void testInsertWordWithBigrams() { - runTestInsertWordWithBigrams(BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP); - runTestInsertWordWithBigrams(BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP); - } - - private void runTestRandomWords(final FormatOptions formatOptions) { - final String testName = "testRandomWord"; - final String version = Long.toString(System.currentTimeMillis()); - final File file = BinaryDictUtils.getDictFile(testName, version, formatOptions, - getContext().getCacheDir()); - - // set an initial dictionary. - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - BinaryDictUtils.makeDictionaryOptions(testName, version)); - dict.add("initial", 10, null, false); - - try { - final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions); - dictEncoder.writeDictionary(dict, formatOptions); - } catch (IOException e) { - assertTrue(false); - } catch (UnsupportedFormatException e) { - assertTrue(false); - } - - long maxTimeToInsert = 0, sum = 0; - long minTimeToInsert = 100000000; // 1000000000 is an upper bound for minTimeToInsert. - int cnt = 0; - for (final String word : sWords) { - final long diff = insertAndCheckWord(file, word, - cnt % FormatSpec.MAX_TERMINAL_FREQUENCY, false, null, null, formatOptions); - maxTimeToInsert = Math.max(maxTimeToInsert, diff); - minTimeToInsert = Math.min(minTimeToInsert, diff); - sum += diff; - cnt++; - } - cnt = 0; - for (final String word : sWords) { - MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); - } - - Log.d(TAG, "Test version " + formatOptions.mVersion); - Log.d(TAG, "max = " + ((double)maxTimeToInsert/1000000) + " ms."); - Log.d(TAG, "min = " + ((double)minTimeToInsert/1000000) + " ms."); - Log.d(TAG, "avg = " + ((double)sum/mMaxUnigrams/1000000) + " ms."); - } - - public void testRandomWords() { - runTestRandomWords(BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP); - runTestRandomWords(BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP); - } -} diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java index 67d77e05a..f7a808c1e 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java @@ -69,14 +69,4 @@ public class BinaryDictUtils { + formatOptions.mVersion); } } - - public static DictUpdater getDictUpdater(final File file, final FormatOptions formatOptions) - throws UnsupportedFormatException { - if (formatOptions.mVersion == FormatSpec.VERSION4) { - return new Ver4DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER); - } else { - throw new UnsupportedFormatException("The format option has a wrong version : " - + formatOptions.mVersion); - } - } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/DictUpdater.java b/tests/src/com/android/inputmethod/latin/makedict/DictUpdater.java deleted file mode 100644 index 709ea3310..000000000 --- a/tests/src/com/android/inputmethod/latin/makedict/DictUpdater.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.makedict; - -import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; - -import java.io.IOException; -import java.util.ArrayList; - -/** - * An interface of a binary dictionary updater. - */ -public interface DictUpdater extends DictDecoder { - - /** - * Deletes the word from the binary dictionary. - * - * @param word the word to be deleted. - */ - public void deleteWord(final String word) throws IOException, UnsupportedFormatException; - - /** - * Inserts a word into a binary dictionary. - * - * @param word the word to be inserted. - * @param frequency the frequency of the new word. - * @param bigramStrings bigram list, or null if none. - * @param shortcuts shortcut list, or null if none. - * @param isBlackListEntry whether this should be a blacklist entry. - */ - // TODO: Support batch insertion. - public void insertWord(final String word, final int frequency, - final ArrayList bigramStrings, - final ArrayList shortcuts, final boolean isNotAWord, - final boolean isBlackListEntry) throws IOException, UnsupportedFormatException; -} diff --git a/tests/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java b/tests/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java deleted file mode 100644 index 4518f21b9..000000000 --- a/tests/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.makedict; - -import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; - -/** - * An auxiliary class for updating data associated with SparseTable. - */ -public class SparseTableContentUpdater extends SparseTableContentReader { - protected OutputStream mLookupTableOutStream; - protected OutputStream[] mAddressTableOutStreams; - protected OutputStream[] mContentOutStreams; - - public SparseTableContentUpdater(final String name, final int blockSize, - final File baseDir, final String[] contentFilenames, final String[] contentIds, - final DictionaryBufferFactory factory) { - super(name, blockSize, baseDir, contentFilenames, contentIds, factory); - mAddressTableOutStreams = new OutputStream[mContentCount]; - mContentOutStreams = new OutputStream[mContentCount]; - } - - protected void openStreamsAndBuffers() throws IOException { - openBuffers(); - mLookupTableOutStream = new FileOutputStream(mLookupTableFile, true /* append */); - for (int i = 0; i < mContentCount; ++i) { - mAddressTableOutStreams[i] = new FileOutputStream(mAddressTableFiles[i], - true /* append */); - mContentOutStreams[i] = new FileOutputStream(mContentFiles[i], true /* append */); - } - } - - /** - * Set the contentIndex-th elements of contentId-th table. - * - * @param contentId the id of the content table. - * @param contentIndex the index where to set the valie. - * @param value the value to set. - */ - protected void setContentValue(final int contentId, final int contentIndex, final int value) - throws IOException { - if ((contentIndex / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES - >= mLookupTableBuffer.limit()) { - // Need to extend the lookup table - final int currentSize = mLookupTableBuffer.limit() - / SparseTable.SIZE_OF_INT_IN_BYTES; - final int target = contentIndex / mBlockSize + 1; - for (int i = currentSize; i < target; ++i) { - BinaryDictEncoderUtils.writeUIntToStream(mLookupTableOutStream, - SparseTable.NOT_EXIST, SparseTable.SIZE_OF_INT_IN_BYTES); - } - // We need to reopen the byte buffer of the lookup table because a MappedByteBuffer in - // Java isn't expanded automatically when the underlying file is expanded. - reopenLookupTable(); - } - - mLookupTableBuffer.position((contentIndex / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES); - int posInAddressTable = mLookupTableBuffer.readInt(); - if (posInAddressTable == SparseTable.NOT_EXIST) { - // Need to extend the address table - mLookupTableBuffer.position(mLookupTableBuffer.position() - - SparseTable.SIZE_OF_INT_IN_BYTES); - posInAddressTable = mAddressTableBuffers[0].limit() / mBlockSize; - BinaryDictEncoderUtils.writeUIntToDictBuffer(mLookupTableBuffer, - posInAddressTable, SparseTable.SIZE_OF_INT_IN_BYTES); - for (int i = 0; i < mContentCount; ++i) { - for (int j = 0; j < mBlockSize; ++j) { - BinaryDictEncoderUtils.writeUIntToStream(mAddressTableOutStreams[i], - SparseTable.NOT_EXIST, SparseTable.SIZE_OF_INT_IN_BYTES); - } - } - // We need to reopen the byte buffers of the address tables because a MappedByteBuffer - // in Java isn't expanded automatically when the underlying file is expanded. - reopenAddressTables(); - } - posInAddressTable += (contentIndex % mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES; - - mAddressTableBuffers[contentId].position(posInAddressTable); - BinaryDictEncoderUtils.writeUIntToDictBuffer(mAddressTableBuffers[contentId], - value, SparseTable.SIZE_OF_INT_IN_BYTES); - } - - private void reopenLookupTable() throws IOException { - mLookupTableOutStream.flush(); - mLookupTableBuffer = mFactory.getDictionaryBuffer(mLookupTableFile); - } - - private void reopenAddressTables() throws IOException { - for (int i = 0; i < mContentCount; ++i) { - mAddressTableOutStreams[i].flush(); - mAddressTableBuffers[i] = mFactory.getDictionaryBuffer(mAddressTableFiles[i]); - } - } - - protected void close() throws IOException { - mLookupTableOutStream.close(); - for (final OutputStream stream : mAddressTableOutStreams) { - stream.close(); - } - for (final OutputStream stream : mContentOutStreams) { - stream.close(); - } - } -} diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java deleted file mode 100644 index 119755ff3..000000000 --- a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java +++ /dev/null @@ -1,790 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.makedict; - -import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; -import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; -import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; -import com.android.inputmethod.latin.utils.CollectionUtils; - -import android.util.Log; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; - -/** - * An implementation of DictUpdater for version 4 binary dictionary. - */ -public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater { - private static final String TAG = Ver4DictUpdater.class.getSimpleName(); - private static final int MAX_JUMPS = 10000; - - private OutputStream mDictStream; - private final File mFrequencyFile; - - public Ver4DictUpdater(final File dictDirectory, final int factoryType) - throws UnsupportedFormatException { - // DictUpdater must have an updatable DictBuffer. - super(dictDirectory, ((factoryType & MASK_DICTBUFFER) == USE_BYTEARRAY) - ? USE_BYTEARRAY : USE_WRITABLE_BYTEBUFFER); - mFrequencyFile = getFile(FILETYPE_FREQUENCY); - } - - private static class BigramContentUpdater extends SparseTableContentUpdater { - public BigramContentUpdater(final String name, final File baseDir, - final boolean hasTimestamp) { - super(name + FormatSpec.BIGRAM_FILE_EXTENSION, - FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir, - BigramContentReader.getContentFilenames(name, hasTimestamp), - BigramContentReader.getContentIds(hasTimestamp), - new DictionaryBufferFromWritableByteBufferFactory()); - } - - public void insertBigramEntries(final int terminalId, final int frequency, - final ArrayList entries) throws IOException { - if (terminalId < 0) { - throw new RuntimeException("Invalid terminal id : " + terminalId); - } - openStreamsAndBuffers(); - - if (entries == null || entries.isEmpty()) { - setContentValue(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, - SparseTable.NOT_EXIST); - return; - } - final int positionOfEntries = - (int) mContentFiles[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX].length(); - setContentValue(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, positionOfEntries); - - final Iterator bigramIterator = entries.iterator(); - while (bigramIterator.hasNext()) { - final PendingAttribute entry = bigramIterator.next(); - final int flags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(), - 0 /* offset */, entry.mFrequency, frequency, "" /* word */); - BinaryDictEncoderUtils.writeUIntToStream( - mContentOutStreams[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX], flags, - FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); - BinaryDictEncoderUtils.writeUIntToStream( - mContentOutStreams[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX], entry.mAddress, - FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE); - } - close(); - } - } - - private static class ShortcutContentUpdater extends SparseTableContentUpdater { - public ShortcutContentUpdater(final String name, final File baseDir) { - super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, - FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir, - new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION }, - new String[] { FormatSpec.SHORTCUT_CONTENT_ID }, - new DictionaryBufferFromWritableByteBufferFactory()); - } - - public void insertShortcuts(final int terminalId, - final ArrayList shortcuts) throws IOException { - if (terminalId < 0) { - throw new RuntimeException("Invalid terminal id : " + terminalId); - } - openStreamsAndBuffers(); - if (shortcuts == null || shortcuts.isEmpty()) { - setContentValue(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId, - SparseTable.NOT_EXIST); - return; - } - - final int positionOfShortcuts = - (int) mContentFiles[FormatSpec.SHORTCUT_CONTENT_INDEX].length(); - setContentValue(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId, positionOfShortcuts); - - final Iterator shortcutIterator = shortcuts.iterator(); - while (shortcutIterator.hasNext()) { - final WeightedString target = shortcutIterator.next(); - final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags( - shortcutIterator.hasNext(), target.mFrequency); - BinaryDictEncoderUtils.writeUIntToStream( - mContentOutStreams[FormatSpec.SHORTCUT_CONTENT_INDEX], shortcutFlags, - FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); - CharEncoding.writeString(mContentOutStreams[FormatSpec.SHORTCUT_CONTENT_INDEX], - target.mWord); - } - close(); - } - } - - @Override - public void deleteWord(final String word) throws IOException, UnsupportedFormatException { - if (mDictBuffer == null) { - openDictBuffer(); - readHeader(); - } - final int wordPos = getTerminalPosition(word); - if (wordPos != FormatSpec.NOT_VALID_WORD) { - mDictBuffer.position(wordPos); - final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); - mDictBuffer.position(wordPos); - mDictBuffer.put((byte)markAsDeleted(flags)); - } - } - - private int getNewTerminalId() { - // The size of frequency file is FormatSpec.FREQUENCY_AND_FLAGS_SIZE * number of terminals - // because each terminal always has a frequency. - // So we can get a fresh terminal id by this logic. - // CAVEAT: we are reading the file size from the disk each time: beware of race conditions, - // even on one thread. - return (int) (mFrequencyFile.length() / FormatSpec.FREQUENCY_AND_FLAGS_SIZE); - } - - private void updateParentPosIfNotMoved(final int nodePos, final int newParentPos, - final FormatOptions formatOptions) { - final int originalPos = getPosition(); - setPosition(nodePos); - final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); - if (!BinaryDictIOUtils.isMovedPtNode(flags, formatOptions)) { - final int parentOffset = newParentPos - nodePos; - BinaryDictIOUtils.writeSInt24ToBuffer(mDictBuffer, parentOffset); - } - setPosition(originalPos); - } - - private void updateParentPositions(final int nodeArrayPos, final int newParentPos, - final FormatOptions formatOptions) { - final int originalPos = mDictBuffer.position(); - mDictBuffer.position(nodeArrayPos); - int jumpCount = 0; - do { - final int count = readPtNodeCount(); - for (int i = 0; i < count; ++i) { - updateParentPosIfNotMoved(getPosition(), newParentPos, formatOptions); - skipPtNode(formatOptions); - } - if (!readAndFollowForwardLink()) break; - } while (jumpCount++ < MAX_JUMPS); - setPosition(originalPos); - } - - private void updateChildrenPos(final int nodePos, final int newChildrenPos, - final FormatOptions options) { - final int originalPos = getPosition(); - setPosition(nodePos); - final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); - PtNodeReader.readParentAddress(mDictBuffer, options); - BinaryDictIOUtils.skipString(mDictBuffer, - (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0); - if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) PtNodeReader.readTerminalId(mDictBuffer); - final int basePos = getPosition(); - BinaryDictIOUtils.writeSInt24ToBuffer(mDictBuffer, newChildrenPos - basePos); - setPosition(originalPos); - } - - private void updateTerminalPosition(final int terminalId, final int position) { - if (terminalId == PtNode.NOT_A_TERMINAL - || terminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE - >= mTerminalAddressTableBuffer.limit()) return; - mTerminalAddressTableBuffer.position(terminalId - * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); - BinaryDictEncoderUtils.writeUIntToDictBuffer(mTerminalAddressTableBuffer, position, - FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); - } - - private void updateForwardLink(final int nodeArrayPos, final int newForwardLink, - final FormatOptions formatOptions) { - final int originalPos = getPosition(); - setPosition(nodeArrayPos); - int jumpCount = 0; - while (jumpCount++ < MAX_JUMPS) { - final int ptNodeCount = readPtNodeCount(); - for (int i = 0; i < ptNodeCount; ++i) { - skipPtNode(formatOptions); - } - final int forwardLinkPos = getPosition(); - if (!readAndFollowForwardLink()) { - setPosition(forwardLinkPos); - BinaryDictIOUtils.writeSInt24ToBuffer(mDictBuffer, newForwardLink - forwardLinkPos); - break; - } - } - setPosition(originalPos); - } - - private void markPtNodeAsMoved(final int nodePos, final int newNodePos, - final FormatOptions options) { - final int originalPos = getPosition(); - updateParentPosIfNotMoved(nodePos, newNodePos, options); - setPosition(nodePos); - final int currentFlags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); - setPosition(nodePos); - mDictBuffer.put((byte) (FormatSpec.FLAG_IS_MOVED - | (currentFlags & (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG)))); - final int offset = newNodePos - nodePos; - BinaryDictIOUtils.writeSInt24ToBuffer(mDictBuffer, offset); - setPosition(originalPos); - } - - /** - * Writes a PtNode to an output stream from a Ver4PtNodeInfo. - * - * @param nodePos the position of the head of the PtNode. - * @param info the PtNode info to be written. - * @return the size written, in bytes. - */ - private int writePtNode(final int nodePos, final Ver4PtNodeInfo info) throws IOException { - int written = 0; - - // Write flags. - mDictStream.write((byte) (info.mFlags & 0xFF)); - written += FormatSpec.PTNODE_FLAGS_SIZE; - - // Write the parent position. - final int parentOffset = info.mParentPos == FormatSpec.NO_PARENT_ADDRESS ? - FormatSpec.NO_PARENT_ADDRESS : info.mParentPos - nodePos; - BinaryDictIOUtils.writeSInt24ToStream(mDictStream, parentOffset); - written += FormatSpec.PARENT_ADDRESS_SIZE; - - // Write a string. - if (((info.mFlags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0) - != (info.mEndIndexOfCharacters - info.mStartIndexOfCharacters > 1)) { - throw new RuntimeException("Inconsistent flags : hasMultipleChars = " - + ((info.mFlags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0) + ", length = " - + (info.mEndIndexOfCharacters - info.mStartIndexOfCharacters)); - } - written += CharEncoding.writeCodePoints(mDictStream, info.mCharacters, - info.mStartIndexOfCharacters, info.mEndIndexOfCharacters); - - // Write the terminal id. - if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) { - BinaryDictEncoderUtils.writeUIntToStream(mDictStream, info.mTerminalId, - FormatSpec.PTNODE_TERMINAL_ID_SIZE); - written += FormatSpec.PTNODE_TERMINAL_ID_SIZE; - } - - // Write the children position. - final int childrenOffset = info.mChildrenPos == FormatSpec.NO_CHILDREN_ADDRESS - ? 0 : info.mChildrenPos - (nodePos + written); - BinaryDictIOUtils.writeSInt24ToStream(mDictStream, childrenOffset); - written += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; - - return written; - } - - /** - * Helper method to split and move PtNode. - * - * @param ptNodeArrayPos the position of PtNodeArray which contains the split and moved PtNode. - * @param splittedPtNodeToMovePos the position of the split and moved PtNode. - * @param newParent the parent PtNode after splitting. - * @param newChildren the children PtNodes after splitting. - * @param newParentStartPos where to write the new parent. - * @param formatOptions the format options. - */ - private void writeSplittedPtNodes(final int ptNodeArrayPos, final int splittedPtNodeToMovePos, - final Ver4PtNodeInfo newParent, final Ver4PtNodeInfo[] newChildren, - final int newParentStartPos, - final FormatOptions formatOptions) throws IOException { - updateTerminalPosition(newParent.mTerminalId, - newParentStartPos + 1 /* size of PtNodeCount */); - int written = writePtNodeArray(newParentStartPos, new Ver4PtNodeInfo[] { newParent }, - FormatSpec.NO_FORWARD_LINK_ADDRESS); - final int childrenStartPos = newParentStartPos + written; - writePtNodeArray(childrenStartPos, newChildren, FormatSpec.NO_FORWARD_LINK_ADDRESS); - int childrenNodePos = childrenStartPos + 1 /* size of PtNodeCount */; - for (final Ver4PtNodeInfo info : newChildren) { - updateTerminalPosition(info.mTerminalId, childrenNodePos); - childrenNodePos += computePtNodeSize(info.mCharacters, info.mStartIndexOfCharacters, - info.mEndIndexOfCharacters, - (info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0); - } - - // Mark as moved. - markPtNodeAsMoved(splittedPtNodeToMovePos, newParentStartPos + 1 /* size of PtNodeCount */, - formatOptions); - updateForwardLink(ptNodeArrayPos, newParentStartPos, formatOptions); - } - - /** - * Writes a node array to the stream. - * - * @param nodeArrayPos the position of the head of the node array. - * @param infos an array of Ver4PtNodeInfo to be written. - * @return the written length in bytes. - */ - private int writePtNodeArray(final int nodeArrayPos, final Ver4PtNodeInfo[] infos, - final int forwardLink) throws IOException { - int written = BinaryDictIOUtils.writePtNodeCount(mDictStream, infos.length); - for (int i = 0; i < infos.length; ++i) { - written += writePtNode(nodeArrayPos + written, infos[i]); - } - BinaryDictIOUtils.writeSInt24ToStream(mDictStream, forwardLink); - written += FormatSpec.FORWARD_LINK_ADDRESS_SIZE; - return written; - } - - private int computePtNodeSize(final int[] codePoints, final int startIndex, final int endIndex, - final boolean isTerminal) { - return FormatSpec.PTNODE_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE - + CharEncoding.getCharArraySize(codePoints, startIndex, endIndex) - + (endIndex - startIndex > 1 ? FormatSpec.PTNODE_TERMINATOR_SIZE : 0) - + (isTerminal ? FormatSpec.PTNODE_TERMINAL_ID_SIZE : 0) - + FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; - } - - private void writeNewSinglePtNodeWithAttributes(final int[] codePoints, - final boolean hasShortcuts, final int terminalId, final boolean hasBigrams, - final boolean isNotAWord, final boolean isBlackListEntry, final int parentPos, - final FormatOptions formatOptions) throws IOException { - final int newNodeArrayPos = mDictBuffer.limit(); - final int newNodeFlags = BinaryDictEncoderUtils.makePtNodeFlags(codePoints.length > 1, - terminalId != PtNode.NOT_A_TERMINAL, FormatSpec.FLAG_IS_NOT_MOVED, hasShortcuts, - hasBigrams, isNotAWord, isBlackListEntry, formatOptions); - final Ver4PtNodeInfo info = new Ver4PtNodeInfo(newNodeFlags, codePoints, terminalId, - FormatSpec.NO_CHILDREN_ADDRESS, parentPos, 0 /* nodeSize */); - writePtNodeArray(newNodeArrayPos, new Ver4PtNodeInfo[] { info }, - FormatSpec.NO_FORWARD_LINK_ADDRESS); - } - - private int setMultipleCharsInFlags(final int currentFlags, final boolean hasMultipleChars) { - final int flags; - if (hasMultipleChars) { - flags = currentFlags | FormatSpec.FLAG_HAS_MULTIPLE_CHARS; - } else { - flags = currentFlags & (~FormatSpec.FLAG_HAS_MULTIPLE_CHARS); - } - return flags; - } - - private int setIsNotAWordInFlags(final int currentFlags, final boolean isNotAWord) { - final int flags; - if (isNotAWord) { - flags = currentFlags | FormatSpec.FLAG_IS_NOT_A_WORD; - } else { - flags = currentFlags & (~FormatSpec.FLAG_IS_NOT_A_WORD); - } - return flags; - } - - private int setIsBlackListEntryInFlags(final int currentFlags, final boolean isBlackListEntry) { - final int flags; - if (isBlackListEntry) { - flags = currentFlags | FormatSpec.FLAG_IS_BLACKLISTED; - } else { - flags = currentFlags & (~FormatSpec.FLAG_IS_BLACKLISTED); - } - return flags; - } - - /** - * Splits a PtNode. - * - * abcd - ef - * - * -> inserting "abc" - * - * abc - d - ef - * - * @param nodeArrayToSplitPos the position of PtNodeArray which contains the PtNode to split. - * @param nodeToSplitPos the position of the PtNode to split. - * @param nodeToSplitInfo the information of the PtNode to split. - * @param indexToSplit the index where to split in the code points array. - * @param parentOfNodeToSplitPos the absolute position of a parent of the node to split. - * @param newTerminalId the terminal id of the inserted node (corresponds to "d"). - * @param hasShortcuts whether the inserted word should have shortcuts. - * @param hasBigrams whether the inserted word should have bigrams. - * @param isNotAWord whether the inserted word should be not a word. - * @param isBlackListEntry whether the inserted word should be a black list entry. - * @param formatOptions the format options. - */ - private void splitOnly(final int nodeArrayToSplitPos, final int nodeToSplitPos, - final Ver4PtNodeInfo nodeToSplitInfo, final int indexToSplit, - final int parentOfNodeToSplitPos, final int newTerminalId, final boolean hasShortcuts, - final boolean hasBigrams, final boolean isNotAWord, final boolean isBlackListEntry, - final FormatOptions formatOptions) throws IOException { - final int parentNodeArrayStartPos = mDictBuffer.limit(); - final int parentNodeStartPos = parentNodeArrayStartPos + 1 /* size of PtNodeCount */; - final int parentFlags = BinaryDictEncoderUtils.makePtNodeFlags(indexToSplit > 1, - true /* isTerminal */, FormatSpec.FLAG_IS_NOT_MOVED, hasShortcuts, hasBigrams, - isNotAWord, isBlackListEntry, formatOptions); - final Ver4PtNodeInfo parentInfo = new Ver4PtNodeInfo(parentFlags, - nodeToSplitInfo.mCharacters, newTerminalId, parentNodeStartPos - + computePtNodeSize(nodeToSplitInfo.mCharacters, 0, indexToSplit, true) - + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, - parentOfNodeToSplitPos, 0 /* nodeSize */); - parentInfo.mStartIndexOfCharacters = 0; - parentInfo.mEndIndexOfCharacters = indexToSplit; - - // Write the child. - final int childrenFlags = setMultipleCharsInFlags(nodeToSplitInfo.mFlags, - nodeToSplitInfo.mCharacters.length - indexToSplit > 1); - final Ver4PtNodeInfo childrenInfo = new Ver4PtNodeInfo(childrenFlags, - nodeToSplitInfo.mCharacters, nodeToSplitInfo.mTerminalId, - nodeToSplitInfo.mChildrenPos, parentNodeStartPos, 0 /* nodeSize */); - childrenInfo.mStartIndexOfCharacters = indexToSplit; - childrenInfo.mEndIndexOfCharacters = nodeToSplitInfo.mCharacters.length; - if (nodeToSplitInfo.mChildrenPos != FormatSpec.NO_CHILDREN_ADDRESS) { - updateParentPositions(nodeToSplitInfo.mChildrenPos, - parentInfo.mChildrenPos + 1 /* size of PtNodeCount */, formatOptions); - } - - writeSplittedPtNodes(nodeArrayToSplitPos, nodeToSplitPos, parentInfo, - new Ver4PtNodeInfo[] { childrenInfo }, parentNodeArrayStartPos, formatOptions); - } - - /** - * Split and branch a PtNode. - * - * ab - cd - * - * -> inserting "ac" - * - * a - b - cd - * | - * - c - * - * @param nodeArrayToSplitPos the position of PtNodeArray which contains the PtNode to split. - * @param nodeToSplitPos the position of the PtNode to split. - * @param nodeToSplitInfo the information of the PtNode to split. - * @param indexToSplit the index where to split in the code points array. - * @param parentOfNodeToSplitPos the absolute position of parent of the node to split. - * @param newWordSuffixCodePoints the suffix of the newly inserted word (corresponds to "c"). - * @param startIndexOfNewWordSuffixCodePoints the start index in newWordSuffixCodePoints where - * the suffix starts. - * @param newTerminalId the terminal id of the inserted node (correspond to "c"). - * @param hasShortcuts whether the inserted word should have shortcuts. - * @param hasBigrams whether the inserted word should have bigrams. - * @param isNotAWord whether the inserted word should be not a word. - * @param isBlackListEntry whether the inserted word should be a black list entry. - * @param formatOptions the format options. - */ - private void splitAndBranch(final int nodeArrayToSplitPos, final int nodeToSplitPos, - final Ver4PtNodeInfo nodeToSplitInfo, final int indexToSplit, - final int parentOfNodeToSplitPos, final int[] newWordSuffixCodePoints, - final int startIndexOfNewWordSuffixCodePoints, - final int newTerminalId, - final boolean hasShortcuts, final boolean hasBigrams, final boolean isNotAWord, - final boolean isBlackListEntry, final FormatOptions formatOptions) throws IOException { - final int parentNodeArrayStartPos = mDictBuffer.limit(); - final int parentNodeStartPos = parentNodeArrayStartPos + 1 /* size of PtNodeCount */; - final int parentFlags = BinaryDictEncoderUtils.makePtNodeFlags( - indexToSplit > 1, - false /* isTerminal */, FormatSpec.FLAG_IS_NOT_MOVED, - false /* hasShortcut */, false /* hasBigrams */, - false /* isNotAWord */, false /* isBlackListEntry */, formatOptions); - final Ver4PtNodeInfo parentInfo = new Ver4PtNodeInfo(parentFlags, - nodeToSplitInfo.mCharacters, PtNode.NOT_A_TERMINAL, - parentNodeStartPos - + computePtNodeSize(nodeToSplitInfo.mCharacters, 0, indexToSplit, false) - + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, - parentOfNodeToSplitPos, 0 /* nodeSize */); - parentInfo.mStartIndexOfCharacters = 0; - parentInfo.mEndIndexOfCharacters = indexToSplit; - - final int childrenNodeArrayStartPos = parentNodeStartPos - + computePtNodeSize(nodeToSplitInfo.mCharacters, 0, indexToSplit, false) - + FormatSpec.FORWARD_LINK_ADDRESS_SIZE; - final int firstChildrenFlags = BinaryDictEncoderUtils.makePtNodeFlags( - newWordSuffixCodePoints.length - startIndexOfNewWordSuffixCodePoints > 1, - true /* isTerminal */, FormatSpec.FLAG_IS_NOT_MOVED, hasShortcuts, hasBigrams, - isNotAWord, isBlackListEntry, formatOptions); - final Ver4PtNodeInfo firstChildrenInfo = new Ver4PtNodeInfo(firstChildrenFlags, - newWordSuffixCodePoints, newTerminalId, - FormatSpec.NO_CHILDREN_ADDRESS, parentNodeStartPos, - 0 /* nodeSize */); - firstChildrenInfo.mStartIndexOfCharacters = startIndexOfNewWordSuffixCodePoints; - firstChildrenInfo.mEndIndexOfCharacters = newWordSuffixCodePoints.length; - - final int secondChildrenStartPos = childrenNodeArrayStartPos + 1 /* size of ptNodeCount */ - + computePtNodeSize(newWordSuffixCodePoints, startIndexOfNewWordSuffixCodePoints, - newWordSuffixCodePoints.length, true /* isTerminal */); - final int secondChildrenFlags = setMultipleCharsInFlags(nodeToSplitInfo.mFlags, - nodeToSplitInfo.mCharacters.length - indexToSplit > 1); - final Ver4PtNodeInfo secondChildrenInfo = new Ver4PtNodeInfo(secondChildrenFlags, - nodeToSplitInfo.mCharacters, nodeToSplitInfo.mTerminalId, - nodeToSplitInfo.mChildrenPos, parentNodeStartPos, 0 /* nodeSize */); - secondChildrenInfo.mStartIndexOfCharacters = indexToSplit; - secondChildrenInfo.mEndIndexOfCharacters = nodeToSplitInfo.mCharacters.length; - if (nodeToSplitInfo.mChildrenPos != FormatSpec.NO_CHILDREN_ADDRESS) { - updateParentPositions(nodeToSplitInfo.mChildrenPos, secondChildrenStartPos, - formatOptions); - } - - writeSplittedPtNodes(nodeArrayToSplitPos, nodeToSplitPos, parentInfo, - new Ver4PtNodeInfo[] { firstChildrenInfo, secondChildrenInfo }, - parentNodeArrayStartPos, formatOptions); - } - - /** - * Inserts a word into the trie file and returns the position of inserted terminal node. - * If the insertion is failed, returns FormatSpec.NOT_VALID_WORD. - */ - private int insertWordToTrie(final String word, final int newTerminalId, - final boolean isNotAWord, final boolean isBlackListEntry, final boolean hasBigrams, - final boolean hasShortcuts) throws IOException, UnsupportedFormatException { - setPosition(0); - final FileHeader header = readHeader(); - - final int[] codePoints = FusionDictionary.getCodePoints(word); - final int wordLen = codePoints.length; - - int wordPos = 0; - for (int depth = 0; depth < FormatSpec.MAX_WORD_LENGTH; /* nop */) { - final int nodeArrayPos = getPosition(); - final int ptNodeCount = readPtNodeCount(); - boolean goToChildren = false; - int parentPos = FormatSpec.NO_PARENT_ADDRESS; - for (int i = 0; i < ptNodeCount; ++i) { - final int nodePos = getPosition(); - final Ver4PtNodeInfo nodeInfo = readVer4PtNodeInfo(nodePos, header.mFormatOptions); - if (BinaryDictIOUtils.isMovedPtNode(nodeInfo.mFlags, header.mFormatOptions)) { - continue; - } - if (nodeInfo.mParentPos != FormatSpec.NO_PARENT_ADDRESS) { - parentPos = nodePos + nodeInfo.mParentPos; - } - - final boolean firstCharacterMatched = - codePoints[wordPos] == nodeInfo.mCharacters[0]; - boolean allCharactersMatched = true; - int firstDifferentCharacterIndex = -1; - for (int p = 0; p < nodeInfo.mCharacters.length; ++p) { - if (wordPos + p >= codePoints.length) break; - if (codePoints[wordPos + p] != nodeInfo.mCharacters[p]) { - if (firstDifferentCharacterIndex == -1) { - firstDifferentCharacterIndex = p; - } - allCharactersMatched = false; - } - } - - if (!firstCharacterMatched) { - // Go to the next sibling node. - continue; - } - - if (!allCharactersMatched) { - final int parentNodeArrayStartPos = mDictBuffer.limit(); - splitAndBranch(nodeArrayPos, nodePos, nodeInfo, firstDifferentCharacterIndex, - parentPos, codePoints, wordPos + firstDifferentCharacterIndex, - newTerminalId, hasShortcuts, hasBigrams, isNotAWord, - isBlackListEntry, header.mFormatOptions); - - return parentNodeArrayStartPos + computePtNodeSize(codePoints, wordPos, - wordPos + firstDifferentCharacterIndex, false) - + FormatSpec.FORWARD_LINK_ADDRESS_SIZE + 1 /* size of PtNodeCount */; - } - - if (wordLen - wordPos < nodeInfo.mCharacters.length) { - final int parentNodeArrayStartPos = mDictBuffer.limit(); - splitOnly(nodeArrayPos, nodePos, nodeInfo, wordLen - wordPos, parentPos, - newTerminalId, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry, - header.mFormatOptions); - - // Return the position of the inserted word. - return parentNodeArrayStartPos + 1 /* size of PtNodeCount */; - } - - wordPos += nodeInfo.mCharacters.length; - if (wordPos == wordLen) { - // This dictionary already contains the word. - Log.e(TAG, "Something went wrong. If the word is already contained, " - + " there is no need to insert new PtNode."); - return FormatSpec.NOT_VALID_WORD; - } - if (nodeInfo.mChildrenPos == FormatSpec.NO_CHILDREN_ADDRESS) { - // There are no children. - // We need to add a new node as a child of this node. - final int newNodeArrayPos = mDictBuffer.limit(); - final int[] newNodeCodePoints = Arrays.copyOfRange(codePoints, wordPos, - codePoints.length); - writeNewSinglePtNodeWithAttributes(newNodeCodePoints, hasShortcuts, - newTerminalId, hasBigrams, isNotAWord, isBlackListEntry, nodePos, - header.mFormatOptions); - updateChildrenPos(nodePos, newNodeArrayPos, header.mFormatOptions); - return newNodeArrayPos + 1 /* size of PtNodeCount */; - } else { - // Found the matched node. - // Go to the children of this node. - setPosition(nodeInfo.mChildrenPos); - goToChildren = true; - depth++; - break; - } - } - - if (goToChildren) continue; - if (!readAndFollowForwardLink()) { - // Add a new node that contains [wordPos, word.length()-1]. - // and update the forward link. - final int newNodeArrayPos = mDictBuffer.limit(); - final int[] newCodePoints = Arrays.copyOfRange(codePoints, wordPos, - codePoints.length); - writeNewSinglePtNodeWithAttributes(newCodePoints, hasShortcuts, newTerminalId, - hasBigrams, isNotAWord, isBlackListEntry, parentPos, header.mFormatOptions); - updateForwardLink(nodeArrayPos, newNodeArrayPos, header.mFormatOptions); - return newNodeArrayPos + 1 /* size of PtNodeCount */; - } - } - return FormatSpec.NOT_VALID_WORD; - } - - private void updateFrequency(final int terminalId, final int frequency) { - mFrequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE); - BinaryDictEncoderUtils.writeUIntToDictBuffer(mFrequencyBuffer, frequency, - FormatSpec.FREQUENCY_AND_FLAGS_SIZE); - } - - private void insertFrequency(final int frequency) throws IOException { - final OutputStream frequencyStream = new FileOutputStream(mFrequencyFile, - true /* append */); - BinaryDictEncoderUtils.writeUIntToStream(frequencyStream, frequency, - FormatSpec.FREQUENCY_AND_FLAGS_SIZE); - frequencyStream.close(); - } - - private void insertTerminalPosition(final int posOfTerminal) throws IOException, - UnsupportedFormatException { - final OutputStream terminalPosStream = new FileOutputStream( - getFile(FILETYPE_TERMINAL_ADDRESS_TABLE), true /* append */); - BinaryDictEncoderUtils.writeUIntToStream(terminalPosStream, posOfTerminal, - FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); - terminalPosStream.close(); - } - - private void insertBigrams(final int terminalId, final int frequency, - final ArrayList bigramAddresses) - throws IOException, UnsupportedFormatException { - openDictBuffer(); - final BigramContentUpdater updater = new BigramContentUpdater(mDictDirectory.getName(), - mDictDirectory, false); - - // Convert addresses to terminal ids. - final ArrayList bigrams = CollectionUtils.newArrayList(); - mDictBuffer.position(0); - final FileHeader header = readHeader(); - for (PendingAttribute attr : bigramAddresses) { - mDictBuffer.position(attr.mAddress); - final Ver4PtNodeInfo info = readVer4PtNodeInfo(attr.mAddress, header.mFormatOptions); - if (info.mTerminalId == PtNode.NOT_A_TERMINAL) { - throw new RuntimeException("We can't have a bigram target that's not a terminal."); - } - bigrams.add(new PendingAttribute(frequency, info.mTerminalId)); - } - updater.insertBigramEntries(terminalId, frequency, bigrams); - close(); - } - - private void insertShortcuts(final int terminalId, final ArrayList shortcuts) - throws IOException { - final ShortcutContentUpdater updater = new ShortcutContentUpdater(mDictDirectory.getName(), - mDictDirectory); - updater.insertShortcuts(terminalId, shortcuts); - } - - private void openBuffersAndStream() throws IOException, UnsupportedFormatException { - openDictBuffer(); - mDictStream = new FileOutputStream(getFile(FILETYPE_TRIE), true /* append */); - } - - private void close() throws IOException { - if (mDictStream != null) { - mDictStream.close(); - mDictStream = null; - } - mDictBuffer = null; - mFrequencyBuffer = null; - mTerminalAddressTableBuffer = null; - } - - private void updateAttributes(final int posOfWord, final int frequency, - final ArrayList bigramStrings, - final ArrayList shortcuts, final boolean isNotAWord, - final boolean isBlackListEntry) throws IOException, UnsupportedFormatException { - mDictBuffer.position(0); - final FileHeader header = readHeader(); - mDictBuffer.position(posOfWord); - final Ver4PtNodeInfo info = readVer4PtNodeInfo(posOfWord, header.mFormatOptions); - final int terminalId = info.mTerminalId; - - // Update the flags. - final int newFlags = setIsNotAWordInFlags( - setIsBlackListEntryInFlags(info.mFlags, isBlackListEntry), isNotAWord); - mDictBuffer.position(posOfWord); - mDictBuffer.put((byte) newFlags); - - updateFrequency(terminalId, frequency); - insertBigrams(terminalId, frequency, resolveBigramPositions(this, bigramStrings)); - insertShortcuts(terminalId, shortcuts); - } - - @Override - public void insertWord(final String word, final int frequency, - final ArrayList bigramStrings, final ArrayList shortcuts, - final boolean isNotAWord, final boolean isBlackListEntry) - throws IOException, UnsupportedFormatException { - final int newTerminalId = getNewTerminalId(); - - openBuffersAndStream(); - final int posOfWord = getTerminalPosition(word); - if (posOfWord != FormatSpec.NOT_VALID_WORD) { - // The word is already contained in the dictionary. - updateAttributes(posOfWord, frequency, bigramStrings, shortcuts, isNotAWord, - isBlackListEntry); - close(); - return; - } - - // Insert new PtNode into trie. - final int posOfTerminal = insertWordToTrie(word, newTerminalId, isNotAWord, - isBlackListEntry, bigramStrings != null && !bigramStrings.isEmpty(), - shortcuts != null && !shortcuts.isEmpty()); - insertFrequency(frequency); - insertTerminalPosition(posOfTerminal); - close(); - - insertBigrams(newTerminalId, frequency, resolveBigramPositions(this, bigramStrings)); - insertShortcuts(newTerminalId, shortcuts); - } - - /** - * Converts a list of WeightedString to a list of PendingAttribute. - */ - private static ArrayList resolveBigramPositions(final DictUpdater dictUpdater, - final ArrayList bigramStrings) - throws IOException, UnsupportedFormatException { - if (bigramStrings == null) return CollectionUtils.newArrayList(); - final ArrayList bigrams = CollectionUtils.newArrayList(); - for (final WeightedString bigram : bigramStrings) { - final int pos = dictUpdater.getTerminalPosition(bigram.mWord); - if (pos == FormatSpec.NOT_VALID_WORD) { - // TODO: figure out what is the correct thing to do here. - } else { - bigrams.add(new PendingAttribute(bigram.mFrequency, pos)); - } - } - return bigrams; - } - - private static int markAsDeleted(final int flags) { - return (flags & (~FormatSpec.MASK_CHILDREN_ADDRESS_TYPE)) | FormatSpec.FLAG_IS_DELETED; - } -} diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java index 9174238da..48817b1b1 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Test.java @@ -18,7 +18,6 @@ package com.android.inputmethod.latin.dicttool; import com.android.inputmethod.latin.makedict.BinaryDictDecoderEncoderTests; import com.android.inputmethod.latin.makedict.BinaryDictEncoderFlattenTreeTests; -import com.android.inputmethod.latin.makedict.BinaryDictIOUtilsTests; import com.android.inputmethod.latin.makedict.FusionDictionaryTest; import java.lang.reflect.Constructor; @@ -31,15 +30,15 @@ import java.util.ArrayList; */ public class Test extends Dicttool.Command { public static final String COMMAND = "test"; + private static final int DEFAULT_MAX_UNIGRAMS = 1500; private long mSeed = System.currentTimeMillis(); - private int mMaxUnigrams = BinaryDictIOUtilsTests.DEFAULT_MAX_UNIGRAMS; + private int mMaxUnigrams = DEFAULT_MAX_UNIGRAMS; private static final Class[] sClassesToTest = { BinaryDictOffdeviceUtilsTests.class, FusionDictionaryTest.class, BinaryDictDecoderEncoderTests.class, BinaryDictEncoderFlattenTreeTests.class, - BinaryDictIOUtilsTests.class }; private ArrayList mAllTestMethods = new ArrayList(); private ArrayList mUsedTestMethods = new ArrayList();