parent
42334bb493
commit
c2fd53ee0e
|
@ -592,35 +592,4 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
Log.d(TAG, result);
|
||||
}
|
||||
}
|
||||
|
||||
private void runTestDeleteWord(final FormatOptions formatOptions)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
final String dictName = "testDeleteWord";
|
||||
final String dictVersion = Long.toString(System.currentTimeMillis());
|
||||
final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
|
||||
getContext().getCacheDir());
|
||||
|
||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||
BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion));
|
||||
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
|
||||
timeWritingDictToFile(file, dict, formatOptions);
|
||||
|
||||
final DictUpdater dictUpdater = BinaryDictUtils.getDictUpdater(file, formatOptions);
|
||||
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
|
||||
dictUpdater.getTerminalPosition(sWords.get(0)));
|
||||
dictUpdater.deleteWord(sWords.get(0));
|
||||
assertEquals(FormatSpec.NOT_VALID_WORD,
|
||||
dictUpdater.getTerminalPosition(sWords.get(0)));
|
||||
|
||||
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
|
||||
dictUpdater.getTerminalPosition(sWords.get(5)));
|
||||
dictUpdater.deleteWord(sWords.get(5));
|
||||
assertEquals(FormatSpec.NOT_VALID_WORD,
|
||||
dictUpdater.getTerminalPosition(sWords.get(5)));
|
||||
}
|
||||
|
||||
public void testDeleteWord() throws IOException, UnsupportedFormatException {
|
||||
runTestDeleteWord(BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
|
||||
runTestDeleteWord(BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,380 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2012 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.android.inputmethod.latin.makedict;
|
||||
|
||||
import android.test.AndroidTestCase;
|
||||
import android.test.MoreAsserts;
|
||||
import android.test.suitebuilder.annotation.LargeTest;
|
||||
import android.util.Log;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Random;
|
||||
|
||||
@LargeTest
|
||||
public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
||||
private static final String TAG = BinaryDictIOUtilsTests.class.getSimpleName();
|
||||
|
||||
private static final ArrayList<String> sWords = CollectionUtils.newArrayList();
|
||||
public static final int DEFAULT_MAX_UNIGRAMS = 1500;
|
||||
private final int mMaxUnigrams;
|
||||
|
||||
private static final String[] CHARACTERS = {
|
||||
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
|
||||
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
|
||||
"\u00FC" /* ü */, "\u00E2" /* â */, "\u00F1" /* ñ */, // accented characters
|
||||
"\u4E9C" /* 亜 */, "\u4F0A" /* 伊 */, "\u5B87" /* 宇 */, // kanji
|
||||
"\uD841\uDE28" /* 𠘨 */, "\uD840\uDC0B" /* 𠀋 */, "\uD861\uDED7" /* 𨛗 */ // surrogate pair
|
||||
};
|
||||
|
||||
public BinaryDictIOUtilsTests() {
|
||||
// 1500 is the default max unigrams
|
||||
this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS);
|
||||
}
|
||||
|
||||
public BinaryDictIOUtilsTests(final long seed, final int maxUnigrams) {
|
||||
super();
|
||||
Log.d(TAG, "Seed for test is " + seed + ", maxUnigrams is " + maxUnigrams);
|
||||
mMaxUnigrams = maxUnigrams;
|
||||
final Random random = new Random(seed);
|
||||
sWords.clear();
|
||||
for (int i = 0; i < maxUnigrams; ++i) {
|
||||
sWords.add(generateWord(random.nextInt()));
|
||||
}
|
||||
}
|
||||
|
||||
// Utilities for test
|
||||
private String generateWord(final int value) {
|
||||
final int lengthOfChars = CHARACTERS.length;
|
||||
StringBuilder builder = new StringBuilder("");
|
||||
long lvalue = Math.abs((long)value);
|
||||
while (lvalue > 0) {
|
||||
builder.append(CHARACTERS[(int)(lvalue % lengthOfChars)]);
|
||||
lvalue /= lengthOfChars;
|
||||
}
|
||||
if (builder.toString().equals("")) return "a";
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
private static void printPtNode(final PtNodeInfo info) {
|
||||
Log.d(TAG, " PtNode at " + info.mOriginalAddress);
|
||||
Log.d(TAG, " flags = " + info.mFlags);
|
||||
Log.d(TAG, " parentAddress = " + info.mParentAddress);
|
||||
Log.d(TAG, " characters = " + new String(info.mCharacters, 0,
|
||||
info.mCharacters.length));
|
||||
if (info.mFrequency != -1) Log.d(TAG, " frequency = " + info.mFrequency);
|
||||
if (info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) {
|
||||
Log.d(TAG, " children address = no children address");
|
||||
} else {
|
||||
Log.d(TAG, " children address = " + info.mChildrenAddress);
|
||||
}
|
||||
if (info.mShortcutTargets != null) {
|
||||
for (final WeightedString ws : info.mShortcutTargets) {
|
||||
Log.d(TAG, " shortcuts = " + ws.mWord);
|
||||
}
|
||||
}
|
||||
if (info.mBigrams != null) {
|
||||
for (final PendingAttribute attr : info.mBigrams) {
|
||||
Log.d(TAG, " bigram = " + attr.mAddress);
|
||||
}
|
||||
}
|
||||
Log.d(TAG, " end address = " + info.mEndAddress);
|
||||
}
|
||||
|
||||
private static void printNode(final Ver2DictDecoder dictDecoder,
|
||||
final FormatSpec.FormatOptions formatOptions) {
|
||||
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
||||
Log.d(TAG, "Node at " + dictBuffer.position());
|
||||
final int count = BinaryDictDecoderUtils.readPtNodeCount(dictBuffer);
|
||||
Log.d(TAG, " ptNodeCount = " + count);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
final PtNodeInfo currentInfo = dictDecoder.readPtNode(dictBuffer.position(),
|
||||
formatOptions);
|
||||
printPtNode(currentInfo);
|
||||
}
|
||||
if (formatOptions.supportsDynamicUpdate()) {
|
||||
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
|
||||
Log.d(TAG, " forwardLinkAddress = " + forwardLinkAddress);
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
private static void printBinaryFile(final Ver2DictDecoder dictDecoder)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
final FileHeader fileHeader = dictDecoder.readHeader();
|
||||
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
||||
while (dictBuffer.position() < dictBuffer.limit()) {
|
||||
printNode(dictDecoder, fileHeader.mFormatOptions);
|
||||
}
|
||||
}
|
||||
|
||||
private int getWordPosition(final File file, final String word) {
|
||||
int position = FormatSpec.NOT_VALID_WORD;
|
||||
|
||||
try {
|
||||
final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file,
|
||||
DictDecoder.USE_READONLY_BYTEBUFFER);
|
||||
position = dictDecoder.getTerminalPosition(word);
|
||||
} catch (IOException e) {
|
||||
} catch (UnsupportedFormatException e) {
|
||||
}
|
||||
return position;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find a word using the DictDecoder.
|
||||
*
|
||||
* @param dictDecoder the dict decoder
|
||||
* @param word the word searched
|
||||
* @return the found ptNodeInfo
|
||||
* @throws IOException
|
||||
* @throws UnsupportedFormatException
|
||||
*/
|
||||
private static PtNodeInfo findWordByDictDecoder(final DictDecoder dictDecoder,
|
||||
final String word) throws IOException, UnsupportedFormatException {
|
||||
int position = dictDecoder.getTerminalPosition(word);
|
||||
if (position != FormatSpec.NOT_VALID_WORD) {
|
||||
dictDecoder.setPosition(0);
|
||||
final FileHeader header = dictDecoder.readHeader();
|
||||
dictDecoder.setPosition(position);
|
||||
return dictDecoder.readPtNode(position, header.mFormatOptions);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private PtNodeInfo findWordFromFile(final File file, final String word) {
|
||||
final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file);
|
||||
PtNodeInfo info = null;
|
||||
try {
|
||||
dictDecoder.openDictBuffer();
|
||||
info = findWordByDictDecoder(dictDecoder, word);
|
||||
} catch (IOException e) {
|
||||
} catch (UnsupportedFormatException e) {
|
||||
}
|
||||
return info;
|
||||
}
|
||||
|
||||
// return amount of time to insert a word
|
||||
private long insertAndCheckWord(final File file, final String word, final int frequency,
|
||||
final boolean exist, final ArrayList<WeightedString> bigrams,
|
||||
final ArrayList<WeightedString> shortcuts, final FormatOptions formatOptions) {
|
||||
long amountOfTime = -1;
|
||||
try {
|
||||
final DictUpdater dictUpdater = BinaryDictUtils.getDictUpdater(file, formatOptions);
|
||||
|
||||
if (!exist) {
|
||||
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
|
||||
}
|
||||
final long now = System.nanoTime();
|
||||
dictUpdater.insertWord(word, frequency, bigrams, shortcuts, false, false);
|
||||
amountOfTime = System.nanoTime() - now;
|
||||
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "Raised an IOException while inserting a word", e);
|
||||
} catch (UnsupportedFormatException e) {
|
||||
Log.e(TAG, "Raised an UnsupportedFormatException error while inserting a word", e);
|
||||
}
|
||||
return amountOfTime;
|
||||
}
|
||||
|
||||
private void deleteWord(final File file, final String word, final FormatOptions formatOptions) {
|
||||
try {
|
||||
final DictUpdater dictUpdater = BinaryDictUtils.getDictUpdater(file, formatOptions);
|
||||
dictUpdater.deleteWord(word);
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "Raised an IOException while deleting a word", e);
|
||||
} catch (UnsupportedFormatException e) {
|
||||
Log.e(TAG, "Raised an UnsupportedFormatException while deleting a word", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void checkReverseLookup(final File file, final String word, final int position) {
|
||||
|
||||
try {
|
||||
final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file);
|
||||
final FileHeader fileHeader = dictDecoder.readHeader();
|
||||
assertEquals(word,
|
||||
BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset,
|
||||
position, fileHeader.mFormatOptions).mWord);
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "Raised an IOException while looking up a word", e);
|
||||
} catch (UnsupportedFormatException e) {
|
||||
Log.e(TAG, "Raised an UnsupportedFormatException error while looking up a word", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void runTestInsertWord(final FormatOptions formatOptions) {
|
||||
final String testName = "testInsertWord";
|
||||
final String version = Long.toString(System.currentTimeMillis());
|
||||
final File file = BinaryDictUtils.getDictFile(testName, version, formatOptions,
|
||||
getContext().getCacheDir());
|
||||
|
||||
// set an initial dictionary.
|
||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||
BinaryDictUtils.makeDictionaryOptions(testName, version));
|
||||
dict.add("abcd", 10, null, false);
|
||||
|
||||
try {
|
||||
final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions);
|
||||
dictEncoder.writeDictionary(dict, formatOptions);
|
||||
} catch (IOException e) {
|
||||
fail("IOException while writing an initial dictionary : " + e);
|
||||
} catch (UnsupportedFormatException e) {
|
||||
fail("UnsupportedFormatException while writing an initial dictionary : " + e);
|
||||
}
|
||||
|
||||
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd"));
|
||||
insertAndCheckWord(file, "abcde", 10, false, null, null, formatOptions);
|
||||
checkReverseLookup(file, "abcde", getWordPosition(file, "abcde"));
|
||||
|
||||
insertAndCheckWord(file, "abcdefghijklmn", 10, false, null, null, formatOptions);
|
||||
checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn"));
|
||||
|
||||
insertAndCheckWord(file, "abcdabcd", 10, false, null, null, formatOptions);
|
||||
checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd"));
|
||||
|
||||
// update the existing word.
|
||||
insertAndCheckWord(file, "abcdabcd", 15, true, null, null, formatOptions);
|
||||
checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd"));
|
||||
|
||||
// Testing splitOnly
|
||||
insertAndCheckWord(file, "ab", 20, false, null, null, formatOptions);
|
||||
checkReverseLookup(file, "ab", getWordPosition(file, "ab"));
|
||||
checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd"));
|
||||
checkReverseLookup(file, "abcde", getWordPosition(file, "abcde"));
|
||||
checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn"));
|
||||
|
||||
// Testing splitAndBranch
|
||||
insertAndCheckWord(file, "ami", 30, false, null, null, formatOptions);
|
||||
checkReverseLookup(file, "ami", getWordPosition(file, "ami"));
|
||||
checkReverseLookup(file, "ab", getWordPosition(file, "ab"));
|
||||
checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd"));
|
||||
checkReverseLookup(file, "abcde", getWordPosition(file, "abcde"));
|
||||
checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn"));
|
||||
checkReverseLookup(file, "ami", getWordPosition(file, "ami"));
|
||||
|
||||
insertAndCheckWord(file, "abcdefzzzz", 40, false, null, null, formatOptions);
|
||||
checkReverseLookup(file, "abcdefzzzz", getWordPosition(file, "abcdefzzzz"));
|
||||
|
||||
deleteWord(file, "ami", formatOptions);
|
||||
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "ami"));
|
||||
|
||||
insertAndCheckWord(file, "abcdabfg", 30, false, null, null, formatOptions);
|
||||
|
||||
deleteWord(file, "abcd", formatOptions);
|
||||
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd"));
|
||||
}
|
||||
|
||||
public void testInsertWord() {
|
||||
runTestInsertWord(BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
|
||||
runTestInsertWord(BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
|
||||
}
|
||||
|
||||
private void runTestInsertWordWithBigrams(final FormatOptions formatOptions) {
|
||||
final String testName = "testInsertWordWithBigrams";
|
||||
final String version = Long.toString(System.currentTimeMillis());
|
||||
File file = BinaryDictUtils.getDictFile(testName, version, formatOptions,
|
||||
getContext().getCacheDir());
|
||||
|
||||
// set an initial dictionary.
|
||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||
BinaryDictUtils.makeDictionaryOptions(testName, version));
|
||||
dict.add("abcd", 10, null, false);
|
||||
dict.add("efgh", 15, null, false);
|
||||
|
||||
try {
|
||||
final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions);
|
||||
dictEncoder.writeDictionary(dict, formatOptions);
|
||||
} catch (IOException e) {
|
||||
fail("IOException while writing an initial dictionary : " + e);
|
||||
} catch (UnsupportedFormatException e) {
|
||||
fail("UnsupportedFormatException while writing an initial dictionary : " + e);
|
||||
}
|
||||
|
||||
final ArrayList<WeightedString> banana = new ArrayList<WeightedString>();
|
||||
banana.add(new WeightedString("banana", 10));
|
||||
|
||||
insertAndCheckWord(file, "banana", 0, false, null, null, formatOptions);
|
||||
insertAndCheckWord(file, "recursive", 60, true, banana, null, formatOptions);
|
||||
|
||||
final PtNodeInfo info = findWordFromFile(file, "recursive");
|
||||
int bananaPos = getWordPosition(file, "banana");
|
||||
assertNotNull(info.mBigrams);
|
||||
assertEquals(info.mBigrams.size(), 1);
|
||||
assertEquals(info.mBigrams.get(0).mAddress, bananaPos);
|
||||
}
|
||||
|
||||
public void testInsertWordWithBigrams() {
|
||||
runTestInsertWordWithBigrams(BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
|
||||
runTestInsertWordWithBigrams(BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
|
||||
}
|
||||
|
||||
private void runTestRandomWords(final FormatOptions formatOptions) {
|
||||
final String testName = "testRandomWord";
|
||||
final String version = Long.toString(System.currentTimeMillis());
|
||||
final File file = BinaryDictUtils.getDictFile(testName, version, formatOptions,
|
||||
getContext().getCacheDir());
|
||||
|
||||
// set an initial dictionary.
|
||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||
BinaryDictUtils.makeDictionaryOptions(testName, version));
|
||||
dict.add("initial", 10, null, false);
|
||||
|
||||
try {
|
||||
final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions);
|
||||
dictEncoder.writeDictionary(dict, formatOptions);
|
||||
} catch (IOException e) {
|
||||
assertTrue(false);
|
||||
} catch (UnsupportedFormatException e) {
|
||||
assertTrue(false);
|
||||
}
|
||||
|
||||
long maxTimeToInsert = 0, sum = 0;
|
||||
long minTimeToInsert = 100000000; // 1000000000 is an upper bound for minTimeToInsert.
|
||||
int cnt = 0;
|
||||
for (final String word : sWords) {
|
||||
final long diff = insertAndCheckWord(file, word,
|
||||
cnt % FormatSpec.MAX_TERMINAL_FREQUENCY, false, null, null, formatOptions);
|
||||
maxTimeToInsert = Math.max(maxTimeToInsert, diff);
|
||||
minTimeToInsert = Math.min(minTimeToInsert, diff);
|
||||
sum += diff;
|
||||
cnt++;
|
||||
}
|
||||
cnt = 0;
|
||||
for (final String word : sWords) {
|
||||
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
|
||||
}
|
||||
|
||||
Log.d(TAG, "Test version " + formatOptions.mVersion);
|
||||
Log.d(TAG, "max = " + ((double)maxTimeToInsert/1000000) + " ms.");
|
||||
Log.d(TAG, "min = " + ((double)minTimeToInsert/1000000) + " ms.");
|
||||
Log.d(TAG, "avg = " + ((double)sum/mMaxUnigrams/1000000) + " ms.");
|
||||
}
|
||||
|
||||
public void testRandomWords() {
|
||||
runTestRandomWords(BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
|
||||
runTestRandomWords(BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
|
||||
}
|
||||
}
|
|
@ -69,14 +69,4 @@ public class BinaryDictUtils {
|
|||
+ formatOptions.mVersion);
|
||||
}
|
||||
}
|
||||
|
||||
public static DictUpdater getDictUpdater(final File file, final FormatOptions formatOptions)
|
||||
throws UnsupportedFormatException {
|
||||
if (formatOptions.mVersion == FormatSpec.VERSION4) {
|
||||
return new Ver4DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
|
||||
} else {
|
||||
throw new UnsupportedFormatException("The format option has a wrong version : "
|
||||
+ formatOptions.mVersion);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,50 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.android.inputmethod.latin.makedict;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* An interface of a binary dictionary updater.
|
||||
*/
|
||||
public interface DictUpdater extends DictDecoder {
|
||||
|
||||
/**
|
||||
* Deletes the word from the binary dictionary.
|
||||
*
|
||||
* @param word the word to be deleted.
|
||||
*/
|
||||
public void deleteWord(final String word) throws IOException, UnsupportedFormatException;
|
||||
|
||||
/**
|
||||
* Inserts a word into a binary dictionary.
|
||||
*
|
||||
* @param word the word to be inserted.
|
||||
* @param frequency the frequency of the new word.
|
||||
* @param bigramStrings bigram list, or null if none.
|
||||
* @param shortcuts shortcut list, or null if none.
|
||||
* @param isBlackListEntry whether this should be a blacklist entry.
|
||||
*/
|
||||
// TODO: Support batch insertion.
|
||||
public void insertWord(final String word, final int frequency,
|
||||
final ArrayList<WeightedString> bigramStrings,
|
||||
final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
|
||||
final boolean isBlackListEntry) throws IOException, UnsupportedFormatException;
|
||||
}
|
|
@ -1,123 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.android.inputmethod.latin.makedict;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
/**
|
||||
* An auxiliary class for updating data associated with SparseTable.
|
||||
*/
|
||||
public class SparseTableContentUpdater extends SparseTableContentReader {
|
||||
protected OutputStream mLookupTableOutStream;
|
||||
protected OutputStream[] mAddressTableOutStreams;
|
||||
protected OutputStream[] mContentOutStreams;
|
||||
|
||||
public SparseTableContentUpdater(final String name, final int blockSize,
|
||||
final File baseDir, final String[] contentFilenames, final String[] contentIds,
|
||||
final DictionaryBufferFactory factory) {
|
||||
super(name, blockSize, baseDir, contentFilenames, contentIds, factory);
|
||||
mAddressTableOutStreams = new OutputStream[mContentCount];
|
||||
mContentOutStreams = new OutputStream[mContentCount];
|
||||
}
|
||||
|
||||
protected void openStreamsAndBuffers() throws IOException {
|
||||
openBuffers();
|
||||
mLookupTableOutStream = new FileOutputStream(mLookupTableFile, true /* append */);
|
||||
for (int i = 0; i < mContentCount; ++i) {
|
||||
mAddressTableOutStreams[i] = new FileOutputStream(mAddressTableFiles[i],
|
||||
true /* append */);
|
||||
mContentOutStreams[i] = new FileOutputStream(mContentFiles[i], true /* append */);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the contentIndex-th elements of contentId-th table.
|
||||
*
|
||||
* @param contentId the id of the content table.
|
||||
* @param contentIndex the index where to set the valie.
|
||||
* @param value the value to set.
|
||||
*/
|
||||
protected void setContentValue(final int contentId, final int contentIndex, final int value)
|
||||
throws IOException {
|
||||
if ((contentIndex / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES
|
||||
>= mLookupTableBuffer.limit()) {
|
||||
// Need to extend the lookup table
|
||||
final int currentSize = mLookupTableBuffer.limit()
|
||||
/ SparseTable.SIZE_OF_INT_IN_BYTES;
|
||||
final int target = contentIndex / mBlockSize + 1;
|
||||
for (int i = currentSize; i < target; ++i) {
|
||||
BinaryDictEncoderUtils.writeUIntToStream(mLookupTableOutStream,
|
||||
SparseTable.NOT_EXIST, SparseTable.SIZE_OF_INT_IN_BYTES);
|
||||
}
|
||||
// We need to reopen the byte buffer of the lookup table because a MappedByteBuffer in
|
||||
// Java isn't expanded automatically when the underlying file is expanded.
|
||||
reopenLookupTable();
|
||||
}
|
||||
|
||||
mLookupTableBuffer.position((contentIndex / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES);
|
||||
int posInAddressTable = mLookupTableBuffer.readInt();
|
||||
if (posInAddressTable == SparseTable.NOT_EXIST) {
|
||||
// Need to extend the address table
|
||||
mLookupTableBuffer.position(mLookupTableBuffer.position()
|
||||
- SparseTable.SIZE_OF_INT_IN_BYTES);
|
||||
posInAddressTable = mAddressTableBuffers[0].limit() / mBlockSize;
|
||||
BinaryDictEncoderUtils.writeUIntToDictBuffer(mLookupTableBuffer,
|
||||
posInAddressTable, SparseTable.SIZE_OF_INT_IN_BYTES);
|
||||
for (int i = 0; i < mContentCount; ++i) {
|
||||
for (int j = 0; j < mBlockSize; ++j) {
|
||||
BinaryDictEncoderUtils.writeUIntToStream(mAddressTableOutStreams[i],
|
||||
SparseTable.NOT_EXIST, SparseTable.SIZE_OF_INT_IN_BYTES);
|
||||
}
|
||||
}
|
||||
// We need to reopen the byte buffers of the address tables because a MappedByteBuffer
|
||||
// in Java isn't expanded automatically when the underlying file is expanded.
|
||||
reopenAddressTables();
|
||||
}
|
||||
posInAddressTable += (contentIndex % mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES;
|
||||
|
||||
mAddressTableBuffers[contentId].position(posInAddressTable);
|
||||
BinaryDictEncoderUtils.writeUIntToDictBuffer(mAddressTableBuffers[contentId],
|
||||
value, SparseTable.SIZE_OF_INT_IN_BYTES);
|
||||
}
|
||||
|
||||
private void reopenLookupTable() throws IOException {
|
||||
mLookupTableOutStream.flush();
|
||||
mLookupTableBuffer = mFactory.getDictionaryBuffer(mLookupTableFile);
|
||||
}
|
||||
|
||||
private void reopenAddressTables() throws IOException {
|
||||
for (int i = 0; i < mContentCount; ++i) {
|
||||
mAddressTableOutStreams[i].flush();
|
||||
mAddressTableBuffers[i] = mFactory.getDictionaryBuffer(mAddressTableFiles[i]);
|
||||
}
|
||||
}
|
||||
|
||||
protected void close() throws IOException {
|
||||
mLookupTableOutStream.close();
|
||||
for (final OutputStream stream : mAddressTableOutStreams) {
|
||||
stream.close();
|
||||
}
|
||||
for (final OutputStream stream : mContentOutStreams) {
|
||||
stream.close();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,790 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.android.inputmethod.latin.makedict;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||
|
||||
import android.util.Log;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* An implementation of DictUpdater for version 4 binary dictionary.
|
||||
*/
|
||||
public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
|
||||
private static final String TAG = Ver4DictUpdater.class.getSimpleName();
|
||||
private static final int MAX_JUMPS = 10000;
|
||||
|
||||
private OutputStream mDictStream;
|
||||
private final File mFrequencyFile;
|
||||
|
||||
public Ver4DictUpdater(final File dictDirectory, final int factoryType)
|
||||
throws UnsupportedFormatException {
|
||||
// DictUpdater must have an updatable DictBuffer.
|
||||
super(dictDirectory, ((factoryType & MASK_DICTBUFFER) == USE_BYTEARRAY)
|
||||
? USE_BYTEARRAY : USE_WRITABLE_BYTEBUFFER);
|
||||
mFrequencyFile = getFile(FILETYPE_FREQUENCY);
|
||||
}
|
||||
|
||||
private static class BigramContentUpdater extends SparseTableContentUpdater {
|
||||
public BigramContentUpdater(final String name, final File baseDir,
|
||||
final boolean hasTimestamp) {
|
||||
super(name + FormatSpec.BIGRAM_FILE_EXTENSION,
|
||||
FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
|
||||
BigramContentReader.getContentFilenames(name, hasTimestamp),
|
||||
BigramContentReader.getContentIds(hasTimestamp),
|
||||
new DictionaryBufferFromWritableByteBufferFactory());
|
||||
}
|
||||
|
||||
public void insertBigramEntries(final int terminalId, final int frequency,
|
||||
final ArrayList<PendingAttribute> entries) throws IOException {
|
||||
if (terminalId < 0) {
|
||||
throw new RuntimeException("Invalid terminal id : " + terminalId);
|
||||
}
|
||||
openStreamsAndBuffers();
|
||||
|
||||
if (entries == null || entries.isEmpty()) {
|
||||
setContentValue(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId,
|
||||
SparseTable.NOT_EXIST);
|
||||
return;
|
||||
}
|
||||
final int positionOfEntries =
|
||||
(int) mContentFiles[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX].length();
|
||||
setContentValue(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, positionOfEntries);
|
||||
|
||||
final Iterator<PendingAttribute> bigramIterator = entries.iterator();
|
||||
while (bigramIterator.hasNext()) {
|
||||
final PendingAttribute entry = bigramIterator.next();
|
||||
final int flags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(),
|
||||
0 /* offset */, entry.mFrequency, frequency, "" /* word */);
|
||||
BinaryDictEncoderUtils.writeUIntToStream(
|
||||
mContentOutStreams[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX], flags,
|
||||
FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
|
||||
BinaryDictEncoderUtils.writeUIntToStream(
|
||||
mContentOutStreams[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX], entry.mAddress,
|
||||
FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE);
|
||||
}
|
||||
close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class ShortcutContentUpdater extends SparseTableContentUpdater {
|
||||
public ShortcutContentUpdater(final String name, final File baseDir) {
|
||||
super(name + FormatSpec.SHORTCUT_FILE_EXTENSION,
|
||||
FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
|
||||
new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION },
|
||||
new String[] { FormatSpec.SHORTCUT_CONTENT_ID },
|
||||
new DictionaryBufferFromWritableByteBufferFactory());
|
||||
}
|
||||
|
||||
public void insertShortcuts(final int terminalId,
|
||||
final ArrayList<WeightedString> shortcuts) throws IOException {
|
||||
if (terminalId < 0) {
|
||||
throw new RuntimeException("Invalid terminal id : " + terminalId);
|
||||
}
|
||||
openStreamsAndBuffers();
|
||||
if (shortcuts == null || shortcuts.isEmpty()) {
|
||||
setContentValue(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId,
|
||||
SparseTable.NOT_EXIST);
|
||||
return;
|
||||
}
|
||||
|
||||
final int positionOfShortcuts =
|
||||
(int) mContentFiles[FormatSpec.SHORTCUT_CONTENT_INDEX].length();
|
||||
setContentValue(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId, positionOfShortcuts);
|
||||
|
||||
final Iterator<WeightedString> shortcutIterator = shortcuts.iterator();
|
||||
while (shortcutIterator.hasNext()) {
|
||||
final WeightedString target = shortcutIterator.next();
|
||||
final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
|
||||
shortcutIterator.hasNext(), target.mFrequency);
|
||||
BinaryDictEncoderUtils.writeUIntToStream(
|
||||
mContentOutStreams[FormatSpec.SHORTCUT_CONTENT_INDEX], shortcutFlags,
|
||||
FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
|
||||
CharEncoding.writeString(mContentOutStreams[FormatSpec.SHORTCUT_CONTENT_INDEX],
|
||||
target.mWord);
|
||||
}
|
||||
close();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteWord(final String word) throws IOException, UnsupportedFormatException {
|
||||
if (mDictBuffer == null) {
|
||||
openDictBuffer();
|
||||
readHeader();
|
||||
}
|
||||
final int wordPos = getTerminalPosition(word);
|
||||
if (wordPos != FormatSpec.NOT_VALID_WORD) {
|
||||
mDictBuffer.position(wordPos);
|
||||
final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
|
||||
mDictBuffer.position(wordPos);
|
||||
mDictBuffer.put((byte)markAsDeleted(flags));
|
||||
}
|
||||
}
|
||||
|
||||
private int getNewTerminalId() {
|
||||
// The size of frequency file is FormatSpec.FREQUENCY_AND_FLAGS_SIZE * number of terminals
|
||||
// because each terminal always has a frequency.
|
||||
// So we can get a fresh terminal id by this logic.
|
||||
// CAVEAT: we are reading the file size from the disk each time: beware of race conditions,
|
||||
// even on one thread.
|
||||
return (int) (mFrequencyFile.length() / FormatSpec.FREQUENCY_AND_FLAGS_SIZE);
|
||||
}
|
||||
|
||||
private void updateParentPosIfNotMoved(final int nodePos, final int newParentPos,
|
||||
final FormatOptions formatOptions) {
|
||||
final int originalPos = getPosition();
|
||||
setPosition(nodePos);
|
||||
final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
|
||||
if (!BinaryDictIOUtils.isMovedPtNode(flags, formatOptions)) {
|
||||
final int parentOffset = newParentPos - nodePos;
|
||||
BinaryDictIOUtils.writeSInt24ToBuffer(mDictBuffer, parentOffset);
|
||||
}
|
||||
setPosition(originalPos);
|
||||
}
|
||||
|
||||
private void updateParentPositions(final int nodeArrayPos, final int newParentPos,
|
||||
final FormatOptions formatOptions) {
|
||||
final int originalPos = mDictBuffer.position();
|
||||
mDictBuffer.position(nodeArrayPos);
|
||||
int jumpCount = 0;
|
||||
do {
|
||||
final int count = readPtNodeCount();
|
||||
for (int i = 0; i < count; ++i) {
|
||||
updateParentPosIfNotMoved(getPosition(), newParentPos, formatOptions);
|
||||
skipPtNode(formatOptions);
|
||||
}
|
||||
if (!readAndFollowForwardLink()) break;
|
||||
} while (jumpCount++ < MAX_JUMPS);
|
||||
setPosition(originalPos);
|
||||
}
|
||||
|
||||
private void updateChildrenPos(final int nodePos, final int newChildrenPos,
|
||||
final FormatOptions options) {
|
||||
final int originalPos = getPosition();
|
||||
setPosition(nodePos);
|
||||
final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
|
||||
PtNodeReader.readParentAddress(mDictBuffer, options);
|
||||
BinaryDictIOUtils.skipString(mDictBuffer,
|
||||
(flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
|
||||
if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) PtNodeReader.readTerminalId(mDictBuffer);
|
||||
final int basePos = getPosition();
|
||||
BinaryDictIOUtils.writeSInt24ToBuffer(mDictBuffer, newChildrenPos - basePos);
|
||||
setPosition(originalPos);
|
||||
}
|
||||
|
||||
private void updateTerminalPosition(final int terminalId, final int position) {
|
||||
if (terminalId == PtNode.NOT_A_TERMINAL
|
||||
|| terminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE
|
||||
>= mTerminalAddressTableBuffer.limit()) return;
|
||||
mTerminalAddressTableBuffer.position(terminalId
|
||||
* FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
|
||||
BinaryDictEncoderUtils.writeUIntToDictBuffer(mTerminalAddressTableBuffer, position,
|
||||
FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
|
||||
}
|
||||
|
||||
private void updateForwardLink(final int nodeArrayPos, final int newForwardLink,
|
||||
final FormatOptions formatOptions) {
|
||||
final int originalPos = getPosition();
|
||||
setPosition(nodeArrayPos);
|
||||
int jumpCount = 0;
|
||||
while (jumpCount++ < MAX_JUMPS) {
|
||||
final int ptNodeCount = readPtNodeCount();
|
||||
for (int i = 0; i < ptNodeCount; ++i) {
|
||||
skipPtNode(formatOptions);
|
||||
}
|
||||
final int forwardLinkPos = getPosition();
|
||||
if (!readAndFollowForwardLink()) {
|
||||
setPosition(forwardLinkPos);
|
||||
BinaryDictIOUtils.writeSInt24ToBuffer(mDictBuffer, newForwardLink - forwardLinkPos);
|
||||
break;
|
||||
}
|
||||
}
|
||||
setPosition(originalPos);
|
||||
}
|
||||
|
||||
private void markPtNodeAsMoved(final int nodePos, final int newNodePos,
|
||||
final FormatOptions options) {
|
||||
final int originalPos = getPosition();
|
||||
updateParentPosIfNotMoved(nodePos, newNodePos, options);
|
||||
setPosition(nodePos);
|
||||
final int currentFlags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
|
||||
setPosition(nodePos);
|
||||
mDictBuffer.put((byte) (FormatSpec.FLAG_IS_MOVED
|
||||
| (currentFlags & (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG))));
|
||||
final int offset = newNodePos - nodePos;
|
||||
BinaryDictIOUtils.writeSInt24ToBuffer(mDictBuffer, offset);
|
||||
setPosition(originalPos);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a PtNode to an output stream from a Ver4PtNodeInfo.
|
||||
*
|
||||
* @param nodePos the position of the head of the PtNode.
|
||||
* @param info the PtNode info to be written.
|
||||
* @return the size written, in bytes.
|
||||
*/
|
||||
private int writePtNode(final int nodePos, final Ver4PtNodeInfo info) throws IOException {
|
||||
int written = 0;
|
||||
|
||||
// Write flags.
|
||||
mDictStream.write((byte) (info.mFlags & 0xFF));
|
||||
written += FormatSpec.PTNODE_FLAGS_SIZE;
|
||||
|
||||
// Write the parent position.
|
||||
final int parentOffset = info.mParentPos == FormatSpec.NO_PARENT_ADDRESS ?
|
||||
FormatSpec.NO_PARENT_ADDRESS : info.mParentPos - nodePos;
|
||||
BinaryDictIOUtils.writeSInt24ToStream(mDictStream, parentOffset);
|
||||
written += FormatSpec.PARENT_ADDRESS_SIZE;
|
||||
|
||||
// Write a string.
|
||||
if (((info.mFlags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0)
|
||||
!= (info.mEndIndexOfCharacters - info.mStartIndexOfCharacters > 1)) {
|
||||
throw new RuntimeException("Inconsistent flags : hasMultipleChars = "
|
||||
+ ((info.mFlags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0) + ", length = "
|
||||
+ (info.mEndIndexOfCharacters - info.mStartIndexOfCharacters));
|
||||
}
|
||||
written += CharEncoding.writeCodePoints(mDictStream, info.mCharacters,
|
||||
info.mStartIndexOfCharacters, info.mEndIndexOfCharacters);
|
||||
|
||||
// Write the terminal id.
|
||||
if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) {
|
||||
BinaryDictEncoderUtils.writeUIntToStream(mDictStream, info.mTerminalId,
|
||||
FormatSpec.PTNODE_TERMINAL_ID_SIZE);
|
||||
written += FormatSpec.PTNODE_TERMINAL_ID_SIZE;
|
||||
}
|
||||
|
||||
// Write the children position.
|
||||
final int childrenOffset = info.mChildrenPos == FormatSpec.NO_CHILDREN_ADDRESS
|
||||
? 0 : info.mChildrenPos - (nodePos + written);
|
||||
BinaryDictIOUtils.writeSInt24ToStream(mDictStream, childrenOffset);
|
||||
written += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
|
||||
|
||||
return written;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to split and move PtNode.
|
||||
*
|
||||
* @param ptNodeArrayPos the position of PtNodeArray which contains the split and moved PtNode.
|
||||
* @param splittedPtNodeToMovePos the position of the split and moved PtNode.
|
||||
* @param newParent the parent PtNode after splitting.
|
||||
* @param newChildren the children PtNodes after splitting.
|
||||
* @param newParentStartPos where to write the new parent.
|
||||
* @param formatOptions the format options.
|
||||
*/
|
||||
private void writeSplittedPtNodes(final int ptNodeArrayPos, final int splittedPtNodeToMovePos,
|
||||
final Ver4PtNodeInfo newParent, final Ver4PtNodeInfo[] newChildren,
|
||||
final int newParentStartPos,
|
||||
final FormatOptions formatOptions) throws IOException {
|
||||
updateTerminalPosition(newParent.mTerminalId,
|
||||
newParentStartPos + 1 /* size of PtNodeCount */);
|
||||
int written = writePtNodeArray(newParentStartPos, new Ver4PtNodeInfo[] { newParent },
|
||||
FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
||||
final int childrenStartPos = newParentStartPos + written;
|
||||
writePtNodeArray(childrenStartPos, newChildren, FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
||||
int childrenNodePos = childrenStartPos + 1 /* size of PtNodeCount */;
|
||||
for (final Ver4PtNodeInfo info : newChildren) {
|
||||
updateTerminalPosition(info.mTerminalId, childrenNodePos);
|
||||
childrenNodePos += computePtNodeSize(info.mCharacters, info.mStartIndexOfCharacters,
|
||||
info.mEndIndexOfCharacters,
|
||||
(info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0);
|
||||
}
|
||||
|
||||
// Mark as moved.
|
||||
markPtNodeAsMoved(splittedPtNodeToMovePos, newParentStartPos + 1 /* size of PtNodeCount */,
|
||||
formatOptions);
|
||||
updateForwardLink(ptNodeArrayPos, newParentStartPos, formatOptions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a node array to the stream.
|
||||
*
|
||||
* @param nodeArrayPos the position of the head of the node array.
|
||||
* @param infos an array of Ver4PtNodeInfo to be written.
|
||||
* @return the written length in bytes.
|
||||
*/
|
||||
private int writePtNodeArray(final int nodeArrayPos, final Ver4PtNodeInfo[] infos,
|
||||
final int forwardLink) throws IOException {
|
||||
int written = BinaryDictIOUtils.writePtNodeCount(mDictStream, infos.length);
|
||||
for (int i = 0; i < infos.length; ++i) {
|
||||
written += writePtNode(nodeArrayPos + written, infos[i]);
|
||||
}
|
||||
BinaryDictIOUtils.writeSInt24ToStream(mDictStream, forwardLink);
|
||||
written += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
||||
return written;
|
||||
}
|
||||
|
||||
private int computePtNodeSize(final int[] codePoints, final int startIndex, final int endIndex,
|
||||
final boolean isTerminal) {
|
||||
return FormatSpec.PTNODE_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE
|
||||
+ CharEncoding.getCharArraySize(codePoints, startIndex, endIndex)
|
||||
+ (endIndex - startIndex > 1 ? FormatSpec.PTNODE_TERMINATOR_SIZE : 0)
|
||||
+ (isTerminal ? FormatSpec.PTNODE_TERMINAL_ID_SIZE : 0)
|
||||
+ FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
|
||||
}
|
||||
|
||||
private void writeNewSinglePtNodeWithAttributes(final int[] codePoints,
|
||||
final boolean hasShortcuts, final int terminalId, final boolean hasBigrams,
|
||||
final boolean isNotAWord, final boolean isBlackListEntry, final int parentPos,
|
||||
final FormatOptions formatOptions) throws IOException {
|
||||
final int newNodeArrayPos = mDictBuffer.limit();
|
||||
final int newNodeFlags = BinaryDictEncoderUtils.makePtNodeFlags(codePoints.length > 1,
|
||||
terminalId != PtNode.NOT_A_TERMINAL, FormatSpec.FLAG_IS_NOT_MOVED, hasShortcuts,
|
||||
hasBigrams, isNotAWord, isBlackListEntry, formatOptions);
|
||||
final Ver4PtNodeInfo info = new Ver4PtNodeInfo(newNodeFlags, codePoints, terminalId,
|
||||
FormatSpec.NO_CHILDREN_ADDRESS, parentPos, 0 /* nodeSize */);
|
||||
writePtNodeArray(newNodeArrayPos, new Ver4PtNodeInfo[] { info },
|
||||
FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
||||
}
|
||||
|
||||
private int setMultipleCharsInFlags(final int currentFlags, final boolean hasMultipleChars) {
|
||||
final int flags;
|
||||
if (hasMultipleChars) {
|
||||
flags = currentFlags | FormatSpec.FLAG_HAS_MULTIPLE_CHARS;
|
||||
} else {
|
||||
flags = currentFlags & (~FormatSpec.FLAG_HAS_MULTIPLE_CHARS);
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
private int setIsNotAWordInFlags(final int currentFlags, final boolean isNotAWord) {
|
||||
final int flags;
|
||||
if (isNotAWord) {
|
||||
flags = currentFlags | FormatSpec.FLAG_IS_NOT_A_WORD;
|
||||
} else {
|
||||
flags = currentFlags & (~FormatSpec.FLAG_IS_NOT_A_WORD);
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
private int setIsBlackListEntryInFlags(final int currentFlags, final boolean isBlackListEntry) {
|
||||
final int flags;
|
||||
if (isBlackListEntry) {
|
||||
flags = currentFlags | FormatSpec.FLAG_IS_BLACKLISTED;
|
||||
} else {
|
||||
flags = currentFlags & (~FormatSpec.FLAG_IS_BLACKLISTED);
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits a PtNode.
|
||||
*
|
||||
* abcd - ef
|
||||
*
|
||||
* -> inserting "abc"
|
||||
*
|
||||
* abc - d - ef
|
||||
*
|
||||
* @param nodeArrayToSplitPos the position of PtNodeArray which contains the PtNode to split.
|
||||
* @param nodeToSplitPos the position of the PtNode to split.
|
||||
* @param nodeToSplitInfo the information of the PtNode to split.
|
||||
* @param indexToSplit the index where to split in the code points array.
|
||||
* @param parentOfNodeToSplitPos the absolute position of a parent of the node to split.
|
||||
* @param newTerminalId the terminal id of the inserted node (corresponds to "d").
|
||||
* @param hasShortcuts whether the inserted word should have shortcuts.
|
||||
* @param hasBigrams whether the inserted word should have bigrams.
|
||||
* @param isNotAWord whether the inserted word should be not a word.
|
||||
* @param isBlackListEntry whether the inserted word should be a black list entry.
|
||||
* @param formatOptions the format options.
|
||||
*/
|
||||
private void splitOnly(final int nodeArrayToSplitPos, final int nodeToSplitPos,
|
||||
final Ver4PtNodeInfo nodeToSplitInfo, final int indexToSplit,
|
||||
final int parentOfNodeToSplitPos, final int newTerminalId, final boolean hasShortcuts,
|
||||
final boolean hasBigrams, final boolean isNotAWord, final boolean isBlackListEntry,
|
||||
final FormatOptions formatOptions) throws IOException {
|
||||
final int parentNodeArrayStartPos = mDictBuffer.limit();
|
||||
final int parentNodeStartPos = parentNodeArrayStartPos + 1 /* size of PtNodeCount */;
|
||||
final int parentFlags = BinaryDictEncoderUtils.makePtNodeFlags(indexToSplit > 1,
|
||||
true /* isTerminal */, FormatSpec.FLAG_IS_NOT_MOVED, hasShortcuts, hasBigrams,
|
||||
isNotAWord, isBlackListEntry, formatOptions);
|
||||
final Ver4PtNodeInfo parentInfo = new Ver4PtNodeInfo(parentFlags,
|
||||
nodeToSplitInfo.mCharacters, newTerminalId, parentNodeStartPos
|
||||
+ computePtNodeSize(nodeToSplitInfo.mCharacters, 0, indexToSplit, true)
|
||||
+ FormatSpec.FORWARD_LINK_ADDRESS_SIZE,
|
||||
parentOfNodeToSplitPos, 0 /* nodeSize */);
|
||||
parentInfo.mStartIndexOfCharacters = 0;
|
||||
parentInfo.mEndIndexOfCharacters = indexToSplit;
|
||||
|
||||
// Write the child.
|
||||
final int childrenFlags = setMultipleCharsInFlags(nodeToSplitInfo.mFlags,
|
||||
nodeToSplitInfo.mCharacters.length - indexToSplit > 1);
|
||||
final Ver4PtNodeInfo childrenInfo = new Ver4PtNodeInfo(childrenFlags,
|
||||
nodeToSplitInfo.mCharacters, nodeToSplitInfo.mTerminalId,
|
||||
nodeToSplitInfo.mChildrenPos, parentNodeStartPos, 0 /* nodeSize */);
|
||||
childrenInfo.mStartIndexOfCharacters = indexToSplit;
|
||||
childrenInfo.mEndIndexOfCharacters = nodeToSplitInfo.mCharacters.length;
|
||||
if (nodeToSplitInfo.mChildrenPos != FormatSpec.NO_CHILDREN_ADDRESS) {
|
||||
updateParentPositions(nodeToSplitInfo.mChildrenPos,
|
||||
parentInfo.mChildrenPos + 1 /* size of PtNodeCount */, formatOptions);
|
||||
}
|
||||
|
||||
writeSplittedPtNodes(nodeArrayToSplitPos, nodeToSplitPos, parentInfo,
|
||||
new Ver4PtNodeInfo[] { childrenInfo }, parentNodeArrayStartPos, formatOptions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split and branch a PtNode.
|
||||
*
|
||||
* ab - cd
|
||||
*
|
||||
* -> inserting "ac"
|
||||
*
|
||||
* a - b - cd
|
||||
* |
|
||||
* - c
|
||||
*
|
||||
* @param nodeArrayToSplitPos the position of PtNodeArray which contains the PtNode to split.
|
||||
* @param nodeToSplitPos the position of the PtNode to split.
|
||||
* @param nodeToSplitInfo the information of the PtNode to split.
|
||||
* @param indexToSplit the index where to split in the code points array.
|
||||
* @param parentOfNodeToSplitPos the absolute position of parent of the node to split.
|
||||
* @param newWordSuffixCodePoints the suffix of the newly inserted word (corresponds to "c").
|
||||
* @param startIndexOfNewWordSuffixCodePoints the start index in newWordSuffixCodePoints where
|
||||
* the suffix starts.
|
||||
* @param newTerminalId the terminal id of the inserted node (correspond to "c").
|
||||
* @param hasShortcuts whether the inserted word should have shortcuts.
|
||||
* @param hasBigrams whether the inserted word should have bigrams.
|
||||
* @param isNotAWord whether the inserted word should be not a word.
|
||||
* @param isBlackListEntry whether the inserted word should be a black list entry.
|
||||
* @param formatOptions the format options.
|
||||
*/
|
||||
private void splitAndBranch(final int nodeArrayToSplitPos, final int nodeToSplitPos,
|
||||
final Ver4PtNodeInfo nodeToSplitInfo, final int indexToSplit,
|
||||
final int parentOfNodeToSplitPos, final int[] newWordSuffixCodePoints,
|
||||
final int startIndexOfNewWordSuffixCodePoints,
|
||||
final int newTerminalId,
|
||||
final boolean hasShortcuts, final boolean hasBigrams, final boolean isNotAWord,
|
||||
final boolean isBlackListEntry, final FormatOptions formatOptions) throws IOException {
|
||||
final int parentNodeArrayStartPos = mDictBuffer.limit();
|
||||
final int parentNodeStartPos = parentNodeArrayStartPos + 1 /* size of PtNodeCount */;
|
||||
final int parentFlags = BinaryDictEncoderUtils.makePtNodeFlags(
|
||||
indexToSplit > 1,
|
||||
false /* isTerminal */, FormatSpec.FLAG_IS_NOT_MOVED,
|
||||
false /* hasShortcut */, false /* hasBigrams */,
|
||||
false /* isNotAWord */, false /* isBlackListEntry */, formatOptions);
|
||||
final Ver4PtNodeInfo parentInfo = new Ver4PtNodeInfo(parentFlags,
|
||||
nodeToSplitInfo.mCharacters, PtNode.NOT_A_TERMINAL,
|
||||
parentNodeStartPos
|
||||
+ computePtNodeSize(nodeToSplitInfo.mCharacters, 0, indexToSplit, false)
|
||||
+ FormatSpec.FORWARD_LINK_ADDRESS_SIZE,
|
||||
parentOfNodeToSplitPos, 0 /* nodeSize */);
|
||||
parentInfo.mStartIndexOfCharacters = 0;
|
||||
parentInfo.mEndIndexOfCharacters = indexToSplit;
|
||||
|
||||
final int childrenNodeArrayStartPos = parentNodeStartPos
|
||||
+ computePtNodeSize(nodeToSplitInfo.mCharacters, 0, indexToSplit, false)
|
||||
+ FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
||||
final int firstChildrenFlags = BinaryDictEncoderUtils.makePtNodeFlags(
|
||||
newWordSuffixCodePoints.length - startIndexOfNewWordSuffixCodePoints > 1,
|
||||
true /* isTerminal */, FormatSpec.FLAG_IS_NOT_MOVED, hasShortcuts, hasBigrams,
|
||||
isNotAWord, isBlackListEntry, formatOptions);
|
||||
final Ver4PtNodeInfo firstChildrenInfo = new Ver4PtNodeInfo(firstChildrenFlags,
|
||||
newWordSuffixCodePoints, newTerminalId,
|
||||
FormatSpec.NO_CHILDREN_ADDRESS, parentNodeStartPos,
|
||||
0 /* nodeSize */);
|
||||
firstChildrenInfo.mStartIndexOfCharacters = startIndexOfNewWordSuffixCodePoints;
|
||||
firstChildrenInfo.mEndIndexOfCharacters = newWordSuffixCodePoints.length;
|
||||
|
||||
final int secondChildrenStartPos = childrenNodeArrayStartPos + 1 /* size of ptNodeCount */
|
||||
+ computePtNodeSize(newWordSuffixCodePoints, startIndexOfNewWordSuffixCodePoints,
|
||||
newWordSuffixCodePoints.length, true /* isTerminal */);
|
||||
final int secondChildrenFlags = setMultipleCharsInFlags(nodeToSplitInfo.mFlags,
|
||||
nodeToSplitInfo.mCharacters.length - indexToSplit > 1);
|
||||
final Ver4PtNodeInfo secondChildrenInfo = new Ver4PtNodeInfo(secondChildrenFlags,
|
||||
nodeToSplitInfo.mCharacters, nodeToSplitInfo.mTerminalId,
|
||||
nodeToSplitInfo.mChildrenPos, parentNodeStartPos, 0 /* nodeSize */);
|
||||
secondChildrenInfo.mStartIndexOfCharacters = indexToSplit;
|
||||
secondChildrenInfo.mEndIndexOfCharacters = nodeToSplitInfo.mCharacters.length;
|
||||
if (nodeToSplitInfo.mChildrenPos != FormatSpec.NO_CHILDREN_ADDRESS) {
|
||||
updateParentPositions(nodeToSplitInfo.mChildrenPos, secondChildrenStartPos,
|
||||
formatOptions);
|
||||
}
|
||||
|
||||
writeSplittedPtNodes(nodeArrayToSplitPos, nodeToSplitPos, parentInfo,
|
||||
new Ver4PtNodeInfo[] { firstChildrenInfo, secondChildrenInfo },
|
||||
parentNodeArrayStartPos, formatOptions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Inserts a word into the trie file and returns the position of inserted terminal node.
|
||||
* If the insertion is failed, returns FormatSpec.NOT_VALID_WORD.
|
||||
*/
|
||||
private int insertWordToTrie(final String word, final int newTerminalId,
|
||||
final boolean isNotAWord, final boolean isBlackListEntry, final boolean hasBigrams,
|
||||
final boolean hasShortcuts) throws IOException, UnsupportedFormatException {
|
||||
setPosition(0);
|
||||
final FileHeader header = readHeader();
|
||||
|
||||
final int[] codePoints = FusionDictionary.getCodePoints(word);
|
||||
final int wordLen = codePoints.length;
|
||||
|
||||
int wordPos = 0;
|
||||
for (int depth = 0; depth < FormatSpec.MAX_WORD_LENGTH; /* nop */) {
|
||||
final int nodeArrayPos = getPosition();
|
||||
final int ptNodeCount = readPtNodeCount();
|
||||
boolean goToChildren = false;
|
||||
int parentPos = FormatSpec.NO_PARENT_ADDRESS;
|
||||
for (int i = 0; i < ptNodeCount; ++i) {
|
||||
final int nodePos = getPosition();
|
||||
final Ver4PtNodeInfo nodeInfo = readVer4PtNodeInfo(nodePos, header.mFormatOptions);
|
||||
if (BinaryDictIOUtils.isMovedPtNode(nodeInfo.mFlags, header.mFormatOptions)) {
|
||||
continue;
|
||||
}
|
||||
if (nodeInfo.mParentPos != FormatSpec.NO_PARENT_ADDRESS) {
|
||||
parentPos = nodePos + nodeInfo.mParentPos;
|
||||
}
|
||||
|
||||
final boolean firstCharacterMatched =
|
||||
codePoints[wordPos] == nodeInfo.mCharacters[0];
|
||||
boolean allCharactersMatched = true;
|
||||
int firstDifferentCharacterIndex = -1;
|
||||
for (int p = 0; p < nodeInfo.mCharacters.length; ++p) {
|
||||
if (wordPos + p >= codePoints.length) break;
|
||||
if (codePoints[wordPos + p] != nodeInfo.mCharacters[p]) {
|
||||
if (firstDifferentCharacterIndex == -1) {
|
||||
firstDifferentCharacterIndex = p;
|
||||
}
|
||||
allCharactersMatched = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!firstCharacterMatched) {
|
||||
// Go to the next sibling node.
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!allCharactersMatched) {
|
||||
final int parentNodeArrayStartPos = mDictBuffer.limit();
|
||||
splitAndBranch(nodeArrayPos, nodePos, nodeInfo, firstDifferentCharacterIndex,
|
||||
parentPos, codePoints, wordPos + firstDifferentCharacterIndex,
|
||||
newTerminalId, hasShortcuts, hasBigrams, isNotAWord,
|
||||
isBlackListEntry, header.mFormatOptions);
|
||||
|
||||
return parentNodeArrayStartPos + computePtNodeSize(codePoints, wordPos,
|
||||
wordPos + firstDifferentCharacterIndex, false)
|
||||
+ FormatSpec.FORWARD_LINK_ADDRESS_SIZE + 1 /* size of PtNodeCount */;
|
||||
}
|
||||
|
||||
if (wordLen - wordPos < nodeInfo.mCharacters.length) {
|
||||
final int parentNodeArrayStartPos = mDictBuffer.limit();
|
||||
splitOnly(nodeArrayPos, nodePos, nodeInfo, wordLen - wordPos, parentPos,
|
||||
newTerminalId, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry,
|
||||
header.mFormatOptions);
|
||||
|
||||
// Return the position of the inserted word.
|
||||
return parentNodeArrayStartPos + 1 /* size of PtNodeCount */;
|
||||
}
|
||||
|
||||
wordPos += nodeInfo.mCharacters.length;
|
||||
if (wordPos == wordLen) {
|
||||
// This dictionary already contains the word.
|
||||
Log.e(TAG, "Something went wrong. If the word is already contained, "
|
||||
+ " there is no need to insert new PtNode.");
|
||||
return FormatSpec.NOT_VALID_WORD;
|
||||
}
|
||||
if (nodeInfo.mChildrenPos == FormatSpec.NO_CHILDREN_ADDRESS) {
|
||||
// There are no children.
|
||||
// We need to add a new node as a child of this node.
|
||||
final int newNodeArrayPos = mDictBuffer.limit();
|
||||
final int[] newNodeCodePoints = Arrays.copyOfRange(codePoints, wordPos,
|
||||
codePoints.length);
|
||||
writeNewSinglePtNodeWithAttributes(newNodeCodePoints, hasShortcuts,
|
||||
newTerminalId, hasBigrams, isNotAWord, isBlackListEntry, nodePos,
|
||||
header.mFormatOptions);
|
||||
updateChildrenPos(nodePos, newNodeArrayPos, header.mFormatOptions);
|
||||
return newNodeArrayPos + 1 /* size of PtNodeCount */;
|
||||
} else {
|
||||
// Found the matched node.
|
||||
// Go to the children of this node.
|
||||
setPosition(nodeInfo.mChildrenPos);
|
||||
goToChildren = true;
|
||||
depth++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (goToChildren) continue;
|
||||
if (!readAndFollowForwardLink()) {
|
||||
// Add a new node that contains [wordPos, word.length()-1].
|
||||
// and update the forward link.
|
||||
final int newNodeArrayPos = mDictBuffer.limit();
|
||||
final int[] newCodePoints = Arrays.copyOfRange(codePoints, wordPos,
|
||||
codePoints.length);
|
||||
writeNewSinglePtNodeWithAttributes(newCodePoints, hasShortcuts, newTerminalId,
|
||||
hasBigrams, isNotAWord, isBlackListEntry, parentPos, header.mFormatOptions);
|
||||
updateForwardLink(nodeArrayPos, newNodeArrayPos, header.mFormatOptions);
|
||||
return newNodeArrayPos + 1 /* size of PtNodeCount */;
|
||||
}
|
||||
}
|
||||
return FormatSpec.NOT_VALID_WORD;
|
||||
}
|
||||
|
||||
private void updateFrequency(final int terminalId, final int frequency) {
|
||||
mFrequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE);
|
||||
BinaryDictEncoderUtils.writeUIntToDictBuffer(mFrequencyBuffer, frequency,
|
||||
FormatSpec.FREQUENCY_AND_FLAGS_SIZE);
|
||||
}
|
||||
|
||||
private void insertFrequency(final int frequency) throws IOException {
|
||||
final OutputStream frequencyStream = new FileOutputStream(mFrequencyFile,
|
||||
true /* append */);
|
||||
BinaryDictEncoderUtils.writeUIntToStream(frequencyStream, frequency,
|
||||
FormatSpec.FREQUENCY_AND_FLAGS_SIZE);
|
||||
frequencyStream.close();
|
||||
}
|
||||
|
||||
private void insertTerminalPosition(final int posOfTerminal) throws IOException,
|
||||
UnsupportedFormatException {
|
||||
final OutputStream terminalPosStream = new FileOutputStream(
|
||||
getFile(FILETYPE_TERMINAL_ADDRESS_TABLE), true /* append */);
|
||||
BinaryDictEncoderUtils.writeUIntToStream(terminalPosStream, posOfTerminal,
|
||||
FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
|
||||
terminalPosStream.close();
|
||||
}
|
||||
|
||||
private void insertBigrams(final int terminalId, final int frequency,
|
||||
final ArrayList<PendingAttribute> bigramAddresses)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
openDictBuffer();
|
||||
final BigramContentUpdater updater = new BigramContentUpdater(mDictDirectory.getName(),
|
||||
mDictDirectory, false);
|
||||
|
||||
// Convert addresses to terminal ids.
|
||||
final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList();
|
||||
mDictBuffer.position(0);
|
||||
final FileHeader header = readHeader();
|
||||
for (PendingAttribute attr : bigramAddresses) {
|
||||
mDictBuffer.position(attr.mAddress);
|
||||
final Ver4PtNodeInfo info = readVer4PtNodeInfo(attr.mAddress, header.mFormatOptions);
|
||||
if (info.mTerminalId == PtNode.NOT_A_TERMINAL) {
|
||||
throw new RuntimeException("We can't have a bigram target that's not a terminal.");
|
||||
}
|
||||
bigrams.add(new PendingAttribute(frequency, info.mTerminalId));
|
||||
}
|
||||
updater.insertBigramEntries(terminalId, frequency, bigrams);
|
||||
close();
|
||||
}
|
||||
|
||||
private void insertShortcuts(final int terminalId, final ArrayList<WeightedString> shortcuts)
|
||||
throws IOException {
|
||||
final ShortcutContentUpdater updater = new ShortcutContentUpdater(mDictDirectory.getName(),
|
||||
mDictDirectory);
|
||||
updater.insertShortcuts(terminalId, shortcuts);
|
||||
}
|
||||
|
||||
private void openBuffersAndStream() throws IOException, UnsupportedFormatException {
|
||||
openDictBuffer();
|
||||
mDictStream = new FileOutputStream(getFile(FILETYPE_TRIE), true /* append */);
|
||||
}
|
||||
|
||||
private void close() throws IOException {
|
||||
if (mDictStream != null) {
|
||||
mDictStream.close();
|
||||
mDictStream = null;
|
||||
}
|
||||
mDictBuffer = null;
|
||||
mFrequencyBuffer = null;
|
||||
mTerminalAddressTableBuffer = null;
|
||||
}
|
||||
|
||||
private void updateAttributes(final int posOfWord, final int frequency,
|
||||
final ArrayList<WeightedString> bigramStrings,
|
||||
final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
|
||||
final boolean isBlackListEntry) throws IOException, UnsupportedFormatException {
|
||||
mDictBuffer.position(0);
|
||||
final FileHeader header = readHeader();
|
||||
mDictBuffer.position(posOfWord);
|
||||
final Ver4PtNodeInfo info = readVer4PtNodeInfo(posOfWord, header.mFormatOptions);
|
||||
final int terminalId = info.mTerminalId;
|
||||
|
||||
// Update the flags.
|
||||
final int newFlags = setIsNotAWordInFlags(
|
||||
setIsBlackListEntryInFlags(info.mFlags, isBlackListEntry), isNotAWord);
|
||||
mDictBuffer.position(posOfWord);
|
||||
mDictBuffer.put((byte) newFlags);
|
||||
|
||||
updateFrequency(terminalId, frequency);
|
||||
insertBigrams(terminalId, frequency, resolveBigramPositions(this, bigramStrings));
|
||||
insertShortcuts(terminalId, shortcuts);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void insertWord(final String word, final int frequency,
|
||||
final ArrayList<WeightedString> bigramStrings, final ArrayList<WeightedString> shortcuts,
|
||||
final boolean isNotAWord, final boolean isBlackListEntry)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
final int newTerminalId = getNewTerminalId();
|
||||
|
||||
openBuffersAndStream();
|
||||
final int posOfWord = getTerminalPosition(word);
|
||||
if (posOfWord != FormatSpec.NOT_VALID_WORD) {
|
||||
// The word is already contained in the dictionary.
|
||||
updateAttributes(posOfWord, frequency, bigramStrings, shortcuts, isNotAWord,
|
||||
isBlackListEntry);
|
||||
close();
|
||||
return;
|
||||
}
|
||||
|
||||
// Insert new PtNode into trie.
|
||||
final int posOfTerminal = insertWordToTrie(word, newTerminalId, isNotAWord,
|
||||
isBlackListEntry, bigramStrings != null && !bigramStrings.isEmpty(),
|
||||
shortcuts != null && !shortcuts.isEmpty());
|
||||
insertFrequency(frequency);
|
||||
insertTerminalPosition(posOfTerminal);
|
||||
close();
|
||||
|
||||
insertBigrams(newTerminalId, frequency, resolveBigramPositions(this, bigramStrings));
|
||||
insertShortcuts(newTerminalId, shortcuts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a list of WeightedString to a list of PendingAttribute.
|
||||
*/
|
||||
private static ArrayList<PendingAttribute> resolveBigramPositions(final DictUpdater dictUpdater,
|
||||
final ArrayList<WeightedString> bigramStrings)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
if (bigramStrings == null) return CollectionUtils.newArrayList();
|
||||
final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList();
|
||||
for (final WeightedString bigram : bigramStrings) {
|
||||
final int pos = dictUpdater.getTerminalPosition(bigram.mWord);
|
||||
if (pos == FormatSpec.NOT_VALID_WORD) {
|
||||
// TODO: figure out what is the correct thing to do here.
|
||||
} else {
|
||||
bigrams.add(new PendingAttribute(bigram.mFrequency, pos));
|
||||
}
|
||||
}
|
||||
return bigrams;
|
||||
}
|
||||
|
||||
private static int markAsDeleted(final int flags) {
|
||||
return (flags & (~FormatSpec.MASK_CHILDREN_ADDRESS_TYPE)) | FormatSpec.FLAG_IS_DELETED;
|
||||
}
|
||||
}
|
|
@ -18,7 +18,6 @@ package com.android.inputmethod.latin.dicttool;
|
|||
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderEncoderTests;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictEncoderFlattenTreeTests;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictIOUtilsTests;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionaryTest;
|
||||
|
||||
import java.lang.reflect.Constructor;
|
||||
|
@ -31,15 +30,15 @@ import java.util.ArrayList;
|
|||
*/
|
||||
public class Test extends Dicttool.Command {
|
||||
public static final String COMMAND = "test";
|
||||
private static final int DEFAULT_MAX_UNIGRAMS = 1500;
|
||||
private long mSeed = System.currentTimeMillis();
|
||||
private int mMaxUnigrams = BinaryDictIOUtilsTests.DEFAULT_MAX_UNIGRAMS;
|
||||
private int mMaxUnigrams = DEFAULT_MAX_UNIGRAMS;
|
||||
|
||||
private static final Class<?>[] sClassesToTest = {
|
||||
BinaryDictOffdeviceUtilsTests.class,
|
||||
FusionDictionaryTest.class,
|
||||
BinaryDictDecoderEncoderTests.class,
|
||||
BinaryDictEncoderFlattenTreeTests.class,
|
||||
BinaryDictIOUtilsTests.class
|
||||
};
|
||||
private ArrayList<Method> mAllTestMethods = new ArrayList<Method>();
|
||||
private ArrayList<String> mUsedTestMethods = new ArrayList<String>();
|
||||
|
|
Loading…
Reference in New Issue