am c15bbb52: Retire Delight2 migration code to speed up tests.

* commit 'c15bbb52a37be751fed2ba7e765dfd7727306308':
  Retire Delight2 migration code to speed up tests.
This commit is contained in:
Dan Zivkovic 2015-03-20 18:54:20 +00:00 committed by Android Git Automerger
commit 8eecbbd618
7 changed files with 24 additions and 1002 deletions

View file

@ -121,8 +121,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
private static boolean needsToMigrateDictionary(final int formatVersion) {
// When we bump up the dictionary format version, the old version should be added to here
// for supporting migration. Note that native code has to support reading such formats.
return formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING
|| formatVersion == FormatSpec.VERSION402;
return formatVersion == FormatSpec.VERSION402;
}
public boolean isValidDictionaryLocked() {

View file

@ -174,9 +174,6 @@ public final class FormatSpec {
public static final int VERSION202 = 202;
// format version for Fava Dictionaries.
public static final int VERSION_DELIGHT3 = 86736212;
public static final int MINIMUM_SUPPORTED_VERSION_OF_CODE_POINT_TABLE = VERSION201;
// Dictionary version used for testing.
public static final int VERSION4_ONLY_FOR_TESTING = 399;
public static final int VERSION402 = 402;
public static final int VERSION403 = 403;
public static final int VERSION4 = VERSION403;

View file

@ -42,8 +42,6 @@ import java.util.Random;
public class BinaryDictionaryTests extends AndroidTestCase {
private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
private static final String TEST_LOCALE = "test";
private static final int[] DICT_FORMAT_VERSIONS =
new int[] { FormatSpec.VERSION402, FormatSpec.VERSION403 };
private static final String DICTIONARY_ID = "TestBinaryDictionary";
private static boolean supportsNgram(final int formatVersion) {
@ -113,13 +111,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testIsValidDictionary() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testIsValidDictionary(formatVersion);
}
}
private void testIsValidDictionary(final int formatVersion) {
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
assertTrue("binaryDictionary must be valid for existing valid dictionary file.",
binaryDictionary.isValidDictionary());
@ -134,20 +126,14 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testConstructingDictionaryOnMemory() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testConstructingDictionaryOnMemory(formatVersion);
}
}
private void testConstructingDictionaryOnMemory(final int formatVersion) {
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
FileUtils.deleteRecursively(dictFile);
assertFalse(dictFile.exists());
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion,
new HashMap<String, String>());
true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE,
FormatSpec.VERSION403, new HashMap<String, String>());
assertTrue(binaryDictionary.isValidDictionary());
assertEquals(formatVersion, binaryDictionary.getFormatVersion());
assertEquals(FormatSpec.VERSION403, binaryDictionary.getFormatVersion());
final int probability = 100;
addUnigramWord(binaryDictionary, "word", probability);
assertEquals(probability, binaryDictionary.getFrequency("word"));
@ -155,19 +141,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.flush();
assertTrue(dictFile.exists());
assertTrue(binaryDictionary.isValidDictionary());
assertEquals(formatVersion, binaryDictionary.getFormatVersion());
assertEquals(FormatSpec.VERSION403, binaryDictionary.getFormatVersion());
assertEquals(probability, binaryDictionary.getFrequency("word"));
binaryDictionary.close();
}
public void testAddTooLongWord() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testAddTooLongWord(formatVersion);
}
}
private void testAddTooLongWord(final int formatVersion) {
final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion);
final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final StringBuffer stringBuilder = new StringBuffer();
for (int i = 0; i < BinaryDictionary.DICTIONARY_MAX_WORD_LENGTH; i++) {
stringBuilder.append('a');
@ -234,13 +214,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddUnigramWord() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testAddUnigramWord(formatVersion);
}
}
private void testAddUnigramWord(final int formatVersion) {
final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion);
final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final int probability = 100;
addUnigramWord(binaryDictionary, "aaa", probability);
// Reallocate and create.
@ -267,16 +241,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testRandomlyAddUnigramWord() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testRandomlyAddUnigramWord(formatVersion);
}
}
private void testRandomlyAddUnigramWord(final int formatVersion) {
final int wordCount = 1000;
final int codePointSetSize = 50;
final long seed = System.currentTimeMillis();
final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion);
final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final HashMap<String, Integer> probabilityMap = new HashMap<>();
// Test a word that isn't contained within the dictionary.
@ -295,13 +263,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddBigramWords() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testAddBigramWords(formatVersion);
}
}
private void testAddBigramWords(final int formatVersion) {
final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion);
final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final int unigramProbability = 100;
final int bigramProbability = 150;
@ -354,18 +316,12 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testRandomlyAddBigramWords() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testRandomlyAddBigramWords(formatVersion);
}
}
private void testRandomlyAddBigramWords(final int formatVersion) {
final int wordCount = 100;
final int bigramCount = 1000;
final int codePointSetSize = 50;
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion);
final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final ArrayList<String> words = new ArrayList<>();
final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
@ -406,15 +362,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddTrigramWords() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
if (supportsNgram(formatVersion)) {
testAddTrigramWords(formatVersion);
}
}
}
private void testAddTrigramWords(final int formatVersion) {
final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion);
final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final int unigramProbability = 100;
final int trigramProbability = 150;
final int updatedTrigramProbability = 200;
@ -440,13 +388,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testFlushDictionary() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testFlushDictionary(formatVersion);
}
}
private void testFlushDictionary(final int formatVersion) {
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
final int probability = 100;
@ -480,13 +422,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testFlushWithGCDictionary() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testFlushWithGCDictionary(formatVersion);
}
}
private void testFlushWithGCDictionary(final int formatVersion) {
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
final int unigramProbability = 100;
final int bigramProbability = 150;
@ -516,20 +452,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddBigramWordsAndFlashWithGC() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testAddBigramWordsAndFlashWithGC(formatVersion);
}
}
// TODO: Evaluate performance of GC
private void testAddBigramWordsAndFlashWithGC(final int formatVersion) {
final int wordCount = 100;
final int bigramCount = 1000;
final int codePointSetSize = 30;
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
final ArrayList<String> words = new ArrayList<>();
@ -575,12 +504,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testRandomOperationsAndFlashWithGC() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testRandomOperationsAndFlashWithGC(formatVersion);
}
}
private void testRandomOperationsAndFlashWithGC(final int formatVersion) {
final int maxUnigramCount = 5000;
final int maxBigramCount = 10000;
final HashMap<String, String> attributeMap = new HashMap<>();
@ -596,7 +519,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(formatVersion,
final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(FormatSpec.VERSION403,
attributeMap);
BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
@ -675,19 +598,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddManyUnigramsAndFlushWithGC() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testAddManyUnigramsAndFlushWithGC(formatVersion);
}
}
private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) {
final int flashWithGCIterationCount = 3;
final int codePointSetSize = 50;
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
final ArrayList<String> words = new ArrayList<>();
final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
@ -716,12 +633,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testUnigramAndBigramCount() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testUnigramAndBigramCount(formatVersion);
}
}
private void testUnigramAndBigramCount(final int formatVersion) {
final int maxUnigramCount = 5000;
final int maxBigramCount = 10000;
final HashMap<String, String> attributeMap = new HashMap<>();
@ -734,7 +645,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int bigramCountPerIteration = 2000;
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(formatVersion,
final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(FormatSpec.VERSION403,
attributeMap);
final ArrayList<String> words = new ArrayList<>();
@ -778,19 +689,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testGetWordProperties() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testGetWordProperties(formatVersion);
}
}
private void testGetWordProperties(final int formatVersion) {
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
final int UNIGRAM_COUNT = 1000;
final int BIGRAM_COUNT = 1000;
final int codePointSetSize = 20;
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord",
@ -869,19 +774,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testIterateAllWords() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testIterateAllWords(formatVersion);
}
}
private void testIterateAllWords(final int formatVersion) {
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
final int UNIGRAM_COUNT = 1000;
final int BIGRAM_COUNT = 1000;
final int codePointSetSize = 20;
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion);
final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord",
false /* isBeginningOfSentence */);
@ -965,123 +864,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(true, wordProperty.mIsPossiblyOffensive);
}
public void testDictMigration() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
}
}
private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) {
final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(fromFormatVersion);
final int unigramProbability = 100;
addUnigramWord(binaryDictionary, "aaa", unigramProbability);
addUnigramWord(binaryDictionary, "bbb", unigramProbability);
final int bigramProbability = 150;
addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
binaryDictionary.addUnigramEntry("ccc", unigramProbability,
false /* isBeginningOfSentence */, false /* isNotAWord */,
false /* isPossiblyOffensive */, 0 /* timestamp */);
binaryDictionary.addUnigramEntry("ddd", unigramProbability,
false /* isBeginningOfSentence */,
true /* isNotAWord */, true /* isPossiblyOffensive */, 0 /* timestamp */);
binaryDictionary.addNgramEntry(NgramContext.BEGINNING_OF_SENTENCE,
"aaa", bigramProbability, 0 /* timestamp */);
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion());
assertTrue(binaryDictionary.migrateTo(toFormatVersion));
assertTrue(binaryDictionary.isValidDictionary());
assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb"));
assertEquals(bigramProbability, binaryDictionary.getNgramProbability(
NgramContext.BEGINNING_OF_SENTENCE, "aaa"));
assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
WordProperty wordProperty = binaryDictionary.getWordProperty("ccc",
false /* isBeginningOfSentence */);
wordProperty = binaryDictionary.getWordProperty("ddd",
false /* isBeginningOfSentence */);
assertTrue(wordProperty.mIsPossiblyOffensive);
assertTrue(wordProperty.mIsNotAWord);
}
public void testLargeDictMigration() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testLargeDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
}
}
private void testLargeDictMigration(final int fromFormatVersion, final int toFormatVersion) {
final int UNIGRAM_COUNT = 3000;
final int BIGRAM_COUNT = 3000;
final int codePointSetSize = 50;
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(fromFormatVersion);
final ArrayList<String> words = new ArrayList<>();
final ArrayList<Pair<String, String>> bigrams = new ArrayList<>();
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
for (int i = 0; i < UNIGRAM_COUNT; i++) {
final String word = CodePointUtils.generateWord(random, codePointSet);
final int unigramProbability = random.nextInt(0xFF);
addUnigramWord(binaryDictionary, word, unigramProbability);
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
binaryDictionary.flushWithGC();
}
words.add(word);
unigramProbabilities.put(word, unigramProbability);
}
for (int i = 0; i < BIGRAM_COUNT; i++) {
final int word0Index = random.nextInt(words.size());
final int word1Index = random.nextInt(words.size());
if (word0Index == word1Index) {
continue;
}
final String word0 = words.get(word0Index);
final String word1 = words.get(word1Index);
final int unigramProbability = unigramProbabilities.get(word1);
final int bigramProbability =
random.nextInt(0xFF - unigramProbability) + unigramProbability;
addBigramWords(binaryDictionary, word0, word1, bigramProbability);
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
binaryDictionary.flushWithGC();
}
final Pair<String, String> bigram = new Pair<>(word0, word1);
bigrams.add(bigram);
bigramProbabilities.put(bigram, bigramProbability);
}
assertTrue(binaryDictionary.migrateTo(toFormatVersion));
for (final String word : words) {
assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word));
}
assertEquals(unigramProbabilities.size(), Integer.parseInt(
binaryDictionary.getPropertyForGettingStats(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
for (final Pair<String, String> bigram : bigrams) {
assertEquals((int)bigramProbabilities.get(bigram),
getBigramProbability(binaryDictionary, bigram.first, bigram.second));
assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second));
}
assertEquals(bigramProbabilities.size(), Integer.parseInt(
binaryDictionary.getPropertyForGettingStats(BinaryDictionary.BIGRAM_COUNT_QUERY)));
}
public void testBeginningOfSentence() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testBeginningOfSentence(formatVersion);
}
}
private void testBeginningOfSentence(final int formatVersion) {
final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion);
final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final int dummyProbability = 0;
final NgramContext beginningOfSentenceContext = NgramContext.BEGINNING_OF_SENTENCE;
final int bigramProbability = 200;

View file

@ -43,22 +43,12 @@ public final class BinaryDictIOUtils {
*/
public static DictDecoder getDictDecoder(final File dictFile, final long offset,
final long length, final int bufferType) {
if (dictFile.isDirectory()) {
return new Ver4DictDecoder(dictFile);
} else if (dictFile.isFile()) {
return new Ver2DictDecoder(dictFile, offset, length, bufferType);
}
return null;
return new Ver4DictDecoder(dictFile);
}
public static DictDecoder getDictDecoder(final File dictFile, final long offset,
final long length, final DictionaryBufferFactory factory) {
if (dictFile.isDirectory()) {
return new Ver4DictDecoder(dictFile);
} else if (dictFile.isFile()) {
return new Ver2DictDecoder(dictFile, offset, length, factory);
}
return null;
return new Ver4DictDecoder(dictFile);
}
public static DictDecoder getDictDecoder(final File dictFile, final long offset,

View file

@ -1,319 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
/**
* An implementation of DictDecoder for version 2 binary dictionary.
*/
// TODO: Separate logics that are used only for testing.
@UsedForTesting
public class Ver2DictDecoder extends AbstractDictDecoder {
/**
* A utility class for reading a PtNode.
*/
static class PtNodeReader {
static ProbabilityInfo readProbabilityInfo(final DictBuffer dictBuffer) {
// Ver2 dicts don't contain historical information.
return new ProbabilityInfo(dictBuffer.readUnsignedByte());
}
static int readPtNodeOptionFlags(final DictBuffer dictBuffer) {
return dictBuffer.readUnsignedByte();
}
static int readChildrenAddress(final DictBuffer dictBuffer,
final int ptNodeFlags) {
switch (ptNodeFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
return dictBuffer.readUnsignedByte();
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
return dictBuffer.readUnsignedShort();
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
return dictBuffer.readUnsignedInt24();
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
default:
return FormatSpec.NO_CHILDREN_ADDRESS;
}
}
// Reads shortcuts and returns the read length.
static int readShortcut(final DictBuffer dictBuffer,
final ArrayList<WeightedString> shortcutTargets) {
final int pointerBefore = dictBuffer.position();
dictBuffer.readUnsignedShort(); // skip the size
while (true) {
final int targetFlags = dictBuffer.readUnsignedByte();
final String word = CharEncoding.readString(dictBuffer);
shortcutTargets.add(new WeightedString(word,
targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
}
return dictBuffer.position() - pointerBefore;
}
static int readBigramAddresses(final DictBuffer dictBuffer,
final ArrayList<PendingAttribute> bigrams, final int baseAddress) {
int readLength = 0;
int bigramCount = 0;
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
final int bigramFlags = dictBuffer.readUnsignedByte();
++readLength;
final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE)
? 1 : -1;
int bigramAddress = baseAddress + readLength;
switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) {
case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE:
bigramAddress += sign * dictBuffer.readUnsignedByte();
readLength += 1;
break;
case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES:
bigramAddress += sign * dictBuffer.readUnsignedShort();
readLength += 2;
break;
case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES:
bigramAddress += sign * dictBuffer.readUnsignedInt24();
readLength += 3;
break;
default:
throw new RuntimeException("Has bigrams with no address");
}
bigrams.add(new PendingAttribute(
bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
bigramAddress));
if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
}
return readLength;
}
}
protected final File mDictionaryBinaryFile;
protected final long mOffset;
protected final long mLength;
// TODO: Remove mBufferFactory and mDictBuffer from this class members because they are now
// used only for testing.
private final DictionaryBufferFactory mBufferFactory;
protected DictBuffer mDictBuffer;
@UsedForTesting
/* package */ Ver2DictDecoder(final File file, final long offset, final long length,
final int factoryFlag) {
mDictionaryBinaryFile = file;
mOffset = offset;
mLength = length;
mDictBuffer = null;
if ((factoryFlag & MASK_DICTBUFFER) == USE_READONLY_BYTEBUFFER) {
mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory();
} else if ((factoryFlag & MASK_DICTBUFFER) == USE_BYTEARRAY) {
mBufferFactory = new DictionaryBufferFromByteArrayFactory();
} else if ((factoryFlag & MASK_DICTBUFFER) == USE_WRITABLE_BYTEBUFFER) {
mBufferFactory = new DictionaryBufferFromWritableByteBufferFactory();
} else {
mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory();
}
}
/* package */ Ver2DictDecoder(final File file, final long offset, final long length,
final DictionaryBufferFactory factory) {
mDictionaryBinaryFile = file;
mOffset = offset;
mLength = length;
mBufferFactory = factory;
}
@Override
public void openDictBuffer() throws FileNotFoundException, IOException {
mDictBuffer = mBufferFactory.getDictionaryBuffer(mDictionaryBinaryFile);
}
@Override
public boolean isDictBufferOpen() {
return mDictBuffer != null;
}
/* package */ DictBuffer getDictBuffer() {
return mDictBuffer;
}
@UsedForTesting
/* package */ DictBuffer openAndGetDictBuffer() throws FileNotFoundException, IOException {
openDictBuffer();
return getDictBuffer();
}
@Override
public DictionaryHeader readHeader() throws IOException, UnsupportedFormatException {
// dictType is not being used in dicttool. Passing an empty string.
final BinaryDictionary binaryDictionary = new BinaryDictionary(
mDictionaryBinaryFile.getAbsolutePath(), mOffset, mLength,
true /* useFullEditDistance */, null /* locale */, "" /* dictType */,
false /* isUpdatable */);
final DictionaryHeader header = binaryDictionary.getHeader();
binaryDictionary.close();
if (header == null) {
throw new IOException("Cannot read the dictionary header.");
}
if (header.mFormatOptions.mVersion != FormatSpec.VERSION2 &&
header.mFormatOptions.mVersion != FormatSpec.VERSION201 &&
header.mFormatOptions.mVersion != FormatSpec.VERSION202) {
throw new UnsupportedFormatException("File header has a wrong version : "
+ header.mFormatOptions.mVersion);
}
if (!isDictBufferOpen()) {
openDictBuffer();
}
// Advance buffer reading position to the head of dictionary body.
setPosition(header.mBodyOffset);
return header;
}
// TODO: Make this buffer multi thread safe.
private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH];
@Override
public PtNodeInfo readPtNode(final int ptNodePos) {
int addressPointer = ptNodePos;
final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
addressPointer += FormatSpec.PTNODE_FLAGS_SIZE;
final int characters[];
if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
int index = 0;
int character = CharEncoding.readChar(mDictBuffer);
addressPointer += CharEncoding.getCharSize(character, null);
while (FormatSpec.INVALID_CHARACTER != character) {
// FusionDictionary is making sure that the length of the word is smaller than
// MAX_WORD_LENGTH.
// So we'll never write past the end of mCharacterBuffer.
mCharacterBuffer[index++] = character;
character = CharEncoding.readChar(mDictBuffer);
addressPointer += CharEncoding.getCharSize(character, null);
}
characters = Arrays.copyOfRange(mCharacterBuffer, 0, index);
} else {
final int character = CharEncoding.readChar(mDictBuffer);
addressPointer += CharEncoding.getCharSize(character, null);
characters = new int[] { character };
}
final ProbabilityInfo probabilityInfo;
if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
probabilityInfo = PtNodeReader.readProbabilityInfo(mDictBuffer);
addressPointer += FormatSpec.PTNODE_FREQUENCY_SIZE;
} else {
probabilityInfo = null;
}
int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags);
if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
childrenAddress += addressPointer;
}
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags);
final ArrayList<WeightedString> shortcutTargets;
if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
// readShortcut will add shortcuts to shortcutTargets.
shortcutTargets = new ArrayList<>();
addressPointer += PtNodeReader.readShortcut(mDictBuffer, shortcutTargets);
} else {
shortcutTargets = null;
}
final ArrayList<PendingAttribute> bigrams;
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
bigrams = new ArrayList<>();
addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams,
addressPointer);
if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size()
+ " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")");
}
} else {
bigrams = null;
}
return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, probabilityInfo,
childrenAddress, shortcutTargets, bigrams);
}
@Override
public FusionDictionary readDictionaryBinary(final boolean deleteDictIfBroken)
throws FileNotFoundException, IOException, UnsupportedFormatException {
// dictType is not being used in dicttool. Passing an empty string.
final BinaryDictionary binaryDictionary = new BinaryDictionary(
mDictionaryBinaryFile.getAbsolutePath(), 0 /* offset */,
mDictionaryBinaryFile.length() /* length */, true /* useFullEditDistance */,
null /* locale */, "" /* dictType */, false /* isUpdatable */);
final DictionaryHeader header = readHeader();
final FusionDictionary fusionDict =
new FusionDictionary(new FusionDictionary.PtNodeArray(), header.mDictionaryOptions);
int token = 0;
final ArrayList<WordProperty> wordProperties = new ArrayList<>();
do {
final BinaryDictionary.GetNextWordPropertyResult result =
binaryDictionary.getNextWordProperty(token);
final WordProperty wordProperty = result.mWordProperty;
if (wordProperty == null) {
binaryDictionary.close();
if (deleteDictIfBroken) {
mDictionaryBinaryFile.delete();
}
return null;
}
wordProperties.add(wordProperty);
token = result.mNextToken;
} while (token != 0);
// Insert unigrams into the fusion dictionary.
for (final WordProperty wordProperty : wordProperties) {
fusionDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo,
wordProperty.mIsNotAWord,
wordProperty.mIsPossiblyOffensive);
}
// Insert bigrams into the fusion dictionary.
for (final WordProperty wordProperty : wordProperties) {
if (!wordProperty.mHasNgrams) {
continue;
}
final String word0 = wordProperty.mWord;
for (final WeightedString bigram : wordProperty.getBigrams()) {
fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo);
}
}
binaryDictionary.close();
return fusionDict;
}
@Override
public void setPosition(int newPos) {
mDictBuffer.position(newPos);
}
@Override
public int getPosition() {
return mDictBuffer.position();
}
@Override
public int readPtNodeCount() {
return BinaryDictDecoderUtils.readPtNodeCount(mDictBuffer);
}
}

View file

@ -1,150 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory;
import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFromByteArrayFactory;
import com.android.inputmethod.latin.makedict.DictDecoder.
DictionaryBufferFromReadOnlyByteBufferFactory;
import com.android.inputmethod.latin.makedict.DictDecoder.
DictionaryBufferFromWritableByteBufferFactory;
import android.test.AndroidTestCase;
import android.util.Log;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
/**
* Unit tests for Ver2DictDecoder
*/
public class Ver2DictDecoderTests extends AndroidTestCase {
private static final String TAG = Ver2DictDecoderTests.class.getSimpleName();
private final byte[] data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
// Utilities for testing
public void writeDataToFile(final File file) {
FileOutputStream outStream = null;
try {
outStream = new FileOutputStream(file);
outStream.write(data);
} catch (IOException e) {
fail ("Can't write data to the test file");
} finally {
if (outStream != null) {
try {
outStream.close();
} catch (IOException e) {
Log.e(TAG, "Failed to close the output stream", e);
}
}
}
}
public void runTestOpenBuffer(final String testName, final DictionaryBufferFactory factory) {
File testFile = null;
try {
testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir());
} catch (IOException e) {
Log.e(TAG, "IOException while the creating temporary file", e);
}
assertNotNull(testFile);
final Ver2DictDecoder dictDecoder = new Ver2DictDecoder(testFile, 0, testFile.length(),
factory);
try {
dictDecoder.openDictBuffer();
} catch (Exception e) {
Log.e(TAG, "Failed to open the buffer", e);
}
writeDataToFile(testFile);
try {
dictDecoder.openDictBuffer();
} catch (Exception e) {
Log.e(TAG, "Raised the exception while opening buffer", e);
}
assertEquals(testFile.length(), dictDecoder.getDictBuffer().capacity());
}
public void testOpenBufferWithByteBuffer() {
runTestOpenBuffer("testOpenBufferWithByteBuffer",
new DictionaryBufferFromReadOnlyByteBufferFactory());
}
public void testOpenBufferWithByteArray() {
runTestOpenBuffer("testOpenBufferWithByteArray",
new DictionaryBufferFromByteArrayFactory());
}
public void testOpenBufferWithWritableByteBuffer() {
runTestOpenBuffer("testOpenBufferWithWritableByteBuffer",
new DictionaryBufferFromWritableByteBufferFactory());
}
public void runTestGetBuffer(final String testName, final DictionaryBufferFactory factory) {
File testFile = null;
try {
testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir());
} catch (IOException e) {
Log.e(TAG, "IOException while the creating temporary file", e);
}
final Ver2DictDecoder dictDecoder = new Ver2DictDecoder(testFile, 0, testFile.length(),
factory);
// the default return value of getBuffer() must be null.
assertNull("the default return value of getBuffer() is not null",
dictDecoder.getDictBuffer());
writeDataToFile(testFile);
assertTrue(testFile.exists());
Log.d(TAG, "file length = " + testFile.length());
DictBuffer dictBuffer = null;
try {
dictBuffer = dictDecoder.openAndGetDictBuffer();
} catch (IOException e) {
Log.e(TAG, "Failed to open and get the buffer", e);
}
assertNotNull("the buffer must not be null", dictBuffer);
for (int i = 0; i < data.length; ++i) {
assertEquals(data[i], dictBuffer.readUnsignedByte());
}
}
public void testGetBufferWithByteBuffer() {
runTestGetBuffer("testGetBufferWithByteBuffer",
new DictionaryBufferFromReadOnlyByteBufferFactory());
}
public void testGetBufferWithByteArray() {
runTestGetBuffer("testGetBufferWithByteArray",
new DictionaryBufferFromByteArrayFactory());
}
public void testGetBufferWithWritableByteBuffer() {
runTestGetBuffer("testGetBufferWithWritableByteBuffer",
new DictionaryBufferFromWritableByteBufferFactory());
}
}

View file

@ -1,279 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictEncoderUtils.CodePointTable;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map.Entry;
/**
* An implementation of DictEncoder for version 2 binary dictionary.
*/
@UsedForTesting
public class Ver2DictEncoder implements DictEncoder {
private final File mDictFile;
private OutputStream mOutStream;
private byte[] mBuffer;
private int mPosition;
private final int mCodePointTableMode;
public static final int CODE_POINT_TABLE_OFF = 0;
public static final int CODE_POINT_TABLE_ON = 1;
@UsedForTesting
public Ver2DictEncoder(final File dictFile, final int codePointTableMode) {
mDictFile = dictFile;
mOutStream = null;
mBuffer = null;
mCodePointTableMode = codePointTableMode;
}
// This constructor is used only by BinaryDictOffdeviceUtilsTests.
// If you want to use this in the production code, you should consider keeping consistency of
// the interface of Ver3DictDecoder by using factory.
@UsedForTesting
public Ver2DictEncoder(final OutputStream outStream) {
mDictFile = null;
mOutStream = outStream;
mCodePointTableMode = CODE_POINT_TABLE_OFF;
}
private void openStream() throws FileNotFoundException {
mOutStream = new FileOutputStream(mDictFile);
}
private void close() throws IOException {
if (mOutStream != null) {
mOutStream.close();
mOutStream = null;
}
}
// Package for testing
static CodePointTable makeCodePointTable(final FusionDictionary dict) {
final HashMap<Integer, Integer> codePointOccurrenceCounts = new HashMap<>();
for (final WordProperty word : dict) {
// Store per code point occurrence
final String wordString = word.mWord;
for (int i = 0; i < wordString.length(); ++i) {
final int codePoint = Character.codePointAt(wordString, i);
if (codePointOccurrenceCounts.containsKey(codePoint)) {
codePointOccurrenceCounts.put(codePoint,
codePointOccurrenceCounts.get(codePoint) + 1);
} else {
codePointOccurrenceCounts.put(codePoint, 1);
}
}
}
final ArrayList<Entry<Integer, Integer>> codePointOccurrenceArray =
new ArrayList<>(codePointOccurrenceCounts.entrySet());
// Descending order sort by occurrence (value side)
Collections.sort(codePointOccurrenceArray, new Comparator<Entry<Integer, Integer>>() {
@Override
public int compare(final Entry<Integer, Integer> a, final Entry<Integer, Integer> b) {
if (a.getValue() != b.getValue()) {
return b.getValue().compareTo(a.getValue());
}
return b.getKey().compareTo(a.getKey());
}
});
int currentCodePointTableIndex = FormatSpec.MINIMAL_ONE_BYTE_CHARACTER_VALUE;
// Temporary map for writing of nodes
final HashMap<Integer, Integer> codePointToOneByteCodeMap = new HashMap<>();
for (final Entry<Integer, Integer> entry : codePointOccurrenceArray) {
// Put a relation from the original code point to the one byte code.
codePointToOneByteCodeMap.put(entry.getKey(), currentCodePointTableIndex);
if (FormatSpec.MAXIMAL_ONE_BYTE_CHARACTER_VALUE < ++currentCodePointTableIndex) {
break;
}
}
// codePointToOneByteCodeMap for writing the trie
// codePointOccurrenceArray for writing the header
return new CodePointTable(codePointToOneByteCodeMap, codePointOccurrenceArray);
}
@Override
public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions)
throws IOException, UnsupportedFormatException {
// We no longer support anything but the latest version of v2.
if (formatOptions.mVersion != FormatSpec.VERSION202) {
throw new UnsupportedFormatException(
"The given format options has wrong version number : "
+ formatOptions.mVersion);
}
if (mOutStream == null) {
openStream();
}
// Make code point conversion table ordered by occurrence of code points
// Version 201 or later have codePointTable
final CodePointTable codePointTable;
if (mCodePointTableMode == CODE_POINT_TABLE_OFF || formatOptions.mVersion
< FormatSpec.MINIMUM_SUPPORTED_VERSION_OF_CODE_POINT_TABLE) {
codePointTable = new CodePointTable();
} else {
codePointTable = makeCodePointTable(dict);
}
BinaryDictEncoderUtils.writeDictionaryHeader(mOutStream, dict, formatOptions,
codePointTable.mCodePointOccurrenceArray);
// Addresses are limited to 3 bytes, but since addresses can be relative to each node
// array, the structure itself is not limited to 16MB. However, if it is over 16MB deciding
// the order of the PtNode arrays becomes a quite complicated problem, because though the
// dictionary itself does not have a size limit, each node array must still be within 16MB
// of all its children and parents. As long as this is ensured, the dictionary file may
// grow to any size.
// Leave the choice of the optimal node order to the flattenTree function.
MakedictLog.i("Flattening the tree...");
ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray);
MakedictLog.i("Computing addresses...");
BinaryDictEncoderUtils.computeAddresses(dict, flatNodes,
codePointTable.mCodePointToOneByteCodeMap);
MakedictLog.i("Checking PtNode array...");
if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes);
// Create a buffer that matches the final dictionary size.
final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize;
mBuffer = new byte[bufferSize];
MakedictLog.i("Writing file...");
for (PtNodeArray nodeArray : flatNodes) {
BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray,
codePointTable.mCodePointToOneByteCodeMap);
}
if (MakedictLog.DBG) BinaryDictEncoderUtils.showStatistics(flatNodes);
mOutStream.write(mBuffer, 0, mPosition);
MakedictLog.i("Done");
close();
}
@Override
public void setPosition(final int position) {
if (mBuffer == null || position < 0 || position >= mBuffer.length) return;
mPosition = position;
}
@Override
public int getPosition() {
return mPosition;
}
@Override
public void writePtNodeCount(final int ptNodeCount) {
final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount);
if (countSize != 1 && countSize != 2) {
throw new RuntimeException("Strange size from getGroupCountSize : " + countSize);
}
final int encodedPtNodeCount = (countSize == 2) ?
(ptNodeCount | FormatSpec.LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG) : ptNodeCount;
mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, encodedPtNodeCount,
countSize);
}
private void writePtNodeFlags(final PtNode ptNode,
final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode,
codePointToOneByteCodeMap);
mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition,
BinaryDictEncoderUtils.makePtNodeFlags(ptNode, childrenPos),
FormatSpec.PTNODE_FLAGS_SIZE);
}
private void writeCharacters(final int[] codePoints, final boolean hasSeveralChars,
final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
mPosition = CharEncoding.writeCharArray(codePoints, mBuffer, mPosition,
codePointToOneByteCodeMap);
if (hasSeveralChars) {
mBuffer[mPosition++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR;
}
}
private void writeFrequency(final int frequency) {
if (frequency >= 0) {
mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, frequency,
FormatSpec.PTNODE_FREQUENCY_SIZE);
}
}
private void writeChildrenPosition(final PtNode ptNode,
final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode,
codePointToOneByteCodeMap);
mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition,
childrenPos);
}
/**
* Write a bigram attributes list to mBuffer.
*
* @param bigrams the bigram attributes list.
* @param dict the dictionary the node array is a part of (for relative offsets).
*/
private void writeBigrams(final ArrayList<WeightedString> bigrams,
final FusionDictionary dict) {
if (bigrams == null) return;
final Iterator<WeightedString> bigramIterator = bigrams.iterator();
while (bigramIterator.hasNext()) {
final WeightedString bigram = bigramIterator.next();
final PtNode target =
FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
final int addressOfBigram = target.mCachedAddressAfterUpdate;
final int unigramFrequencyForThisWord = target.getProbability();
final int offset = addressOfBigram
- (mPosition + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(),
offset, bigram.getProbability(), unigramFrequencyForThisWord, bigram.mWord);
mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, bigramFlags,
FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition,
Math.abs(offset));
}
}
@Override
public void writePtNode(final PtNode ptNode, final FusionDictionary dict,
final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
writePtNodeFlags(ptNode, codePointToOneByteCodeMap);
writeCharacters(ptNode.mChars, ptNode.hasSeveralChars(), codePointToOneByteCodeMap);
writeFrequency(ptNode.getProbability());
writeChildrenPosition(ptNode, codePointToOneByteCodeMap);
writeBigrams(ptNode.mBigrams, dict);
}
}